<a href="https://colab.research.google.com/github/faezesarlakifar/text-emotion-recognition/blob/main/EmoPars_Emotion_Recognition_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q transformers

In [None]:
# @title Import Necessary Libraries
import pandas as pd
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch
from torch.utils.data import DataLoader
from transformers import AdamW
import torch.nn as nn

### Hyper-parameters

In [None]:
batch_size = 32
learning_rate = 1e-5
num_epochs = 5

In [None]:
# Load the pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')


Downloading (…)lve/main/config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

In [None]:
from transformers import XLMRobertaForSequenceClassification, AdamW, get_linear_schedule_with_warmup

In [None]:
test = pd.read_csv('test_cleaned_emopars.csv', index_col=0)

In [None]:
train = pd.read_csv('train_cleaned_emopars.csv', index_col=0)

In [None]:
train.head()

Unnamed: 0.1,Unnamed: 0,text,Anger,Fear,Happiness,Hatred,Sadness,Wonder,primary_emotion
0,0,کرونا رو شکست میدهیم مرحله بعد چه گوهی میخوای ...,4,3,1,3,3,4,0
1,2,یکی از پدرسوختگی های برانداز اینه که ظاهرا ژست...,5,3,2,4,5,2,0
2,3,یکی از دوستای دبستانم,1,0,0,1,0,0,6
3,4,اینقدر گرفتار مسایل میشی که تخصص از دستت درد ...,2,1,0,1,0,4,5
4,5,شماهایی که توییتها رو میبینید و سکوت میکنید شم...,2,0,0,3,3,3,3


'Anger':0, 'Fear':1, 'Happiness':2, 'Hatred':3, 'Sadness':4, 'Wonder':5, 'Other':6

In [None]:
test = test.drop(['primary_emotion', 'Unnamed: 0'], axis=1)
test.head()

Unnamed: 0,text,Anger,Fear,Happiness,Hatred,Sadness,Wonder
0,من خیلی خودسانسوری می‌کنم تو اینستا هر چی فالو...,1,1,2,1,0,2
1,بعد اتمام جلسه مجلس روند بازار برگشت بورس,3,0,1,2,1,1
2,کاربران توییتر در جریان طوفان توییتری اعتراض ب...,4,0,0,2,2,0
3,وحشی شدن معده بعد از رسیدن به ایران اجتناب ناپ...,1,0,1,0,0,1
4,سحام نیوز بیانیه مشترک عربستان و امارات با پرو...,0,0,0,1,1,0


In [None]:
train = train.drop(['primary_emotion', 'Unnamed: 0'], axis=1)
train.head()

Unnamed: 0,text,Anger,Fear,Happiness,Hatred,Sadness,Wonder
0,کرونا رو شکست میدهیم مرحله بعد چه گوهی میخوای ...,4,3,1,3,3,4
1,یکی از پدرسوختگی های برانداز اینه که ظاهرا ژست...,5,3,2,4,5,2
2,یکی از دوستای دبستانم,1,0,0,1,0,0
3,اینقدر گرفتار مسایل میشی که تخصص از دستت درد ...,2,1,0,1,0,4
4,شماهایی که توییتها رو میبینید و سکوت میکنید شم...,2,0,0,3,3,3


In [None]:
targets = ["Anger", "Fear", "Happiness", "Hatred", "Sadness", "Wonder"]

## Preprocessing

In [None]:
def preprocess(df):
    # Normalize emotion labels
    emotion_columns = ['Anger', 'Fear', 'Happiness', 'Hatred', 'Sadness', 'Wonder']

    for col in emotion_columns:
        df[col] = df[col] / df[col].max()  # Normalize to the range [0, 1]

    # Apply threshold for binary labels
    threshold = 0.35
    for col in emotion_columns:
        df[col] = df[col].apply(lambda x: 1 if x >= threshold else 0)

    return df

In [None]:
train = preprocess(train)
test = preprocess(test)

In [None]:
X_train, y_train = train['text'].values.tolist(), train[targets].values.tolist()
X_test, y_test = test['text'].values.tolist(), test[targets].values.tolist()

In [None]:
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, tokenizer, texts, labels, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            truncation=True,
            max_length=self.max_length,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt'
        )

        inputs = {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(label, dtype=torch.float32)  # Use the provided numeric label directly
        }

        return inputs


In [None]:
train_dataset = TextDataset(tokenizer,X_train,y_train,max_length=128)
test_dataset = TextDataset(tokenizer,X_test,y_test,max_length=128)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

## Defining Loss function

In [None]:
import torch.nn as nn

# Define the loss function
criterion = nn.BCEWithLogitsLoss()

In [None]:
# Check if CUDA (GPU) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Modeling

In [None]:
from transformers import AutoModel

In [None]:
class XLMRobertaGRUClassifier(nn.Module):
    def __init__(self, num_classes):
        super(XLMRobertaGRUClassifier, self).__init__()
        self.num_classes = num_classes
        self.xlmroberta = AutoModel.from_pretrained("xlm-roberta-base")

        # Add a GRU layer
        self.gru = nn.GRU(self.xlmroberta.config.hidden_size, hidden_size=self.xlmroberta.config.hidden_size, num_layers=1, batch_first=True, bidirectional=True)

        # # Correct the hidden size for the linear layer
        self.linear = nn.Linear(self.xlmroberta.config.hidden_size*2, num_classes)  # Multiply by 2 for bidirectional GRU
        self.dropout = nn.Dropout(0.2)

    def forward(self, input_ids, attention_mask):
        outputs = self.xlmroberta(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        logits = outputs.last_hidden_state

        # Pass the logits through the GRU layer
        gru_output, _ = self.gru(logits)

        logits = self.linear(self.dropout(gru_output[:, -1, :]))
        return logits

## Model Training

In [None]:
# Assuming num_classes and your dataset are defined
num_classes = 6  # Change this to your actual number of classes
learning_rate = 1e-5
batch_size = 32
epochs = 5

model = XLMRobertaGRUClassifier(num_classes)
model.to(device)  # Move model to GPU if available

Downloading model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

XLMRobertaGRUClassifier(
  (xlmroberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=Tru

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import XLMRobertaTokenizer, AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm

In [None]:
optimizer = AdamW(model.parameters(), lr=learning_rate)
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)



In [None]:
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()

        logits = model(input_ids, attention_mask)
        loss = criterion(logits, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Optional gradient clipping
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
        print(f"batch loss: {loss.item():.4f}")

    average_loss = total_loss / len(train_dataloader)
    print('-' * 80)
    print(f"Epoch {epoch + 1} - Average Loss: {average_loss:.4f}")

# Save your trained model
torch.save(model.state_dict(), 'xlmroberta_gru_model.pth')


Epoch 1:   0%|          | 1/844 [00:01<25:23,  1.81s/it]

batch loss: 0.6809


Epoch 1:   0%|          | 2/844 [00:02<16:33,  1.18s/it]

batch loss: 0.6706


Epoch 1:   0%|          | 3/844 [00:03<13:46,  1.02it/s]

batch loss: 0.6609


Epoch 1:   0%|          | 4/844 [00:04<12:28,  1.12it/s]

batch loss: 0.6649


Epoch 1:   1%|          | 5/844 [00:04<11:44,  1.19it/s]

batch loss: 0.6625


Epoch 1:   1%|          | 6/844 [00:05<11:16,  1.24it/s]

batch loss: 0.6601


Epoch 1:   1%|          | 7/844 [00:06<10:57,  1.27it/s]

batch loss: 0.6480


Epoch 1:   1%|          | 8/844 [00:07<10:44,  1.30it/s]

batch loss: 0.6465


Epoch 1:   1%|          | 9/844 [00:07<10:37,  1.31it/s]

batch loss: 0.6293


Epoch 1:   1%|          | 10/844 [00:08<10:30,  1.32it/s]

batch loss: 0.6345


Epoch 1:   1%|▏         | 11/844 [00:09<10:28,  1.33it/s]

batch loss: 0.6365


Epoch 1:   1%|▏         | 12/844 [00:10<10:25,  1.33it/s]

batch loss: 0.6307


Epoch 1:   2%|▏         | 13/844 [00:10<10:24,  1.33it/s]

batch loss: 0.6296


Epoch 1:   2%|▏         | 14/844 [00:11<10:24,  1.33it/s]

batch loss: 0.6195


Epoch 1:   2%|▏         | 15/844 [00:12<10:21,  1.33it/s]

batch loss: 0.6281


Epoch 1:   2%|▏         | 16/844 [00:13<10:20,  1.33it/s]

batch loss: 0.5993


Epoch 1:   2%|▏         | 17/844 [00:13<10:19,  1.34it/s]

batch loss: 0.6146


Epoch 1:   2%|▏         | 18/844 [00:14<10:20,  1.33it/s]

batch loss: 0.5958


Epoch 1:   2%|▏         | 19/844 [00:15<10:17,  1.34it/s]

batch loss: 0.6000


Epoch 1:   2%|▏         | 20/844 [00:15<10:17,  1.33it/s]

batch loss: 0.5766


Epoch 1:   2%|▏         | 21/844 [00:16<10:16,  1.33it/s]

batch loss: 0.5821


Epoch 1:   3%|▎         | 22/844 [00:17<10:18,  1.33it/s]

batch loss: 0.6116


Epoch 1:   3%|▎         | 23/844 [00:18<10:19,  1.33it/s]

batch loss: 0.5527


Epoch 1:   3%|▎         | 24/844 [00:19<10:20,  1.32it/s]

batch loss: 0.5674


Epoch 1:   3%|▎         | 25/844 [00:19<10:18,  1.32it/s]

batch loss: 0.6065


Epoch 1:   3%|▎         | 26/844 [00:20<10:17,  1.33it/s]

batch loss: 0.5809


Epoch 1:   3%|▎         | 27/844 [00:21<10:21,  1.31it/s]

batch loss: 0.5534


Epoch 1:   3%|▎         | 28/844 [00:22<10:23,  1.31it/s]

batch loss: 0.5434


Epoch 1:   3%|▎         | 29/844 [00:22<10:24,  1.30it/s]

batch loss: 0.5056


Epoch 1:   4%|▎         | 30/844 [00:23<10:29,  1.29it/s]

batch loss: 0.5888


Epoch 1:   4%|▎         | 31/844 [00:24<10:28,  1.29it/s]

batch loss: 0.5105


Epoch 1:   4%|▍         | 32/844 [00:25<10:24,  1.30it/s]

batch loss: 0.5175


Epoch 1:   4%|▍         | 33/844 [00:25<10:21,  1.30it/s]

batch loss: 0.5156


Epoch 1:   4%|▍         | 34/844 [00:26<10:18,  1.31it/s]

batch loss: 0.4836


Epoch 1:   4%|▍         | 35/844 [00:27<10:16,  1.31it/s]

batch loss: 0.4598


Epoch 1:   4%|▍         | 36/844 [00:28<10:14,  1.31it/s]

batch loss: 0.5154


Epoch 1:   4%|▍         | 37/844 [00:28<10:14,  1.31it/s]

batch loss: 0.4570


Epoch 1:   5%|▍         | 38/844 [00:29<10:14,  1.31it/s]

batch loss: 0.4890


Epoch 1:   5%|▍         | 39/844 [00:30<10:14,  1.31it/s]

batch loss: 0.4706


Epoch 1:   5%|▍         | 40/844 [00:31<10:17,  1.30it/s]

batch loss: 0.5228


Epoch 1:   5%|▍         | 41/844 [00:32<10:28,  1.28it/s]

batch loss: 0.4667


Epoch 1:   5%|▍         | 42/844 [00:32<10:24,  1.28it/s]

batch loss: 0.4781


Epoch 1:   5%|▌         | 43/844 [00:33<10:20,  1.29it/s]

batch loss: 0.4656


Epoch 1:   5%|▌         | 44/844 [00:34<10:16,  1.30it/s]

batch loss: 0.4885


Epoch 1:   5%|▌         | 45/844 [00:35<10:14,  1.30it/s]

batch loss: 0.4255


Epoch 1:   5%|▌         | 46/844 [00:35<10:12,  1.30it/s]

batch loss: 0.4914


Epoch 1:   6%|▌         | 47/844 [00:36<10:12,  1.30it/s]

batch loss: 0.4885


Epoch 1:   6%|▌         | 48/844 [00:37<10:12,  1.30it/s]

batch loss: 0.4107


Epoch 1:   6%|▌         | 49/844 [00:38<10:10,  1.30it/s]

batch loss: 0.5461


Epoch 1:   6%|▌         | 50/844 [00:39<10:10,  1.30it/s]

batch loss: 0.4702


Epoch 1:   6%|▌         | 51/844 [00:39<10:09,  1.30it/s]

batch loss: 0.4542


Epoch 1:   6%|▌         | 52/844 [00:40<10:09,  1.30it/s]

batch loss: 0.5678


Epoch 1:   6%|▋         | 53/844 [00:41<10:07,  1.30it/s]

batch loss: 0.4938


Epoch 1:   6%|▋         | 54/844 [00:42<10:08,  1.30it/s]

batch loss: 0.4581


Epoch 1:   7%|▋         | 55/844 [00:42<10:07,  1.30it/s]

batch loss: 0.4368


Epoch 1:   7%|▋         | 56/844 [00:43<10:08,  1.30it/s]

batch loss: 0.4258


Epoch 1:   7%|▋         | 57/844 [00:44<10:09,  1.29it/s]

batch loss: 0.5172


Epoch 1:   7%|▋         | 58/844 [00:45<10:09,  1.29it/s]

batch loss: 0.4280


Epoch 1:   7%|▋         | 59/844 [00:45<10:10,  1.29it/s]

batch loss: 0.5056


Epoch 1:   7%|▋         | 60/844 [00:46<10:08,  1.29it/s]

batch loss: 0.4720


Epoch 1:   7%|▋         | 61/844 [00:47<10:06,  1.29it/s]

batch loss: 0.4325


Epoch 1:   7%|▋         | 62/844 [00:48<10:04,  1.29it/s]

batch loss: 0.4637


Epoch 1:   7%|▋         | 63/844 [00:49<10:03,  1.29it/s]

batch loss: 0.4849


Epoch 1:   8%|▊         | 64/844 [00:49<10:03,  1.29it/s]

batch loss: 0.6200


Epoch 1:   8%|▊         | 65/844 [00:50<10:01,  1.30it/s]

batch loss: 0.4200


Epoch 1:   8%|▊         | 66/844 [00:51<10:00,  1.30it/s]

batch loss: 0.4169


Epoch 1:   8%|▊         | 67/844 [00:52<09:59,  1.30it/s]

batch loss: 0.4744


Epoch 1:   8%|▊         | 68/844 [00:52<09:58,  1.30it/s]

batch loss: 0.4784


Epoch 1:   8%|▊         | 69/844 [00:53<09:58,  1.29it/s]

batch loss: 0.4358


Epoch 1:   8%|▊         | 70/844 [00:54<09:59,  1.29it/s]

batch loss: 0.5025


Epoch 1:   8%|▊         | 71/844 [00:55<09:58,  1.29it/s]

batch loss: 0.4368


Epoch 1:   9%|▊         | 72/844 [00:56<09:57,  1.29it/s]

batch loss: 0.5778


Epoch 1:   9%|▊         | 73/844 [00:56<09:59,  1.29it/s]

batch loss: 0.5427


Epoch 1:   9%|▉         | 74/844 [00:57<09:59,  1.29it/s]

batch loss: 0.4789


Epoch 1:   9%|▉         | 75/844 [00:58<10:00,  1.28it/s]

batch loss: 0.4568


Epoch 1:   9%|▉         | 76/844 [00:59<10:00,  1.28it/s]

batch loss: 0.5091


Epoch 1:   9%|▉         | 77/844 [00:59<09:59,  1.28it/s]

batch loss: 0.4792


Epoch 1:   9%|▉         | 78/844 [01:00<09:59,  1.28it/s]

batch loss: 0.4474


Epoch 1:   9%|▉         | 79/844 [01:01<09:57,  1.28it/s]

batch loss: 0.4643


Epoch 1:   9%|▉         | 80/844 [01:02<09:57,  1.28it/s]

batch loss: 0.5122


Epoch 1:  10%|▉         | 81/844 [01:03<10:19,  1.23it/s]

batch loss: 0.5412


Epoch 1:  10%|▉         | 82/844 [01:03<10:09,  1.25it/s]

batch loss: 0.4537


Epoch 1:  10%|▉         | 83/844 [01:04<10:05,  1.26it/s]

batch loss: 0.4601


Epoch 1:  10%|▉         | 84/844 [01:05<10:03,  1.26it/s]

batch loss: 0.4326


Epoch 1:  10%|█         | 85/844 [01:06<10:00,  1.26it/s]

batch loss: 0.4279


Epoch 1:  10%|█         | 86/844 [01:07<09:58,  1.27it/s]

batch loss: 0.4739


Epoch 1:  10%|█         | 87/844 [01:07<09:57,  1.27it/s]

batch loss: 0.5256


Epoch 1:  10%|█         | 88/844 [01:08<09:56,  1.27it/s]

batch loss: 0.4208


Epoch 1:  11%|█         | 89/844 [01:09<09:56,  1.27it/s]

batch loss: 0.5481


Epoch 1:  11%|█         | 90/844 [01:10<09:55,  1.27it/s]

batch loss: 0.5533


Epoch 1:  11%|█         | 91/844 [01:11<09:56,  1.26it/s]

batch loss: 0.4993


Epoch 1:  11%|█         | 92/844 [01:11<09:58,  1.26it/s]

batch loss: 0.4618


Epoch 1:  11%|█         | 93/844 [01:12<09:59,  1.25it/s]

batch loss: 0.4430


Epoch 1:  11%|█         | 94/844 [01:13<09:59,  1.25it/s]

batch loss: 0.4216


Epoch 1:  11%|█▏        | 95/844 [01:14<09:57,  1.25it/s]

batch loss: 0.4969


Epoch 1:  11%|█▏        | 96/844 [01:15<09:56,  1.25it/s]

batch loss: 0.4824


Epoch 1:  11%|█▏        | 97/844 [01:15<09:54,  1.26it/s]

batch loss: 0.5379


Epoch 1:  12%|█▏        | 98/844 [01:16<09:54,  1.26it/s]

batch loss: 0.4491


Epoch 1:  12%|█▏        | 99/844 [01:17<09:54,  1.25it/s]

batch loss: 0.5587


Epoch 1:  12%|█▏        | 100/844 [01:18<09:53,  1.25it/s]

batch loss: 0.4991


Epoch 1:  12%|█▏        | 101/844 [01:19<09:54,  1.25it/s]

batch loss: 0.4642


Epoch 1:  12%|█▏        | 102/844 [01:19<09:53,  1.25it/s]

batch loss: 0.5934


Epoch 1:  12%|█▏        | 103/844 [01:20<09:52,  1.25it/s]

batch loss: 0.5367


Epoch 1:  12%|█▏        | 104/844 [01:21<09:52,  1.25it/s]

batch loss: 0.5240


Epoch 1:  12%|█▏        | 105/844 [01:22<09:53,  1.25it/s]

batch loss: 0.4790


Epoch 1:  13%|█▎        | 106/844 [01:23<09:52,  1.24it/s]

batch loss: 0.4641


Epoch 1:  13%|█▎        | 107/844 [01:23<09:53,  1.24it/s]

batch loss: 0.4987


Epoch 1:  13%|█▎        | 108/844 [01:24<09:55,  1.24it/s]

batch loss: 0.4550


Epoch 1:  13%|█▎        | 109/844 [01:25<09:55,  1.23it/s]

batch loss: 0.4688


Epoch 1:  13%|█▎        | 110/844 [01:26<09:56,  1.23it/s]

batch loss: 0.4521


Epoch 1:  13%|█▎        | 111/844 [01:27<09:57,  1.23it/s]

batch loss: 0.4447


Epoch 1:  13%|█▎        | 112/844 [01:27<09:57,  1.23it/s]

batch loss: 0.5024


Epoch 1:  13%|█▎        | 113/844 [01:28<09:56,  1.23it/s]

batch loss: 0.5715


Epoch 1:  14%|█▎        | 114/844 [01:29<09:55,  1.23it/s]

batch loss: 0.5055


Epoch 1:  14%|█▎        | 115/844 [01:30<09:54,  1.23it/s]

batch loss: 0.4676


Epoch 1:  14%|█▎        | 116/844 [01:31<09:54,  1.22it/s]

batch loss: 0.4689


Epoch 1:  14%|█▍        | 117/844 [01:32<09:54,  1.22it/s]

batch loss: 0.4584


Epoch 1:  14%|█▍        | 118/844 [01:32<09:55,  1.22it/s]

batch loss: 0.4637


Epoch 1:  14%|█▍        | 119/844 [01:33<09:54,  1.22it/s]

batch loss: 0.4188


Epoch 1:  14%|█▍        | 120/844 [01:34<09:54,  1.22it/s]

batch loss: 0.5036


Epoch 1:  14%|█▍        | 121/844 [01:35<09:54,  1.22it/s]

batch loss: 0.4760


Epoch 1:  14%|█▍        | 122/844 [01:36<09:54,  1.21it/s]

batch loss: 0.4940


Epoch 1:  15%|█▍        | 123/844 [01:36<09:54,  1.21it/s]

batch loss: 0.4633


Epoch 1:  15%|█▍        | 124/844 [01:37<09:57,  1.20it/s]

batch loss: 0.4767


Epoch 1:  15%|█▍        | 125/844 [01:38<09:57,  1.20it/s]

batch loss: 0.3872


Epoch 1:  15%|█▍        | 126/844 [01:39<09:57,  1.20it/s]

batch loss: 0.3894


Epoch 1:  15%|█▌        | 127/844 [01:40<09:57,  1.20it/s]

batch loss: 0.4716


Epoch 1:  15%|█▌        | 128/844 [01:41<09:57,  1.20it/s]

batch loss: 0.3648


Epoch 1:  15%|█▌        | 129/844 [01:41<09:57,  1.20it/s]

batch loss: 0.5083


Epoch 1:  15%|█▌        | 130/844 [01:42<09:56,  1.20it/s]

batch loss: 0.4847


Epoch 1:  16%|█▌        | 131/844 [01:43<09:57,  1.19it/s]

batch loss: 0.4741


Epoch 1:  16%|█▌        | 132/844 [01:44<09:57,  1.19it/s]

batch loss: 0.4371


Epoch 1:  16%|█▌        | 133/844 [01:45<09:56,  1.19it/s]

batch loss: 0.4979


Epoch 1:  16%|█▌        | 134/844 [01:46<09:57,  1.19it/s]

batch loss: 0.5260


Epoch 1:  16%|█▌        | 135/844 [01:47<09:58,  1.18it/s]

batch loss: 0.5019


Epoch 1:  16%|█▌        | 136/844 [01:47<09:59,  1.18it/s]

batch loss: 0.4268


Epoch 1:  16%|█▌        | 137/844 [01:48<09:57,  1.18it/s]

batch loss: 0.4623


Epoch 1:  16%|█▋        | 138/844 [01:49<09:57,  1.18it/s]

batch loss: 0.5060


Epoch 1:  16%|█▋        | 139/844 [01:50<09:56,  1.18it/s]

batch loss: 0.4897


Epoch 1:  17%|█▋        | 140/844 [01:51<09:58,  1.18it/s]

batch loss: 0.4679


Epoch 1:  17%|█▋        | 141/844 [01:52<10:00,  1.17it/s]

batch loss: 0.4740


Epoch 1:  17%|█▋        | 142/844 [01:53<10:03,  1.16it/s]

batch loss: 0.4946


Epoch 1:  17%|█▋        | 143/844 [01:53<10:04,  1.16it/s]

batch loss: 0.5545


Epoch 1:  17%|█▋        | 144/844 [01:54<10:01,  1.16it/s]

batch loss: 0.4939


Epoch 1:  17%|█▋        | 145/844 [01:55<09:59,  1.17it/s]

batch loss: 0.5441


Epoch 1:  17%|█▋        | 146/844 [01:56<09:59,  1.16it/s]

batch loss: 0.4800


Epoch 1:  17%|█▋        | 147/844 [01:57<09:57,  1.17it/s]

batch loss: 0.5223


Epoch 1:  18%|█▊        | 148/844 [01:58<09:55,  1.17it/s]

batch loss: 0.4438


Epoch 1:  18%|█▊        | 149/844 [01:59<09:53,  1.17it/s]

batch loss: 0.4344


Epoch 1:  18%|█▊        | 150/844 [01:59<09:52,  1.17it/s]

batch loss: 0.4321


Epoch 1:  18%|█▊        | 151/844 [02:00<09:51,  1.17it/s]

batch loss: 0.3993


Epoch 1:  18%|█▊        | 152/844 [02:01<09:48,  1.18it/s]

batch loss: 0.3740


Epoch 1:  18%|█▊        | 153/844 [02:02<09:47,  1.18it/s]

batch loss: 0.5650


Epoch 1:  18%|█▊        | 154/844 [02:03<09:45,  1.18it/s]

batch loss: 0.5068


Epoch 1:  18%|█▊        | 155/844 [02:04<09:44,  1.18it/s]

batch loss: 0.4449


Epoch 1:  18%|█▊        | 156/844 [02:04<09:44,  1.18it/s]

batch loss: 0.5410


Epoch 1:  19%|█▊        | 157/844 [02:05<09:43,  1.18it/s]

batch loss: 0.4138


Epoch 1:  19%|█▊        | 158/844 [02:06<09:43,  1.18it/s]

batch loss: 0.4114


Epoch 1:  19%|█▉        | 159/844 [02:07<09:42,  1.18it/s]

batch loss: 0.4442


Epoch 1:  19%|█▉        | 160/844 [02:08<09:38,  1.18it/s]

batch loss: 0.4991


Epoch 1:  19%|█▉        | 161/844 [02:09<09:36,  1.18it/s]

batch loss: 0.4757


Epoch 1:  19%|█▉        | 162/844 [02:10<09:34,  1.19it/s]

batch loss: 0.4872


Epoch 1:  19%|█▉        | 163/844 [02:10<09:33,  1.19it/s]

batch loss: 0.4174


Epoch 1:  19%|█▉        | 164/844 [02:11<09:32,  1.19it/s]

batch loss: 0.4748


Epoch 1:  20%|█▉        | 165/844 [02:12<09:30,  1.19it/s]

batch loss: 0.4941


Epoch 1:  20%|█▉        | 166/844 [02:13<09:27,  1.19it/s]

batch loss: 0.4462


Epoch 1:  20%|█▉        | 167/844 [02:14<09:26,  1.19it/s]

batch loss: 0.4980


Epoch 1:  20%|█▉        | 168/844 [02:15<09:25,  1.19it/s]

batch loss: 0.4168


Epoch 1:  20%|██        | 169/844 [02:15<09:24,  1.20it/s]

batch loss: 0.4961


Epoch 1:  20%|██        | 170/844 [02:16<09:22,  1.20it/s]

batch loss: 0.4655


Epoch 1:  20%|██        | 171/844 [02:17<09:26,  1.19it/s]

batch loss: 0.5408


Epoch 1:  20%|██        | 172/844 [02:18<09:44,  1.15it/s]

batch loss: 0.4950


Epoch 1:  20%|██        | 173/844 [02:19<09:33,  1.17it/s]

batch loss: 0.4323


Epoch 1:  21%|██        | 174/844 [02:20<09:29,  1.18it/s]

batch loss: 0.4606


Epoch 1:  21%|██        | 175/844 [02:21<09:27,  1.18it/s]

batch loss: 0.4971


Epoch 1:  21%|██        | 176/844 [02:21<09:22,  1.19it/s]

batch loss: 0.5097


Epoch 1:  21%|██        | 177/844 [02:22<09:17,  1.20it/s]

batch loss: 0.4598


Epoch 1:  21%|██        | 178/844 [02:23<09:13,  1.20it/s]

batch loss: 0.5459


Epoch 1:  21%|██        | 179/844 [02:24<09:11,  1.20it/s]

batch loss: 0.4256


Epoch 1:  21%|██▏       | 180/844 [02:25<09:10,  1.21it/s]

batch loss: 0.4558


Epoch 1:  21%|██▏       | 181/844 [02:25<09:08,  1.21it/s]

batch loss: 0.4579


Epoch 1:  22%|██▏       | 182/844 [02:26<09:07,  1.21it/s]

batch loss: 0.3854


Epoch 1:  22%|██▏       | 183/844 [02:27<09:07,  1.21it/s]

batch loss: 0.5120


Epoch 1:  22%|██▏       | 184/844 [02:28<09:05,  1.21it/s]

batch loss: 0.4741


Epoch 1:  22%|██▏       | 185/844 [02:29<09:06,  1.21it/s]

batch loss: 0.4301


Epoch 1:  22%|██▏       | 186/844 [02:30<09:05,  1.21it/s]

batch loss: 0.4739


Epoch 1:  22%|██▏       | 187/844 [02:30<09:02,  1.21it/s]

batch loss: 0.5341


Epoch 1:  22%|██▏       | 188/844 [02:31<09:03,  1.21it/s]

batch loss: 0.5087


Epoch 1:  22%|██▏       | 189/844 [02:32<09:01,  1.21it/s]

batch loss: 0.4540


Epoch 1:  23%|██▎       | 190/844 [02:33<09:01,  1.21it/s]

batch loss: 0.4889


Epoch 1:  23%|██▎       | 191/844 [02:34<09:01,  1.21it/s]

batch loss: 0.5559


Epoch 1:  23%|██▎       | 192/844 [02:35<09:00,  1.21it/s]

batch loss: 0.4496


Epoch 1:  23%|██▎       | 193/844 [02:35<08:56,  1.21it/s]

batch loss: 0.4291


Epoch 1:  23%|██▎       | 194/844 [02:36<08:55,  1.21it/s]

batch loss: 0.3736


Epoch 1:  23%|██▎       | 195/844 [02:37<08:53,  1.22it/s]

batch loss: 0.4731


Epoch 1:  23%|██▎       | 196/844 [02:38<08:52,  1.22it/s]

batch loss: 0.6037


Epoch 1:  23%|██▎       | 197/844 [02:39<08:50,  1.22it/s]

batch loss: 0.4127


Epoch 1:  23%|██▎       | 198/844 [02:40<08:49,  1.22it/s]

batch loss: 0.4725


Epoch 1:  24%|██▎       | 199/844 [02:40<08:48,  1.22it/s]

batch loss: 0.5157


Epoch 1:  24%|██▎       | 200/844 [02:41<08:48,  1.22it/s]

batch loss: 0.4698


Epoch 1:  24%|██▍       | 201/844 [02:42<08:47,  1.22it/s]

batch loss: 0.4940


Epoch 1:  24%|██▍       | 202/844 [02:43<08:45,  1.22it/s]

batch loss: 0.5227


Epoch 1:  24%|██▍       | 203/844 [02:44<08:45,  1.22it/s]

batch loss: 0.5050


Epoch 1:  24%|██▍       | 204/844 [02:44<08:44,  1.22it/s]

batch loss: 0.4426


Epoch 1:  24%|██▍       | 205/844 [02:45<08:44,  1.22it/s]

batch loss: 0.4584


Epoch 1:  24%|██▍       | 206/844 [02:46<08:46,  1.21it/s]

batch loss: 0.3901


Epoch 1:  25%|██▍       | 207/844 [02:47<08:45,  1.21it/s]

batch loss: 0.5486


Epoch 1:  25%|██▍       | 208/844 [02:48<08:45,  1.21it/s]

batch loss: 0.4704


Epoch 1:  25%|██▍       | 209/844 [02:49<08:44,  1.21it/s]

batch loss: 0.5228


Epoch 1:  25%|██▍       | 210/844 [02:49<08:42,  1.21it/s]

batch loss: 0.3744


Epoch 1:  25%|██▌       | 211/844 [02:50<08:43,  1.21it/s]

batch loss: 0.4759


Epoch 1:  25%|██▌       | 212/844 [02:51<08:41,  1.21it/s]

batch loss: 0.4758


Epoch 1:  25%|██▌       | 213/844 [02:52<08:40,  1.21it/s]

batch loss: 0.4486


Epoch 1:  25%|██▌       | 214/844 [02:53<08:38,  1.21it/s]

batch loss: 0.4945


Epoch 1:  25%|██▌       | 215/844 [02:54<08:39,  1.21it/s]

batch loss: 0.4684


Epoch 1:  26%|██▌       | 216/844 [02:54<08:39,  1.21it/s]

batch loss: 0.5672


Epoch 1:  26%|██▌       | 217/844 [02:55<08:38,  1.21it/s]

batch loss: 0.4997


Epoch 1:  26%|██▌       | 218/844 [02:56<08:37,  1.21it/s]

batch loss: 0.4549


Epoch 1:  26%|██▌       | 219/844 [02:57<08:37,  1.21it/s]

batch loss: 0.4750


Epoch 1:  26%|██▌       | 220/844 [02:58<08:37,  1.21it/s]

batch loss: 0.5252


Epoch 1:  26%|██▌       | 221/844 [02:58<08:38,  1.20it/s]

batch loss: 0.6187


Epoch 1:  26%|██▋       | 222/844 [02:59<08:38,  1.20it/s]

batch loss: 0.4283


Epoch 1:  26%|██▋       | 223/844 [03:00<08:36,  1.20it/s]

batch loss: 0.4512


Epoch 1:  27%|██▋       | 224/844 [03:01<08:34,  1.20it/s]

batch loss: 0.4672


Epoch 1:  27%|██▋       | 225/844 [03:02<08:32,  1.21it/s]

batch loss: 0.4773


Epoch 1:  27%|██▋       | 226/844 [03:03<08:31,  1.21it/s]

batch loss: 0.4343


Epoch 1:  27%|██▋       | 227/844 [03:03<08:31,  1.21it/s]

batch loss: 0.4548


Epoch 1:  27%|██▋       | 228/844 [03:04<08:30,  1.21it/s]

batch loss: 0.4419


Epoch 1:  27%|██▋       | 229/844 [03:05<08:29,  1.21it/s]

batch loss: 0.4789


Epoch 1:  27%|██▋       | 230/844 [03:06<08:29,  1.21it/s]

batch loss: 0.4154


Epoch 1:  27%|██▋       | 231/844 [03:07<08:28,  1.21it/s]

batch loss: 0.5222


Epoch 1:  27%|██▋       | 232/844 [03:08<08:27,  1.21it/s]

batch loss: 0.4559


Epoch 1:  28%|██▊       | 233/844 [03:08<08:28,  1.20it/s]

batch loss: 0.4036


Epoch 1:  28%|██▊       | 234/844 [03:09<08:27,  1.20it/s]

batch loss: 0.4931


Epoch 1:  28%|██▊       | 235/844 [03:10<08:27,  1.20it/s]

batch loss: 0.4046


Epoch 1:  28%|██▊       | 236/844 [03:11<08:27,  1.20it/s]

batch loss: 0.4627


Epoch 1:  28%|██▊       | 237/844 [03:12<08:27,  1.20it/s]

batch loss: 0.4346


Epoch 1:  28%|██▊       | 238/844 [03:13<08:29,  1.19it/s]

batch loss: 0.4456


Epoch 1:  28%|██▊       | 239/844 [03:13<08:27,  1.19it/s]

batch loss: 0.4460


Epoch 1:  28%|██▊       | 240/844 [03:14<08:27,  1.19it/s]

batch loss: 0.5395


Epoch 1:  29%|██▊       | 241/844 [03:15<08:27,  1.19it/s]

batch loss: 0.4439


Epoch 1:  29%|██▊       | 242/844 [03:16<08:25,  1.19it/s]

batch loss: 0.4536


Epoch 1:  29%|██▉       | 243/844 [03:17<08:23,  1.19it/s]

batch loss: 0.4518


Epoch 1:  29%|██▉       | 244/844 [03:18<08:23,  1.19it/s]

batch loss: 0.3872


Epoch 1:  29%|██▉       | 245/844 [03:19<08:22,  1.19it/s]

batch loss: 0.4291


Epoch 1:  29%|██▉       | 246/844 [03:19<08:21,  1.19it/s]

batch loss: 0.5170


Epoch 1:  29%|██▉       | 247/844 [03:20<08:21,  1.19it/s]

batch loss: 0.3822


Epoch 1:  29%|██▉       | 248/844 [03:21<08:19,  1.19it/s]

batch loss: 0.5240


Epoch 1:  30%|██▉       | 249/844 [03:22<08:19,  1.19it/s]

batch loss: 0.4581


Epoch 1:  30%|██▉       | 250/844 [03:23<08:17,  1.19it/s]

batch loss: 0.4843


Epoch 1:  30%|██▉       | 251/844 [03:24<08:16,  1.19it/s]

batch loss: 0.4686


Epoch 1:  30%|██▉       | 252/844 [03:24<08:16,  1.19it/s]

batch loss: 0.4767


Epoch 1:  30%|██▉       | 253/844 [03:25<08:16,  1.19it/s]

batch loss: 0.3837


Epoch 1:  30%|███       | 254/844 [03:26<08:17,  1.19it/s]

batch loss: 0.4272


Epoch 1:  30%|███       | 255/844 [03:27<08:16,  1.19it/s]

batch loss: 0.5191


Epoch 1:  30%|███       | 256/844 [03:28<08:17,  1.18it/s]

batch loss: 0.5778


Epoch 1:  30%|███       | 257/844 [03:29<08:17,  1.18it/s]

batch loss: 0.4941


Epoch 1:  31%|███       | 258/844 [03:29<08:15,  1.18it/s]

batch loss: 0.4275


Epoch 1:  31%|███       | 259/844 [03:30<08:12,  1.19it/s]

batch loss: 0.3918


Epoch 1:  31%|███       | 260/844 [03:31<08:09,  1.19it/s]

batch loss: 0.4480


Epoch 1:  31%|███       | 261/844 [03:32<08:07,  1.20it/s]

batch loss: 0.4931


Epoch 1:  31%|███       | 262/844 [03:33<08:07,  1.19it/s]

batch loss: 0.5491


Epoch 1:  31%|███       | 263/844 [03:34<08:07,  1.19it/s]

batch loss: 0.3691


Epoch 1:  31%|███▏      | 264/844 [03:34<08:07,  1.19it/s]

batch loss: 0.5025


Epoch 1:  31%|███▏      | 265/844 [03:35<08:05,  1.19it/s]

batch loss: 0.4002


Epoch 1:  32%|███▏      | 266/844 [03:36<08:02,  1.20it/s]

batch loss: 0.4632


Epoch 1:  32%|███▏      | 267/844 [03:37<08:01,  1.20it/s]

batch loss: 0.4350


Epoch 1:  32%|███▏      | 268/844 [03:38<08:01,  1.20it/s]

batch loss: 0.4880


Epoch 1:  32%|███▏      | 269/844 [03:39<08:01,  1.19it/s]

batch loss: 0.5612


Epoch 1:  32%|███▏      | 270/844 [03:40<08:03,  1.19it/s]

batch loss: 0.5832


Epoch 1:  32%|███▏      | 271/844 [03:40<08:03,  1.19it/s]

batch loss: 0.5012


Epoch 1:  32%|███▏      | 272/844 [03:41<08:02,  1.19it/s]

batch loss: 0.4565


Epoch 1:  32%|███▏      | 273/844 [03:42<07:58,  1.19it/s]

batch loss: 0.3849


Epoch 1:  32%|███▏      | 274/844 [03:43<07:56,  1.20it/s]

batch loss: 0.4214


Epoch 1:  33%|███▎      | 275/844 [03:44<07:55,  1.20it/s]

batch loss: 0.4648


Epoch 1:  33%|███▎      | 276/844 [03:45<07:55,  1.19it/s]

batch loss: 0.5202


Epoch 1:  33%|███▎      | 277/844 [03:45<07:54,  1.19it/s]

batch loss: 0.4616


Epoch 1:  33%|███▎      | 278/844 [03:46<07:53,  1.19it/s]

batch loss: 0.5328


Epoch 1:  33%|███▎      | 279/844 [03:47<07:52,  1.19it/s]

batch loss: 0.5294


Epoch 1:  33%|███▎      | 280/844 [03:48<07:51,  1.20it/s]

batch loss: 0.5055


Epoch 1:  33%|███▎      | 281/844 [03:49<07:49,  1.20it/s]

batch loss: 0.5071


Epoch 1:  33%|███▎      | 282/844 [03:50<07:48,  1.20it/s]

batch loss: 0.4939


Epoch 1:  34%|███▎      | 283/844 [03:50<07:47,  1.20it/s]

batch loss: 0.4007


Epoch 1:  34%|███▎      | 284/844 [03:51<07:46,  1.20it/s]

batch loss: 0.4605


Epoch 1:  34%|███▍      | 285/844 [03:52<07:46,  1.20it/s]

batch loss: 0.5211


Epoch 1:  34%|███▍      | 286/844 [03:53<07:45,  1.20it/s]

batch loss: 0.5874


Epoch 1:  34%|███▍      | 287/844 [03:54<07:46,  1.19it/s]

batch loss: 0.5125


Epoch 1:  34%|███▍      | 288/844 [03:55<07:45,  1.20it/s]

batch loss: 0.4688


Epoch 1:  34%|███▍      | 289/844 [03:55<07:46,  1.19it/s]

batch loss: 0.4103


Epoch 1:  34%|███▍      | 290/844 [03:56<07:43,  1.20it/s]

batch loss: 0.4744


Epoch 1:  34%|███▍      | 291/844 [03:57<07:41,  1.20it/s]

batch loss: 0.4049


Epoch 1:  35%|███▍      | 292/844 [03:58<07:41,  1.20it/s]

batch loss: 0.4991


Epoch 1:  35%|███▍      | 293/844 [03:59<07:39,  1.20it/s]

batch loss: 0.5052


Epoch 1:  35%|███▍      | 294/844 [04:00<07:38,  1.20it/s]

batch loss: 0.4863


Epoch 1:  35%|███▍      | 295/844 [04:00<07:36,  1.20it/s]

batch loss: 0.4619


Epoch 1:  35%|███▌      | 296/844 [04:01<07:36,  1.20it/s]

batch loss: 0.4491


Epoch 1:  35%|███▌      | 297/844 [04:02<07:35,  1.20it/s]

batch loss: 0.4644


Epoch 1:  35%|███▌      | 298/844 [04:03<07:36,  1.20it/s]

batch loss: 0.5051


Epoch 1:  35%|███▌      | 299/844 [04:04<07:35,  1.20it/s]

batch loss: 0.4980


Epoch 1:  36%|███▌      | 300/844 [04:05<07:33,  1.20it/s]

batch loss: 0.4530


Epoch 1:  36%|███▌      | 301/844 [04:05<07:33,  1.20it/s]

batch loss: 0.5603


Epoch 1:  36%|███▌      | 302/844 [04:06<07:32,  1.20it/s]

batch loss: 0.5065


Epoch 1:  36%|███▌      | 303/844 [04:07<07:30,  1.20it/s]

batch loss: 0.4976


Epoch 1:  36%|███▌      | 304/844 [04:08<07:29,  1.20it/s]

batch loss: 0.4952


Epoch 1:  36%|███▌      | 305/844 [04:09<07:28,  1.20it/s]

batch loss: 0.4521


Epoch 1:  36%|███▋      | 306/844 [04:10<07:27,  1.20it/s]

batch loss: 0.4635


Epoch 1:  36%|███▋      | 307/844 [04:10<07:26,  1.20it/s]

batch loss: 0.4299


Epoch 1:  36%|███▋      | 308/844 [04:11<07:25,  1.20it/s]

batch loss: 0.4168


Epoch 1:  37%|███▋      | 309/844 [04:12<07:24,  1.20it/s]

batch loss: 0.4771


Epoch 1:  37%|███▋      | 310/844 [04:13<07:23,  1.20it/s]

batch loss: 0.4502


Epoch 1:  37%|███▋      | 311/844 [04:14<07:22,  1.20it/s]

batch loss: 0.4345


Epoch 1:  37%|███▋      | 312/844 [04:15<07:21,  1.20it/s]

batch loss: 0.4625


Epoch 1:  37%|███▋      | 313/844 [04:15<07:20,  1.20it/s]

batch loss: 0.4226


Epoch 1:  37%|███▋      | 314/844 [04:16<07:19,  1.21it/s]

batch loss: 0.5261


Epoch 1:  37%|███▋      | 315/844 [04:17<07:18,  1.21it/s]

batch loss: 0.4755


Epoch 1:  37%|███▋      | 316/844 [04:18<07:17,  1.21it/s]

batch loss: 0.4484


Epoch 1:  38%|███▊      | 317/844 [04:19<07:16,  1.21it/s]

batch loss: 0.5383


Epoch 1:  38%|███▊      | 318/844 [04:20<07:18,  1.20it/s]

batch loss: 0.4295


Epoch 1:  38%|███▊      | 319/844 [04:20<07:17,  1.20it/s]

batch loss: 0.4125


Epoch 1:  38%|███▊      | 320/844 [04:21<07:16,  1.20it/s]

batch loss: 0.3718


Epoch 1:  38%|███▊      | 321/844 [04:22<07:16,  1.20it/s]

batch loss: 0.4510


Epoch 1:  38%|███▊      | 322/844 [04:23<07:14,  1.20it/s]

batch loss: 0.4118


Epoch 1:  38%|███▊      | 323/844 [04:24<07:13,  1.20it/s]

batch loss: 0.4214


Epoch 1:  38%|███▊      | 324/844 [04:25<07:12,  1.20it/s]

batch loss: 0.4523


Epoch 1:  39%|███▊      | 325/844 [04:25<07:10,  1.20it/s]

batch loss: 0.4998


Epoch 1:  39%|███▊      | 326/844 [04:26<07:09,  1.21it/s]

batch loss: 0.4767


Epoch 1:  39%|███▊      | 327/844 [04:27<07:08,  1.21it/s]

batch loss: 0.4067


Epoch 1:  39%|███▉      | 328/844 [04:28<07:08,  1.21it/s]

batch loss: 0.5843


Epoch 1:  39%|███▉      | 329/844 [04:29<07:06,  1.21it/s]

batch loss: 0.5079


Epoch 1:  39%|███▉      | 330/844 [04:29<07:06,  1.21it/s]

batch loss: 0.4185


Epoch 1:  39%|███▉      | 331/844 [04:30<07:05,  1.20it/s]

batch loss: 0.4625


Epoch 1:  39%|███▉      | 332/844 [04:31<07:04,  1.21it/s]

batch loss: 0.5328


Epoch 1:  39%|███▉      | 333/844 [04:32<07:03,  1.21it/s]

batch loss: 0.4516


Epoch 1:  40%|███▉      | 334/844 [04:33<07:04,  1.20it/s]

batch loss: 0.4850


Epoch 1:  40%|███▉      | 335/844 [04:34<07:04,  1.20it/s]

batch loss: 0.3608


Epoch 1:  40%|███▉      | 336/844 [04:34<07:04,  1.20it/s]

batch loss: 0.4814


Epoch 1:  40%|███▉      | 337/844 [04:35<07:02,  1.20it/s]

batch loss: 0.4612


Epoch 1:  40%|████      | 338/844 [04:36<07:00,  1.20it/s]

batch loss: 0.5127


Epoch 1:  40%|████      | 339/844 [04:37<06:59,  1.20it/s]

batch loss: 0.4184


Epoch 1:  40%|████      | 340/844 [04:38<06:59,  1.20it/s]

batch loss: 0.4038


Epoch 1:  40%|████      | 341/844 [04:39<06:58,  1.20it/s]

batch loss: 0.4469


Epoch 1:  41%|████      | 342/844 [04:39<06:57,  1.20it/s]

batch loss: 0.3998


Epoch 1:  41%|████      | 343/844 [04:40<06:55,  1.20it/s]

batch loss: 0.4155


Epoch 1:  41%|████      | 344/844 [04:41<06:54,  1.21it/s]

batch loss: 0.4943


Epoch 1:  41%|████      | 345/844 [04:42<06:53,  1.21it/s]

batch loss: 0.5550


Epoch 1:  41%|████      | 346/844 [04:43<06:53,  1.21it/s]

batch loss: 0.4142


Epoch 1:  41%|████      | 347/844 [04:44<06:52,  1.21it/s]

batch loss: 0.3853


Epoch 1:  41%|████      | 348/844 [04:44<06:51,  1.20it/s]

batch loss: 0.5128


Epoch 1:  41%|████▏     | 349/844 [04:45<06:51,  1.20it/s]

batch loss: 0.4305


Epoch 1:  41%|████▏     | 350/844 [04:46<06:52,  1.20it/s]

batch loss: 0.4661


Epoch 1:  42%|████▏     | 351/844 [04:47<06:51,  1.20it/s]

batch loss: 0.4954


Epoch 1:  42%|████▏     | 352/844 [04:48<06:51,  1.20it/s]

batch loss: 0.4676


Epoch 1:  42%|████▏     | 353/844 [04:49<06:50,  1.20it/s]

batch loss: 0.4199


Epoch 1:  42%|████▏     | 354/844 [04:49<06:50,  1.19it/s]

batch loss: 0.4899


Epoch 1:  42%|████▏     | 355/844 [04:50<06:49,  1.19it/s]

batch loss: 0.5177


Epoch 1:  42%|████▏     | 356/844 [04:51<06:47,  1.20it/s]

batch loss: 0.5130


Epoch 1:  42%|████▏     | 357/844 [04:52<06:46,  1.20it/s]

batch loss: 0.5100


Epoch 1:  42%|████▏     | 358/844 [04:53<06:45,  1.20it/s]

batch loss: 0.5488


Epoch 1:  43%|████▎     | 359/844 [04:54<06:44,  1.20it/s]

batch loss: 0.3911


Epoch 1:  43%|████▎     | 360/844 [04:54<06:43,  1.20it/s]

batch loss: 0.5438


Epoch 1:  43%|████▎     | 361/844 [04:55<06:42,  1.20it/s]

batch loss: 0.4200


Epoch 1:  43%|████▎     | 362/844 [04:56<06:41,  1.20it/s]

batch loss: 0.4244


Epoch 1:  43%|████▎     | 363/844 [04:57<06:41,  1.20it/s]

batch loss: 0.4184


Epoch 1:  43%|████▎     | 364/844 [04:58<06:39,  1.20it/s]

batch loss: 0.5171


Epoch 1:  43%|████▎     | 365/844 [04:59<06:39,  1.20it/s]

batch loss: 0.4295


Epoch 1:  43%|████▎     | 366/844 [04:59<06:37,  1.20it/s]

batch loss: 0.4982


Epoch 1:  43%|████▎     | 367/844 [05:00<06:37,  1.20it/s]

batch loss: 0.4240


Epoch 1:  44%|████▎     | 368/844 [05:01<06:37,  1.20it/s]

batch loss: 0.4606


Epoch 1:  44%|████▎     | 369/844 [05:02<06:37,  1.20it/s]

batch loss: 0.4481


Epoch 1:  44%|████▍     | 370/844 [05:03<06:36,  1.20it/s]

batch loss: 0.4698


Epoch 1:  44%|████▍     | 371/844 [05:04<06:34,  1.20it/s]

batch loss: 0.4571


Epoch 1:  44%|████▍     | 372/844 [05:04<06:34,  1.20it/s]

batch loss: 0.4556


Epoch 1:  44%|████▍     | 373/844 [05:05<06:33,  1.20it/s]

batch loss: 0.4495


Epoch 1:  44%|████▍     | 374/844 [05:06<06:31,  1.20it/s]

batch loss: 0.4247


Epoch 1:  44%|████▍     | 375/844 [05:07<06:31,  1.20it/s]

batch loss: 0.4432


Epoch 1:  45%|████▍     | 376/844 [05:08<06:29,  1.20it/s]

batch loss: 0.5628


Epoch 1:  45%|████▍     | 377/844 [05:09<06:29,  1.20it/s]

batch loss: 0.4236


Epoch 1:  45%|████▍     | 378/844 [05:09<06:28,  1.20it/s]

batch loss: 0.5123


Epoch 1:  45%|████▍     | 379/844 [05:10<06:27,  1.20it/s]

batch loss: 0.4296


Epoch 1:  45%|████▌     | 380/844 [05:11<06:27,  1.20it/s]

batch loss: 0.5065


Epoch 1:  45%|████▌     | 381/844 [05:12<06:27,  1.20it/s]

batch loss: 0.4646


Epoch 1:  45%|████▌     | 382/844 [05:13<06:25,  1.20it/s]

batch loss: 0.4427


Epoch 1:  45%|████▌     | 383/844 [05:14<06:26,  1.19it/s]

batch loss: 0.4825


Epoch 1:  45%|████▌     | 384/844 [05:15<06:27,  1.19it/s]

batch loss: 0.4905


Epoch 1:  46%|████▌     | 385/844 [05:15<06:25,  1.19it/s]

batch loss: 0.4720


Epoch 1:  46%|████▌     | 386/844 [05:16<06:24,  1.19it/s]

batch loss: 0.4861


Epoch 1:  46%|████▌     | 387/844 [05:17<06:21,  1.20it/s]

batch loss: 0.4496


Epoch 1:  46%|████▌     | 388/844 [05:18<06:22,  1.19it/s]

batch loss: 0.4425


Epoch 1:  46%|████▌     | 389/844 [05:19<06:20,  1.20it/s]

batch loss: 0.5086


Epoch 1:  46%|████▌     | 390/844 [05:20<06:20,  1.19it/s]

batch loss: 0.5624


Epoch 1:  46%|████▋     | 391/844 [05:20<06:18,  1.20it/s]

batch loss: 0.4414


Epoch 1:  46%|████▋     | 392/844 [05:21<06:17,  1.20it/s]

batch loss: 0.4747


Epoch 1:  47%|████▋     | 393/844 [05:22<06:15,  1.20it/s]

batch loss: 0.4511


Epoch 1:  47%|████▋     | 394/844 [05:23<06:15,  1.20it/s]

batch loss: 0.5047


Epoch 1:  47%|████▋     | 395/844 [05:24<06:14,  1.20it/s]

batch loss: 0.4739


Epoch 1:  47%|████▋     | 396/844 [05:25<06:13,  1.20it/s]

batch loss: 0.4845


Epoch 1:  47%|████▋     | 397/844 [05:25<06:12,  1.20it/s]

batch loss: 0.4756


Epoch 1:  47%|████▋     | 398/844 [05:26<06:11,  1.20it/s]

batch loss: 0.3864


Epoch 1:  47%|████▋     | 399/844 [05:27<06:12,  1.20it/s]

batch loss: 0.4327


Epoch 1:  47%|████▋     | 400/844 [05:28<06:11,  1.20it/s]

batch loss: 0.4233


Epoch 1:  48%|████▊     | 401/844 [05:29<06:10,  1.20it/s]

batch loss: 0.5493


Epoch 1:  48%|████▊     | 402/844 [05:30<06:09,  1.20it/s]

batch loss: 0.4961


Epoch 1:  48%|████▊     | 403/844 [05:30<06:08,  1.20it/s]

batch loss: 0.4465


Epoch 1:  48%|████▊     | 404/844 [05:31<06:08,  1.19it/s]

batch loss: 0.4423


Epoch 1:  48%|████▊     | 405/844 [05:32<06:07,  1.20it/s]

batch loss: 0.5278


Epoch 1:  48%|████▊     | 406/844 [05:33<06:05,  1.20it/s]

batch loss: 0.4609


Epoch 1:  48%|████▊     | 407/844 [05:34<06:05,  1.20it/s]

batch loss: 0.4766


Epoch 1:  48%|████▊     | 408/844 [05:35<06:03,  1.20it/s]

batch loss: 0.4314


Epoch 1:  48%|████▊     | 409/844 [05:35<06:03,  1.20it/s]

batch loss: 0.4944


Epoch 1:  49%|████▊     | 410/844 [05:36<06:02,  1.20it/s]

batch loss: 0.4607


Epoch 1:  49%|████▊     | 411/844 [05:37<06:01,  1.20it/s]

batch loss: 0.4672


Epoch 1:  49%|████▉     | 412/844 [05:38<06:00,  1.20it/s]

batch loss: 0.4391


Epoch 1:  49%|████▉     | 413/844 [05:39<06:00,  1.20it/s]

batch loss: 0.4765


Epoch 1:  49%|████▉     | 414/844 [05:40<05:59,  1.20it/s]

batch loss: 0.5054


Epoch 1:  49%|████▉     | 415/844 [05:40<05:59,  1.19it/s]

batch loss: 0.5504


Epoch 1:  49%|████▉     | 416/844 [05:41<05:57,  1.20it/s]

batch loss: 0.4629


Epoch 1:  49%|████▉     | 417/844 [05:42<05:58,  1.19it/s]

batch loss: 0.4757


Epoch 1:  50%|████▉     | 418/844 [05:43<05:56,  1.19it/s]

batch loss: 0.4533


Epoch 1:  50%|████▉     | 419/844 [05:44<05:55,  1.19it/s]

batch loss: 0.5250


Epoch 1:  50%|████▉     | 420/844 [05:45<05:54,  1.19it/s]

batch loss: 0.5027


Epoch 1:  50%|████▉     | 421/844 [05:45<05:54,  1.19it/s]

batch loss: 0.4745


Epoch 1:  50%|█████     | 422/844 [05:46<05:52,  1.20it/s]

batch loss: 0.4733


Epoch 1:  50%|█████     | 423/844 [05:47<05:51,  1.20it/s]

batch loss: 0.5126


Epoch 1:  50%|█████     | 424/844 [05:48<05:49,  1.20it/s]

batch loss: 0.4377


Epoch 1:  50%|█████     | 425/844 [05:49<05:49,  1.20it/s]

batch loss: 0.5025


Epoch 1:  50%|█████     | 426/844 [05:50<05:49,  1.20it/s]

batch loss: 0.5118


Epoch 1:  51%|█████     | 427/844 [05:50<05:48,  1.20it/s]

batch loss: 0.4750


Epoch 1:  51%|█████     | 428/844 [05:51<05:47,  1.20it/s]

batch loss: 0.4512


Epoch 1:  51%|█████     | 429/844 [05:52<05:47,  1.20it/s]

batch loss: 0.5277


Epoch 1:  51%|█████     | 430/844 [05:53<05:45,  1.20it/s]

batch loss: 0.4413


Epoch 1:  51%|█████     | 431/844 [05:54<05:45,  1.19it/s]

batch loss: 0.4981


Epoch 1:  51%|█████     | 432/844 [05:55<05:46,  1.19it/s]

batch loss: 0.4570


Epoch 1:  51%|█████▏    | 433/844 [05:55<05:45,  1.19it/s]

batch loss: 0.5489


Epoch 1:  51%|█████▏    | 434/844 [05:56<05:44,  1.19it/s]

batch loss: 0.4287


Epoch 1:  52%|█████▏    | 435/844 [05:57<05:42,  1.19it/s]

batch loss: 0.4387


Epoch 1:  52%|█████▏    | 436/844 [05:58<05:40,  1.20it/s]

batch loss: 0.4619


Epoch 1:  52%|█████▏    | 437/844 [05:59<05:39,  1.20it/s]

batch loss: 0.4492


Epoch 1:  52%|█████▏    | 438/844 [06:00<05:39,  1.19it/s]

batch loss: 0.4582


Epoch 1:  52%|█████▏    | 439/844 [06:01<05:37,  1.20it/s]

batch loss: 0.5382


Epoch 1:  52%|█████▏    | 440/844 [06:01<05:36,  1.20it/s]

batch loss: 0.4541


Epoch 1:  52%|█████▏    | 441/844 [06:02<05:35,  1.20it/s]

batch loss: 0.3373


Epoch 1:  52%|█████▏    | 442/844 [06:03<05:35,  1.20it/s]

batch loss: 0.5553


Epoch 1:  52%|█████▏    | 443/844 [06:04<05:34,  1.20it/s]

batch loss: 0.4178


Epoch 1:  53%|█████▎    | 444/844 [06:05<05:33,  1.20it/s]

batch loss: 0.4503


Epoch 1:  53%|█████▎    | 445/844 [06:05<05:32,  1.20it/s]

batch loss: 0.4911


Epoch 1:  53%|█████▎    | 446/844 [06:06<05:32,  1.20it/s]

batch loss: 0.5349


Epoch 1:  53%|█████▎    | 447/844 [06:07<05:31,  1.20it/s]

batch loss: 0.4082


Epoch 1:  53%|█████▎    | 448/844 [06:08<05:32,  1.19it/s]

batch loss: 0.4768


Epoch 1:  53%|█████▎    | 449/844 [06:09<05:30,  1.20it/s]

batch loss: 0.4526


Epoch 1:  53%|█████▎    | 450/844 [06:10<05:29,  1.20it/s]

batch loss: 0.4029


Epoch 1:  53%|█████▎    | 451/844 [06:11<05:28,  1.20it/s]

batch loss: 0.4681


Epoch 1:  54%|█████▎    | 452/844 [06:11<05:27,  1.20it/s]

batch loss: 0.4681


Epoch 1:  54%|█████▎    | 453/844 [06:12<05:25,  1.20it/s]

batch loss: 0.5985


Epoch 1:  54%|█████▍    | 454/844 [06:13<05:24,  1.20it/s]

batch loss: 0.4446


Epoch 1:  54%|█████▍    | 455/844 [06:14<05:23,  1.20it/s]

batch loss: 0.4949


Epoch 1:  54%|█████▍    | 456/844 [06:15<05:23,  1.20it/s]

batch loss: 0.3837


Epoch 1:  54%|█████▍    | 457/844 [06:16<05:23,  1.20it/s]

batch loss: 0.4795


Epoch 1:  54%|█████▍    | 458/844 [06:16<05:22,  1.20it/s]

batch loss: 0.4998


Epoch 1:  54%|█████▍    | 459/844 [06:17<05:20,  1.20it/s]

batch loss: 0.4452


Epoch 1:  55%|█████▍    | 460/844 [06:18<05:19,  1.20it/s]

batch loss: 0.4700


Epoch 1:  55%|█████▍    | 461/844 [06:19<05:19,  1.20it/s]

batch loss: 0.4600


Epoch 1:  55%|█████▍    | 462/844 [06:20<05:19,  1.20it/s]

batch loss: 0.4866


Epoch 1:  55%|█████▍    | 463/844 [06:21<05:18,  1.20it/s]

batch loss: 0.5387


Epoch 1:  55%|█████▍    | 464/844 [06:21<05:18,  1.19it/s]

batch loss: 0.4634


Epoch 1:  55%|█████▌    | 465/844 [06:22<05:18,  1.19it/s]

batch loss: 0.4696


Epoch 1:  55%|█████▌    | 466/844 [06:23<05:16,  1.19it/s]

batch loss: 0.4779


Epoch 1:  55%|█████▌    | 467/844 [06:24<05:14,  1.20it/s]

batch loss: 0.5734


Epoch 1:  55%|█████▌    | 468/844 [06:25<05:13,  1.20it/s]

batch loss: 0.4054


Epoch 1:  56%|█████▌    | 469/844 [06:26<05:13,  1.20it/s]

batch loss: 0.5570


Epoch 1:  56%|█████▌    | 470/844 [06:26<05:12,  1.20it/s]

batch loss: 0.4625


Epoch 1:  56%|█████▌    | 471/844 [06:27<05:11,  1.20it/s]

batch loss: 0.4380


Epoch 1:  56%|█████▌    | 472/844 [06:28<05:09,  1.20it/s]

batch loss: 0.4786


Epoch 1:  56%|█████▌    | 473/844 [06:29<05:08,  1.20it/s]

batch loss: 0.4668


Epoch 1:  56%|█████▌    | 474/844 [06:30<05:08,  1.20it/s]

batch loss: 0.4713


Epoch 1:  56%|█████▋    | 475/844 [06:31<05:07,  1.20it/s]

batch loss: 0.4824


Epoch 1:  56%|█████▋    | 476/844 [06:31<05:06,  1.20it/s]

batch loss: 0.4705


Epoch 1:  57%|█████▋    | 477/844 [06:32<05:05,  1.20it/s]

batch loss: 0.4542


Epoch 1:  57%|█████▋    | 478/844 [06:33<05:04,  1.20it/s]

batch loss: 0.4096


Epoch 1:  57%|█████▋    | 479/844 [06:34<05:03,  1.20it/s]

batch loss: 0.4660


Epoch 1:  57%|█████▋    | 480/844 [06:35<05:04,  1.20it/s]

batch loss: 0.5872


Epoch 1:  57%|█████▋    | 481/844 [06:36<05:02,  1.20it/s]

batch loss: 0.5064


Epoch 1:  57%|█████▋    | 482/844 [06:36<05:02,  1.20it/s]

batch loss: 0.5148


Epoch 1:  57%|█████▋    | 483/844 [06:37<05:01,  1.20it/s]

batch loss: 0.4579


Epoch 1:  57%|█████▋    | 484/844 [06:38<05:00,  1.20it/s]

batch loss: 0.4754


Epoch 1:  57%|█████▋    | 485/844 [06:39<04:59,  1.20it/s]

batch loss: 0.4584


Epoch 1:  58%|█████▊    | 486/844 [06:40<04:58,  1.20it/s]

batch loss: 0.5226


Epoch 1:  58%|█████▊    | 487/844 [06:41<04:57,  1.20it/s]

batch loss: 0.4388


Epoch 1:  58%|█████▊    | 488/844 [06:41<04:57,  1.20it/s]

batch loss: 0.5216


Epoch 1:  58%|█████▊    | 489/844 [06:42<04:56,  1.20it/s]

batch loss: 0.4597


Epoch 1:  58%|█████▊    | 490/844 [06:43<04:55,  1.20it/s]

batch loss: 0.4364


Epoch 1:  58%|█████▊    | 491/844 [06:44<04:53,  1.20it/s]

batch loss: 0.4382


Epoch 1:  58%|█████▊    | 492/844 [06:45<04:52,  1.20it/s]

batch loss: 0.3935


Epoch 1:  58%|█████▊    | 493/844 [06:46<04:52,  1.20it/s]

batch loss: 0.5571


Epoch 1:  59%|█████▊    | 494/844 [06:46<04:51,  1.20it/s]

batch loss: 0.4755


Epoch 1:  59%|█████▊    | 495/844 [06:47<04:51,  1.20it/s]

batch loss: 0.5343


Epoch 1:  59%|█████▉    | 496/844 [06:48<04:50,  1.20it/s]

batch loss: 0.5188


Epoch 1:  59%|█████▉    | 497/844 [06:49<04:50,  1.19it/s]

batch loss: 0.4670


Epoch 1:  59%|█████▉    | 498/844 [06:50<04:50,  1.19it/s]

batch loss: 0.4823


Epoch 1:  59%|█████▉    | 499/844 [06:51<04:48,  1.19it/s]

batch loss: 0.3982


Epoch 1:  59%|█████▉    | 500/844 [06:51<04:47,  1.20it/s]

batch loss: 0.5265


Epoch 1:  59%|█████▉    | 501/844 [06:52<04:46,  1.20it/s]

batch loss: 0.4421


Epoch 1:  59%|█████▉    | 502/844 [06:53<04:45,  1.20it/s]

batch loss: 0.4863


Epoch 1:  60%|█████▉    | 503/844 [06:54<04:44,  1.20it/s]

batch loss: 0.4880


Epoch 1:  60%|█████▉    | 504/844 [06:55<04:42,  1.20it/s]

batch loss: 0.4465


Epoch 1:  60%|█████▉    | 505/844 [06:56<04:41,  1.20it/s]

batch loss: 0.5582


Epoch 1:  60%|█████▉    | 506/844 [06:56<04:41,  1.20it/s]

batch loss: 0.4524


Epoch 1:  60%|██████    | 507/844 [06:57<04:40,  1.20it/s]

batch loss: 0.4620


Epoch 1:  60%|██████    | 508/844 [06:58<04:39,  1.20it/s]

batch loss: 0.4950


Epoch 1:  60%|██████    | 509/844 [06:59<04:38,  1.20it/s]

batch loss: 0.4563


Epoch 1:  60%|██████    | 510/844 [07:00<04:37,  1.20it/s]

batch loss: 0.4658


Epoch 1:  61%|██████    | 511/844 [07:01<04:36,  1.20it/s]

batch loss: 0.5405


Epoch 1:  61%|██████    | 512/844 [07:01<04:36,  1.20it/s]

batch loss: 0.4225


Epoch 1:  61%|██████    | 513/844 [07:02<04:36,  1.20it/s]

batch loss: 0.4667


Epoch 1:  61%|██████    | 514/844 [07:03<04:35,  1.20it/s]

batch loss: 0.4598


Epoch 1:  61%|██████    | 515/844 [07:04<04:35,  1.19it/s]

batch loss: 0.4787


Epoch 1:  61%|██████    | 516/844 [07:05<04:34,  1.20it/s]

batch loss: 0.5516


Epoch 1:  61%|██████▏   | 517/844 [07:06<04:32,  1.20it/s]

batch loss: 0.5744


Epoch 1:  61%|██████▏   | 518/844 [07:06<04:31,  1.20it/s]

batch loss: 0.4841


Epoch 1:  61%|██████▏   | 519/844 [07:07<04:30,  1.20it/s]

batch loss: 0.5080


Epoch 1:  62%|██████▏   | 520/844 [07:08<04:29,  1.20it/s]

batch loss: 0.4898


Epoch 1:  62%|██████▏   | 521/844 [07:09<04:28,  1.20it/s]

batch loss: 0.4528


Epoch 1:  62%|██████▏   | 522/844 [07:10<04:29,  1.20it/s]

batch loss: 0.5027


Epoch 1:  62%|██████▏   | 523/844 [07:11<04:27,  1.20it/s]

batch loss: 0.5410


Epoch 1:  62%|██████▏   | 524/844 [07:11<04:26,  1.20it/s]

batch loss: 0.4765


Epoch 1:  62%|██████▏   | 525/844 [07:12<04:25,  1.20it/s]

batch loss: 0.4220


Epoch 1:  62%|██████▏   | 526/844 [07:13<04:25,  1.20it/s]

batch loss: 0.5146


Epoch 1:  62%|██████▏   | 527/844 [07:14<04:24,  1.20it/s]

batch loss: 0.4980


Epoch 1:  63%|██████▎   | 528/844 [07:15<04:24,  1.19it/s]

batch loss: 0.5247


Epoch 1:  63%|██████▎   | 529/844 [07:16<04:24,  1.19it/s]

batch loss: 0.4682


Epoch 1:  63%|██████▎   | 530/844 [07:16<04:22,  1.19it/s]

batch loss: 0.5592


Epoch 1:  63%|██████▎   | 531/844 [07:17<04:22,  1.19it/s]

batch loss: 0.4903


Epoch 1:  63%|██████▎   | 532/844 [07:18<04:21,  1.19it/s]

batch loss: 0.4771


Epoch 1:  63%|██████▎   | 533/844 [07:19<04:20,  1.19it/s]

batch loss: 0.4745


Epoch 1:  63%|██████▎   | 534/844 [07:20<04:18,  1.20it/s]

batch loss: 0.4449


Epoch 1:  63%|██████▎   | 535/844 [07:21<04:17,  1.20it/s]

batch loss: 0.4420


Epoch 1:  64%|██████▎   | 536/844 [07:21<04:16,  1.20it/s]

batch loss: 0.4740


Epoch 1:  64%|██████▎   | 537/844 [07:22<04:15,  1.20it/s]

batch loss: 0.4933


Epoch 1:  64%|██████▎   | 538/844 [07:23<04:14,  1.20it/s]

batch loss: 0.5751


Epoch 1:  64%|██████▍   | 539/844 [07:24<04:13,  1.20it/s]

batch loss: 0.5131


Epoch 1:  64%|██████▍   | 540/844 [07:25<04:12,  1.20it/s]

batch loss: 0.5805


Epoch 1:  64%|██████▍   | 541/844 [07:26<04:11,  1.20it/s]

batch loss: 0.4191


Epoch 1:  64%|██████▍   | 542/844 [07:26<04:10,  1.20it/s]

batch loss: 0.4876


Epoch 1:  64%|██████▍   | 543/844 [07:27<04:10,  1.20it/s]

batch loss: 0.5292


Epoch 1:  64%|██████▍   | 544/844 [07:28<04:10,  1.20it/s]

batch loss: 0.4853


Epoch 1:  65%|██████▍   | 545/844 [07:29<04:10,  1.20it/s]

batch loss: 0.4975


Epoch 1:  65%|██████▍   | 546/844 [07:30<04:09,  1.19it/s]

batch loss: 0.4453


Epoch 1:  65%|██████▍   | 547/844 [07:31<04:08,  1.20it/s]

batch loss: 0.5130


Epoch 1:  65%|██████▍   | 548/844 [07:31<04:07,  1.20it/s]

batch loss: 0.4707


Epoch 1:  65%|██████▌   | 549/844 [07:32<04:06,  1.20it/s]

batch loss: 0.5352


Epoch 1:  65%|██████▌   | 550/844 [07:33<04:05,  1.20it/s]

batch loss: 0.4891


Epoch 1:  65%|██████▌   | 551/844 [07:34<04:04,  1.20it/s]

batch loss: 0.4525


Epoch 1:  65%|██████▌   | 552/844 [07:35<04:02,  1.20it/s]

batch loss: 0.4958


Epoch 1:  66%|██████▌   | 553/844 [07:36<04:01,  1.20it/s]

batch loss: 0.4681


Epoch 1:  66%|██████▌   | 554/844 [07:36<04:00,  1.20it/s]

batch loss: 0.4675


Epoch 1:  66%|██████▌   | 555/844 [07:37<04:00,  1.20it/s]

batch loss: 0.4370


Epoch 1:  66%|██████▌   | 556/844 [07:38<03:59,  1.20it/s]

batch loss: 0.4035


Epoch 1:  66%|██████▌   | 557/844 [07:39<03:58,  1.20it/s]

batch loss: 0.4674


Epoch 1:  66%|██████▌   | 558/844 [07:40<03:57,  1.21it/s]

batch loss: 0.4857


Epoch 1:  66%|██████▌   | 559/844 [07:41<03:56,  1.21it/s]

batch loss: 0.4779


Epoch 1:  66%|██████▋   | 560/844 [07:41<03:56,  1.20it/s]

batch loss: 0.5287


Epoch 1:  66%|██████▋   | 561/844 [07:42<03:55,  1.20it/s]

batch loss: 0.4861


Epoch 1:  67%|██████▋   | 562/844 [07:43<03:54,  1.20it/s]

batch loss: 0.4904


Epoch 1:  67%|██████▋   | 563/844 [07:44<03:54,  1.20it/s]

batch loss: 0.4128


Epoch 1:  67%|██████▋   | 564/844 [07:45<03:53,  1.20it/s]

batch loss: 0.4863


Epoch 1:  67%|██████▋   | 565/844 [07:46<03:53,  1.20it/s]

batch loss: 0.4199


Epoch 1:  67%|██████▋   | 566/844 [07:46<03:52,  1.20it/s]

batch loss: 0.4460


Epoch 1:  67%|██████▋   | 567/844 [07:47<03:51,  1.19it/s]

batch loss: 0.4555


Epoch 1:  67%|██████▋   | 568/844 [07:48<03:50,  1.20it/s]

batch loss: 0.4576


Epoch 1:  67%|██████▋   | 569/844 [07:49<03:49,  1.20it/s]

batch loss: 0.4917


Epoch 1:  68%|██████▊   | 570/844 [07:50<03:48,  1.20it/s]

batch loss: 0.4433


Epoch 1:  68%|██████▊   | 571/844 [07:51<03:47,  1.20it/s]

batch loss: 0.3846


Epoch 1:  68%|██████▊   | 572/844 [07:51<03:47,  1.20it/s]

batch loss: 0.4820


Epoch 1:  68%|██████▊   | 573/844 [07:52<03:46,  1.19it/s]

batch loss: 0.4677


Epoch 1:  68%|██████▊   | 574/844 [07:53<03:46,  1.19it/s]

batch loss: 0.5378


Epoch 1:  68%|██████▊   | 575/844 [07:54<03:44,  1.20it/s]

batch loss: 0.4855


Epoch 1:  68%|██████▊   | 576/844 [07:55<03:43,  1.20it/s]

batch loss: 0.4522


Epoch 1:  68%|██████▊   | 577/844 [07:56<03:43,  1.19it/s]

batch loss: 0.5039


Epoch 1:  68%|██████▊   | 578/844 [07:56<03:42,  1.19it/s]

batch loss: 0.4760


Epoch 1:  69%|██████▊   | 579/844 [07:57<03:42,  1.19it/s]

batch loss: 0.4628


Epoch 1:  69%|██████▊   | 580/844 [07:58<03:42,  1.19it/s]

batch loss: 0.5485


Epoch 1:  69%|██████▉   | 581/844 [07:59<03:40,  1.19it/s]

batch loss: 0.4890


Epoch 1:  69%|██████▉   | 582/844 [08:00<03:39,  1.19it/s]

batch loss: 0.5223


Epoch 1:  69%|██████▉   | 583/844 [08:01<03:37,  1.20it/s]

batch loss: 0.4400


Epoch 1:  69%|██████▉   | 584/844 [08:01<03:37,  1.20it/s]

batch loss: 0.4665


Epoch 1:  69%|██████▉   | 585/844 [08:02<03:35,  1.20it/s]

batch loss: 0.4007


Epoch 1:  69%|██████▉   | 586/844 [08:03<03:34,  1.20it/s]

batch loss: 0.4405


Epoch 1:  70%|██████▉   | 587/844 [08:04<03:33,  1.20it/s]

batch loss: 0.4587


Epoch 1:  70%|██████▉   | 588/844 [08:05<03:32,  1.20it/s]

batch loss: 0.4613


Epoch 1:  70%|██████▉   | 589/844 [08:06<03:32,  1.20it/s]

batch loss: 0.4646


Epoch 1:  70%|██████▉   | 590/844 [08:06<03:31,  1.20it/s]

batch loss: 0.4710


Epoch 1:  70%|███████   | 591/844 [08:07<03:30,  1.20it/s]

batch loss: 0.4201


Epoch 1:  70%|███████   | 592/844 [08:08<03:29,  1.20it/s]

batch loss: 0.4444


Epoch 1:  70%|███████   | 593/844 [08:09<03:29,  1.20it/s]

batch loss: 0.4917


Epoch 1:  70%|███████   | 594/844 [08:10<03:28,  1.20it/s]

batch loss: 0.4730


Epoch 1:  70%|███████   | 595/844 [08:11<03:27,  1.20it/s]

batch loss: 0.4989


Epoch 1:  71%|███████   | 596/844 [08:11<03:27,  1.20it/s]

batch loss: 0.4108


Epoch 1:  71%|███████   | 597/844 [08:12<03:26,  1.20it/s]

batch loss: 0.4562


Epoch 1:  71%|███████   | 598/844 [08:13<03:25,  1.20it/s]

batch loss: 0.4240


Epoch 1:  71%|███████   | 599/844 [08:14<03:24,  1.20it/s]

batch loss: 0.4017


Epoch 1:  71%|███████   | 600/844 [08:15<03:23,  1.20it/s]

batch loss: 0.5225


Epoch 1:  71%|███████   | 601/844 [08:16<03:22,  1.20it/s]

batch loss: 0.4340


Epoch 1:  71%|███████▏  | 602/844 [08:16<03:21,  1.20it/s]

batch loss: 0.4760


Epoch 1:  71%|███████▏  | 603/844 [08:17<03:20,  1.20it/s]

batch loss: 0.4495


Epoch 1:  72%|███████▏  | 604/844 [08:18<03:19,  1.20it/s]

batch loss: 0.5096


Epoch 1:  72%|███████▏  | 605/844 [08:19<03:18,  1.20it/s]

batch loss: 0.4436


Epoch 1:  72%|███████▏  | 606/844 [08:20<03:18,  1.20it/s]

batch loss: 0.4972


Epoch 1:  72%|███████▏  | 607/844 [08:21<03:17,  1.20it/s]

batch loss: 0.5655


Epoch 1:  72%|███████▏  | 608/844 [08:21<03:17,  1.20it/s]

batch loss: 0.5250


Epoch 1:  72%|███████▏  | 609/844 [08:22<03:16,  1.20it/s]

batch loss: 0.4391


Epoch 1:  72%|███████▏  | 610/844 [08:23<03:15,  1.20it/s]

batch loss: 0.5547


Epoch 1:  72%|███████▏  | 611/844 [08:24<03:14,  1.20it/s]

batch loss: 0.4292


Epoch 1:  73%|███████▎  | 612/844 [08:25<03:14,  1.19it/s]

batch loss: 0.4597


Epoch 1:  73%|███████▎  | 613/844 [08:26<03:13,  1.20it/s]

batch loss: 0.4659


Epoch 1:  73%|███████▎  | 614/844 [08:26<03:12,  1.20it/s]

batch loss: 0.5108


Epoch 1:  73%|███████▎  | 615/844 [08:27<03:11,  1.19it/s]

batch loss: 0.4930


Epoch 1:  73%|███████▎  | 616/844 [08:28<03:10,  1.20it/s]

batch loss: 0.5035


Epoch 1:  73%|███████▎  | 617/844 [08:29<03:09,  1.20it/s]

batch loss: 0.4288


Epoch 1:  73%|███████▎  | 618/844 [08:30<03:08,  1.20it/s]

batch loss: 0.4194


Epoch 1:  73%|███████▎  | 619/844 [08:31<03:07,  1.20it/s]

batch loss: 0.4830


Epoch 1:  73%|███████▎  | 620/844 [08:31<03:06,  1.20it/s]

batch loss: 0.4465


Epoch 1:  74%|███████▎  | 621/844 [08:32<03:06,  1.20it/s]

batch loss: 0.4697


Epoch 1:  74%|███████▎  | 622/844 [08:33<03:04,  1.20it/s]

batch loss: 0.5204


Epoch 1:  74%|███████▍  | 623/844 [08:34<03:03,  1.20it/s]

batch loss: 0.5085


Epoch 1:  74%|███████▍  | 624/844 [08:35<03:03,  1.20it/s]

batch loss: 0.3873


Epoch 1:  74%|███████▍  | 625/844 [08:36<03:02,  1.20it/s]

batch loss: 0.5289


Epoch 1:  74%|███████▍  | 626/844 [08:36<03:01,  1.20it/s]

batch loss: 0.4132


Epoch 1:  74%|███████▍  | 627/844 [08:37<03:00,  1.20it/s]

batch loss: 0.4438


Epoch 1:  74%|███████▍  | 628/844 [08:38<03:00,  1.20it/s]

batch loss: 0.4347


Epoch 1:  75%|███████▍  | 629/844 [08:39<02:59,  1.19it/s]

batch loss: 0.4709


Epoch 1:  75%|███████▍  | 630/844 [08:40<02:59,  1.20it/s]

batch loss: 0.5130


Epoch 1:  75%|███████▍  | 631/844 [08:41<02:58,  1.19it/s]

batch loss: 0.4841


Epoch 1:  75%|███████▍  | 632/844 [08:42<02:57,  1.20it/s]

batch loss: 0.4063


Epoch 1:  75%|███████▌  | 633/844 [08:42<02:56,  1.20it/s]

batch loss: 0.4522


Epoch 1:  75%|███████▌  | 634/844 [08:43<02:54,  1.20it/s]

batch loss: 0.4950


Epoch 1:  75%|███████▌  | 635/844 [08:44<02:53,  1.20it/s]

batch loss: 0.4966


Epoch 1:  75%|███████▌  | 636/844 [08:45<02:53,  1.20it/s]

batch loss: 0.4142


Epoch 1:  75%|███████▌  | 637/844 [08:46<02:52,  1.20it/s]

batch loss: 0.4959


Epoch 1:  76%|███████▌  | 638/844 [08:47<02:51,  1.20it/s]

batch loss: 0.4885


Epoch 1:  76%|███████▌  | 639/844 [08:47<02:50,  1.20it/s]

batch loss: 0.4439


Epoch 1:  76%|███████▌  | 640/844 [08:48<02:49,  1.20it/s]

batch loss: 0.4294


Epoch 1:  76%|███████▌  | 641/844 [08:49<02:48,  1.20it/s]

batch loss: 0.4250


Epoch 1:  76%|███████▌  | 642/844 [08:50<02:48,  1.20it/s]

batch loss: 0.5249


Epoch 1:  76%|███████▌  | 643/844 [08:51<02:47,  1.20it/s]

batch loss: 0.4517


Epoch 1:  76%|███████▋  | 644/844 [08:51<02:46,  1.20it/s]

batch loss: 0.4890


Epoch 1:  76%|███████▋  | 645/844 [08:52<02:45,  1.20it/s]

batch loss: 0.4237


Epoch 1:  77%|███████▋  | 646/844 [08:53<02:44,  1.20it/s]

batch loss: 0.5010


Epoch 1:  77%|███████▋  | 647/844 [08:54<02:44,  1.20it/s]

batch loss: 0.4686


Epoch 1:  77%|███████▋  | 648/844 [08:55<02:43,  1.20it/s]

batch loss: 0.4665


Epoch 1:  77%|███████▋  | 649/844 [08:56<02:42,  1.20it/s]

batch loss: 0.4278


Epoch 1:  77%|███████▋  | 650/844 [08:56<02:41,  1.20it/s]

batch loss: 0.4839


Epoch 1:  77%|███████▋  | 651/844 [08:57<02:40,  1.20it/s]

batch loss: 0.5106


Epoch 1:  77%|███████▋  | 652/844 [08:58<02:39,  1.20it/s]

batch loss: 0.4783


Epoch 1:  77%|███████▋  | 653/844 [08:59<02:39,  1.20it/s]

batch loss: 0.5032


Epoch 1:  77%|███████▋  | 654/844 [09:00<02:38,  1.20it/s]

batch loss: 0.4788


Epoch 1:  78%|███████▊  | 655/844 [09:01<02:37,  1.20it/s]

batch loss: 0.4625


Epoch 1:  78%|███████▊  | 656/844 [09:02<02:36,  1.20it/s]

batch loss: 0.4316


Epoch 1:  78%|███████▊  | 657/844 [09:02<02:35,  1.20it/s]

batch loss: 0.4667


Epoch 1:  78%|███████▊  | 658/844 [09:03<02:35,  1.20it/s]

batch loss: 0.4078


Epoch 1:  78%|███████▊  | 659/844 [09:04<02:34,  1.19it/s]

batch loss: 0.5088


Epoch 1:  78%|███████▊  | 660/844 [09:05<02:34,  1.19it/s]

batch loss: 0.4230


Epoch 1:  78%|███████▊  | 661/844 [09:06<02:45,  1.10it/s]

batch loss: 0.3903


Epoch 1:  78%|███████▊  | 662/844 [09:07<02:38,  1.15it/s]

batch loss: 0.4800


Epoch 1:  79%|███████▊  | 663/844 [09:08<02:34,  1.17it/s]

batch loss: 0.5252


Epoch 1:  79%|███████▊  | 664/844 [09:08<02:33,  1.17it/s]

batch loss: 0.4519


Epoch 1:  79%|███████▉  | 665/844 [09:09<02:32,  1.18it/s]

batch loss: 0.4134


Epoch 1:  79%|███████▉  | 666/844 [09:10<02:30,  1.19it/s]

batch loss: 0.4403


Epoch 1:  79%|███████▉  | 667/844 [09:11<02:28,  1.19it/s]

batch loss: 0.4339


Epoch 1:  79%|███████▉  | 668/844 [09:12<02:26,  1.20it/s]

batch loss: 0.3746


Epoch 1:  79%|███████▉  | 669/844 [09:13<02:25,  1.20it/s]

batch loss: 0.5069


Epoch 1:  79%|███████▉  | 670/844 [09:13<02:24,  1.20it/s]

batch loss: 0.4922


Epoch 1:  80%|███████▉  | 671/844 [09:14<02:24,  1.20it/s]

batch loss: 0.5172


Epoch 1:  80%|███████▉  | 672/844 [09:15<02:22,  1.20it/s]

batch loss: 0.3911


Epoch 1:  80%|███████▉  | 673/844 [09:16<02:21,  1.20it/s]

batch loss: 0.4740


Epoch 1:  80%|███████▉  | 674/844 [09:17<02:21,  1.20it/s]

batch loss: 0.5219


Epoch 1:  80%|███████▉  | 675/844 [09:18<02:20,  1.20it/s]

batch loss: 0.4368


Epoch 1:  80%|████████  | 676/844 [09:18<02:20,  1.20it/s]

batch loss: 0.5049


Epoch 1:  80%|████████  | 677/844 [09:19<02:19,  1.20it/s]

batch loss: 0.4774


Epoch 1:  80%|████████  | 678/844 [09:20<02:18,  1.20it/s]

batch loss: 0.5340


Epoch 1:  80%|████████  | 679/844 [09:21<02:17,  1.20it/s]

batch loss: 0.4105


Epoch 1:  81%|████████  | 680/844 [09:22<02:16,  1.20it/s]

batch loss: 0.4789


Epoch 1:  81%|████████  | 681/844 [09:23<02:15,  1.20it/s]

batch loss: 0.5226


Epoch 1:  81%|████████  | 682/844 [09:23<02:14,  1.20it/s]

batch loss: 0.5344


Epoch 1:  81%|████████  | 683/844 [09:24<02:13,  1.20it/s]

batch loss: 0.4572


Epoch 1:  81%|████████  | 684/844 [09:25<02:13,  1.20it/s]

batch loss: 0.4499


Epoch 1:  81%|████████  | 685/844 [09:26<02:12,  1.20it/s]

batch loss: 0.4579


Epoch 1:  81%|████████▏ | 686/844 [09:27<02:11,  1.20it/s]

batch loss: 0.4276


Epoch 1:  81%|████████▏ | 687/844 [09:28<02:10,  1.20it/s]

batch loss: 0.4348


Epoch 1:  82%|████████▏ | 688/844 [09:28<02:09,  1.20it/s]

batch loss: 0.5256


Epoch 1:  82%|████████▏ | 689/844 [09:29<02:09,  1.20it/s]

batch loss: 0.4300


Epoch 1:  82%|████████▏ | 690/844 [09:30<02:08,  1.20it/s]

batch loss: 0.4889


Epoch 1:  82%|████████▏ | 691/844 [09:31<02:07,  1.20it/s]

batch loss: 0.4596


Epoch 1:  82%|████████▏ | 692/844 [09:32<02:06,  1.20it/s]

batch loss: 0.4876


Epoch 1:  82%|████████▏ | 693/844 [09:33<02:05,  1.20it/s]

batch loss: 0.4743


Epoch 1:  82%|████████▏ | 694/844 [09:33<02:05,  1.20it/s]

batch loss: 0.4745


Epoch 1:  82%|████████▏ | 695/844 [09:34<02:04,  1.20it/s]

batch loss: 0.4407


Epoch 1:  82%|████████▏ | 696/844 [09:35<02:03,  1.20it/s]

batch loss: 0.4706


Epoch 1:  83%|████████▎ | 697/844 [09:36<02:02,  1.20it/s]

batch loss: 0.4376


Epoch 1:  83%|████████▎ | 698/844 [09:37<02:01,  1.20it/s]

batch loss: 0.4720


Epoch 1:  83%|████████▎ | 699/844 [09:38<02:00,  1.20it/s]

batch loss: 0.4661


Epoch 1:  83%|████████▎ | 700/844 [09:38<01:59,  1.20it/s]

batch loss: 0.5318


Epoch 1:  83%|████████▎ | 701/844 [09:39<01:58,  1.20it/s]

batch loss: 0.4782


Epoch 1:  83%|████████▎ | 702/844 [09:40<01:58,  1.20it/s]

batch loss: 0.4751


Epoch 1:  83%|████████▎ | 703/844 [09:41<01:57,  1.20it/s]

batch loss: 0.4615


Epoch 1:  83%|████████▎ | 704/844 [09:42<01:56,  1.20it/s]

batch loss: 0.4439


Epoch 1:  84%|████████▎ | 705/844 [09:43<01:55,  1.20it/s]

batch loss: 0.4263


Epoch 1:  84%|████████▎ | 706/844 [09:43<01:54,  1.20it/s]

batch loss: 0.5570


Epoch 1:  84%|████████▍ | 707/844 [09:44<01:54,  1.20it/s]

batch loss: 0.4330


Epoch 1:  84%|████████▍ | 708/844 [09:45<01:54,  1.19it/s]

batch loss: 0.4522


Epoch 1:  84%|████████▍ | 709/844 [09:46<01:53,  1.19it/s]

batch loss: 0.4008


Epoch 1:  84%|████████▍ | 710/844 [09:47<01:51,  1.20it/s]

batch loss: 0.4737


Epoch 1:  84%|████████▍ | 711/844 [09:48<01:51,  1.20it/s]

batch loss: 0.4718


Epoch 1:  84%|████████▍ | 712/844 [09:48<01:50,  1.19it/s]

batch loss: 0.5455


Epoch 1:  84%|████████▍ | 713/844 [09:49<01:49,  1.20it/s]

batch loss: 0.3865


Epoch 1:  85%|████████▍ | 714/844 [09:50<01:48,  1.20it/s]

batch loss: 0.4448


Epoch 1:  85%|████████▍ | 715/844 [09:51<01:47,  1.20it/s]

batch loss: 0.4800


Epoch 1:  85%|████████▍ | 716/844 [09:52<01:46,  1.20it/s]

batch loss: 0.4479


Epoch 1:  85%|████████▍ | 717/844 [09:53<01:45,  1.20it/s]

batch loss: 0.4940


Epoch 1:  85%|████████▌ | 718/844 [09:53<01:45,  1.20it/s]

batch loss: 0.5051


Epoch 1:  85%|████████▌ | 719/844 [09:54<01:44,  1.19it/s]

batch loss: 0.4931


Epoch 1:  85%|████████▌ | 720/844 [09:55<01:43,  1.20it/s]

batch loss: 0.4801


Epoch 1:  85%|████████▌ | 721/844 [09:56<01:42,  1.20it/s]

batch loss: 0.4861


Epoch 1:  86%|████████▌ | 722/844 [09:57<01:41,  1.20it/s]

batch loss: 0.5013


Epoch 1:  86%|████████▌ | 723/844 [09:58<01:41,  1.20it/s]

batch loss: 0.4598


Epoch 1:  86%|████████▌ | 724/844 [09:58<01:40,  1.19it/s]

batch loss: 0.4658


Epoch 1:  86%|████████▌ | 725/844 [09:59<01:39,  1.19it/s]

batch loss: 0.4993


Epoch 1:  86%|████████▌ | 726/844 [10:00<01:38,  1.19it/s]

batch loss: 0.4484


Epoch 1:  86%|████████▌ | 727/844 [10:01<01:37,  1.20it/s]

batch loss: 0.4114


Epoch 1:  86%|████████▋ | 728/844 [10:02<01:36,  1.20it/s]

batch loss: 0.5729


Epoch 1:  86%|████████▋ | 729/844 [10:03<01:36,  1.19it/s]

batch loss: 0.4316


Epoch 1:  86%|████████▋ | 730/844 [10:03<01:35,  1.19it/s]

batch loss: 0.4318


Epoch 1:  87%|████████▋ | 731/844 [10:04<01:34,  1.19it/s]

batch loss: 0.4823


Epoch 1:  87%|████████▋ | 732/844 [10:05<01:33,  1.20it/s]

batch loss: 0.4844


Epoch 1:  87%|████████▋ | 733/844 [10:06<01:32,  1.20it/s]

batch loss: 0.5276


Epoch 1:  87%|████████▋ | 734/844 [10:07<01:31,  1.20it/s]

batch loss: 0.4768


Epoch 1:  87%|████████▋ | 735/844 [10:08<01:30,  1.20it/s]

batch loss: 0.4887


Epoch 1:  87%|████████▋ | 736/844 [10:08<01:30,  1.20it/s]

batch loss: 0.5504


Epoch 1:  87%|████████▋ | 737/844 [10:09<01:29,  1.20it/s]

batch loss: 0.5508


Epoch 1:  87%|████████▋ | 738/844 [10:10<01:28,  1.20it/s]

batch loss: 0.4787


Epoch 1:  88%|████████▊ | 739/844 [10:11<01:27,  1.20it/s]

batch loss: 0.4934


Epoch 1:  88%|████████▊ | 740/844 [10:12<01:27,  1.19it/s]

batch loss: 0.4465


Epoch 1:  88%|████████▊ | 741/844 [10:13<01:26,  1.19it/s]

batch loss: 0.4569


Epoch 1:  88%|████████▊ | 742/844 [10:13<01:25,  1.19it/s]

batch loss: 0.5160


Epoch 1:  88%|████████▊ | 743/844 [10:14<01:24,  1.20it/s]

batch loss: 0.4646


Epoch 1:  88%|████████▊ | 744/844 [10:15<01:23,  1.19it/s]

batch loss: 0.4237


Epoch 1:  88%|████████▊ | 745/844 [10:16<01:22,  1.20it/s]

batch loss: 0.4742


Epoch 1:  88%|████████▊ | 746/844 [10:17<01:21,  1.20it/s]

batch loss: 0.4330


Epoch 1:  89%|████████▊ | 747/844 [10:18<01:20,  1.20it/s]

batch loss: 0.4690


Epoch 1:  89%|████████▊ | 748/844 [10:18<01:20,  1.20it/s]

batch loss: 0.4236


Epoch 1:  89%|████████▊ | 749/844 [10:19<01:19,  1.20it/s]

batch loss: 0.4147


Epoch 1:  89%|████████▉ | 750/844 [10:20<01:18,  1.20it/s]

batch loss: 0.4505


Epoch 1:  89%|████████▉ | 751/844 [10:21<01:17,  1.20it/s]

batch loss: 0.5173


Epoch 1:  89%|████████▉ | 752/844 [10:22<01:16,  1.20it/s]

batch loss: 0.4990


Epoch 1:  89%|████████▉ | 753/844 [10:23<01:15,  1.20it/s]

batch loss: 0.4511


Epoch 1:  89%|████████▉ | 754/844 [10:23<01:14,  1.20it/s]

batch loss: 0.5880


Epoch 1:  89%|████████▉ | 755/844 [10:24<01:14,  1.20it/s]

batch loss: 0.4054


Epoch 1:  90%|████████▉ | 756/844 [10:25<01:13,  1.19it/s]

batch loss: 0.5239


Epoch 1:  90%|████████▉ | 757/844 [10:26<01:12,  1.20it/s]

batch loss: 0.5149


Epoch 1:  90%|████████▉ | 758/844 [10:27<01:11,  1.20it/s]

batch loss: 0.5328


Epoch 1:  90%|████████▉ | 759/844 [10:28<01:10,  1.20it/s]

batch loss: 0.4083


Epoch 1:  90%|█████████ | 760/844 [10:28<01:10,  1.20it/s]

batch loss: 0.5240


Epoch 1:  90%|█████████ | 761/844 [10:29<01:09,  1.20it/s]

batch loss: 0.4689


Epoch 1:  90%|█████████ | 762/844 [10:30<01:08,  1.20it/s]

batch loss: 0.4287


Epoch 1:  90%|█████████ | 763/844 [10:31<01:07,  1.20it/s]

batch loss: 0.5293


Epoch 1:  91%|█████████ | 764/844 [10:32<01:06,  1.20it/s]

batch loss: 0.4698


Epoch 1:  91%|█████████ | 765/844 [10:33<01:05,  1.20it/s]

batch loss: 0.4800


Epoch 1:  91%|█████████ | 766/844 [10:33<01:05,  1.20it/s]

batch loss: 0.5448


Epoch 1:  91%|█████████ | 767/844 [10:34<01:04,  1.20it/s]

batch loss: 0.4017


Epoch 1:  91%|█████████ | 768/844 [10:35<01:03,  1.20it/s]

batch loss: 0.4776


Epoch 1:  91%|█████████ | 769/844 [10:36<01:02,  1.20it/s]

batch loss: 0.4541


Epoch 1:  91%|█████████ | 770/844 [10:37<01:01,  1.20it/s]

batch loss: 0.4862


Epoch 1:  91%|█████████▏| 771/844 [10:38<01:01,  1.20it/s]

batch loss: 0.4829


Epoch 1:  91%|█████████▏| 772/844 [10:38<01:00,  1.19it/s]

batch loss: 0.4522


Epoch 1:  92%|█████████▏| 773/844 [10:39<00:59,  1.20it/s]

batch loss: 0.4483


Epoch 1:  92%|█████████▏| 774/844 [10:40<00:58,  1.20it/s]

batch loss: 0.4391


Epoch 1:  92%|█████████▏| 775/844 [10:41<00:57,  1.20it/s]

batch loss: 0.4432


Epoch 1:  92%|█████████▏| 776/844 [10:42<00:56,  1.20it/s]

batch loss: 0.4537


Epoch 1:  92%|█████████▏| 777/844 [10:43<00:55,  1.20it/s]

batch loss: 0.4753


Epoch 1:  92%|█████████▏| 778/844 [10:43<00:55,  1.20it/s]

batch loss: 0.4257


Epoch 1:  92%|█████████▏| 779/844 [10:44<00:54,  1.20it/s]

batch loss: 0.4564


Epoch 1:  92%|█████████▏| 780/844 [10:45<00:53,  1.20it/s]

batch loss: 0.4591


Epoch 1:  93%|█████████▎| 781/844 [10:46<00:52,  1.20it/s]

batch loss: 0.4996


Epoch 1:  93%|█████████▎| 782/844 [10:47<00:51,  1.20it/s]

batch loss: 0.4054


Epoch 1:  93%|█████████▎| 783/844 [10:48<00:50,  1.20it/s]

batch loss: 0.5534


Epoch 1:  93%|█████████▎| 784/844 [10:48<00:50,  1.20it/s]

batch loss: 0.5125


Epoch 1:  93%|█████████▎| 785/844 [10:49<00:49,  1.20it/s]

batch loss: 0.4907


Epoch 1:  93%|█████████▎| 786/844 [10:50<00:48,  1.20it/s]

batch loss: 0.4254


Epoch 1:  93%|█████████▎| 787/844 [10:51<00:47,  1.20it/s]

batch loss: 0.4623


Epoch 1:  93%|█████████▎| 788/844 [10:52<00:46,  1.20it/s]

batch loss: 0.5167


Epoch 1:  93%|█████████▎| 789/844 [10:53<00:45,  1.20it/s]

batch loss: 0.5297


Epoch 1:  94%|█████████▎| 790/844 [10:53<00:45,  1.20it/s]

batch loss: 0.4732


Epoch 1:  94%|█████████▎| 791/844 [10:54<00:44,  1.20it/s]

batch loss: 0.4886


Epoch 1:  94%|█████████▍| 792/844 [10:55<00:43,  1.20it/s]

batch loss: 0.4830


Epoch 1:  94%|█████████▍| 793/844 [10:56<00:42,  1.20it/s]

batch loss: 0.4976


Epoch 1:  94%|█████████▍| 794/844 [10:57<00:41,  1.20it/s]

batch loss: 0.5035


Epoch 1:  94%|█████████▍| 795/844 [10:58<00:40,  1.20it/s]

batch loss: 0.5132


Epoch 1:  94%|█████████▍| 796/844 [10:58<00:40,  1.20it/s]

batch loss: 0.4316


Epoch 1:  94%|█████████▍| 797/844 [10:59<00:39,  1.19it/s]

batch loss: 0.5346


Epoch 1:  95%|█████████▍| 798/844 [11:00<00:38,  1.20it/s]

batch loss: 0.4581


Epoch 1:  95%|█████████▍| 799/844 [11:01<00:37,  1.20it/s]

batch loss: 0.4726


Epoch 1:  95%|█████████▍| 800/844 [11:02<00:36,  1.20it/s]

batch loss: 0.4290


Epoch 1:  95%|█████████▍| 801/844 [11:03<00:35,  1.20it/s]

batch loss: 0.5188


Epoch 1:  95%|█████████▌| 802/844 [11:03<00:34,  1.20it/s]

batch loss: 0.4661


Epoch 1:  95%|█████████▌| 803/844 [11:04<00:34,  1.20it/s]

batch loss: 0.4818


Epoch 1:  95%|█████████▌| 804/844 [11:05<00:33,  1.19it/s]

batch loss: 0.4376


Epoch 1:  95%|█████████▌| 805/844 [11:06<00:32,  1.19it/s]

batch loss: 0.4449


Epoch 1:  95%|█████████▌| 806/844 [11:07<00:31,  1.19it/s]

batch loss: 0.3836


Epoch 1:  96%|█████████▌| 807/844 [11:08<00:31,  1.19it/s]

batch loss: 0.4536


Epoch 1:  96%|█████████▌| 808/844 [11:09<00:30,  1.19it/s]

batch loss: 0.5004


Epoch 1:  96%|█████████▌| 809/844 [11:09<00:29,  1.19it/s]

batch loss: 0.4303


Epoch 1:  96%|█████████▌| 810/844 [11:10<00:28,  1.20it/s]

batch loss: 0.4242


Epoch 1:  96%|█████████▌| 811/844 [11:11<00:27,  1.20it/s]

batch loss: 0.4000


Epoch 1:  96%|█████████▌| 812/844 [11:12<00:26,  1.20it/s]

batch loss: 0.4906


Epoch 1:  96%|█████████▋| 813/844 [11:13<00:25,  1.20it/s]

batch loss: 0.4658


Epoch 1:  96%|█████████▋| 814/844 [11:14<00:25,  1.20it/s]

batch loss: 0.4857


Epoch 1:  97%|█████████▋| 815/844 [11:14<00:24,  1.20it/s]

batch loss: 0.3790


Epoch 1:  97%|█████████▋| 816/844 [11:15<00:23,  1.20it/s]

batch loss: 0.4546


Epoch 1:  97%|█████████▋| 817/844 [11:16<00:22,  1.20it/s]

batch loss: 0.4973


Epoch 1:  97%|█████████▋| 818/844 [11:17<00:21,  1.20it/s]

batch loss: 0.5892


Epoch 1:  97%|█████████▋| 819/844 [11:18<00:20,  1.20it/s]

batch loss: 0.4710


Epoch 1:  97%|█████████▋| 820/844 [11:19<00:20,  1.19it/s]

batch loss: 0.4780


Epoch 1:  97%|█████████▋| 821/844 [11:19<00:19,  1.19it/s]

batch loss: 0.4549


Epoch 1:  97%|█████████▋| 822/844 [11:20<00:18,  1.19it/s]

batch loss: 0.4671


Epoch 1:  98%|█████████▊| 823/844 [11:21<00:17,  1.20it/s]

batch loss: 0.4686


Epoch 1:  98%|█████████▊| 824/844 [11:22<00:16,  1.20it/s]

batch loss: 0.4240


Epoch 1:  98%|█████████▊| 825/844 [11:23<00:15,  1.20it/s]

batch loss: 0.4420


Epoch 1:  98%|█████████▊| 826/844 [11:24<00:15,  1.20it/s]

batch loss: 0.4891


Epoch 1:  98%|█████████▊| 827/844 [11:24<00:14,  1.20it/s]

batch loss: 0.5041


Epoch 1:  98%|█████████▊| 828/844 [11:25<00:13,  1.20it/s]

batch loss: 0.4537


Epoch 1:  98%|█████████▊| 829/844 [11:26<00:12,  1.20it/s]

batch loss: 0.4855


Epoch 1:  98%|█████████▊| 830/844 [11:27<00:11,  1.20it/s]

batch loss: 0.4795


Epoch 1:  98%|█████████▊| 831/844 [11:28<00:10,  1.20it/s]

batch loss: 0.5054


Epoch 1:  99%|█████████▊| 832/844 [11:29<00:09,  1.20it/s]

batch loss: 0.4422


Epoch 1:  99%|█████████▊| 833/844 [11:29<00:09,  1.20it/s]

batch loss: 0.4656


Epoch 1:  99%|█████████▉| 834/844 [11:30<00:08,  1.20it/s]

batch loss: 0.4777


Epoch 1:  99%|█████████▉| 835/844 [11:31<00:07,  1.20it/s]

batch loss: 0.5063


Epoch 1:  99%|█████████▉| 836/844 [11:32<00:06,  1.20it/s]

batch loss: 0.4834


Epoch 1:  99%|█████████▉| 837/844 [11:33<00:05,  1.20it/s]

batch loss: 0.4833


Epoch 1:  99%|█████████▉| 838/844 [11:34<00:05,  1.20it/s]

batch loss: 0.5040


Epoch 1:  99%|█████████▉| 839/844 [11:34<00:04,  1.19it/s]

batch loss: 0.5182


Epoch 1: 100%|█████████▉| 840/844 [11:35<00:03,  1.19it/s]

batch loss: 0.5276


Epoch 1: 100%|█████████▉| 841/844 [11:36<00:02,  1.19it/s]

batch loss: 0.4856


Epoch 1: 100%|█████████▉| 842/844 [11:37<00:01,  1.20it/s]

batch loss: 0.4585


Epoch 1: 100%|█████████▉| 843/844 [11:38<00:00,  1.20it/s]

batch loss: 0.5316


Epoch 1: 100%|██████████| 844/844 [11:38<00:00,  1.21it/s]


batch loss: 0.5462
--------------------------------------------------------------------------------
Epoch 1 - Average Loss: 0.4785


Epoch 2:   0%|          | 1/844 [00:00<11:33,  1.21it/s]

batch loss: 0.5028


Epoch 2:   0%|          | 2/844 [00:01<11:37,  1.21it/s]

batch loss: 0.4743


Epoch 2:   0%|          | 3/844 [00:02<11:39,  1.20it/s]

batch loss: 0.5089


Epoch 2:   0%|          | 4/844 [00:03<11:39,  1.20it/s]

batch loss: 0.4141


Epoch 2:   1%|          | 5/844 [00:04<11:39,  1.20it/s]

batch loss: 0.5018


Epoch 2:   1%|          | 6/844 [00:05<11:40,  1.20it/s]

batch loss: 0.4699


Epoch 2:   1%|          | 7/844 [00:05<11:38,  1.20it/s]

batch loss: 0.4735


Epoch 2:   1%|          | 8/844 [00:06<11:37,  1.20it/s]

batch loss: 0.5153


Epoch 2:   1%|          | 9/844 [00:07<11:38,  1.20it/s]

batch loss: 0.4777


Epoch 2:   1%|          | 10/844 [00:08<11:41,  1.19it/s]

batch loss: 0.4252


Epoch 2:   1%|▏         | 11/844 [00:09<11:42,  1.19it/s]

batch loss: 0.4375


Epoch 2:   1%|▏         | 12/844 [00:10<11:42,  1.18it/s]

batch loss: 0.4127


Epoch 2:   2%|▏         | 13/844 [00:10<11:36,  1.19it/s]

batch loss: 0.5250


Epoch 2:   2%|▏         | 14/844 [00:11<11:33,  1.20it/s]

batch loss: 0.4804


Epoch 2:   2%|▏         | 15/844 [00:12<11:31,  1.20it/s]

batch loss: 0.5551


Epoch 2:   2%|▏         | 16/844 [00:13<11:29,  1.20it/s]

batch loss: 0.5042


Epoch 2:   2%|▏         | 17/844 [00:14<11:27,  1.20it/s]

batch loss: 0.4444


Epoch 2:   2%|▏         | 18/844 [00:15<11:27,  1.20it/s]

batch loss: 0.4272


Epoch 2:   2%|▏         | 19/844 [00:15<11:25,  1.20it/s]

batch loss: 0.4025


Epoch 2:   2%|▏         | 20/844 [00:16<11:27,  1.20it/s]

batch loss: 0.4656


Epoch 2:   2%|▏         | 21/844 [00:17<11:26,  1.20it/s]

batch loss: 0.4687


Epoch 2:   3%|▎         | 22/844 [00:18<11:25,  1.20it/s]

batch loss: 0.4706


Epoch 2:   3%|▎         | 23/844 [00:19<11:24,  1.20it/s]

batch loss: 0.4302


Epoch 2:   3%|▎         | 24/844 [00:20<11:22,  1.20it/s]

batch loss: 0.4983


Epoch 2:   3%|▎         | 25/844 [00:20<11:25,  1.20it/s]

batch loss: 0.5082


Epoch 2:   3%|▎         | 26/844 [00:21<11:24,  1.20it/s]

batch loss: 0.4376


Epoch 2:   3%|▎         | 27/844 [00:22<11:24,  1.19it/s]

batch loss: 0.4276


Epoch 2:   3%|▎         | 28/844 [00:23<11:24,  1.19it/s]

batch loss: 0.4635


Epoch 2:   3%|▎         | 29/844 [00:24<11:21,  1.20it/s]

batch loss: 0.4106


Epoch 2:   4%|▎         | 30/844 [00:25<11:20,  1.20it/s]

batch loss: 0.4846


Epoch 2:   4%|▎         | 31/844 [00:25<11:17,  1.20it/s]

batch loss: 0.4623


Epoch 2:   4%|▍         | 32/844 [00:26<11:18,  1.20it/s]

batch loss: 0.5151


Epoch 2:   4%|▍         | 33/844 [00:27<11:18,  1.20it/s]

batch loss: 0.3918


Epoch 2:   4%|▍         | 34/844 [00:28<11:15,  1.20it/s]

batch loss: 0.3863


Epoch 2:   4%|▍         | 35/844 [00:29<11:13,  1.20it/s]

batch loss: 0.4618


Epoch 2:   4%|▍         | 36/844 [00:30<11:12,  1.20it/s]

batch loss: 0.4523


Epoch 2:   4%|▍         | 37/844 [00:30<11:13,  1.20it/s]

batch loss: 0.4767


Epoch 2:   5%|▍         | 38/844 [00:31<11:11,  1.20it/s]

batch loss: 0.4594


Epoch 2:   5%|▍         | 39/844 [00:32<11:11,  1.20it/s]

batch loss: 0.4124


Epoch 2:   5%|▍         | 40/844 [00:33<11:09,  1.20it/s]

batch loss: 0.4530


Epoch 2:   5%|▍         | 41/844 [00:34<11:08,  1.20it/s]

batch loss: 0.4810


Epoch 2:   5%|▍         | 42/844 [00:35<11:08,  1.20it/s]

batch loss: 0.5020


Epoch 2:   5%|▌         | 43/844 [00:35<11:10,  1.20it/s]

batch loss: 0.4031


Epoch 2:   5%|▌         | 44/844 [00:36<11:09,  1.19it/s]

batch loss: 0.4988


Epoch 2:   5%|▌         | 45/844 [00:37<11:09,  1.19it/s]

batch loss: 0.4711


Epoch 2:   5%|▌         | 46/844 [00:38<11:07,  1.20it/s]

batch loss: 0.4637


Epoch 2:   6%|▌         | 47/844 [00:39<11:06,  1.20it/s]

batch loss: 0.4686


Epoch 2:   6%|▌         | 48/844 [00:40<11:06,  1.19it/s]

batch loss: 0.4507


Epoch 2:   6%|▌         | 49/844 [00:40<11:04,  1.20it/s]

batch loss: 0.4200


Epoch 2:   6%|▌         | 50/844 [00:41<11:01,  1.20it/s]

batch loss: 0.5023


Epoch 2:   6%|▌         | 51/844 [00:42<11:01,  1.20it/s]

batch loss: 0.5578


Epoch 2:   6%|▌         | 52/844 [00:43<11:00,  1.20it/s]

batch loss: 0.5094


Epoch 2:   6%|▋         | 53/844 [00:44<10:59,  1.20it/s]

batch loss: 0.4857


Epoch 2:   6%|▋         | 54/844 [00:45<10:59,  1.20it/s]

batch loss: 0.3900


Epoch 2:   7%|▋         | 55/844 [00:45<10:56,  1.20it/s]

batch loss: 0.5037


Epoch 2:   7%|▋         | 56/844 [00:46<10:56,  1.20it/s]

batch loss: 0.4371


Epoch 2:   7%|▋         | 57/844 [00:47<10:53,  1.20it/s]

batch loss: 0.3776


Epoch 2:   7%|▋         | 58/844 [00:48<10:55,  1.20it/s]

batch loss: 0.4856


Epoch 2:   7%|▋         | 59/844 [00:49<10:55,  1.20it/s]

batch loss: 0.5607


Epoch 2:   7%|▋         | 60/844 [00:50<10:56,  1.20it/s]

batch loss: 0.4853


Epoch 2:   7%|▋         | 61/844 [00:50<10:53,  1.20it/s]

batch loss: 0.5057


Epoch 2:   7%|▋         | 62/844 [00:51<10:54,  1.19it/s]

batch loss: 0.4810


Epoch 2:   7%|▋         | 63/844 [00:52<10:52,  1.20it/s]

batch loss: 0.4531


Epoch 2:   8%|▊         | 64/844 [00:53<10:50,  1.20it/s]

batch loss: 0.5251


Epoch 2:   8%|▊         | 65/844 [00:54<10:49,  1.20it/s]

batch loss: 0.4730


Epoch 2:   8%|▊         | 66/844 [00:55<10:48,  1.20it/s]

batch loss: 0.4582


Epoch 2:   8%|▊         | 67/844 [00:55<10:46,  1.20it/s]

batch loss: 0.4818


Epoch 2:   8%|▊         | 68/844 [00:56<10:44,  1.20it/s]

batch loss: 0.4454


Epoch 2:   8%|▊         | 69/844 [00:57<10:43,  1.20it/s]

batch loss: 0.5754


Epoch 2:   8%|▊         | 70/844 [00:58<10:43,  1.20it/s]

batch loss: 0.4315


Epoch 2:   8%|▊         | 71/844 [00:59<10:41,  1.20it/s]

batch loss: 0.4443


Epoch 2:   9%|▊         | 72/844 [01:00<10:40,  1.20it/s]

batch loss: 0.4263


Epoch 2:   9%|▊         | 73/844 [01:00<10:40,  1.20it/s]

batch loss: 0.4270


Epoch 2:   9%|▉         | 74/844 [01:01<10:40,  1.20it/s]

batch loss: 0.5555


Epoch 2:   9%|▉         | 75/844 [01:02<10:40,  1.20it/s]

batch loss: 0.5192


Epoch 2:   9%|▉         | 76/844 [01:03<10:38,  1.20it/s]

batch loss: 0.4262


Epoch 2:   9%|▉         | 77/844 [01:04<10:38,  1.20it/s]

batch loss: 0.4187


Epoch 2:   9%|▉         | 78/844 [01:05<10:41,  1.19it/s]

batch loss: 0.4419


Epoch 2:   9%|▉         | 79/844 [01:05<10:39,  1.20it/s]

batch loss: 0.4840


Epoch 2:   9%|▉         | 80/844 [01:06<10:38,  1.20it/s]

batch loss: 0.4702


Epoch 2:  10%|▉         | 81/844 [01:07<10:36,  1.20it/s]

batch loss: 0.5035


Epoch 2:  10%|▉         | 82/844 [01:08<10:35,  1.20it/s]

batch loss: 0.4895


Epoch 2:  10%|▉         | 83/844 [01:09<10:32,  1.20it/s]

batch loss: 0.4223


Epoch 2:  10%|▉         | 84/844 [01:10<10:31,  1.20it/s]

batch loss: 0.4916


Epoch 2:  10%|█         | 85/844 [01:10<10:31,  1.20it/s]

batch loss: 0.4667


Epoch 2:  10%|█         | 86/844 [01:11<10:30,  1.20it/s]

batch loss: 0.4947


Epoch 2:  10%|█         | 87/844 [01:12<10:29,  1.20it/s]

batch loss: 0.4210


Epoch 2:  10%|█         | 88/844 [01:13<10:28,  1.20it/s]

batch loss: 0.4201


Epoch 2:  11%|█         | 89/844 [01:14<10:29,  1.20it/s]

batch loss: 0.5230


Epoch 2:  11%|█         | 90/844 [01:15<10:26,  1.20it/s]

batch loss: 0.4767


Epoch 2:  11%|█         | 91/844 [01:15<10:28,  1.20it/s]

batch loss: 0.4403


Epoch 2:  11%|█         | 92/844 [01:16<10:27,  1.20it/s]

batch loss: 0.5023


Epoch 2:  11%|█         | 93/844 [01:17<10:27,  1.20it/s]

batch loss: 0.5866


Epoch 2:  11%|█         | 94/844 [01:18<10:26,  1.20it/s]

batch loss: 0.4627


Epoch 2:  11%|█▏        | 95/844 [01:19<10:25,  1.20it/s]

batch loss: 0.4786


Epoch 2:  11%|█▏        | 96/844 [01:20<10:23,  1.20it/s]

batch loss: 0.5029


Epoch 2:  11%|█▏        | 97/844 [01:20<10:23,  1.20it/s]

batch loss: 0.4724


Epoch 2:  12%|█▏        | 98/844 [01:21<10:22,  1.20it/s]

batch loss: 0.5309


Epoch 2:  12%|█▏        | 99/844 [01:22<10:21,  1.20it/s]

batch loss: 0.5429


Epoch 2:  12%|█▏        | 100/844 [01:23<10:22,  1.20it/s]

batch loss: 0.4575


Epoch 2:  12%|█▏        | 101/844 [01:24<10:19,  1.20it/s]

batch loss: 0.4710


Epoch 2:  12%|█▏        | 102/844 [01:25<10:17,  1.20it/s]

batch loss: 0.5607


Epoch 2:  12%|█▏        | 103/844 [01:25<10:16,  1.20it/s]

batch loss: 0.4573


Epoch 2:  12%|█▏        | 104/844 [01:26<10:14,  1.20it/s]

batch loss: 0.4791


Epoch 2:  12%|█▏        | 105/844 [01:27<10:14,  1.20it/s]

batch loss: 0.5240


Epoch 2:  13%|█▎        | 106/844 [01:28<10:15,  1.20it/s]

batch loss: 0.5356


Epoch 2:  13%|█▎        | 107/844 [01:29<10:16,  1.20it/s]

batch loss: 0.4833


Epoch 2:  13%|█▎        | 108/844 [01:30<10:16,  1.19it/s]

batch loss: 0.4371


Epoch 2:  13%|█▎        | 109/844 [01:30<10:13,  1.20it/s]

batch loss: 0.5145


Epoch 2:  13%|█▎        | 110/844 [01:31<10:16,  1.19it/s]

batch loss: 0.4859


Epoch 2:  13%|█▎        | 111/844 [01:32<10:13,  1.20it/s]

batch loss: 0.4670


Epoch 2:  13%|█▎        | 112/844 [01:33<10:12,  1.20it/s]

batch loss: 0.4913


Epoch 2:  13%|█▎        | 113/844 [01:34<10:11,  1.20it/s]

batch loss: 0.4388


Epoch 2:  14%|█▎        | 114/844 [01:35<10:11,  1.19it/s]

batch loss: 0.4825


Epoch 2:  14%|█▎        | 115/844 [01:35<10:09,  1.20it/s]

batch loss: 0.4357


Epoch 2:  14%|█▎        | 116/844 [01:36<10:08,  1.20it/s]

batch loss: 0.4892


Epoch 2:  14%|█▍        | 117/844 [01:37<10:08,  1.19it/s]

batch loss: 0.4724


Epoch 2:  14%|█▍        | 118/844 [01:38<10:07,  1.19it/s]

batch loss: 0.4537


Epoch 2:  14%|█▍        | 119/844 [01:39<10:06,  1.20it/s]

batch loss: 0.5203


Epoch 2:  14%|█▍        | 120/844 [01:40<10:06,  1.19it/s]

batch loss: 0.4492


Epoch 2:  14%|█▍        | 121/844 [01:40<10:04,  1.20it/s]

batch loss: 0.4920


Epoch 2:  14%|█▍        | 122/844 [01:41<10:03,  1.20it/s]

batch loss: 0.5190


Epoch 2:  15%|█▍        | 123/844 [01:42<10:05,  1.19it/s]

batch loss: 0.5230


Epoch 2:  15%|█▍        | 124/844 [01:43<10:05,  1.19it/s]

batch loss: 0.4523


Epoch 2:  15%|█▍        | 125/844 [01:44<10:06,  1.19it/s]

batch loss: 0.5125


Epoch 2:  15%|█▍        | 126/844 [01:45<10:02,  1.19it/s]

batch loss: 0.4708


Epoch 2:  15%|█▌        | 127/844 [01:46<10:02,  1.19it/s]

batch loss: 0.4944


Epoch 2:  15%|█▌        | 128/844 [01:46<10:01,  1.19it/s]

batch loss: 0.4390


Epoch 2:  15%|█▌        | 129/844 [01:47<10:01,  1.19it/s]

batch loss: 0.5507


Epoch 2:  15%|█▌        | 130/844 [01:48<10:00,  1.19it/s]

batch loss: 0.3862


Epoch 2:  16%|█▌        | 131/844 [01:49<09:57,  1.19it/s]

batch loss: 0.5162


Epoch 2:  16%|█▌        | 132/844 [01:50<09:56,  1.19it/s]

batch loss: 0.3705


Epoch 2:  16%|█▌        | 133/844 [01:51<09:55,  1.19it/s]

batch loss: 0.5234


Epoch 2:  16%|█▌        | 134/844 [01:51<09:53,  1.20it/s]

batch loss: 0.4861


Epoch 2:  16%|█▌        | 135/844 [01:52<09:53,  1.20it/s]

batch loss: 0.4960


Epoch 2:  16%|█▌        | 136/844 [01:53<09:53,  1.19it/s]

batch loss: 0.4861


Epoch 2:  16%|█▌        | 137/844 [01:54<09:52,  1.19it/s]

batch loss: 0.4586


Epoch 2:  16%|█▋        | 138/844 [01:55<09:51,  1.19it/s]

batch loss: 0.5203


Epoch 2:  16%|█▋        | 139/844 [01:56<09:50,  1.19it/s]

batch loss: 0.4546


Epoch 2:  17%|█▋        | 140/844 [01:56<09:50,  1.19it/s]

batch loss: 0.5195


Epoch 2:  17%|█▋        | 141/844 [01:57<09:52,  1.19it/s]

batch loss: 0.4639


Epoch 2:  17%|█▋        | 142/844 [01:58<09:53,  1.18it/s]

batch loss: 0.4395


Epoch 2:  17%|█▋        | 143/844 [01:59<09:51,  1.18it/s]

batch loss: 0.4533


Epoch 2:  17%|█▋        | 144/844 [02:00<09:49,  1.19it/s]

batch loss: 0.4548


Epoch 2:  17%|█▋        | 145/844 [02:01<09:46,  1.19it/s]

batch loss: 0.4508


Epoch 2:  17%|█▋        | 146/844 [02:01<09:44,  1.19it/s]

batch loss: 0.4819


Epoch 2:  17%|█▋        | 147/844 [02:02<09:43,  1.19it/s]

batch loss: 0.5091


Epoch 2:  18%|█▊        | 148/844 [02:03<09:43,  1.19it/s]

batch loss: 0.4560


Epoch 2:  18%|█▊        | 149/844 [02:04<09:42,  1.19it/s]

batch loss: 0.4700


Epoch 2:  18%|█▊        | 150/844 [02:05<09:41,  1.19it/s]

batch loss: 0.5280


Epoch 2:  18%|█▊        | 151/844 [02:06<09:38,  1.20it/s]

batch loss: 0.5362


Epoch 2:  18%|█▊        | 152/844 [02:06<09:38,  1.20it/s]

batch loss: 0.5272


Epoch 2:  18%|█▊        | 153/844 [02:07<09:39,  1.19it/s]

batch loss: 0.4956


Epoch 2:  18%|█▊        | 154/844 [02:08<09:37,  1.19it/s]

batch loss: 0.4698


Epoch 2:  18%|█▊        | 155/844 [02:09<09:38,  1.19it/s]

batch loss: 0.4022


Epoch 2:  18%|█▊        | 156/844 [02:10<09:36,  1.19it/s]

batch loss: 0.4858


Epoch 2:  19%|█▊        | 157/844 [02:11<09:36,  1.19it/s]

batch loss: 0.4088


Epoch 2:  19%|█▊        | 158/844 [02:12<09:36,  1.19it/s]

batch loss: 0.5694


Epoch 2:  19%|█▉        | 159/844 [02:12<09:35,  1.19it/s]

batch loss: 0.4999


Epoch 2:  19%|█▉        | 160/844 [02:13<09:35,  1.19it/s]

batch loss: 0.4895


Epoch 2:  19%|█▉        | 161/844 [02:14<09:33,  1.19it/s]

batch loss: 0.4269


Epoch 2:  19%|█▉        | 162/844 [02:15<09:31,  1.19it/s]

batch loss: 0.4465


Epoch 2:  19%|█▉        | 163/844 [02:16<09:31,  1.19it/s]

batch loss: 0.4603


Epoch 2:  19%|█▉        | 164/844 [02:17<09:29,  1.19it/s]

batch loss: 0.5189


Epoch 2:  20%|█▉        | 165/844 [02:17<09:28,  1.19it/s]

batch loss: 0.3864


Epoch 2:  20%|█▉        | 166/844 [02:18<09:26,  1.20it/s]

batch loss: 0.4902


Epoch 2:  20%|█▉        | 167/844 [02:19<09:25,  1.20it/s]

batch loss: 0.5263


Epoch 2:  20%|█▉        | 168/844 [02:20<09:24,  1.20it/s]

batch loss: 0.5343


Epoch 2:  20%|██        | 169/844 [02:21<09:24,  1.20it/s]

batch loss: 0.4984


Epoch 2:  20%|██        | 170/844 [02:22<09:23,  1.20it/s]

batch loss: 0.4954


Epoch 2:  20%|██        | 171/844 [02:22<09:21,  1.20it/s]

batch loss: 0.4938


Epoch 2:  20%|██        | 172/844 [02:23<09:22,  1.19it/s]

batch loss: 0.5545


Epoch 2:  20%|██        | 173/844 [02:24<09:23,  1.19it/s]

batch loss: 0.5054


Epoch 2:  21%|██        | 174/844 [02:25<09:20,  1.19it/s]

batch loss: 0.4692


Epoch 2:  21%|██        | 175/844 [02:26<09:21,  1.19it/s]

batch loss: 0.3753


Epoch 2:  21%|██        | 176/844 [02:27<09:19,  1.19it/s]

batch loss: 0.4749


Epoch 2:  21%|██        | 177/844 [02:27<09:18,  1.19it/s]

batch loss: 0.4789


Epoch 2:  21%|██        | 178/844 [02:28<09:17,  1.20it/s]

batch loss: 0.4831


Epoch 2:  21%|██        | 179/844 [02:29<09:17,  1.19it/s]

batch loss: 0.4697


Epoch 2:  21%|██▏       | 180/844 [02:30<09:15,  1.20it/s]

batch loss: 0.4728


Epoch 2:  21%|██▏       | 181/844 [02:31<09:13,  1.20it/s]

batch loss: 0.4146


Epoch 2:  22%|██▏       | 182/844 [02:32<09:12,  1.20it/s]

batch loss: 0.5038


Epoch 2:  22%|██▏       | 183/844 [02:32<09:13,  1.19it/s]

batch loss: 0.3896


Epoch 2:  22%|██▏       | 184/844 [02:33<09:10,  1.20it/s]

batch loss: 0.4598


Epoch 2:  22%|██▏       | 185/844 [02:34<09:09,  1.20it/s]

batch loss: 0.5079


Epoch 2:  22%|██▏       | 186/844 [02:35<09:07,  1.20it/s]

batch loss: 0.4785


Epoch 2:  22%|██▏       | 187/844 [02:36<09:06,  1.20it/s]

batch loss: 0.4277


Epoch 2:  22%|██▏       | 188/844 [02:37<09:06,  1.20it/s]

batch loss: 0.5256


Epoch 2:  22%|██▏       | 189/844 [02:37<09:07,  1.20it/s]

batch loss: 0.4621


Epoch 2:  23%|██▎       | 190/844 [02:38<09:08,  1.19it/s]

batch loss: 0.4473


Epoch 2:  23%|██▎       | 191/844 [02:39<09:05,  1.20it/s]

batch loss: 0.4187


Epoch 2:  23%|██▎       | 192/844 [02:40<09:04,  1.20it/s]

batch loss: 0.4822


Epoch 2:  23%|██▎       | 193/844 [02:41<09:03,  1.20it/s]

batch loss: 0.4721


Epoch 2:  23%|██▎       | 194/844 [02:42<09:00,  1.20it/s]

batch loss: 0.4429


Epoch 2:  23%|██▎       | 195/844 [02:42<09:00,  1.20it/s]

batch loss: 0.4315


Epoch 2:  23%|██▎       | 196/844 [02:43<08:59,  1.20it/s]

batch loss: 0.5033


Epoch 2:  23%|██▎       | 197/844 [02:44<08:59,  1.20it/s]

batch loss: 0.4587


Epoch 2:  23%|██▎       | 198/844 [02:45<08:58,  1.20it/s]

batch loss: 0.4730


Epoch 2:  24%|██▎       | 199/844 [02:46<08:57,  1.20it/s]

batch loss: 0.4480


Epoch 2:  24%|██▎       | 200/844 [02:47<08:56,  1.20it/s]

batch loss: 0.3803


Epoch 2:  24%|██▍       | 201/844 [02:47<08:56,  1.20it/s]

batch loss: 0.4623


Epoch 2:  24%|██▍       | 202/844 [02:48<08:55,  1.20it/s]

batch loss: 0.4672


Epoch 2:  24%|██▍       | 203/844 [02:49<08:54,  1.20it/s]

batch loss: 0.4311


Epoch 2:  24%|██▍       | 204/844 [02:50<08:52,  1.20it/s]

batch loss: 0.4034


Epoch 2:  24%|██▍       | 205/844 [02:51<08:54,  1.19it/s]

batch loss: 0.4886


Epoch 2:  24%|██▍       | 206/844 [02:52<08:55,  1.19it/s]

batch loss: 0.5646


Epoch 2:  25%|██▍       | 207/844 [02:52<08:56,  1.19it/s]

batch loss: 0.4214


Epoch 2:  25%|██▍       | 208/844 [02:53<08:56,  1.19it/s]

batch loss: 0.4557


Epoch 2:  25%|██▍       | 209/844 [02:54<08:53,  1.19it/s]

batch loss: 0.5159


Epoch 2:  25%|██▍       | 210/844 [02:55<08:51,  1.19it/s]

batch loss: 0.4740


Epoch 2:  25%|██▌       | 211/844 [02:56<08:49,  1.20it/s]

batch loss: 0.4615


Epoch 2:  25%|██▌       | 212/844 [02:57<08:47,  1.20it/s]

batch loss: 0.4769


Epoch 2:  25%|██▌       | 213/844 [02:57<08:45,  1.20it/s]

batch loss: 0.4995


Epoch 2:  25%|██▌       | 214/844 [02:58<08:43,  1.20it/s]

batch loss: 0.4769


Epoch 2:  25%|██▌       | 215/844 [02:59<08:43,  1.20it/s]

batch loss: 0.4929


Epoch 2:  26%|██▌       | 216/844 [03:00<08:42,  1.20it/s]

batch loss: 0.3996


Epoch 2:  26%|██▌       | 217/844 [03:01<08:41,  1.20it/s]

batch loss: 0.4720


Epoch 2:  26%|██▌       | 218/844 [03:02<08:40,  1.20it/s]

batch loss: 0.3703


Epoch 2:  26%|██▌       | 219/844 [03:02<08:39,  1.20it/s]

batch loss: 0.4648


Epoch 2:  26%|██▌       | 220/844 [03:03<08:39,  1.20it/s]

batch loss: 0.4512


Epoch 2:  26%|██▌       | 221/844 [03:04<08:41,  1.19it/s]

batch loss: 0.4750


Epoch 2:  26%|██▋       | 222/844 [03:05<08:39,  1.20it/s]

batch loss: 0.4473


Epoch 2:  26%|██▋       | 223/844 [03:06<08:37,  1.20it/s]

batch loss: 0.4221


Epoch 2:  27%|██▋       | 224/844 [03:07<08:38,  1.20it/s]

batch loss: 0.4600


Epoch 2:  27%|██▋       | 225/844 [03:07<08:35,  1.20it/s]

batch loss: 0.4393


Epoch 2:  27%|██▋       | 226/844 [03:08<08:35,  1.20it/s]

batch loss: 0.4942


Epoch 2:  27%|██▋       | 227/844 [03:09<08:33,  1.20it/s]

batch loss: 0.3882


Epoch 2:  27%|██▋       | 228/844 [03:10<08:31,  1.20it/s]

batch loss: 0.4834


Epoch 2:  27%|██▋       | 229/844 [03:11<08:31,  1.20it/s]

batch loss: 0.4266


Epoch 2:  27%|██▋       | 230/844 [03:12<08:29,  1.21it/s]

batch loss: 0.3541


Epoch 2:  27%|██▋       | 231/844 [03:12<08:28,  1.21it/s]

batch loss: 0.3824


Epoch 2:  27%|██▋       | 232/844 [03:13<08:28,  1.20it/s]

batch loss: 0.4487


Epoch 2:  28%|██▊       | 233/844 [03:14<08:28,  1.20it/s]

batch loss: 0.5473


Epoch 2:  28%|██▊       | 234/844 [03:15<08:27,  1.20it/s]

batch loss: 0.5333


Epoch 2:  28%|██▊       | 235/844 [03:16<08:25,  1.20it/s]

batch loss: 0.4474


Epoch 2:  28%|██▊       | 236/844 [03:17<08:24,  1.20it/s]

batch loss: 0.4529


Epoch 2:  28%|██▊       | 237/844 [03:17<08:24,  1.20it/s]

batch loss: 0.4513


Epoch 2:  28%|██▊       | 238/844 [03:18<08:25,  1.20it/s]

batch loss: 0.4063


Epoch 2:  28%|██▊       | 239/844 [03:19<08:26,  1.20it/s]

batch loss: 0.3706


Epoch 2:  28%|██▊       | 240/844 [03:20<08:24,  1.20it/s]

batch loss: 0.5027


Epoch 2:  29%|██▊       | 241/844 [03:21<08:22,  1.20it/s]

batch loss: 0.4980


Epoch 2:  29%|██▊       | 242/844 [03:22<08:20,  1.20it/s]

batch loss: 0.4948


Epoch 2:  29%|██▉       | 243/844 [03:22<08:20,  1.20it/s]

batch loss: 0.4538


Epoch 2:  29%|██▉       | 244/844 [03:23<08:19,  1.20it/s]

batch loss: 0.5393


Epoch 2:  29%|██▉       | 245/844 [03:24<08:18,  1.20it/s]

batch loss: 0.4312


Epoch 2:  29%|██▉       | 246/844 [03:25<08:17,  1.20it/s]

batch loss: 0.4698


Epoch 2:  29%|██▉       | 247/844 [03:26<08:17,  1.20it/s]

batch loss: 0.5742


Epoch 2:  29%|██▉       | 248/844 [03:27<08:16,  1.20it/s]

batch loss: 0.5290


Epoch 2:  30%|██▉       | 249/844 [03:27<08:14,  1.20it/s]

batch loss: 0.4413


Epoch 2:  30%|██▉       | 250/844 [03:28<08:13,  1.20it/s]

batch loss: 0.4875


Epoch 2:  30%|██▉       | 251/844 [03:29<08:12,  1.20it/s]

batch loss: 0.4701


Epoch 2:  30%|██▉       | 252/844 [03:30<08:12,  1.20it/s]

batch loss: 0.3875


Epoch 2:  30%|██▉       | 253/844 [03:31<08:12,  1.20it/s]

batch loss: 0.4221


Epoch 2:  30%|███       | 254/844 [03:32<08:12,  1.20it/s]

batch loss: 0.5124


Epoch 2:  30%|███       | 255/844 [03:32<08:12,  1.19it/s]

batch loss: 0.5587


Epoch 2:  30%|███       | 256/844 [03:33<08:11,  1.20it/s]

batch loss: 0.4654


Epoch 2:  30%|███       | 257/844 [03:34<08:10,  1.20it/s]

batch loss: 0.4910


Epoch 2:  31%|███       | 258/844 [03:35<08:11,  1.19it/s]

batch loss: 0.4437


Epoch 2:  31%|███       | 259/844 [03:36<08:09,  1.19it/s]

batch loss: 0.4971


Epoch 2:  31%|███       | 260/844 [03:37<08:08,  1.19it/s]

batch loss: 0.4492


Epoch 2:  31%|███       | 261/844 [03:37<08:08,  1.19it/s]

batch loss: 0.4215


Epoch 2:  31%|███       | 262/844 [03:38<08:06,  1.20it/s]

batch loss: 0.5329


Epoch 2:  31%|███       | 263/844 [03:39<08:03,  1.20it/s]

batch loss: 0.4572


Epoch 2:  31%|███▏      | 264/844 [03:40<08:02,  1.20it/s]

batch loss: 0.4626


Epoch 2:  31%|███▏      | 265/844 [03:41<08:02,  1.20it/s]

batch loss: 0.4506


Epoch 2:  32%|███▏      | 266/844 [03:42<08:03,  1.20it/s]

batch loss: 0.5089


Epoch 2:  32%|███▏      | 267/844 [03:42<08:01,  1.20it/s]

batch loss: 0.4692


Epoch 2:  32%|███▏      | 268/844 [03:43<08:00,  1.20it/s]

batch loss: 0.4473


Epoch 2:  32%|███▏      | 269/844 [03:44<08:00,  1.20it/s]

batch loss: 0.4640


Epoch 2:  32%|███▏      | 270/844 [03:45<07:59,  1.20it/s]

batch loss: 0.4911


Epoch 2:  32%|███▏      | 271/844 [03:46<07:59,  1.19it/s]

batch loss: 0.4357


Epoch 2:  32%|███▏      | 272/844 [03:47<07:59,  1.19it/s]

batch loss: 0.5290


Epoch 2:  32%|███▏      | 273/844 [03:48<08:00,  1.19it/s]

batch loss: 0.4301


Epoch 2:  32%|███▏      | 274/844 [03:48<07:57,  1.19it/s]

batch loss: 0.4476


Epoch 2:  33%|███▎      | 275/844 [03:49<07:55,  1.20it/s]

batch loss: 0.5051


Epoch 2:  33%|███▎      | 276/844 [03:50<07:54,  1.20it/s]

batch loss: 0.4990


Epoch 2:  33%|███▎      | 277/844 [03:51<07:53,  1.20it/s]

batch loss: 0.3910


Epoch 2:  33%|███▎      | 278/844 [03:52<07:52,  1.20it/s]

batch loss: 0.4146


Epoch 2:  33%|███▎      | 279/844 [03:53<07:51,  1.20it/s]

batch loss: 0.5021


Epoch 2:  33%|███▎      | 280/844 [03:53<07:49,  1.20it/s]

batch loss: 0.4490


Epoch 2:  33%|███▎      | 281/844 [03:54<07:49,  1.20it/s]

batch loss: 0.4474


Epoch 2:  33%|███▎      | 282/844 [03:55<07:50,  1.19it/s]

batch loss: 0.4245


Epoch 2:  34%|███▎      | 283/844 [03:56<07:48,  1.20it/s]

batch loss: 0.4374


Epoch 2:  34%|███▎      | 284/844 [03:57<07:47,  1.20it/s]

batch loss: 0.4508


Epoch 2:  34%|███▍      | 285/844 [03:58<07:48,  1.19it/s]

batch loss: 0.4840


Epoch 2:  34%|███▍      | 286/844 [03:58<07:49,  1.19it/s]

batch loss: 0.4568


Epoch 2:  34%|███▍      | 287/844 [03:59<07:50,  1.18it/s]

batch loss: 0.4947


Epoch 2:  34%|███▍      | 288/844 [04:00<07:48,  1.19it/s]

batch loss: 0.4030


Epoch 2:  34%|███▍      | 289/844 [04:01<07:47,  1.19it/s]

batch loss: 0.5230


Epoch 2:  34%|███▍      | 290/844 [04:02<07:45,  1.19it/s]

batch loss: 0.5138


Epoch 2:  34%|███▍      | 291/844 [04:03<07:44,  1.19it/s]

batch loss: 0.4777


Epoch 2:  35%|███▍      | 292/844 [04:03<07:43,  1.19it/s]

batch loss: 0.4250


Epoch 2:  35%|███▍      | 293/844 [04:04<07:43,  1.19it/s]

batch loss: 0.4414


Epoch 2:  35%|███▍      | 294/844 [04:05<07:42,  1.19it/s]

batch loss: 0.5395


Epoch 2:  35%|███▍      | 295/844 [04:06<07:40,  1.19it/s]

batch loss: 0.4613


Epoch 2:  35%|███▌      | 296/844 [04:07<07:38,  1.20it/s]

batch loss: 0.4371


Epoch 2:  35%|███▌      | 297/844 [04:08<07:36,  1.20it/s]

batch loss: 0.5115


Epoch 2:  35%|███▌      | 298/844 [04:08<07:35,  1.20it/s]

batch loss: 0.5162


Epoch 2:  35%|███▌      | 299/844 [04:09<07:36,  1.19it/s]

batch loss: 0.4598


Epoch 2:  36%|███▌      | 300/844 [04:10<07:34,  1.20it/s]

batch loss: 0.4863


Epoch 2:  36%|███▌      | 301/844 [04:11<07:32,  1.20it/s]

batch loss: 0.4634


Epoch 2:  36%|███▌      | 302/844 [04:12<07:31,  1.20it/s]

batch loss: 0.3770


Epoch 2:  36%|███▌      | 303/844 [04:13<07:30,  1.20it/s]

batch loss: 0.5195


Epoch 2:  36%|███▌      | 304/844 [04:13<07:30,  1.20it/s]

batch loss: 0.4702


Epoch 2:  36%|███▌      | 305/844 [04:14<07:32,  1.19it/s]

batch loss: 0.4839


Epoch 2:  36%|███▋      | 306/844 [04:15<07:30,  1.19it/s]

batch loss: 0.4368


Epoch 2:  36%|███▋      | 307/844 [04:16<07:28,  1.20it/s]

batch loss: 0.4721


Epoch 2:  36%|███▋      | 308/844 [04:17<07:29,  1.19it/s]

batch loss: 0.4805


Epoch 2:  37%|███▋      | 309/844 [04:18<07:28,  1.19it/s]

batch loss: 0.4705


Epoch 2:  37%|███▋      | 310/844 [04:18<07:27,  1.19it/s]

batch loss: 0.4671


Epoch 2:  37%|███▋      | 311/844 [04:19<07:26,  1.19it/s]

batch loss: 0.5022


Epoch 2:  37%|███▋      | 312/844 [04:20<07:27,  1.19it/s]

batch loss: 0.4488


Epoch 2:  37%|███▋      | 313/844 [04:21<07:24,  1.19it/s]

batch loss: 0.4234


Epoch 2:  37%|███▋      | 314/844 [04:22<07:21,  1.20it/s]

batch loss: 0.4902


Epoch 2:  37%|███▋      | 315/844 [04:23<07:21,  1.20it/s]

batch loss: 0.4553


Epoch 2:  37%|███▋      | 316/844 [04:24<07:22,  1.19it/s]

batch loss: 0.4822


Epoch 2:  38%|███▊      | 317/844 [04:24<07:20,  1.20it/s]

batch loss: 0.4577


Epoch 2:  38%|███▊      | 318/844 [04:25<07:19,  1.20it/s]

batch loss: 0.3955


Epoch 2:  38%|███▊      | 319/844 [04:26<07:19,  1.19it/s]

batch loss: 0.5501


Epoch 2:  38%|███▊      | 320/844 [04:27<07:17,  1.20it/s]

batch loss: 0.6023


Epoch 2:  38%|███▊      | 321/844 [04:28<07:16,  1.20it/s]

batch loss: 0.4318


Epoch 2:  38%|███▊      | 322/844 [04:29<07:17,  1.19it/s]

batch loss: 0.4258


Epoch 2:  38%|███▊      | 323/844 [04:29<07:18,  1.19it/s]

batch loss: 0.4921


Epoch 2:  38%|███▊      | 324/844 [04:30<07:16,  1.19it/s]

batch loss: 0.4820


Epoch 2:  39%|███▊      | 325/844 [04:31<07:16,  1.19it/s]

batch loss: 0.4564


Epoch 2:  39%|███▊      | 326/844 [04:32<07:13,  1.19it/s]

batch loss: 0.4732


Epoch 2:  39%|███▊      | 327/844 [04:33<07:13,  1.19it/s]

batch loss: 0.4808


Epoch 2:  39%|███▉      | 328/844 [04:34<07:13,  1.19it/s]

batch loss: 0.4621


Epoch 2:  39%|███▉      | 329/844 [04:34<07:10,  1.20it/s]

batch loss: 0.4268


Epoch 2:  39%|███▉      | 330/844 [04:35<07:09,  1.20it/s]

batch loss: 0.4319


Epoch 2:  39%|███▉      | 331/844 [04:36<07:09,  1.20it/s]

batch loss: 0.4143


Epoch 2:  39%|███▉      | 332/844 [04:37<07:07,  1.20it/s]

batch loss: 0.4328


Epoch 2:  39%|███▉      | 333/844 [04:38<07:05,  1.20it/s]

batch loss: 0.4751


Epoch 2:  40%|███▉      | 334/844 [04:39<07:04,  1.20it/s]

batch loss: 0.4603


Epoch 2:  40%|███▉      | 335/844 [04:39<07:04,  1.20it/s]

batch loss: 0.5065


Epoch 2:  40%|███▉      | 336/844 [04:40<07:02,  1.20it/s]

batch loss: 0.4332


Epoch 2:  40%|███▉      | 337/844 [04:41<07:04,  1.19it/s]

batch loss: 0.4877


Epoch 2:  40%|████      | 338/844 [04:42<07:03,  1.19it/s]

batch loss: 0.3708


Epoch 2:  40%|████      | 339/844 [04:43<07:01,  1.20it/s]

batch loss: 0.3994


Epoch 2:  40%|████      | 340/844 [04:44<07:01,  1.20it/s]

batch loss: 0.5121


Epoch 2:  40%|████      | 341/844 [04:44<07:00,  1.20it/s]

batch loss: 0.4344


Epoch 2:  41%|████      | 342/844 [04:45<07:00,  1.19it/s]

batch loss: 0.5136


Epoch 2:  41%|████      | 343/844 [04:46<06:58,  1.20it/s]

batch loss: 0.4588


Epoch 2:  41%|████      | 344/844 [04:47<06:57,  1.20it/s]

batch loss: 0.4439


Epoch 2:  41%|████      | 345/844 [04:48<06:56,  1.20it/s]

batch loss: 0.4443


Epoch 2:  41%|████      | 346/844 [04:49<06:55,  1.20it/s]

batch loss: 0.4912


Epoch 2:  41%|████      | 347/844 [04:49<06:53,  1.20it/s]

batch loss: 0.5316


Epoch 2:  41%|████      | 348/844 [04:50<06:52,  1.20it/s]

batch loss: 0.4811


Epoch 2:  41%|████▏     | 349/844 [04:51<06:51,  1.20it/s]

batch loss: 0.4405


Epoch 2:  41%|████▏     | 350/844 [04:52<06:49,  1.20it/s]

batch loss: 0.4606


Epoch 2:  42%|████▏     | 351/844 [04:53<06:49,  1.20it/s]

batch loss: 0.6007


Epoch 2:  42%|████▏     | 352/844 [04:54<06:49,  1.20it/s]

batch loss: 0.5035


Epoch 2:  42%|████▏     | 353/844 [04:54<06:49,  1.20it/s]

batch loss: 0.4565


Epoch 2:  42%|████▏     | 354/844 [04:55<06:48,  1.20it/s]

batch loss: 0.4311


Epoch 2:  42%|████▏     | 355/844 [04:56<06:49,  1.19it/s]

batch loss: 0.5367


Epoch 2:  42%|████▏     | 356/844 [04:57<06:47,  1.20it/s]

batch loss: 0.4974


Epoch 2:  42%|████▏     | 357/844 [04:58<06:45,  1.20it/s]

batch loss: 0.4942


Epoch 2:  42%|████▏     | 358/844 [04:59<06:44,  1.20it/s]

batch loss: 0.4670


Epoch 2:  43%|████▎     | 359/844 [04:59<06:44,  1.20it/s]

batch loss: 0.4517


Epoch 2:  43%|████▎     | 360/844 [05:00<06:42,  1.20it/s]

batch loss: 0.5017


Epoch 2:  43%|████▎     | 361/844 [05:01<06:42,  1.20it/s]

batch loss: 0.4592


Epoch 2:  43%|████▎     | 362/844 [05:02<06:42,  1.20it/s]

batch loss: 0.4432


Epoch 2:  43%|████▎     | 363/844 [05:03<06:40,  1.20it/s]

batch loss: 0.5667


Epoch 2:  43%|████▎     | 364/844 [05:04<06:39,  1.20it/s]

batch loss: 0.4674


Epoch 2:  43%|████▎     | 365/844 [05:04<06:38,  1.20it/s]

batch loss: 0.4995


Epoch 2:  43%|████▎     | 366/844 [05:05<06:37,  1.20it/s]

batch loss: 0.5152


Epoch 2:  43%|████▎     | 367/844 [05:06<06:36,  1.20it/s]

batch loss: 0.4510


Epoch 2:  44%|████▎     | 368/844 [05:07<06:35,  1.20it/s]

batch loss: 0.4351


Epoch 2:  44%|████▎     | 369/844 [05:08<06:34,  1.20it/s]

batch loss: 0.5005


Epoch 2:  44%|████▍     | 370/844 [05:09<06:33,  1.20it/s]

batch loss: 0.3815


Epoch 2:  44%|████▍     | 371/844 [05:09<06:33,  1.20it/s]

batch loss: 0.4815


Epoch 2:  44%|████▍     | 372/844 [05:10<06:34,  1.20it/s]

batch loss: 0.4835


Epoch 2:  44%|████▍     | 373/844 [05:11<06:35,  1.19it/s]

batch loss: 0.4428


Epoch 2:  44%|████▍     | 374/844 [05:12<06:33,  1.20it/s]

batch loss: 0.5213


Epoch 2:  44%|████▍     | 375/844 [05:13<06:31,  1.20it/s]

batch loss: 0.3651


Epoch 2:  45%|████▍     | 376/844 [05:14<06:30,  1.20it/s]

batch loss: 0.5207


Epoch 2:  45%|████▍     | 377/844 [05:14<06:29,  1.20it/s]

batch loss: 0.4390


Epoch 2:  45%|████▍     | 378/844 [05:15<06:27,  1.20it/s]

batch loss: 0.4777


Epoch 2:  45%|████▍     | 379/844 [05:16<06:26,  1.20it/s]

batch loss: 0.4382


Epoch 2:  45%|████▌     | 380/844 [05:17<06:26,  1.20it/s]

batch loss: 0.5321


Epoch 2:  45%|████▌     | 381/844 [05:18<06:24,  1.20it/s]

batch loss: 0.4977


Epoch 2:  45%|████▌     | 382/844 [05:19<06:23,  1.21it/s]

batch loss: 0.5440


Epoch 2:  45%|████▌     | 383/844 [05:19<06:22,  1.20it/s]

batch loss: 0.4381


Epoch 2:  45%|████▌     | 384/844 [05:20<06:22,  1.20it/s]

batch loss: 0.5048


Epoch 2:  46%|████▌     | 385/844 [05:21<06:22,  1.20it/s]

batch loss: 0.5130


Epoch 2:  46%|████▌     | 386/844 [05:22<06:22,  1.20it/s]

batch loss: 0.4672


Epoch 2:  46%|████▌     | 387/844 [05:23<06:21,  1.20it/s]

batch loss: 0.4580


Epoch 2:  46%|████▌     | 388/844 [05:24<06:22,  1.19it/s]

batch loss: 0.4922


Epoch 2:  46%|████▌     | 389/844 [05:24<06:19,  1.20it/s]

batch loss: 0.4895


Epoch 2:  46%|████▌     | 390/844 [05:25<06:18,  1.20it/s]

batch loss: 0.4303


Epoch 2:  46%|████▋     | 391/844 [05:26<06:17,  1.20it/s]

batch loss: 0.5063


Epoch 2:  46%|████▋     | 392/844 [05:27<06:16,  1.20it/s]

batch loss: 0.4653


Epoch 2:  47%|████▋     | 393/844 [05:28<06:15,  1.20it/s]

batch loss: 0.4283


Epoch 2:  47%|████▋     | 394/844 [05:29<06:15,  1.20it/s]

batch loss: 0.4295


Epoch 2:  47%|████▋     | 395/844 [05:29<06:13,  1.20it/s]

batch loss: 0.4930


Epoch 2:  47%|████▋     | 396/844 [05:30<06:13,  1.20it/s]

batch loss: 0.5300


Epoch 2:  47%|████▋     | 397/844 [05:31<06:11,  1.20it/s]

batch loss: 0.4844


Epoch 2:  47%|████▋     | 398/844 [05:32<06:11,  1.20it/s]

batch loss: 0.5354


Epoch 2:  47%|████▋     | 399/844 [05:33<06:10,  1.20it/s]

batch loss: 0.4909


Epoch 2:  47%|████▋     | 400/844 [05:34<06:10,  1.20it/s]

batch loss: 0.5106


Epoch 2:  48%|████▊     | 401/844 [05:34<06:08,  1.20it/s]

batch loss: 0.4916


Epoch 2:  48%|████▊     | 402/844 [05:35<06:07,  1.20it/s]

batch loss: 0.4410


Epoch 2:  48%|████▊     | 403/844 [05:36<06:08,  1.20it/s]

batch loss: 0.4527


Epoch 2:  48%|████▊     | 404/844 [05:37<06:09,  1.19it/s]

batch loss: 0.4634


Epoch 2:  48%|████▊     | 405/844 [05:38<06:07,  1.20it/s]

batch loss: 0.3931


Epoch 2:  48%|████▊     | 406/844 [05:39<06:06,  1.19it/s]

batch loss: 0.4940


Epoch 2:  48%|████▊     | 407/844 [05:39<06:05,  1.20it/s]

batch loss: 0.4795


Epoch 2:  48%|████▊     | 408/844 [05:40<06:04,  1.20it/s]

batch loss: 0.4039


Epoch 2:  48%|████▊     | 409/844 [05:41<06:03,  1.20it/s]

batch loss: 0.4659


Epoch 2:  49%|████▊     | 410/844 [05:42<06:01,  1.20it/s]

batch loss: 0.5449


Epoch 2:  49%|████▊     | 411/844 [05:43<06:00,  1.20it/s]

batch loss: 0.5127


Epoch 2:  49%|████▉     | 412/844 [05:44<05:59,  1.20it/s]

batch loss: 0.5668


Epoch 2:  49%|████▉     | 413/844 [05:44<05:58,  1.20it/s]

batch loss: 0.4966


Epoch 2:  49%|████▉     | 414/844 [05:45<05:58,  1.20it/s]

batch loss: 0.4029


Epoch 2:  49%|████▉     | 415/844 [05:46<05:56,  1.20it/s]

batch loss: 0.5044


Epoch 2:  49%|████▉     | 416/844 [05:47<05:55,  1.20it/s]

batch loss: 0.4125


Epoch 2:  49%|████▉     | 417/844 [05:48<05:55,  1.20it/s]

batch loss: 0.4565


Epoch 2:  50%|████▉     | 418/844 [05:49<05:53,  1.20it/s]

batch loss: 0.5084


Epoch 2:  50%|████▉     | 419/844 [05:49<05:52,  1.20it/s]

batch loss: 0.4401


Epoch 2:  50%|████▉     | 420/844 [05:50<05:54,  1.20it/s]

batch loss: 0.4950


Epoch 2:  50%|████▉     | 421/844 [05:51<05:52,  1.20it/s]

batch loss: 0.4962


Epoch 2:  50%|█████     | 422/844 [05:52<05:51,  1.20it/s]

batch loss: 0.4490


Epoch 2:  50%|█████     | 423/844 [05:53<05:49,  1.20it/s]

batch loss: 0.4300


Epoch 2:  50%|█████     | 424/844 [05:54<05:49,  1.20it/s]

batch loss: 0.5207


Epoch 2:  50%|█████     | 425/844 [05:54<05:48,  1.20it/s]

batch loss: 0.4878


Epoch 2:  50%|█████     | 426/844 [05:55<05:47,  1.20it/s]

batch loss: 0.4929


Epoch 2:  51%|█████     | 427/844 [05:56<05:46,  1.20it/s]

batch loss: 0.5763


Epoch 2:  51%|█████     | 428/844 [05:57<05:46,  1.20it/s]

batch loss: 0.3796


Epoch 2:  51%|█████     | 429/844 [05:58<05:45,  1.20it/s]

batch loss: 0.5085


Epoch 2:  51%|█████     | 430/844 [05:59<05:45,  1.20it/s]

batch loss: 0.4532


Epoch 2:  51%|█████     | 431/844 [05:59<05:44,  1.20it/s]

batch loss: 0.5135


Epoch 2:  51%|█████     | 432/844 [06:00<05:43,  1.20it/s]

batch loss: 0.4933


Epoch 2:  51%|█████▏    | 433/844 [06:01<05:42,  1.20it/s]

batch loss: 0.3836


Epoch 2:  51%|█████▏    | 434/844 [06:02<05:40,  1.20it/s]

batch loss: 0.4544


Epoch 2:  52%|█████▏    | 435/844 [06:03<05:40,  1.20it/s]

batch loss: 0.5091


Epoch 2:  52%|█████▏    | 436/844 [06:04<05:41,  1.20it/s]

batch loss: 0.4054


Epoch 2:  52%|█████▏    | 437/844 [06:04<05:40,  1.19it/s]

batch loss: 0.4888


Epoch 2:  52%|█████▏    | 438/844 [06:05<05:39,  1.20it/s]

batch loss: 0.4028


Epoch 2:  52%|█████▏    | 439/844 [06:06<05:38,  1.20it/s]

batch loss: 0.5047


Epoch 2:  52%|█████▏    | 440/844 [06:07<05:37,  1.20it/s]

batch loss: 0.4956


Epoch 2:  52%|█████▏    | 441/844 [06:08<05:36,  1.20it/s]

batch loss: 0.5237


Epoch 2:  52%|█████▏    | 442/844 [06:09<05:36,  1.19it/s]

batch loss: 0.4162


Epoch 2:  52%|█████▏    | 443/844 [06:09<05:35,  1.20it/s]

batch loss: 0.4659


Epoch 2:  53%|█████▎    | 444/844 [06:10<05:33,  1.20it/s]

batch loss: 0.3935


Epoch 2:  53%|█████▎    | 445/844 [06:11<05:32,  1.20it/s]

batch loss: 0.4528


Epoch 2:  53%|█████▎    | 446/844 [06:12<05:33,  1.19it/s]

batch loss: 0.4306


Epoch 2:  53%|█████▎    | 447/844 [06:13<05:31,  1.20it/s]

batch loss: 0.4105


Epoch 2:  53%|█████▎    | 448/844 [06:14<05:29,  1.20it/s]

batch loss: 0.4512


Epoch 2:  53%|█████▎    | 449/844 [06:14<05:27,  1.20it/s]

batch loss: 0.4113


Epoch 2:  53%|█████▎    | 450/844 [06:15<05:27,  1.20it/s]

batch loss: 0.4699


Epoch 2:  53%|█████▎    | 451/844 [06:16<05:26,  1.20it/s]

batch loss: 0.4705


Epoch 2:  54%|█████▎    | 452/844 [06:17<05:27,  1.20it/s]

batch loss: 0.4119


Epoch 2:  54%|█████▎    | 453/844 [06:18<05:26,  1.20it/s]

batch loss: 0.4848


Epoch 2:  54%|█████▍    | 454/844 [06:19<05:27,  1.19it/s]

batch loss: 0.5690


Epoch 2:  54%|█████▍    | 455/844 [06:19<05:26,  1.19it/s]

batch loss: 0.5323


Epoch 2:  54%|█████▍    | 456/844 [06:20<05:24,  1.20it/s]

batch loss: 0.4927


Epoch 2:  54%|█████▍    | 457/844 [06:21<05:23,  1.20it/s]

batch loss: 0.5176


Epoch 2:  54%|█████▍    | 458/844 [06:22<05:21,  1.20it/s]

batch loss: 0.4639


Epoch 2:  54%|█████▍    | 459/844 [06:23<05:20,  1.20it/s]

batch loss: 0.4800


Epoch 2:  55%|█████▍    | 460/844 [06:24<05:20,  1.20it/s]

batch loss: 0.4746


Epoch 2:  55%|█████▍    | 461/844 [06:24<05:19,  1.20it/s]

batch loss: 0.5122


Epoch 2:  55%|█████▍    | 462/844 [06:25<05:18,  1.20it/s]

batch loss: 0.5352


Epoch 2:  55%|█████▍    | 463/844 [06:26<05:17,  1.20it/s]

batch loss: 0.4740


Epoch 2:  55%|█████▍    | 464/844 [06:27<05:16,  1.20it/s]

batch loss: 0.5328


Epoch 2:  55%|█████▌    | 465/844 [06:28<05:17,  1.19it/s]

batch loss: 0.4478


Epoch 2:  55%|█████▌    | 466/844 [06:29<05:15,  1.20it/s]

batch loss: 0.4491


Epoch 2:  55%|█████▌    | 467/844 [06:29<05:14,  1.20it/s]

batch loss: 0.4632


Epoch 2:  55%|█████▌    | 468/844 [06:30<05:13,  1.20it/s]

batch loss: 0.4228


Epoch 2:  56%|█████▌    | 469/844 [06:31<05:13,  1.20it/s]

batch loss: 0.3939


Epoch 2:  56%|█████▌    | 470/844 [06:32<05:14,  1.19it/s]

batch loss: 0.4420


Epoch 2:  56%|█████▌    | 471/844 [06:33<05:12,  1.19it/s]

batch loss: 0.4954


Epoch 2:  56%|█████▌    | 472/844 [06:34<05:12,  1.19it/s]

batch loss: 0.5063


Epoch 2:  56%|█████▌    | 473/844 [06:34<05:10,  1.19it/s]

batch loss: 0.4191


Epoch 2:  56%|█████▌    | 474/844 [06:35<05:09,  1.20it/s]

batch loss: 0.4777


Epoch 2:  56%|█████▋    | 475/844 [06:36<05:07,  1.20it/s]

batch loss: 0.4743


Epoch 2:  56%|█████▋    | 476/844 [06:37<05:06,  1.20it/s]

batch loss: 0.5213


Epoch 2:  57%|█████▋    | 477/844 [06:38<05:06,  1.20it/s]

batch loss: 0.4611


Epoch 2:  57%|█████▋    | 478/844 [06:39<05:05,  1.20it/s]

batch loss: 0.4817


Epoch 2:  57%|█████▋    | 479/844 [06:39<05:05,  1.20it/s]

batch loss: 0.4566


Epoch 2:  57%|█████▋    | 480/844 [06:40<05:03,  1.20it/s]

batch loss: 0.4498


Epoch 2:  57%|█████▋    | 481/844 [06:41<05:02,  1.20it/s]

batch loss: 0.3365


Epoch 2:  57%|█████▋    | 482/844 [06:42<05:02,  1.20it/s]

batch loss: 0.4810


Epoch 2:  57%|█████▋    | 483/844 [06:43<05:00,  1.20it/s]

batch loss: 0.4652


Epoch 2:  57%|█████▋    | 484/844 [06:44<05:00,  1.20it/s]

batch loss: 0.3545


Epoch 2:  57%|█████▋    | 485/844 [06:45<04:59,  1.20it/s]

batch loss: 0.4276


Epoch 2:  58%|█████▊    | 486/844 [06:45<04:58,  1.20it/s]

batch loss: 0.4528


Epoch 2:  58%|█████▊    | 487/844 [06:46<04:58,  1.20it/s]

batch loss: 0.4202


Epoch 2:  58%|█████▊    | 488/844 [06:47<04:57,  1.20it/s]

batch loss: 0.4443


Epoch 2:  58%|█████▊    | 489/844 [06:48<04:55,  1.20it/s]

batch loss: 0.5374


Epoch 2:  58%|█████▊    | 490/844 [06:49<04:54,  1.20it/s]

batch loss: 0.4336


Epoch 2:  58%|█████▊    | 491/844 [06:50<04:54,  1.20it/s]

batch loss: 0.5198


Epoch 2:  58%|█████▊    | 492/844 [06:50<04:54,  1.20it/s]

batch loss: 0.4986


Epoch 2:  58%|█████▊    | 493/844 [06:51<04:52,  1.20it/s]

batch loss: 0.4314


Epoch 2:  59%|█████▊    | 494/844 [06:52<04:51,  1.20it/s]

batch loss: 0.4323


Epoch 2:  59%|█████▊    | 495/844 [06:53<04:50,  1.20it/s]

batch loss: 0.4154


Epoch 2:  59%|█████▉    | 496/844 [06:54<04:49,  1.20it/s]

batch loss: 0.4322


Epoch 2:  59%|█████▉    | 497/844 [06:54<04:48,  1.20it/s]

batch loss: 0.4284


Epoch 2:  59%|█████▉    | 498/844 [06:55<04:47,  1.21it/s]

batch loss: 0.5323


Epoch 2:  59%|█████▉    | 499/844 [06:56<04:46,  1.20it/s]

batch loss: 0.5047


Epoch 2:  59%|█████▉    | 500/844 [06:57<04:45,  1.21it/s]

batch loss: 0.4655


Epoch 2:  59%|█████▉    | 501/844 [06:58<04:46,  1.20it/s]

batch loss: 0.4678


Epoch 2:  59%|█████▉    | 502/844 [06:59<04:45,  1.20it/s]

batch loss: 0.5322


Epoch 2:  60%|█████▉    | 503/844 [06:59<04:44,  1.20it/s]

batch loss: 0.5036


Epoch 2:  60%|█████▉    | 504/844 [07:00<04:45,  1.19it/s]

batch loss: 0.5038


Epoch 2:  60%|█████▉    | 505/844 [07:01<04:43,  1.20it/s]

batch loss: 0.5750


Epoch 2:  60%|█████▉    | 506/844 [07:02<04:41,  1.20it/s]

batch loss: 0.4700


Epoch 2:  60%|██████    | 507/844 [07:03<04:41,  1.20it/s]

batch loss: 0.5335


Epoch 2:  60%|██████    | 508/844 [07:04<04:39,  1.20it/s]

batch loss: 0.4869


Epoch 2:  60%|██████    | 509/844 [07:05<04:39,  1.20it/s]

batch loss: 0.5457


Epoch 2:  60%|██████    | 510/844 [07:05<04:38,  1.20it/s]

batch loss: 0.4130


Epoch 2:  61%|██████    | 511/844 [07:06<04:38,  1.20it/s]

batch loss: 0.4535


Epoch 2:  61%|██████    | 512/844 [07:07<04:36,  1.20it/s]

batch loss: 0.5263


Epoch 2:  61%|██████    | 513/844 [07:08<04:36,  1.20it/s]

batch loss: 0.5375


Epoch 2:  61%|██████    | 514/844 [07:09<04:35,  1.20it/s]

batch loss: 0.4195


Epoch 2:  61%|██████    | 515/844 [07:10<04:35,  1.20it/s]

batch loss: 0.4884


Epoch 2:  61%|██████    | 516/844 [07:10<04:33,  1.20it/s]

batch loss: 0.5467


Epoch 2:  61%|██████▏   | 517/844 [07:11<04:32,  1.20it/s]

batch loss: 0.5067


Epoch 2:  61%|██████▏   | 518/844 [07:12<04:32,  1.20it/s]

batch loss: 0.4380


Epoch 2:  61%|██████▏   | 519/844 [07:13<04:33,  1.19it/s]

batch loss: 0.5571


Epoch 2:  62%|██████▏   | 520/844 [07:14<04:31,  1.19it/s]

batch loss: 0.4694


Epoch 2:  62%|██████▏   | 521/844 [07:15<04:30,  1.20it/s]

batch loss: 0.4731


Epoch 2:  62%|██████▏   | 522/844 [07:15<04:30,  1.19it/s]

batch loss: 0.4937


Epoch 2:  62%|██████▏   | 523/844 [07:16<04:28,  1.20it/s]

batch loss: 0.5115


Epoch 2:  62%|██████▏   | 524/844 [07:17<04:27,  1.20it/s]

batch loss: 0.4771


Epoch 2:  62%|██████▏   | 525/844 [07:18<04:26,  1.20it/s]

batch loss: 0.4525


Epoch 2:  62%|██████▏   | 526/844 [07:19<04:25,  1.20it/s]

batch loss: 0.4396


Epoch 2:  62%|██████▏   | 527/844 [07:20<04:24,  1.20it/s]

batch loss: 0.4784


Epoch 2:  63%|██████▎   | 528/844 [07:20<04:22,  1.20it/s]

batch loss: 0.4857


Epoch 2:  63%|██████▎   | 529/844 [07:21<04:22,  1.20it/s]

batch loss: 0.4244


Epoch 2:  63%|██████▎   | 530/844 [07:22<04:21,  1.20it/s]

batch loss: 0.4764


Epoch 2:  63%|██████▎   | 531/844 [07:23<04:20,  1.20it/s]

batch loss: 0.5740


Epoch 2:  63%|██████▎   | 532/844 [07:24<04:20,  1.20it/s]

batch loss: 0.4384


Epoch 2:  63%|██████▎   | 533/844 [07:25<04:19,  1.20it/s]

batch loss: 0.4675


Epoch 2:  63%|██████▎   | 534/844 [07:25<04:19,  1.19it/s]

batch loss: 0.4958


Epoch 2:  63%|██████▎   | 535/844 [07:26<04:19,  1.19it/s]

batch loss: 0.4806


Epoch 2:  64%|██████▎   | 536/844 [07:27<04:17,  1.19it/s]

batch loss: 0.4312


Epoch 2:  64%|██████▎   | 537/844 [07:28<04:17,  1.19it/s]

batch loss: 0.4384


Epoch 2:  64%|██████▎   | 538/844 [07:29<04:15,  1.20it/s]

batch loss: 0.4732


Epoch 2:  64%|██████▍   | 539/844 [07:30<04:15,  1.19it/s]

batch loss: 0.4414


Epoch 2:  64%|██████▍   | 540/844 [07:30<04:14,  1.20it/s]

batch loss: 0.5184


Epoch 2:  64%|██████▍   | 541/844 [07:31<04:13,  1.20it/s]

batch loss: 0.4102


Epoch 2:  64%|██████▍   | 542/844 [07:32<04:12,  1.20it/s]

batch loss: 0.6090


Epoch 2:  64%|██████▍   | 543/844 [07:33<04:11,  1.20it/s]

batch loss: 0.4623


Epoch 2:  64%|██████▍   | 544/844 [07:34<04:09,  1.20it/s]

batch loss: 0.4982


Epoch 2:  65%|██████▍   | 545/844 [07:35<04:09,  1.20it/s]

batch loss: 0.4704


Epoch 2:  65%|██████▍   | 546/844 [07:35<04:08,  1.20it/s]

batch loss: 0.4492


Epoch 2:  65%|██████▍   | 547/844 [07:36<04:07,  1.20it/s]

batch loss: 0.4592


Epoch 2:  65%|██████▍   | 548/844 [07:37<04:07,  1.19it/s]

batch loss: 0.4240


Epoch 2:  65%|██████▌   | 549/844 [07:38<04:06,  1.20it/s]

batch loss: 0.5045


Epoch 2:  65%|██████▌   | 550/844 [07:39<04:05,  1.20it/s]

batch loss: 0.4369


Epoch 2:  65%|██████▌   | 551/844 [07:40<04:05,  1.19it/s]

batch loss: 0.5006


Epoch 2:  65%|██████▌   | 552/844 [07:40<04:04,  1.19it/s]

batch loss: 0.5246


Epoch 2:  66%|██████▌   | 553/844 [07:41<04:04,  1.19it/s]

batch loss: 0.4625


Epoch 2:  66%|██████▌   | 554/844 [07:42<04:02,  1.19it/s]

batch loss: 0.4164


Epoch 2:  66%|██████▌   | 555/844 [07:43<04:01,  1.20it/s]

batch loss: 0.5004


Epoch 2:  66%|██████▌   | 556/844 [07:44<04:00,  1.20it/s]

batch loss: 0.5231


Epoch 2:  66%|██████▌   | 557/844 [07:45<03:59,  1.20it/s]

batch loss: 0.4299


Epoch 2:  66%|██████▌   | 558/844 [07:45<03:58,  1.20it/s]

batch loss: 0.4318


Epoch 2:  66%|██████▌   | 559/844 [07:46<03:58,  1.20it/s]

batch loss: 0.4304


Epoch 2:  66%|██████▋   | 560/844 [07:47<03:56,  1.20it/s]

batch loss: 0.3974


Epoch 2:  66%|██████▋   | 561/844 [07:48<03:55,  1.20it/s]

batch loss: 0.3576


Epoch 2:  67%|██████▋   | 562/844 [07:49<03:55,  1.20it/s]

batch loss: 0.4302


Epoch 2:  67%|██████▋   | 563/844 [07:50<03:54,  1.20it/s]

batch loss: 0.4117


Epoch 2:  67%|██████▋   | 564/844 [07:50<03:53,  1.20it/s]

batch loss: 0.3884


Epoch 2:  67%|██████▋   | 565/844 [07:51<03:52,  1.20it/s]

batch loss: 0.3764


Epoch 2:  67%|██████▋   | 566/844 [07:52<03:51,  1.20it/s]

batch loss: 0.4391


Epoch 2:  67%|██████▋   | 567/844 [07:53<03:51,  1.20it/s]

batch loss: 0.4985


Epoch 2:  67%|██████▋   | 568/844 [07:54<03:50,  1.20it/s]

batch loss: 0.4885


Epoch 2:  67%|██████▋   | 569/844 [07:55<03:50,  1.19it/s]

batch loss: 0.4790


Epoch 2:  68%|██████▊   | 570/844 [07:55<03:50,  1.19it/s]

batch loss: 0.4714


Epoch 2:  68%|██████▊   | 571/844 [07:56<03:49,  1.19it/s]

batch loss: 0.5346


Epoch 2:  68%|██████▊   | 572/844 [07:57<03:48,  1.19it/s]

batch loss: 0.4983


Epoch 2:  68%|██████▊   | 573/844 [07:58<03:46,  1.20it/s]

batch loss: 0.4589


Epoch 2:  68%|██████▊   | 574/844 [07:59<03:46,  1.19it/s]

batch loss: 0.4706


Epoch 2:  68%|██████▊   | 575/844 [08:00<03:45,  1.19it/s]

batch loss: 0.5118


Epoch 2:  68%|██████▊   | 576/844 [08:01<03:44,  1.19it/s]

batch loss: 0.4390


Epoch 2:  68%|██████▊   | 577/844 [08:01<03:43,  1.20it/s]

batch loss: 0.3709


Epoch 2:  68%|██████▊   | 578/844 [08:02<03:41,  1.20it/s]

batch loss: 0.4540


Epoch 2:  69%|██████▊   | 579/844 [08:03<03:41,  1.20it/s]

batch loss: 0.4031


Epoch 2:  69%|██████▊   | 580/844 [08:04<03:40,  1.20it/s]

batch loss: 0.5390


Epoch 2:  69%|██████▉   | 581/844 [08:05<03:39,  1.20it/s]

batch loss: 0.4827


Epoch 2:  69%|██████▉   | 582/844 [08:06<03:38,  1.20it/s]

batch loss: 0.4449


Epoch 2:  69%|██████▉   | 583/844 [08:06<03:37,  1.20it/s]

batch loss: 0.4612


Epoch 2:  69%|██████▉   | 584/844 [08:07<03:36,  1.20it/s]

batch loss: 0.4476


Epoch 2:  69%|██████▉   | 585/844 [08:08<03:36,  1.20it/s]

batch loss: 0.4209


Epoch 2:  69%|██████▉   | 586/844 [08:09<03:34,  1.20it/s]

batch loss: 0.3857


Epoch 2:  70%|██████▉   | 587/844 [08:10<03:33,  1.20it/s]

batch loss: 0.4684


Epoch 2:  70%|██████▉   | 588/844 [08:11<03:33,  1.20it/s]

batch loss: 0.4328


Epoch 2:  70%|██████▉   | 589/844 [08:11<03:33,  1.20it/s]

batch loss: 0.5041


Epoch 2:  70%|██████▉   | 590/844 [08:12<03:32,  1.20it/s]

batch loss: 0.4139


Epoch 2:  70%|███████   | 591/844 [08:13<03:30,  1.20it/s]

batch loss: 0.4558


Epoch 2:  70%|███████   | 592/844 [08:14<03:30,  1.19it/s]

batch loss: 0.3848


Epoch 2:  70%|███████   | 593/844 [08:15<03:29,  1.20it/s]

batch loss: 0.4950


Epoch 2:  70%|███████   | 594/844 [08:16<03:28,  1.20it/s]

batch loss: 0.5271


Epoch 2:  70%|███████   | 595/844 [08:16<03:26,  1.20it/s]

batch loss: 0.4121


Epoch 2:  71%|███████   | 596/844 [08:17<03:25,  1.20it/s]

batch loss: 0.5062


Epoch 2:  71%|███████   | 597/844 [08:18<03:25,  1.20it/s]

batch loss: 0.6587


Epoch 2:  71%|███████   | 598/844 [08:19<03:24,  1.20it/s]

batch loss: 0.5068


Epoch 2:  71%|███████   | 599/844 [08:20<03:22,  1.21it/s]

batch loss: 0.4595


Epoch 2:  71%|███████   | 600/844 [08:20<03:22,  1.20it/s]

batch loss: 0.4509


Epoch 2:  71%|███████   | 601/844 [08:21<03:21,  1.20it/s]

batch loss: 0.3574


Epoch 2:  71%|███████▏  | 602/844 [08:22<03:21,  1.20it/s]

batch loss: 0.4835


Epoch 2:  71%|███████▏  | 603/844 [08:23<03:21,  1.20it/s]

batch loss: 0.4556


Epoch 2:  72%|███████▏  | 604/844 [08:24<03:20,  1.20it/s]

batch loss: 0.4462


Epoch 2:  72%|███████▏  | 605/844 [08:25<03:19,  1.20it/s]

batch loss: 0.4199


Epoch 2:  72%|███████▏  | 606/844 [08:25<03:18,  1.20it/s]

batch loss: 0.4923


Epoch 2:  72%|███████▏  | 607/844 [08:26<03:17,  1.20it/s]

batch loss: 0.3855


Epoch 2:  72%|███████▏  | 608/844 [08:27<03:16,  1.20it/s]

batch loss: 0.4018


Epoch 2:  72%|███████▏  | 609/844 [08:28<03:15,  1.20it/s]

batch loss: 0.4232


Epoch 2:  72%|███████▏  | 610/844 [08:29<03:14,  1.20it/s]

batch loss: 0.4714


Epoch 2:  72%|███████▏  | 611/844 [08:30<03:13,  1.20it/s]

batch loss: 0.4070


Epoch 2:  73%|███████▎  | 612/844 [08:30<03:12,  1.20it/s]

batch loss: 0.4073


Epoch 2:  73%|███████▎  | 613/844 [08:31<03:12,  1.20it/s]

batch loss: 0.5616


Epoch 2:  73%|███████▎  | 614/844 [08:32<03:11,  1.20it/s]

batch loss: 0.5072


Epoch 2:  73%|███████▎  | 615/844 [08:33<03:10,  1.20it/s]

batch loss: 0.5019


Epoch 2:  73%|███████▎  | 616/844 [08:34<03:09,  1.20it/s]

batch loss: 0.4393


Epoch 2:  73%|███████▎  | 617/844 [08:35<03:09,  1.20it/s]

batch loss: 0.4688


Epoch 2:  73%|███████▎  | 618/844 [08:35<03:09,  1.19it/s]

batch loss: 0.4743


Epoch 2:  73%|███████▎  | 619/844 [08:36<03:08,  1.20it/s]

batch loss: 0.4851


Epoch 2:  73%|███████▎  | 620/844 [08:37<03:07,  1.20it/s]

batch loss: 0.4371


Epoch 2:  74%|███████▎  | 621/844 [08:38<03:05,  1.20it/s]

batch loss: 0.5535


Epoch 2:  74%|███████▎  | 622/844 [08:39<03:04,  1.20it/s]

batch loss: 0.5113


Epoch 2:  74%|███████▍  | 623/844 [08:40<03:04,  1.20it/s]

batch loss: 0.4330


Epoch 2:  74%|███████▍  | 624/844 [08:40<03:03,  1.20it/s]

batch loss: 0.4904


Epoch 2:  74%|███████▍  | 625/844 [08:41<03:02,  1.20it/s]

batch loss: 0.4298


Epoch 2:  74%|███████▍  | 626/844 [08:42<03:01,  1.20it/s]

batch loss: 0.4380


Epoch 2:  74%|███████▍  | 627/844 [08:43<03:00,  1.20it/s]

batch loss: 0.4646


Epoch 2:  74%|███████▍  | 628/844 [08:44<03:00,  1.20it/s]

batch loss: 0.4639


Epoch 2:  75%|███████▍  | 629/844 [08:45<02:59,  1.20it/s]

batch loss: 0.4503


Epoch 2:  75%|███████▍  | 630/844 [08:45<02:58,  1.20it/s]

batch loss: 0.4765


Epoch 2:  75%|███████▍  | 631/844 [08:46<02:56,  1.21it/s]

batch loss: 0.5326


Epoch 2:  75%|███████▍  | 632/844 [08:47<02:56,  1.20it/s]

batch loss: 0.5385


Epoch 2:  75%|███████▌  | 633/844 [08:48<02:55,  1.20it/s]

batch loss: 0.4448


Epoch 2:  75%|███████▌  | 634/844 [08:49<02:55,  1.20it/s]

batch loss: 0.4744


Epoch 2:  75%|███████▌  | 635/844 [08:50<02:53,  1.20it/s]

batch loss: 0.4949


Epoch 2:  75%|███████▌  | 636/844 [08:50<02:53,  1.20it/s]

batch loss: 0.4630


Epoch 2:  75%|███████▌  | 637/844 [08:51<02:53,  1.20it/s]

batch loss: 0.5608


Epoch 2:  76%|███████▌  | 638/844 [08:52<02:52,  1.19it/s]

batch loss: 0.5117


Epoch 2:  76%|███████▌  | 639/844 [08:53<02:51,  1.20it/s]

batch loss: 0.5111


Epoch 2:  76%|███████▌  | 640/844 [08:54<02:50,  1.19it/s]

batch loss: 0.4837


Epoch 2:  76%|███████▌  | 641/844 [08:55<02:49,  1.20it/s]

batch loss: 0.4700


Epoch 2:  76%|███████▌  | 642/844 [08:55<02:48,  1.20it/s]

batch loss: 0.4687


Epoch 2:  76%|███████▌  | 643/844 [08:56<02:47,  1.20it/s]

batch loss: 0.4177


Epoch 2:  76%|███████▋  | 644/844 [08:57<02:46,  1.20it/s]

batch loss: 0.4709


Epoch 2:  76%|███████▋  | 645/844 [08:58<02:45,  1.20it/s]

batch loss: 0.5483


Epoch 2:  77%|███████▋  | 646/844 [08:59<02:44,  1.20it/s]

batch loss: 0.4536


Epoch 2:  77%|███████▋  | 647/844 [09:00<02:43,  1.20it/s]

batch loss: 0.5020


Epoch 2:  77%|███████▋  | 648/844 [09:00<02:42,  1.20it/s]

batch loss: 0.4457


Epoch 2:  77%|███████▋  | 649/844 [09:01<02:42,  1.20it/s]

batch loss: 0.4335


Epoch 2:  77%|███████▋  | 650/844 [09:02<02:42,  1.20it/s]

batch loss: 0.4615


Epoch 2:  77%|███████▋  | 651/844 [09:03<02:41,  1.20it/s]

batch loss: 0.4660


Epoch 2:  77%|███████▋  | 652/844 [09:04<02:40,  1.20it/s]

batch loss: 0.4474


Epoch 2:  77%|███████▋  | 653/844 [09:05<02:39,  1.20it/s]

batch loss: 0.4611


Epoch 2:  77%|███████▋  | 654/844 [09:05<02:38,  1.20it/s]

batch loss: 0.4296


Epoch 2:  78%|███████▊  | 655/844 [09:06<02:38,  1.19it/s]

batch loss: 0.4987


Epoch 2:  78%|███████▊  | 656/844 [09:07<02:37,  1.20it/s]

batch loss: 0.4218


Epoch 2:  78%|███████▊  | 657/844 [09:08<02:35,  1.20it/s]

batch loss: 0.4179


Epoch 2:  78%|███████▊  | 658/844 [09:09<02:34,  1.20it/s]

batch loss: 0.5038


Epoch 2:  78%|███████▊  | 659/844 [09:10<02:34,  1.20it/s]

batch loss: 0.4510


Epoch 2:  78%|███████▊  | 660/844 [09:11<02:33,  1.20it/s]

batch loss: 0.4911


Epoch 2:  78%|███████▊  | 661/844 [09:11<02:32,  1.20it/s]

batch loss: 0.4164


Epoch 2:  78%|███████▊  | 662/844 [09:12<02:32,  1.20it/s]

batch loss: 0.5033


Epoch 2:  79%|███████▊  | 663/844 [09:13<02:31,  1.20it/s]

batch loss: 0.4486


Epoch 2:  79%|███████▊  | 664/844 [09:14<02:29,  1.20it/s]

batch loss: 0.5133


Epoch 2:  79%|███████▉  | 665/844 [09:15<02:29,  1.20it/s]

batch loss: 0.4667


Epoch 2:  79%|███████▉  | 666/844 [09:16<02:28,  1.20it/s]

batch loss: 0.4839


Epoch 2:  79%|███████▉  | 667/844 [09:16<02:27,  1.20it/s]

batch loss: 0.4899


Epoch 2:  79%|███████▉  | 668/844 [09:17<02:27,  1.20it/s]

batch loss: 0.4748


Epoch 2:  79%|███████▉  | 669/844 [09:18<02:26,  1.20it/s]

batch loss: 0.5196


Epoch 2:  79%|███████▉  | 670/844 [09:19<02:25,  1.20it/s]

batch loss: 0.4889


Epoch 2:  80%|███████▉  | 671/844 [09:20<02:24,  1.20it/s]

batch loss: 0.4986


Epoch 2:  80%|███████▉  | 672/844 [09:21<02:23,  1.20it/s]

batch loss: 0.4506


Epoch 2:  80%|███████▉  | 673/844 [09:21<02:22,  1.20it/s]

batch loss: 0.4180


Epoch 2:  80%|███████▉  | 674/844 [09:22<02:21,  1.20it/s]

batch loss: 0.4633


Epoch 2:  80%|███████▉  | 675/844 [09:23<02:20,  1.20it/s]

batch loss: 0.5054


Epoch 2:  80%|████████  | 676/844 [09:24<02:19,  1.20it/s]

batch loss: 0.4429


Epoch 2:  80%|████████  | 677/844 [09:25<02:19,  1.20it/s]

batch loss: 0.5667


Epoch 2:  80%|████████  | 678/844 [09:26<02:18,  1.20it/s]

batch loss: 0.4676


Epoch 2:  80%|████████  | 679/844 [09:26<02:18,  1.19it/s]

batch loss: 0.4674


Epoch 2:  81%|████████  | 680/844 [09:27<02:17,  1.20it/s]

batch loss: 0.4710


Epoch 2:  81%|████████  | 681/844 [09:28<02:16,  1.20it/s]

batch loss: 0.5096


Epoch 2:  81%|████████  | 682/844 [09:29<02:15,  1.20it/s]

batch loss: 0.4853


Epoch 2:  81%|████████  | 683/844 [09:30<02:14,  1.20it/s]

batch loss: 0.4661


Epoch 2:  81%|████████  | 684/844 [09:31<02:13,  1.20it/s]

batch loss: 0.4445


Epoch 2:  81%|████████  | 685/844 [09:31<02:12,  1.20it/s]

batch loss: 0.3802


Epoch 2:  81%|████████▏ | 686/844 [09:32<02:12,  1.19it/s]

batch loss: 0.4666


Epoch 2:  81%|████████▏ | 687/844 [09:33<02:11,  1.20it/s]

batch loss: 0.5170


Epoch 2:  82%|████████▏ | 688/844 [09:34<02:10,  1.20it/s]

batch loss: 0.4760


Epoch 2:  82%|████████▏ | 689/844 [09:35<02:09,  1.20it/s]

batch loss: 0.4377


Epoch 2:  82%|████████▏ | 690/844 [09:36<02:08,  1.20it/s]

batch loss: 0.4571


Epoch 2:  82%|████████▏ | 691/844 [09:36<02:07,  1.20it/s]

batch loss: 0.4638


Epoch 2:  82%|████████▏ | 692/844 [09:37<02:06,  1.20it/s]

batch loss: 0.4770


Epoch 2:  82%|████████▏ | 693/844 [09:38<02:05,  1.20it/s]

batch loss: 0.5132


Epoch 2:  82%|████████▏ | 694/844 [09:39<02:04,  1.20it/s]

batch loss: 0.4604


Epoch 2:  82%|████████▏ | 695/844 [09:40<02:04,  1.20it/s]

batch loss: 0.4811


Epoch 2:  82%|████████▏ | 696/844 [09:41<02:03,  1.20it/s]

batch loss: 0.4062


Epoch 2:  83%|████████▎ | 697/844 [09:41<02:02,  1.20it/s]

batch loss: 0.4357


Epoch 2:  83%|████████▎ | 698/844 [09:42<02:01,  1.20it/s]

batch loss: 0.3449


Epoch 2:  83%|████████▎ | 699/844 [09:43<02:00,  1.20it/s]

batch loss: 0.4304


Epoch 2:  83%|████████▎ | 700/844 [09:44<02:00,  1.20it/s]

batch loss: 0.4476


Epoch 2:  83%|████████▎ | 701/844 [09:45<01:59,  1.20it/s]

batch loss: 0.5578


Epoch 2:  83%|████████▎ | 702/844 [09:46<01:58,  1.20it/s]

batch loss: 0.4423


Epoch 2:  83%|████████▎ | 703/844 [09:46<01:57,  1.20it/s]

batch loss: 0.5136


Epoch 2:  83%|████████▎ | 704/844 [09:47<01:57,  1.19it/s]

batch loss: 0.4777


Epoch 2:  84%|████████▎ | 705/844 [09:48<01:56,  1.20it/s]

batch loss: 0.4155


Epoch 2:  84%|████████▎ | 706/844 [09:49<01:55,  1.20it/s]

batch loss: 0.4741


Epoch 2:  84%|████████▍ | 707/844 [09:50<01:54,  1.20it/s]

batch loss: 0.4617


Epoch 2:  84%|████████▍ | 708/844 [09:51<01:53,  1.20it/s]

batch loss: 0.4425


Epoch 2:  84%|████████▍ | 709/844 [09:51<01:52,  1.20it/s]

batch loss: 0.4295


Epoch 2:  84%|████████▍ | 710/844 [09:52<01:52,  1.20it/s]

batch loss: 0.4198


Epoch 2:  84%|████████▍ | 711/844 [09:53<01:51,  1.19it/s]

batch loss: 0.5145


Epoch 2:  84%|████████▍ | 712/844 [09:54<01:50,  1.20it/s]

batch loss: 0.5162


Epoch 2:  84%|████████▍ | 713/844 [09:55<01:49,  1.20it/s]

batch loss: 0.4315


Epoch 2:  85%|████████▍ | 714/844 [09:56<01:48,  1.20it/s]

batch loss: 0.4957


Epoch 2:  85%|████████▍ | 715/844 [09:56<01:47,  1.20it/s]

batch loss: 0.3983


Epoch 2:  85%|████████▍ | 716/844 [09:57<01:46,  1.20it/s]

batch loss: 0.4700


Epoch 2:  85%|████████▍ | 717/844 [09:58<01:46,  1.19it/s]

batch loss: 0.4119


Epoch 2:  85%|████████▌ | 718/844 [09:59<01:45,  1.20it/s]

batch loss: 0.4765


Epoch 2:  85%|████████▌ | 719/844 [10:00<01:44,  1.20it/s]

batch loss: 0.4491


Epoch 2:  85%|████████▌ | 720/844 [10:01<01:43,  1.20it/s]

batch loss: 0.4919


Epoch 2:  85%|████████▌ | 721/844 [10:01<01:42,  1.20it/s]

batch loss: 0.4684


Epoch 2:  86%|████████▌ | 722/844 [10:02<01:41,  1.20it/s]

batch loss: 0.4913


Epoch 2:  86%|████████▌ | 723/844 [10:03<01:40,  1.20it/s]

batch loss: 0.4909


Epoch 2:  86%|████████▌ | 724/844 [10:04<01:39,  1.20it/s]

batch loss: 0.4960


Epoch 2:  86%|████████▌ | 725/844 [10:05<01:39,  1.20it/s]

batch loss: 0.4686


Epoch 2:  86%|████████▌ | 726/844 [10:06<01:38,  1.20it/s]

batch loss: 0.5036


Epoch 2:  86%|████████▌ | 727/844 [10:06<01:37,  1.20it/s]

batch loss: 0.5405


Epoch 2:  86%|████████▋ | 728/844 [10:07<01:36,  1.20it/s]

batch loss: 0.4930


Epoch 2:  86%|████████▋ | 729/844 [10:08<01:35,  1.20it/s]

batch loss: 0.4843


Epoch 2:  86%|████████▋ | 730/844 [10:09<01:34,  1.20it/s]

batch loss: 0.4194


Epoch 2:  87%|████████▋ | 731/844 [10:10<01:34,  1.20it/s]

batch loss: 0.5329


Epoch 2:  87%|████████▋ | 732/844 [10:11<01:33,  1.20it/s]

batch loss: 0.3949


Epoch 2:  87%|████████▋ | 733/844 [10:11<01:32,  1.20it/s]

batch loss: 0.4735


Epoch 2:  87%|████████▋ | 734/844 [10:12<01:32,  1.19it/s]

batch loss: 0.4567


Epoch 2:  87%|████████▋ | 735/844 [10:13<01:31,  1.19it/s]

batch loss: 0.5057


Epoch 2:  87%|████████▋ | 736/844 [10:14<01:30,  1.20it/s]

batch loss: 0.5426


Epoch 2:  87%|████████▋ | 737/844 [10:15<01:29,  1.19it/s]

batch loss: 0.3845


Epoch 2:  87%|████████▋ | 738/844 [10:16<01:28,  1.20it/s]

batch loss: 0.4616


Epoch 2:  88%|████████▊ | 739/844 [10:16<01:27,  1.20it/s]

batch loss: 0.4604


Epoch 2:  88%|████████▊ | 740/844 [10:17<01:26,  1.20it/s]

batch loss: 0.4621


Epoch 2:  88%|████████▊ | 741/844 [10:18<01:25,  1.20it/s]

batch loss: 0.5140


Epoch 2:  88%|████████▊ | 742/844 [10:19<01:24,  1.20it/s]

batch loss: 0.5526


Epoch 2:  88%|████████▊ | 743/844 [10:20<01:24,  1.20it/s]

batch loss: 0.4901


Epoch 2:  88%|████████▊ | 744/844 [10:21<01:23,  1.20it/s]

batch loss: 0.4828


Epoch 2:  88%|████████▊ | 745/844 [10:21<01:22,  1.20it/s]

batch loss: 0.4628


Epoch 2:  88%|████████▊ | 746/844 [10:22<01:21,  1.20it/s]

batch loss: 0.4316


Epoch 2:  89%|████████▊ | 747/844 [10:23<01:20,  1.20it/s]

batch loss: 0.4663


Epoch 2:  89%|████████▊ | 748/844 [10:24<01:20,  1.20it/s]

batch loss: 0.5168


Epoch 2:  89%|████████▊ | 749/844 [10:25<01:19,  1.20it/s]

batch loss: 0.4098


Epoch 2:  89%|████████▉ | 750/844 [10:26<01:18,  1.20it/s]

batch loss: 0.4639


Epoch 2:  89%|████████▉ | 751/844 [10:26<01:17,  1.20it/s]

batch loss: 0.4729


Epoch 2:  89%|████████▉ | 752/844 [10:27<01:16,  1.20it/s]

batch loss: 0.5648


Epoch 2:  89%|████████▉ | 753/844 [10:28<01:15,  1.20it/s]

batch loss: 0.4372


Epoch 2:  89%|████████▉ | 754/844 [10:29<01:14,  1.20it/s]

batch loss: 0.4331


Epoch 2:  89%|████████▉ | 755/844 [10:30<01:14,  1.20it/s]

batch loss: 0.3994


Epoch 2:  90%|████████▉ | 756/844 [10:31<01:13,  1.20it/s]

batch loss: 0.4474


Epoch 2:  90%|████████▉ | 757/844 [10:31<01:12,  1.20it/s]

batch loss: 0.4511


Epoch 2:  90%|████████▉ | 758/844 [10:32<01:11,  1.20it/s]

batch loss: 0.4489


Epoch 2:  90%|████████▉ | 759/844 [10:33<01:10,  1.20it/s]

batch loss: 0.4792


Epoch 2:  90%|█████████ | 760/844 [10:34<01:10,  1.20it/s]

batch loss: 0.4386


Epoch 2:  90%|█████████ | 761/844 [10:35<01:09,  1.20it/s]

batch loss: 0.4560


Epoch 2:  90%|█████████ | 762/844 [10:36<01:08,  1.20it/s]

batch loss: 0.5168


Epoch 2:  90%|█████████ | 763/844 [10:36<01:07,  1.20it/s]

batch loss: 0.4522


Epoch 2:  91%|█████████ | 764/844 [10:37<01:06,  1.20it/s]

batch loss: 0.5124


Epoch 2:  91%|█████████ | 765/844 [10:38<01:05,  1.20it/s]

batch loss: 0.4510


Epoch 2:  91%|█████████ | 766/844 [10:39<01:05,  1.20it/s]

batch loss: 0.4785


Epoch 2:  91%|█████████ | 767/844 [10:40<01:04,  1.20it/s]

batch loss: 0.4069


Epoch 2:  91%|█████████ | 768/844 [10:41<01:03,  1.20it/s]

batch loss: 0.4389


Epoch 2:  91%|█████████ | 769/844 [10:41<01:02,  1.20it/s]

batch loss: 0.4428


Epoch 2:  91%|█████████ | 770/844 [10:42<01:01,  1.20it/s]

batch loss: 0.5002


Epoch 2:  91%|█████████▏| 771/844 [10:43<01:00,  1.20it/s]

batch loss: 0.4993


Epoch 2:  91%|█████████▏| 772/844 [10:44<00:59,  1.20it/s]

batch loss: 0.4048


Epoch 2:  92%|█████████▏| 773/844 [10:45<00:58,  1.21it/s]

batch loss: 0.4504


Epoch 2:  92%|█████████▏| 774/844 [10:46<00:58,  1.20it/s]

batch loss: 0.4835


Epoch 2:  92%|█████████▏| 775/844 [10:46<00:57,  1.20it/s]

batch loss: 0.5108


Epoch 2:  92%|█████████▏| 776/844 [10:47<00:56,  1.20it/s]

batch loss: 0.4468


Epoch 2:  92%|█████████▏| 777/844 [10:48<00:55,  1.20it/s]

batch loss: 0.4192


Epoch 2:  92%|█████████▏| 778/844 [10:49<00:54,  1.20it/s]

batch loss: 0.4523


Epoch 2:  92%|█████████▏| 779/844 [10:50<00:54,  1.20it/s]

batch loss: 0.4368


Epoch 2:  92%|█████████▏| 780/844 [10:51<00:53,  1.20it/s]

batch loss: 0.4337


Epoch 2:  93%|█████████▎| 781/844 [10:51<00:52,  1.19it/s]

batch loss: 0.5727


Epoch 2:  93%|█████████▎| 782/844 [10:52<00:51,  1.20it/s]

batch loss: 0.3761


Epoch 2:  93%|█████████▎| 783/844 [10:53<00:51,  1.19it/s]

batch loss: 0.5056


Epoch 2:  93%|█████████▎| 784/844 [10:54<00:50,  1.20it/s]

batch loss: 0.4651


Epoch 2:  93%|█████████▎| 785/844 [10:55<00:49,  1.20it/s]

batch loss: 0.4768


Epoch 2:  93%|█████████▎| 786/844 [10:56<00:48,  1.20it/s]

batch loss: 0.5136


Epoch 2:  93%|█████████▎| 787/844 [10:56<00:47,  1.20it/s]

batch loss: 0.4135


Epoch 2:  93%|█████████▎| 788/844 [10:57<00:46,  1.20it/s]

batch loss: 0.4488


Epoch 2:  93%|█████████▎| 789/844 [10:58<00:45,  1.20it/s]

batch loss: 0.4069


Epoch 2:  94%|█████████▎| 790/844 [10:59<00:44,  1.20it/s]

batch loss: 0.5209


Epoch 2:  94%|█████████▎| 791/844 [11:00<00:44,  1.20it/s]

batch loss: 0.4111


Epoch 2:  94%|█████████▍| 792/844 [11:01<00:43,  1.20it/s]

batch loss: 0.5293


Epoch 2:  94%|█████████▍| 793/844 [11:01<00:42,  1.20it/s]

batch loss: 0.4055


Epoch 2:  94%|█████████▍| 794/844 [11:02<00:41,  1.20it/s]

batch loss: 0.4850


Epoch 2:  94%|█████████▍| 795/844 [11:03<00:40,  1.20it/s]

batch loss: 0.3916


Epoch 2:  94%|█████████▍| 796/844 [11:04<00:39,  1.20it/s]

batch loss: 0.4287


Epoch 2:  94%|█████████▍| 797/844 [11:05<00:39,  1.20it/s]

batch loss: 0.4340


Epoch 2:  95%|█████████▍| 798/844 [11:06<00:38,  1.20it/s]

batch loss: 0.4411


Epoch 2:  95%|█████████▍| 799/844 [11:06<00:37,  1.20it/s]

batch loss: 0.4529


Epoch 2:  95%|█████████▍| 800/844 [11:07<00:36,  1.19it/s]

batch loss: 0.5142


Epoch 2:  95%|█████████▍| 801/844 [11:08<00:35,  1.20it/s]

batch loss: 0.5350


Epoch 2:  95%|█████████▌| 802/844 [11:09<00:34,  1.20it/s]

batch loss: 0.4297


Epoch 2:  95%|█████████▌| 803/844 [11:10<00:34,  1.20it/s]

batch loss: 0.5058


Epoch 2:  95%|█████████▌| 804/844 [11:11<00:33,  1.20it/s]

batch loss: 0.4336


Epoch 2:  95%|█████████▌| 805/844 [11:11<00:32,  1.20it/s]

batch loss: 0.4468


Epoch 2:  95%|█████████▌| 806/844 [11:12<00:31,  1.20it/s]

batch loss: 0.4934


Epoch 2:  96%|█████████▌| 807/844 [11:13<00:30,  1.20it/s]

batch loss: 0.3925


Epoch 2:  96%|█████████▌| 808/844 [11:14<00:29,  1.20it/s]

batch loss: 0.4673


Epoch 2:  96%|█████████▌| 809/844 [11:15<00:29,  1.20it/s]

batch loss: 0.4500


Epoch 2:  96%|█████████▌| 810/844 [11:16<00:28,  1.20it/s]

batch loss: 0.3966


Epoch 2:  96%|█████████▌| 811/844 [11:16<00:27,  1.20it/s]

batch loss: 0.3849


Epoch 2:  96%|█████████▌| 812/844 [11:17<00:26,  1.20it/s]

batch loss: 0.4041


Epoch 2:  96%|█████████▋| 813/844 [11:18<00:25,  1.20it/s]

batch loss: 0.4403


Epoch 2:  96%|█████████▋| 814/844 [11:19<00:25,  1.19it/s]

batch loss: 0.4274


Epoch 2:  97%|█████████▋| 815/844 [11:20<00:24,  1.19it/s]

batch loss: 0.4846


Epoch 2:  97%|█████████▋| 816/844 [11:21<00:23,  1.20it/s]

batch loss: 0.4964


Epoch 2:  97%|█████████▋| 817/844 [11:21<00:22,  1.20it/s]

batch loss: 0.4120


Epoch 2:  97%|█████████▋| 818/844 [11:22<00:21,  1.19it/s]

batch loss: 0.4878


Epoch 2:  97%|█████████▋| 819/844 [11:23<00:20,  1.19it/s]

batch loss: 0.4656


Epoch 2:  97%|█████████▋| 820/844 [11:24<00:20,  1.20it/s]

batch loss: 0.5025


Epoch 2:  97%|█████████▋| 821/844 [11:25<00:19,  1.20it/s]

batch loss: 0.4324


Epoch 2:  97%|█████████▋| 822/844 [11:26<00:18,  1.20it/s]

batch loss: 0.4463


Epoch 2:  98%|█████████▊| 823/844 [11:26<00:17,  1.20it/s]

batch loss: 0.4380


Epoch 2:  98%|█████████▊| 824/844 [11:27<00:16,  1.20it/s]

batch loss: 0.4944


Epoch 2:  98%|█████████▊| 825/844 [11:28<00:15,  1.20it/s]

batch loss: 0.4480


Epoch 2:  98%|█████████▊| 826/844 [11:29<00:14,  1.20it/s]

batch loss: 0.4968


Epoch 2:  98%|█████████▊| 827/844 [11:30<00:14,  1.21it/s]

batch loss: 0.4242


Epoch 2:  98%|█████████▊| 828/844 [11:31<00:13,  1.20it/s]

batch loss: 0.5272


Epoch 2:  98%|█████████▊| 829/844 [11:31<00:12,  1.20it/s]

batch loss: 0.4563


Epoch 2:  98%|█████████▊| 830/844 [11:32<00:11,  1.19it/s]

batch loss: 0.4075


Epoch 2:  98%|█████████▊| 831/844 [11:33<00:10,  1.20it/s]

batch loss: 0.4620


Epoch 2:  99%|█████████▊| 832/844 [11:34<00:10,  1.20it/s]

batch loss: 0.4243


Epoch 2:  99%|█████████▊| 833/844 [11:35<00:09,  1.20it/s]

batch loss: 0.4700


Epoch 2:  99%|█████████▉| 834/844 [11:36<00:08,  1.20it/s]

batch loss: 0.5450


Epoch 2:  99%|█████████▉| 835/844 [11:36<00:07,  1.20it/s]

batch loss: 0.4624


Epoch 2:  99%|█████████▉| 836/844 [11:37<00:06,  1.20it/s]

batch loss: 0.5664


Epoch 2:  99%|█████████▉| 837/844 [11:38<00:05,  1.20it/s]

batch loss: 0.5030


Epoch 2:  99%|█████████▉| 838/844 [11:39<00:04,  1.20it/s]

batch loss: 0.4966


Epoch 2:  99%|█████████▉| 839/844 [11:40<00:04,  1.20it/s]

batch loss: 0.4404


Epoch 2: 100%|█████████▉| 840/844 [11:41<00:03,  1.20it/s]

batch loss: 0.4351


Epoch 2: 100%|█████████▉| 841/844 [11:41<00:02,  1.20it/s]

batch loss: 0.5141


Epoch 2: 100%|█████████▉| 842/844 [11:42<00:01,  1.20it/s]

batch loss: 0.5270


Epoch 2: 100%|█████████▉| 843/844 [11:43<00:00,  1.20it/s]

batch loss: 0.4559


Epoch 2: 100%|██████████| 844/844 [11:44<00:00,  1.20it/s]


batch loss: 0.3738
--------------------------------------------------------------------------------
Epoch 2 - Average Loss: 0.4689


Epoch 3:   0%|          | 1/844 [00:00<11:41,  1.20it/s]

batch loss: 0.4240


Epoch 3:   0%|          | 2/844 [00:01<11:40,  1.20it/s]

batch loss: 0.4413


Epoch 3:   0%|          | 3/844 [00:02<11:42,  1.20it/s]

batch loss: 0.4821


Epoch 3:   0%|          | 4/844 [00:03<11:44,  1.19it/s]

batch loss: 0.4597


Epoch 3:   1%|          | 5/844 [00:04<11:44,  1.19it/s]

batch loss: 0.5068


Epoch 3:   1%|          | 6/844 [00:05<11:41,  1.19it/s]

batch loss: 0.5567


Epoch 3:   1%|          | 7/844 [00:05<11:38,  1.20it/s]

batch loss: 0.4142


Epoch 3:   1%|          | 8/844 [00:06<11:35,  1.20it/s]

batch loss: 0.4840


Epoch 3:   1%|          | 9/844 [00:07<11:36,  1.20it/s]

batch loss: 0.4758


Epoch 3:   1%|          | 10/844 [00:08<11:35,  1.20it/s]

batch loss: 0.4308


Epoch 3:   1%|▏         | 11/844 [00:09<11:35,  1.20it/s]

batch loss: 0.4812


Epoch 3:   1%|▏         | 12/844 [00:10<11:35,  1.20it/s]

batch loss: 0.5186


Epoch 3:   2%|▏         | 13/844 [00:10<11:34,  1.20it/s]

batch loss: 0.4653


Epoch 3:   2%|▏         | 14/844 [00:11<11:34,  1.19it/s]

batch loss: 0.5370


Epoch 3:   2%|▏         | 15/844 [00:12<11:32,  1.20it/s]

batch loss: 0.4743


Epoch 3:   2%|▏         | 16/844 [00:13<11:30,  1.20it/s]

batch loss: 0.5127


Epoch 3:   2%|▏         | 17/844 [00:14<11:27,  1.20it/s]

batch loss: 0.4462


Epoch 3:   2%|▏         | 18/844 [00:15<11:27,  1.20it/s]

batch loss: 0.4315


Epoch 3:   2%|▏         | 19/844 [00:15<11:28,  1.20it/s]

batch loss: 0.4766


Epoch 3:   2%|▏         | 20/844 [00:16<11:28,  1.20it/s]

batch loss: 0.4502


Epoch 3:   2%|▏         | 21/844 [00:17<11:27,  1.20it/s]

batch loss: 0.4789


Epoch 3:   3%|▎         | 22/844 [00:18<11:29,  1.19it/s]

batch loss: 0.4905


Epoch 3:   3%|▎         | 23/844 [00:19<11:25,  1.20it/s]

batch loss: 0.4943


Epoch 3:   3%|▎         | 24/844 [00:20<11:23,  1.20it/s]

batch loss: 0.4237


Epoch 3:   3%|▎         | 25/844 [00:20<11:22,  1.20it/s]

batch loss: 0.3834


Epoch 3:   3%|▎         | 26/844 [00:21<11:22,  1.20it/s]

batch loss: 0.4290


Epoch 3:   3%|▎         | 27/844 [00:22<11:21,  1.20it/s]

batch loss: 0.4134


Epoch 3:   3%|▎         | 28/844 [00:23<11:22,  1.20it/s]

batch loss: 0.4075


Epoch 3:   3%|▎         | 29/844 [00:24<11:19,  1.20it/s]

batch loss: 0.5390


Epoch 3:   4%|▎         | 30/844 [00:25<11:17,  1.20it/s]

batch loss: 0.4818


Epoch 3:   4%|▎         | 31/844 [00:25<11:16,  1.20it/s]

batch loss: 0.4314


Epoch 3:   4%|▍         | 32/844 [00:26<11:15,  1.20it/s]

batch loss: 0.4610


Epoch 3:   4%|▍         | 33/844 [00:27<11:15,  1.20it/s]

batch loss: 0.4766


Epoch 3:   4%|▍         | 34/844 [00:28<11:16,  1.20it/s]

batch loss: 0.4990


Epoch 3:   4%|▍         | 35/844 [00:29<11:14,  1.20it/s]

batch loss: 0.3689


Epoch 3:   4%|▍         | 36/844 [00:30<11:12,  1.20it/s]

batch loss: 0.4933


Epoch 3:   4%|▍         | 37/844 [00:30<11:13,  1.20it/s]

batch loss: 0.4558


Epoch 3:   5%|▍         | 38/844 [00:31<11:14,  1.19it/s]

batch loss: 0.4452


Epoch 3:   5%|▍         | 39/844 [00:32<11:16,  1.19it/s]

batch loss: 0.3758


Epoch 3:   5%|▍         | 40/844 [00:33<11:12,  1.20it/s]

batch loss: 0.4520


Epoch 3:   5%|▍         | 41/844 [00:34<11:10,  1.20it/s]

batch loss: 0.4204


Epoch 3:   5%|▍         | 42/844 [00:35<11:09,  1.20it/s]

batch loss: 0.4750


Epoch 3:   5%|▌         | 43/844 [00:35<11:08,  1.20it/s]

batch loss: 0.4593


Epoch 3:   5%|▌         | 44/844 [00:36<11:09,  1.19it/s]

batch loss: 0.4535


Epoch 3:   5%|▌         | 45/844 [00:37<11:07,  1.20it/s]

batch loss: 0.4415


Epoch 3:   5%|▌         | 46/844 [00:38<11:05,  1.20it/s]

batch loss: 0.4846


Epoch 3:   6%|▌         | 47/844 [00:39<11:04,  1.20it/s]

batch loss: 0.4065


Epoch 3:   6%|▌         | 48/844 [00:40<11:03,  1.20it/s]

batch loss: 0.5011


Epoch 3:   6%|▌         | 49/844 [00:40<11:02,  1.20it/s]

batch loss: 0.4742


Epoch 3:   6%|▌         | 50/844 [00:41<11:00,  1.20it/s]

batch loss: 0.5353


Epoch 3:   6%|▌         | 51/844 [00:42<11:00,  1.20it/s]

batch loss: 0.4520


Epoch 3:   6%|▌         | 52/844 [00:43<11:02,  1.20it/s]

batch loss: 0.4779


Epoch 3:   6%|▋         | 53/844 [00:44<10:59,  1.20it/s]

batch loss: 0.4427


Epoch 3:   6%|▋         | 54/844 [00:45<11:01,  1.19it/s]

batch loss: 0.4950


Epoch 3:   7%|▋         | 55/844 [00:45<10:59,  1.20it/s]

batch loss: 0.4113


Epoch 3:   7%|▋         | 56/844 [00:46<10:59,  1.20it/s]

batch loss: 0.5613


Epoch 3:   7%|▋         | 57/844 [00:47<10:59,  1.19it/s]

batch loss: 0.4330


Epoch 3:   7%|▋         | 58/844 [00:48<10:56,  1.20it/s]

batch loss: 0.5315


Epoch 3:   7%|▋         | 59/844 [00:49<10:54,  1.20it/s]

batch loss: 0.4070


Epoch 3:   7%|▋         | 60/844 [00:50<10:54,  1.20it/s]

batch loss: 0.5364


Epoch 3:   7%|▋         | 61/844 [00:50<10:53,  1.20it/s]

batch loss: 0.4289


Epoch 3:   7%|▋         | 62/844 [00:51<10:52,  1.20it/s]

batch loss: 0.4462


Epoch 3:   7%|▋         | 63/844 [00:52<10:50,  1.20it/s]

batch loss: 0.5579


Epoch 3:   8%|▊         | 64/844 [00:53<10:48,  1.20it/s]

batch loss: 0.5237


Epoch 3:   8%|▊         | 65/844 [00:54<10:48,  1.20it/s]

batch loss: 0.4684


Epoch 3:   8%|▊         | 66/844 [00:55<10:47,  1.20it/s]

batch loss: 0.4796


Epoch 3:   8%|▊         | 67/844 [00:55<10:48,  1.20it/s]

batch loss: 0.4220


Epoch 3:   8%|▊         | 68/844 [00:56<10:47,  1.20it/s]

batch loss: 0.4307


Epoch 3:   8%|▊         | 69/844 [00:57<10:45,  1.20it/s]

batch loss: 0.4671


Epoch 3:   8%|▊         | 70/844 [00:58<10:44,  1.20it/s]

batch loss: 0.3865


Epoch 3:   8%|▊         | 71/844 [00:59<10:44,  1.20it/s]

batch loss: 0.5066


Epoch 3:   9%|▊         | 72/844 [01:00<10:43,  1.20it/s]

batch loss: 0.3794


Epoch 3:   9%|▊         | 73/844 [01:00<10:44,  1.20it/s]

batch loss: 0.3835


Epoch 3:   9%|▉         | 74/844 [01:01<10:41,  1.20it/s]

batch loss: 0.4931


Epoch 3:   9%|▉         | 75/844 [01:02<10:40,  1.20it/s]

batch loss: 0.4485


Epoch 3:   9%|▉         | 76/844 [01:03<10:39,  1.20it/s]

batch loss: 0.3933


Epoch 3:   9%|▉         | 77/844 [01:04<10:39,  1.20it/s]

batch loss: 0.4137


Epoch 3:   9%|▉         | 78/844 [01:05<10:39,  1.20it/s]

batch loss: 0.4603


Epoch 3:   9%|▉         | 79/844 [01:05<10:38,  1.20it/s]

batch loss: 0.4847


Epoch 3:   9%|▉         | 80/844 [01:06<10:35,  1.20it/s]

batch loss: 0.3637


Epoch 3:  10%|▉         | 81/844 [01:07<10:34,  1.20it/s]

batch loss: 0.4412


Epoch 3:  10%|▉         | 82/844 [01:08<10:34,  1.20it/s]

batch loss: 0.4975


Epoch 3:  10%|▉         | 83/844 [01:09<10:36,  1.20it/s]

batch loss: 0.4013


Epoch 3:  10%|▉         | 84/844 [01:10<10:35,  1.20it/s]

batch loss: 0.4183


Epoch 3:  10%|█         | 85/844 [01:10<10:33,  1.20it/s]

batch loss: 0.4611


Epoch 3:  10%|█         | 86/844 [01:11<10:34,  1.19it/s]

batch loss: 0.4003


Epoch 3:  10%|█         | 87/844 [01:12<10:32,  1.20it/s]

batch loss: 0.4229


Epoch 3:  10%|█         | 88/844 [01:13<10:31,  1.20it/s]

batch loss: 0.5145


Epoch 3:  11%|█         | 89/844 [01:14<10:29,  1.20it/s]

batch loss: 0.5218


Epoch 3:  11%|█         | 90/844 [01:15<10:29,  1.20it/s]

batch loss: 0.4179


Epoch 3:  11%|█         | 91/844 [01:15<10:27,  1.20it/s]

batch loss: 0.4708


Epoch 3:  11%|█         | 92/844 [01:16<10:26,  1.20it/s]

batch loss: 0.4649


Epoch 3:  11%|█         | 93/844 [01:17<10:25,  1.20it/s]

batch loss: 0.4554


Epoch 3:  11%|█         | 94/844 [01:18<10:24,  1.20it/s]

batch loss: 0.4585


Epoch 3:  11%|█▏        | 95/844 [01:19<10:24,  1.20it/s]

batch loss: 0.4524


Epoch 3:  11%|█▏        | 96/844 [01:20<10:23,  1.20it/s]

batch loss: 0.3851


Epoch 3:  11%|█▏        | 97/844 [01:20<10:23,  1.20it/s]

batch loss: 0.3889


Epoch 3:  12%|█▏        | 98/844 [01:21<10:20,  1.20it/s]

batch loss: 0.4710


Epoch 3:  12%|█▏        | 99/844 [01:22<10:18,  1.20it/s]

batch loss: 0.4631


Epoch 3:  12%|█▏        | 100/844 [01:23<10:19,  1.20it/s]

batch loss: 0.4703


Epoch 3:  12%|█▏        | 101/844 [01:24<10:20,  1.20it/s]

batch loss: 0.3819


Epoch 3:  12%|█▏        | 102/844 [01:25<10:20,  1.20it/s]

batch loss: 0.4701


Epoch 3:  12%|█▏        | 103/844 [01:25<10:18,  1.20it/s]

batch loss: 0.4229


Epoch 3:  12%|█▏        | 104/844 [01:26<10:19,  1.19it/s]

batch loss: 0.4496


Epoch 3:  12%|█▏        | 105/844 [01:27<10:17,  1.20it/s]

batch loss: 0.4941


Epoch 3:  13%|█▎        | 106/844 [01:28<10:15,  1.20it/s]

batch loss: 0.3692


Epoch 3:  13%|█▎        | 107/844 [01:29<10:15,  1.20it/s]

batch loss: 0.5221


Epoch 3:  13%|█▎        | 108/844 [01:30<10:13,  1.20it/s]

batch loss: 0.4283


Epoch 3:  13%|█▎        | 109/844 [01:30<10:12,  1.20it/s]

batch loss: 0.4593


Epoch 3:  13%|█▎        | 110/844 [01:31<10:13,  1.20it/s]

batch loss: 0.4864


Epoch 3:  13%|█▎        | 111/844 [01:32<10:12,  1.20it/s]

batch loss: 0.4738


Epoch 3:  13%|█▎        | 112/844 [01:33<10:11,  1.20it/s]

batch loss: 0.4877


Epoch 3:  13%|█▎        | 113/844 [01:34<10:10,  1.20it/s]

batch loss: 0.4383


Epoch 3:  14%|█▎        | 114/844 [01:35<10:10,  1.20it/s]

batch loss: 0.3839


Epoch 3:  14%|█▎        | 115/844 [01:35<10:09,  1.20it/s]

batch loss: 0.4022


Epoch 3:  14%|█▎        | 116/844 [01:36<10:08,  1.20it/s]

batch loss: 0.4485


Epoch 3:  14%|█▍        | 117/844 [01:37<10:08,  1.20it/s]

batch loss: 0.4692


Epoch 3:  14%|█▍        | 118/844 [01:38<10:07,  1.19it/s]

batch loss: 0.3580


Epoch 3:  14%|█▍        | 119/844 [01:39<10:07,  1.19it/s]

batch loss: 0.4750


Epoch 3:  14%|█▍        | 120/844 [01:40<10:06,  1.19it/s]

batch loss: 0.4830


Epoch 3:  14%|█▍        | 121/844 [01:40<10:05,  1.19it/s]

batch loss: 0.4323


Epoch 3:  14%|█▍        | 122/844 [01:41<10:03,  1.20it/s]

batch loss: 0.5259


Epoch 3:  15%|█▍        | 123/844 [01:42<10:02,  1.20it/s]

batch loss: 0.4044


Epoch 3:  15%|█▍        | 124/844 [01:43<10:01,  1.20it/s]

batch loss: 0.4939


Epoch 3:  15%|█▍        | 125/844 [01:44<09:58,  1.20it/s]

batch loss: 0.5496


Epoch 3:  15%|█▍        | 126/844 [01:45<09:57,  1.20it/s]

batch loss: 0.5409


Epoch 3:  15%|█▌        | 127/844 [01:45<09:56,  1.20it/s]

batch loss: 0.5130


Epoch 3:  15%|█▌        | 128/844 [01:46<09:55,  1.20it/s]

batch loss: 0.3915


Epoch 3:  15%|█▌        | 129/844 [01:47<09:53,  1.20it/s]

batch loss: 0.4947


Epoch 3:  15%|█▌        | 130/844 [01:48<09:52,  1.21it/s]

batch loss: 0.4607


Epoch 3:  16%|█▌        | 131/844 [01:49<09:52,  1.20it/s]

batch loss: 0.4207


Epoch 3:  16%|█▌        | 132/844 [01:50<09:51,  1.20it/s]

batch loss: 0.5072


Epoch 3:  16%|█▌        | 133/844 [01:50<09:52,  1.20it/s]

batch loss: 0.3939


Epoch 3:  16%|█▌        | 134/844 [01:51<09:51,  1.20it/s]

batch loss: 0.5087


Epoch 3:  16%|█▌        | 135/844 [01:52<09:54,  1.19it/s]

batch loss: 0.4281


Epoch 3:  16%|█▌        | 136/844 [01:53<09:51,  1.20it/s]

batch loss: 0.4759


Epoch 3:  16%|█▌        | 137/844 [01:54<09:49,  1.20it/s]

batch loss: 0.3950


Epoch 3:  16%|█▋        | 138/844 [01:55<09:47,  1.20it/s]

batch loss: 0.4903


Epoch 3:  16%|█▋        | 139/844 [01:55<09:47,  1.20it/s]

batch loss: 0.4597


Epoch 3:  17%|█▋        | 140/844 [01:56<09:48,  1.20it/s]

batch loss: 0.4993


Epoch 3:  17%|█▋        | 141/844 [01:57<09:47,  1.20it/s]

batch loss: 0.5013


Epoch 3:  17%|█▋        | 142/844 [01:58<09:44,  1.20it/s]

batch loss: 0.4369


Epoch 3:  17%|█▋        | 143/844 [01:59<09:42,  1.20it/s]

batch loss: 0.4870


Epoch 3:  17%|█▋        | 144/844 [02:00<09:41,  1.20it/s]

batch loss: 0.5280


Epoch 3:  17%|█▋        | 145/844 [02:00<09:40,  1.20it/s]

batch loss: 0.4409


Epoch 3:  17%|█▋        | 146/844 [02:01<09:40,  1.20it/s]

batch loss: 0.5221


Epoch 3:  17%|█▋        | 147/844 [02:02<09:41,  1.20it/s]

batch loss: 0.3941


Epoch 3:  18%|█▊        | 148/844 [02:03<09:40,  1.20it/s]

batch loss: 0.4297


Epoch 3:  18%|█▊        | 149/844 [02:04<09:39,  1.20it/s]

batch loss: 0.4969


Epoch 3:  18%|█▊        | 150/844 [02:05<09:38,  1.20it/s]

batch loss: 0.4222


Epoch 3:  18%|█▊        | 151/844 [02:05<09:38,  1.20it/s]

batch loss: 0.5030


Epoch 3:  18%|█▊        | 152/844 [02:06<09:36,  1.20it/s]

batch loss: 0.4503


Epoch 3:  18%|█▊        | 153/844 [02:07<09:38,  1.19it/s]

batch loss: 0.4237


Epoch 3:  18%|█▊        | 154/844 [02:08<09:38,  1.19it/s]

batch loss: 0.3435


Epoch 3:  18%|█▊        | 155/844 [02:09<09:34,  1.20it/s]

batch loss: 0.3856


Epoch 3:  18%|█▊        | 156/844 [02:10<09:33,  1.20it/s]

batch loss: 0.4522


Epoch 3:  19%|█▊        | 157/844 [02:10<09:31,  1.20it/s]

batch loss: 0.4140


Epoch 3:  19%|█▊        | 158/844 [02:11<09:30,  1.20it/s]

batch loss: 0.4786


Epoch 3:  19%|█▉        | 159/844 [02:12<09:30,  1.20it/s]

batch loss: 0.4987


Epoch 3:  19%|█▉        | 160/844 [02:13<09:29,  1.20it/s]

batch loss: 0.5084


Epoch 3:  19%|█▉        | 161/844 [02:14<09:28,  1.20it/s]

batch loss: 0.4523


Epoch 3:  19%|█▉        | 162/844 [02:15<09:27,  1.20it/s]

batch loss: 0.5034


Epoch 3:  19%|█▉        | 163/844 [02:15<09:27,  1.20it/s]

batch loss: 0.4632


Epoch 3:  19%|█▉        | 164/844 [02:16<09:27,  1.20it/s]

batch loss: 0.4879


Epoch 3:  20%|█▉        | 165/844 [02:17<09:26,  1.20it/s]

batch loss: 0.4709


Epoch 3:  20%|█▉        | 166/844 [02:18<09:26,  1.20it/s]

batch loss: 0.4622


Epoch 3:  20%|█▉        | 167/844 [02:19<09:25,  1.20it/s]

batch loss: 0.4428


Epoch 3:  20%|█▉        | 168/844 [02:20<09:24,  1.20it/s]

batch loss: 0.4706


Epoch 3:  20%|██        | 169/844 [02:21<09:26,  1.19it/s]

batch loss: 0.4474


Epoch 3:  20%|██        | 170/844 [02:21<09:27,  1.19it/s]

batch loss: 0.4515


Epoch 3:  20%|██        | 171/844 [02:22<09:24,  1.19it/s]

batch loss: 0.3927


Epoch 3:  20%|██        | 172/844 [02:23<09:21,  1.20it/s]

batch loss: 0.5287


Epoch 3:  20%|██        | 173/844 [02:24<09:19,  1.20it/s]

batch loss: 0.4720


Epoch 3:  21%|██        | 174/844 [02:25<09:18,  1.20it/s]

batch loss: 0.4443


Epoch 3:  21%|██        | 175/844 [02:26<09:16,  1.20it/s]

batch loss: 0.4050


Epoch 3:  21%|██        | 176/844 [02:26<09:16,  1.20it/s]

batch loss: 0.5253


Epoch 3:  21%|██        | 177/844 [02:27<09:14,  1.20it/s]

batch loss: 0.4617


Epoch 3:  21%|██        | 178/844 [02:28<09:13,  1.20it/s]

batch loss: 0.3950


Epoch 3:  21%|██        | 179/844 [02:29<09:12,  1.20it/s]

batch loss: 0.4675


Epoch 3:  21%|██▏       | 180/844 [02:30<09:11,  1.20it/s]

batch loss: 0.4933


Epoch 3:  21%|██▏       | 181/844 [02:30<09:10,  1.20it/s]

batch loss: 0.4067


Epoch 3:  22%|██▏       | 182/844 [02:31<09:10,  1.20it/s]

batch loss: 0.5002


Epoch 3:  22%|██▏       | 183/844 [02:32<09:10,  1.20it/s]

batch loss: 0.5493


Epoch 3:  22%|██▏       | 184/844 [02:33<09:10,  1.20it/s]

batch loss: 0.4821


Epoch 3:  22%|██▏       | 185/844 [02:34<09:09,  1.20it/s]

batch loss: 0.4098


Epoch 3:  22%|██▏       | 186/844 [02:35<09:09,  1.20it/s]

batch loss: 0.4659


Epoch 3:  22%|██▏       | 187/844 [02:36<09:07,  1.20it/s]

batch loss: 0.5203


Epoch 3:  22%|██▏       | 188/844 [02:36<09:08,  1.20it/s]

batch loss: 0.5069


Epoch 3:  22%|██▏       | 189/844 [02:37<09:08,  1.19it/s]

batch loss: 0.4877


Epoch 3:  23%|██▎       | 190/844 [02:38<09:06,  1.20it/s]

batch loss: 0.5475


Epoch 3:  23%|██▎       | 191/844 [02:39<09:04,  1.20it/s]

batch loss: 0.5066


Epoch 3:  23%|██▎       | 192/844 [02:40<09:03,  1.20it/s]

batch loss: 0.5216


Epoch 3:  23%|██▎       | 193/844 [02:41<09:03,  1.20it/s]

batch loss: 0.4806


Epoch 3:  23%|██▎       | 194/844 [02:41<09:02,  1.20it/s]

batch loss: 0.4675


Epoch 3:  23%|██▎       | 195/844 [02:42<09:01,  1.20it/s]

batch loss: 0.5163


Epoch 3:  23%|██▎       | 196/844 [02:43<08:58,  1.20it/s]

batch loss: 0.5394


Epoch 3:  23%|██▎       | 197/844 [02:44<08:57,  1.20it/s]

batch loss: 0.4988


Epoch 3:  23%|██▎       | 198/844 [02:45<08:56,  1.20it/s]

batch loss: 0.3844


Epoch 3:  24%|██▎       | 199/844 [02:46<08:59,  1.20it/s]

batch loss: 0.4275


Epoch 3:  24%|██▎       | 200/844 [02:46<08:58,  1.20it/s]

batch loss: 0.4578


Epoch 3:  24%|██▍       | 201/844 [02:47<08:57,  1.20it/s]

batch loss: 0.4310


Epoch 3:  24%|██▍       | 202/844 [02:48<08:55,  1.20it/s]

batch loss: 0.4231


Epoch 3:  24%|██▍       | 203/844 [02:49<08:53,  1.20it/s]

batch loss: 0.5349


Epoch 3:  24%|██▍       | 204/844 [02:50<08:52,  1.20it/s]

batch loss: 0.4489


Epoch 3:  24%|██▍       | 205/844 [02:51<08:51,  1.20it/s]

batch loss: 0.4698


Epoch 3:  24%|██▍       | 206/844 [02:51<08:51,  1.20it/s]

batch loss: 0.4286


Epoch 3:  25%|██▍       | 207/844 [02:52<08:52,  1.20it/s]

batch loss: 0.4580


Epoch 3:  25%|██▍       | 208/844 [02:53<08:49,  1.20it/s]

batch loss: 0.4390


Epoch 3:  25%|██▍       | 209/844 [02:54<08:48,  1.20it/s]

batch loss: 0.5153


Epoch 3:  25%|██▍       | 210/844 [02:55<08:47,  1.20it/s]

batch loss: 0.5366


Epoch 3:  25%|██▌       | 211/844 [02:56<08:47,  1.20it/s]

batch loss: 0.4431


Epoch 3:  25%|██▌       | 212/844 [02:56<08:47,  1.20it/s]

batch loss: 0.5114


Epoch 3:  25%|██▌       | 213/844 [02:57<08:45,  1.20it/s]

batch loss: 0.4279


Epoch 3:  25%|██▌       | 214/844 [02:58<08:44,  1.20it/s]

batch loss: 0.4609


Epoch 3:  25%|██▌       | 215/844 [02:59<08:45,  1.20it/s]

batch loss: 0.4367


Epoch 3:  26%|██▌       | 216/844 [03:00<08:45,  1.20it/s]

batch loss: 0.4800


Epoch 3:  26%|██▌       | 217/844 [03:01<08:44,  1.20it/s]

batch loss: 0.5019


Epoch 3:  26%|██▌       | 218/844 [03:01<08:43,  1.20it/s]

batch loss: 0.5358


Epoch 3:  26%|██▌       | 219/844 [03:02<08:41,  1.20it/s]

batch loss: 0.4552


Epoch 3:  26%|██▌       | 220/844 [03:03<08:40,  1.20it/s]

batch loss: 0.5098


Epoch 3:  26%|██▌       | 221/844 [03:04<08:40,  1.20it/s]

batch loss: 0.4704


Epoch 3:  26%|██▋       | 222/844 [03:05<08:40,  1.19it/s]

batch loss: 0.4786


Epoch 3:  26%|██▋       | 223/844 [03:06<08:37,  1.20it/s]

batch loss: 0.4679


Epoch 3:  27%|██▋       | 224/844 [03:06<08:36,  1.20it/s]

batch loss: 0.3787


Epoch 3:  27%|██▋       | 225/844 [03:07<08:34,  1.20it/s]

batch loss: 0.4690


Epoch 3:  27%|██▋       | 226/844 [03:08<08:35,  1.20it/s]

batch loss: 0.5250


Epoch 3:  27%|██▋       | 227/844 [03:09<08:34,  1.20it/s]

batch loss: 0.4332


Epoch 3:  27%|██▋       | 228/844 [03:10<08:34,  1.20it/s]

batch loss: 0.4812


Epoch 3:  27%|██▋       | 229/844 [03:11<08:31,  1.20it/s]

batch loss: 0.4133


Epoch 3:  27%|██▋       | 230/844 [03:11<08:31,  1.20it/s]

batch loss: 0.4591


Epoch 3:  27%|██▋       | 231/844 [03:12<08:30,  1.20it/s]

batch loss: 0.4665


Epoch 3:  27%|██▋       | 232/844 [03:13<08:30,  1.20it/s]

batch loss: 0.3678


Epoch 3:  28%|██▊       | 233/844 [03:14<08:29,  1.20it/s]

batch loss: 0.4772


Epoch 3:  28%|██▊       | 234/844 [03:15<08:28,  1.20it/s]

batch loss: 0.4085


Epoch 3:  28%|██▊       | 235/844 [03:16<08:26,  1.20it/s]

batch loss: 0.4619


Epoch 3:  28%|██▊       | 236/844 [03:16<08:25,  1.20it/s]

batch loss: 0.5317


Epoch 3:  28%|██▊       | 237/844 [03:17<08:25,  1.20it/s]

batch loss: 0.4594


Epoch 3:  28%|██▊       | 238/844 [03:18<08:25,  1.20it/s]

batch loss: 0.5553


Epoch 3:  28%|██▊       | 239/844 [03:19<08:24,  1.20it/s]

batch loss: 0.4249


Epoch 3:  28%|██▊       | 240/844 [03:20<08:21,  1.20it/s]

batch loss: 0.5290


Epoch 3:  29%|██▊       | 241/844 [03:21<08:21,  1.20it/s]

batch loss: 0.4487


Epoch 3:  29%|██▊       | 242/844 [03:21<08:20,  1.20it/s]

batch loss: 0.4408


Epoch 3:  29%|██▉       | 243/844 [03:22<08:19,  1.20it/s]

batch loss: 0.4266


Epoch 3:  29%|██▉       | 244/844 [03:23<08:20,  1.20it/s]

batch loss: 0.4108


Epoch 3:  29%|██▉       | 245/844 [03:24<08:19,  1.20it/s]

batch loss: 0.4607


Epoch 3:  29%|██▉       | 246/844 [03:25<08:18,  1.20it/s]

batch loss: 0.4411


Epoch 3:  29%|██▉       | 247/844 [03:26<08:17,  1.20it/s]

batch loss: 0.4713


Epoch 3:  29%|██▉       | 248/844 [03:26<08:19,  1.19it/s]

batch loss: 0.4574


Epoch 3:  30%|██▉       | 249/844 [03:27<08:17,  1.20it/s]

batch loss: 0.4201


Epoch 3:  30%|██▉       | 250/844 [03:28<08:16,  1.20it/s]

batch loss: 0.5108


Epoch 3:  30%|██▉       | 251/844 [03:29<08:15,  1.20it/s]

batch loss: 0.5552


Epoch 3:  30%|██▉       | 252/844 [03:30<08:13,  1.20it/s]

batch loss: 0.4570


Epoch 3:  30%|██▉       | 253/844 [03:31<08:12,  1.20it/s]

batch loss: 0.4779


Epoch 3:  30%|███       | 254/844 [03:31<08:11,  1.20it/s]

batch loss: 0.4209


Epoch 3:  30%|███       | 255/844 [03:32<08:10,  1.20it/s]

batch loss: 0.5432


Epoch 3:  30%|███       | 256/844 [03:33<08:09,  1.20it/s]

batch loss: 0.4775


Epoch 3:  30%|███       | 257/844 [03:34<08:08,  1.20it/s]

batch loss: 0.5450


Epoch 3:  31%|███       | 258/844 [03:35<08:07,  1.20it/s]

batch loss: 0.5049


Epoch 3:  31%|███       | 259/844 [03:36<08:06,  1.20it/s]

batch loss: 0.4462


Epoch 3:  31%|███       | 260/844 [03:36<08:05,  1.20it/s]

batch loss: 0.5582


Epoch 3:  31%|███       | 261/844 [03:37<08:07,  1.20it/s]

batch loss: 0.4110


Epoch 3:  31%|███       | 262/844 [03:38<08:05,  1.20it/s]

batch loss: 0.4909


Epoch 3:  31%|███       | 263/844 [03:39<08:04,  1.20it/s]

batch loss: 0.4803


Epoch 3:  31%|███▏      | 264/844 [03:40<08:05,  1.19it/s]

batch loss: 0.4681


Epoch 3:  31%|███▏      | 265/844 [03:41<08:06,  1.19it/s]

batch loss: 0.5683


Epoch 3:  32%|███▏      | 266/844 [03:41<08:03,  1.19it/s]

batch loss: 0.5362


Epoch 3:  32%|███▏      | 267/844 [03:42<08:01,  1.20it/s]

batch loss: 0.4610


Epoch 3:  32%|███▏      | 268/844 [03:43<08:01,  1.20it/s]

batch loss: 0.4820


Epoch 3:  32%|███▏      | 269/844 [03:44<08:01,  1.19it/s]

batch loss: 0.4319


Epoch 3:  32%|███▏      | 270/844 [03:45<08:00,  1.19it/s]

batch loss: 0.5368


Epoch 3:  32%|███▏      | 271/844 [03:46<07:58,  1.20it/s]

batch loss: 0.5074


Epoch 3:  32%|███▏      | 272/844 [03:46<07:56,  1.20it/s]

batch loss: 0.4446


Epoch 3:  32%|███▏      | 273/844 [03:47<07:55,  1.20it/s]

batch loss: 0.4152


Epoch 3:  32%|███▏      | 274/844 [03:48<07:54,  1.20it/s]

batch loss: 0.4743


Epoch 3:  33%|███▎      | 275/844 [03:49<07:54,  1.20it/s]

batch loss: 0.5161


Epoch 3:  33%|███▎      | 276/844 [03:50<07:54,  1.20it/s]

batch loss: 0.4251


Epoch 3:  33%|███▎      | 277/844 [03:51<07:52,  1.20it/s]

batch loss: 0.4565


Epoch 3:  33%|███▎      | 278/844 [03:51<07:50,  1.20it/s]

batch loss: 0.4531


Epoch 3:  33%|███▎      | 279/844 [03:52<07:49,  1.20it/s]

batch loss: 0.4371


Epoch 3:  33%|███▎      | 280/844 [03:53<07:49,  1.20it/s]

batch loss: 0.5179


Epoch 3:  33%|███▎      | 281/844 [03:54<07:50,  1.20it/s]

batch loss: 0.4713


Epoch 3:  33%|███▎      | 282/844 [03:55<07:50,  1.19it/s]

batch loss: 0.4860


Epoch 3:  34%|███▎      | 283/844 [03:56<07:49,  1.19it/s]

batch loss: 0.4638


Epoch 3:  34%|███▎      | 284/844 [03:56<07:47,  1.20it/s]

batch loss: 0.4198


Epoch 3:  34%|███▍      | 285/844 [03:57<07:45,  1.20it/s]

batch loss: 0.4446


Epoch 3:  34%|███▍      | 286/844 [03:58<07:45,  1.20it/s]

batch loss: 0.4648


Epoch 3:  34%|███▍      | 287/844 [03:59<07:43,  1.20it/s]

batch loss: 0.5211


Epoch 3:  34%|███▍      | 288/844 [04:00<07:42,  1.20it/s]

batch loss: 0.5133


Epoch 3:  34%|███▍      | 289/844 [04:01<07:41,  1.20it/s]

batch loss: 0.5182


Epoch 3:  34%|███▍      | 290/844 [04:01<07:41,  1.20it/s]

batch loss: 0.4923


Epoch 3:  34%|███▍      | 291/844 [04:02<07:42,  1.20it/s]

batch loss: 0.4626


Epoch 3:  35%|███▍      | 292/844 [04:03<07:40,  1.20it/s]

batch loss: 0.5334


Epoch 3:  35%|███▍      | 293/844 [04:04<07:39,  1.20it/s]

batch loss: 0.4824


Epoch 3:  35%|███▍      | 294/844 [04:05<07:37,  1.20it/s]

batch loss: 0.5119


Epoch 3:  35%|███▍      | 295/844 [04:06<07:37,  1.20it/s]

batch loss: 0.4913


Epoch 3:  35%|███▌      | 296/844 [04:06<07:37,  1.20it/s]

batch loss: 0.4229


Epoch 3:  35%|███▌      | 297/844 [04:07<07:36,  1.20it/s]

batch loss: 0.4872


Epoch 3:  35%|███▌      | 298/844 [04:08<07:35,  1.20it/s]

batch loss: 0.4808


Epoch 3:  35%|███▌      | 299/844 [04:09<07:34,  1.20it/s]

batch loss: 0.3977


Epoch 3:  36%|███▌      | 300/844 [04:10<07:35,  1.20it/s]

batch loss: 0.5138


Epoch 3:  36%|███▌      | 301/844 [04:11<07:34,  1.20it/s]

batch loss: 0.4557


Epoch 3:  36%|███▌      | 302/844 [04:11<07:33,  1.20it/s]

batch loss: 0.4781


Epoch 3:  36%|███▌      | 303/844 [04:12<07:33,  1.19it/s]

batch loss: 0.4860


Epoch 3:  36%|███▌      | 304/844 [04:13<07:33,  1.19it/s]

batch loss: 0.4281


Epoch 3:  36%|███▌      | 305/844 [04:14<07:32,  1.19it/s]

batch loss: 0.4501


Epoch 3:  36%|███▋      | 306/844 [04:15<07:29,  1.20it/s]

batch loss: 0.4917


Epoch 3:  36%|███▋      | 307/844 [04:16<07:28,  1.20it/s]

batch loss: 0.5089


Epoch 3:  36%|███▋      | 308/844 [04:16<07:28,  1.19it/s]

batch loss: 0.4811


Epoch 3:  37%|███▋      | 309/844 [04:17<07:27,  1.20it/s]

batch loss: 0.4467


Epoch 3:  37%|███▋      | 310/844 [04:18<07:25,  1.20it/s]

batch loss: 0.4412


Epoch 3:  37%|███▋      | 311/844 [04:19<07:23,  1.20it/s]

batch loss: 0.5446


Epoch 3:  37%|███▋      | 312/844 [04:20<07:23,  1.20it/s]

batch loss: 0.5428


Epoch 3:  37%|███▋      | 313/844 [04:21<07:23,  1.20it/s]

batch loss: 0.4374


Epoch 3:  37%|███▋      | 314/844 [04:21<07:22,  1.20it/s]

batch loss: 0.4767


Epoch 3:  37%|███▋      | 315/844 [04:22<07:21,  1.20it/s]

batch loss: 0.5338


Epoch 3:  37%|███▋      | 316/844 [04:23<07:21,  1.20it/s]

batch loss: 0.4840


Epoch 3:  38%|███▊      | 317/844 [04:24<07:20,  1.20it/s]

batch loss: 0.4511


Epoch 3:  38%|███▊      | 318/844 [04:25<07:20,  1.20it/s]

batch loss: 0.4023


Epoch 3:  38%|███▊      | 319/844 [04:26<07:20,  1.19it/s]

batch loss: 0.4441


Epoch 3:  38%|███▊      | 320/844 [04:26<07:20,  1.19it/s]

batch loss: 0.4943


Epoch 3:  38%|███▊      | 321/844 [04:27<07:18,  1.19it/s]

batch loss: 0.4514


Epoch 3:  38%|███▊      | 322/844 [04:28<07:17,  1.19it/s]

batch loss: 0.5416


Epoch 3:  38%|███▊      | 323/844 [04:29<07:16,  1.19it/s]

batch loss: 0.5270


Epoch 3:  38%|███▊      | 324/844 [04:30<07:14,  1.20it/s]

batch loss: 0.5687


Epoch 3:  39%|███▊      | 325/844 [04:31<07:11,  1.20it/s]

batch loss: 0.3971


Epoch 3:  39%|███▊      | 326/844 [04:31<07:10,  1.20it/s]

batch loss: 0.4820


Epoch 3:  39%|███▊      | 327/844 [04:32<07:10,  1.20it/s]

batch loss: 0.4291


Epoch 3:  39%|███▉      | 328/844 [04:33<07:09,  1.20it/s]

batch loss: 0.3722


Epoch 3:  39%|███▉      | 329/844 [04:34<07:09,  1.20it/s]

batch loss: 0.4515


Epoch 3:  39%|███▉      | 330/844 [04:35<07:08,  1.20it/s]

batch loss: 0.4135


Epoch 3:  39%|███▉      | 331/844 [04:36<07:06,  1.20it/s]

batch loss: 0.4170


Epoch 3:  39%|███▉      | 332/844 [04:36<07:06,  1.20it/s]

batch loss: 0.4522


Epoch 3:  39%|███▉      | 333/844 [04:37<07:05,  1.20it/s]

batch loss: 0.4220


Epoch 3:  40%|███▉      | 334/844 [04:38<07:06,  1.20it/s]

batch loss: 0.5105


Epoch 3:  40%|███▉      | 335/844 [04:39<07:07,  1.19it/s]

batch loss: 0.5082


Epoch 3:  40%|███▉      | 336/844 [04:40<07:08,  1.19it/s]

batch loss: 0.4218


Epoch 3:  40%|███▉      | 337/844 [04:41<07:04,  1.19it/s]

batch loss: 0.4443


Epoch 3:  40%|████      | 338/844 [04:42<07:03,  1.20it/s]

batch loss: 0.5574


Epoch 3:  40%|████      | 339/844 [04:42<07:04,  1.19it/s]

batch loss: 0.5043


Epoch 3:  40%|████      | 340/844 [04:43<07:02,  1.19it/s]

batch loss: 0.4337


Epoch 3:  40%|████      | 341/844 [04:44<07:01,  1.19it/s]

batch loss: 0.4347


Epoch 3:  41%|████      | 342/844 [04:45<06:59,  1.20it/s]

batch loss: 0.4353


Epoch 3:  41%|████      | 343/844 [04:46<06:56,  1.20it/s]

batch loss: 0.5306


Epoch 3:  41%|████      | 344/844 [04:47<06:57,  1.20it/s]

batch loss: 0.4787


Epoch 3:  41%|████      | 345/844 [04:47<06:56,  1.20it/s]

batch loss: 0.4510


Epoch 3:  41%|████      | 346/844 [04:48<06:56,  1.19it/s]

batch loss: 0.4584


Epoch 3:  41%|████      | 347/844 [04:49<06:54,  1.20it/s]

batch loss: 0.4814


Epoch 3:  41%|████      | 348/844 [04:50<06:52,  1.20it/s]

batch loss: 0.4186


Epoch 3:  41%|████▏     | 349/844 [04:51<06:52,  1.20it/s]

batch loss: 0.4973


Epoch 3:  41%|████▏     | 350/844 [04:52<06:54,  1.19it/s]

batch loss: 0.4506


Epoch 3:  42%|████▏     | 351/844 [04:52<06:54,  1.19it/s]

batch loss: 0.4789


Epoch 3:  42%|████▏     | 352/844 [04:53<06:52,  1.19it/s]

batch loss: 0.4770


Epoch 3:  42%|████▏     | 353/844 [04:54<06:49,  1.20it/s]

batch loss: 0.4389


Epoch 3:  42%|████▏     | 354/844 [04:55<06:50,  1.19it/s]

batch loss: 0.4753


Epoch 3:  42%|████▏     | 355/844 [04:56<06:48,  1.20it/s]

batch loss: 0.5163


Epoch 3:  42%|████▏     | 356/844 [04:57<06:47,  1.20it/s]

batch loss: 0.3981


Epoch 3:  42%|████▏     | 357/844 [04:57<06:45,  1.20it/s]

batch loss: 0.5141


Epoch 3:  42%|████▏     | 358/844 [04:58<06:43,  1.20it/s]

batch loss: 0.4903


Epoch 3:  43%|████▎     | 359/844 [04:59<06:43,  1.20it/s]

batch loss: 0.4007


Epoch 3:  43%|████▎     | 360/844 [05:00<06:42,  1.20it/s]

batch loss: 0.4297


Epoch 3:  43%|████▎     | 361/844 [05:01<06:42,  1.20it/s]

batch loss: 0.4221


Epoch 3:  43%|████▎     | 362/844 [05:02<06:41,  1.20it/s]

batch loss: 0.4371


Epoch 3:  43%|████▎     | 363/844 [05:02<06:39,  1.20it/s]

batch loss: 0.3918


Epoch 3:  43%|████▎     | 364/844 [05:03<06:39,  1.20it/s]

batch loss: 0.4530


Epoch 3:  43%|████▎     | 365/844 [05:04<06:39,  1.20it/s]

batch loss: 0.5294


Epoch 3:  43%|████▎     | 366/844 [05:05<06:39,  1.20it/s]

batch loss: 0.4308


Epoch 3:  43%|████▎     | 367/844 [05:06<06:38,  1.20it/s]

batch loss: 0.4550


Epoch 3:  44%|████▎     | 368/844 [05:07<06:37,  1.20it/s]

batch loss: 0.4352


Epoch 3:  44%|████▎     | 369/844 [05:07<06:36,  1.20it/s]

batch loss: 0.5178


Epoch 3:  44%|████▍     | 370/844 [05:08<06:36,  1.20it/s]

batch loss: 0.4039


Epoch 3:  44%|████▍     | 371/844 [05:09<06:35,  1.20it/s]

batch loss: 0.4659


Epoch 3:  44%|████▍     | 372/844 [05:10<06:35,  1.19it/s]

batch loss: 0.4582


Epoch 3:  44%|████▍     | 373/844 [05:11<06:33,  1.20it/s]

batch loss: 0.4848


Epoch 3:  44%|████▍     | 374/844 [05:12<06:31,  1.20it/s]

batch loss: 0.5446


Epoch 3:  44%|████▍     | 375/844 [05:12<06:30,  1.20it/s]

batch loss: 0.4413


Epoch 3:  45%|████▍     | 376/844 [05:13<06:29,  1.20it/s]

batch loss: 0.4338


Epoch 3:  45%|████▍     | 377/844 [05:14<06:28,  1.20it/s]

batch loss: 0.4238


Epoch 3:  45%|████▍     | 378/844 [05:15<06:27,  1.20it/s]

batch loss: 0.4479


Epoch 3:  45%|████▍     | 379/844 [05:16<06:27,  1.20it/s]

batch loss: 0.3884


Epoch 3:  45%|████▌     | 380/844 [05:17<06:26,  1.20it/s]

batch loss: 0.4891


Epoch 3:  45%|████▌     | 381/844 [05:17<06:24,  1.20it/s]

batch loss: 0.4570


Epoch 3:  45%|████▌     | 382/844 [05:18<06:23,  1.20it/s]

batch loss: 0.5188


Epoch 3:  45%|████▌     | 383/844 [05:19<06:24,  1.20it/s]

batch loss: 0.4600


Epoch 3:  45%|████▌     | 384/844 [05:20<06:24,  1.20it/s]

batch loss: 0.4228


Epoch 3:  46%|████▌     | 385/844 [05:21<06:22,  1.20it/s]

batch loss: 0.4799


Epoch 3:  46%|████▌     | 386/844 [05:22<06:21,  1.20it/s]

batch loss: 0.4386


Epoch 3:  46%|████▌     | 387/844 [05:22<06:20,  1.20it/s]

batch loss: 0.4801


Epoch 3:  46%|████▌     | 388/844 [05:23<06:19,  1.20it/s]

batch loss: 0.4245


Epoch 3:  46%|████▌     | 389/844 [05:24<06:17,  1.20it/s]

batch loss: 0.5012


Epoch 3:  46%|████▌     | 390/844 [05:25<06:17,  1.20it/s]

batch loss: 0.4993


Epoch 3:  46%|████▋     | 391/844 [05:26<06:15,  1.21it/s]

batch loss: 0.5123


Epoch 3:  46%|████▋     | 392/844 [05:27<06:14,  1.21it/s]

batch loss: 0.5457


Epoch 3:  47%|████▋     | 393/844 [05:27<06:14,  1.20it/s]

batch loss: 0.4694


Epoch 3:  47%|████▋     | 394/844 [05:28<06:13,  1.20it/s]

batch loss: 0.4535


Epoch 3:  47%|████▋     | 395/844 [05:29<06:12,  1.20it/s]

batch loss: 0.5448


Epoch 3:  47%|████▋     | 396/844 [05:30<06:12,  1.20it/s]

batch loss: 0.4434


Epoch 3:  47%|████▋     | 397/844 [05:31<06:12,  1.20it/s]

batch loss: 0.4524


Epoch 3:  47%|████▋     | 398/844 [05:32<06:11,  1.20it/s]

batch loss: 0.4425


Epoch 3:  47%|████▋     | 399/844 [05:32<06:10,  1.20it/s]

batch loss: 0.5069


Epoch 3:  47%|████▋     | 400/844 [05:33<06:10,  1.20it/s]

batch loss: 0.5157


Epoch 3:  48%|████▊     | 401/844 [05:34<06:08,  1.20it/s]

batch loss: 0.4096


Epoch 3:  48%|████▊     | 402/844 [05:35<06:08,  1.20it/s]

batch loss: 0.4446


Epoch 3:  48%|████▊     | 403/844 [05:36<06:09,  1.19it/s]

batch loss: 0.4289


Epoch 3:  48%|████▊     | 404/844 [05:37<06:07,  1.20it/s]

batch loss: 0.3972


Epoch 3:  48%|████▊     | 405/844 [05:37<06:05,  1.20it/s]

batch loss: 0.4498


Epoch 3:  48%|████▊     | 406/844 [05:38<06:03,  1.20it/s]

batch loss: 0.4709


Epoch 3:  48%|████▊     | 407/844 [05:39<06:04,  1.20it/s]

batch loss: 0.4649


Epoch 3:  48%|████▊     | 408/844 [05:40<06:03,  1.20it/s]

batch loss: 0.4029


Epoch 3:  48%|████▊     | 409/844 [05:41<06:03,  1.20it/s]

batch loss: 0.4761


Epoch 3:  49%|████▊     | 410/844 [05:42<06:01,  1.20it/s]

batch loss: 0.5104


Epoch 3:  49%|████▊     | 411/844 [05:42<06:01,  1.20it/s]

batch loss: 0.4464


Epoch 3:  49%|████▉     | 412/844 [05:43<05:59,  1.20it/s]

batch loss: 0.5012


Epoch 3:  49%|████▉     | 413/844 [05:44<05:58,  1.20it/s]

batch loss: 0.4817


Epoch 3:  49%|████▉     | 414/844 [05:45<05:57,  1.20it/s]

batch loss: 0.4557


Epoch 3:  49%|████▉     | 415/844 [05:46<05:56,  1.20it/s]

batch loss: 0.4766


Epoch 3:  49%|████▉     | 416/844 [05:47<05:57,  1.20it/s]

batch loss: 0.3439


Epoch 3:  49%|████▉     | 417/844 [05:47<05:56,  1.20it/s]

batch loss: 0.5411


Epoch 3:  50%|████▉     | 418/844 [05:48<05:55,  1.20it/s]

batch loss: 0.4500


Epoch 3:  50%|████▉     | 419/844 [05:49<05:54,  1.20it/s]

batch loss: 0.4150


Epoch 3:  50%|████▉     | 420/844 [05:50<05:54,  1.19it/s]

batch loss: 0.4068


Epoch 3:  50%|████▉     | 421/844 [05:51<05:53,  1.20it/s]

batch loss: 0.4480


Epoch 3:  50%|█████     | 422/844 [05:52<05:51,  1.20it/s]

batch loss: 0.4427


Epoch 3:  50%|█████     | 423/844 [05:52<05:49,  1.20it/s]

batch loss: 0.4903


Epoch 3:  50%|█████     | 424/844 [05:53<05:49,  1.20it/s]

batch loss: 0.4094


Epoch 3:  50%|█████     | 425/844 [05:54<05:48,  1.20it/s]

batch loss: 0.5555


Epoch 3:  50%|█████     | 426/844 [05:55<05:47,  1.20it/s]

batch loss: 0.4841


Epoch 3:  51%|█████     | 427/844 [05:56<05:46,  1.20it/s]

batch loss: 0.4061


Epoch 3:  51%|█████     | 428/844 [05:57<05:45,  1.20it/s]

batch loss: 0.4993


Epoch 3:  51%|█████     | 429/844 [05:57<05:44,  1.21it/s]

batch loss: 0.5306


Epoch 3:  51%|█████     | 430/844 [05:58<05:43,  1.20it/s]

batch loss: 0.4282


Epoch 3:  51%|█████     | 431/844 [05:59<05:43,  1.20it/s]

batch loss: 0.4695


Epoch 3:  51%|█████     | 432/844 [06:00<05:43,  1.20it/s]

batch loss: 0.5454


Epoch 3:  51%|█████▏    | 433/844 [06:01<05:42,  1.20it/s]

batch loss: 0.5025


Epoch 3:  51%|█████▏    | 434/844 [06:02<05:41,  1.20it/s]

batch loss: 0.4841


Epoch 3:  52%|█████▏    | 435/844 [06:02<05:40,  1.20it/s]

batch loss: 0.4704


Epoch 3:  52%|█████▏    | 436/844 [06:03<05:40,  1.20it/s]

batch loss: 0.5123


Epoch 3:  52%|█████▏    | 437/844 [06:04<05:40,  1.20it/s]

batch loss: 0.4888


Epoch 3:  52%|█████▏    | 438/844 [06:05<05:38,  1.20it/s]

batch loss: 0.4336


Epoch 3:  52%|█████▏    | 439/844 [06:06<05:37,  1.20it/s]

batch loss: 0.4496


Epoch 3:  52%|█████▏    | 440/844 [06:07<05:37,  1.20it/s]

batch loss: 0.4923


Epoch 3:  52%|█████▏    | 441/844 [06:07<05:36,  1.20it/s]

batch loss: 0.4525


Epoch 3:  52%|█████▏    | 442/844 [06:08<05:34,  1.20it/s]

batch loss: 0.4078


Epoch 3:  52%|█████▏    | 443/844 [06:09<05:34,  1.20it/s]

batch loss: 0.4519


Epoch 3:  53%|█████▎    | 444/844 [06:10<05:34,  1.20it/s]

batch loss: 0.4783


Epoch 3:  53%|█████▎    | 445/844 [06:11<05:33,  1.20it/s]

batch loss: 0.5276


Epoch 3:  53%|█████▎    | 446/844 [06:12<05:32,  1.20it/s]

batch loss: 0.4920


Epoch 3:  53%|█████▎    | 447/844 [06:12<05:30,  1.20it/s]

batch loss: 0.4674


Epoch 3:  53%|█████▎    | 448/844 [06:13<05:29,  1.20it/s]

batch loss: 0.4285


Epoch 3:  53%|█████▎    | 449/844 [06:14<05:29,  1.20it/s]

batch loss: 0.4678


Epoch 3:  53%|█████▎    | 450/844 [06:15<05:28,  1.20it/s]

batch loss: 0.4814


Epoch 3:  53%|█████▎    | 451/844 [06:16<05:27,  1.20it/s]

batch loss: 0.5211


Epoch 3:  54%|█████▎    | 452/844 [06:17<05:27,  1.20it/s]

batch loss: 0.4641


Epoch 3:  54%|█████▎    | 453/844 [06:17<05:26,  1.20it/s]

batch loss: 0.3837


Epoch 3:  54%|█████▍    | 454/844 [06:18<05:25,  1.20it/s]

batch loss: 0.4741


Epoch 3:  54%|█████▍    | 455/844 [06:19<05:24,  1.20it/s]

batch loss: 0.4217


Epoch 3:  54%|█████▍    | 456/844 [06:20<05:23,  1.20it/s]

batch loss: 0.5177


Epoch 3:  54%|█████▍    | 457/844 [06:21<05:21,  1.20it/s]

batch loss: 0.4792


Epoch 3:  54%|█████▍    | 458/844 [06:22<05:21,  1.20it/s]

batch loss: 0.4304


Epoch 3:  54%|█████▍    | 459/844 [06:22<05:22,  1.20it/s]

batch loss: 0.3980


Epoch 3:  55%|█████▍    | 460/844 [06:23<05:20,  1.20it/s]

batch loss: 0.5073


Epoch 3:  55%|█████▍    | 461/844 [06:24<05:18,  1.20it/s]

batch loss: 0.4020


Epoch 3:  55%|█████▍    | 462/844 [06:25<05:17,  1.20it/s]

batch loss: 0.4673


Epoch 3:  55%|█████▍    | 463/844 [06:26<05:16,  1.20it/s]

batch loss: 0.4582


Epoch 3:  55%|█████▍    | 464/844 [06:27<05:16,  1.20it/s]

batch loss: 0.4785


Epoch 3:  55%|█████▌    | 465/844 [06:27<05:16,  1.20it/s]

batch loss: 0.3925


Epoch 3:  55%|█████▌    | 466/844 [06:28<05:15,  1.20it/s]

batch loss: 0.5141


Epoch 3:  55%|█████▌    | 467/844 [06:29<05:14,  1.20it/s]

batch loss: 0.5071


Epoch 3:  55%|█████▌    | 468/844 [06:30<05:14,  1.19it/s]

batch loss: 0.4534


Epoch 3:  56%|█████▌    | 469/844 [06:31<05:12,  1.20it/s]

batch loss: 0.4138


Epoch 3:  56%|█████▌    | 470/844 [06:32<05:11,  1.20it/s]

batch loss: 0.4890


Epoch 3:  56%|█████▌    | 471/844 [06:32<05:11,  1.20it/s]

batch loss: 0.5186


Epoch 3:  56%|█████▌    | 472/844 [06:33<05:10,  1.20it/s]

batch loss: 0.3795


Epoch 3:  56%|█████▌    | 473/844 [06:34<05:10,  1.20it/s]

batch loss: 0.5587


Epoch 3:  56%|█████▌    | 474/844 [06:35<05:09,  1.20it/s]

batch loss: 0.5147


Epoch 3:  56%|█████▋    | 475/844 [06:36<05:08,  1.20it/s]

batch loss: 0.4920


Epoch 3:  56%|█████▋    | 476/844 [06:37<05:06,  1.20it/s]

batch loss: 0.4665


Epoch 3:  57%|█████▋    | 477/844 [06:37<05:05,  1.20it/s]

batch loss: 0.3857


Epoch 3:  57%|█████▋    | 478/844 [06:38<05:04,  1.20it/s]

batch loss: 0.4869


Epoch 3:  57%|█████▋    | 479/844 [06:39<05:05,  1.19it/s]

batch loss: 0.4056


Epoch 3:  57%|█████▋    | 480/844 [06:40<05:04,  1.20it/s]

batch loss: 0.3963


Epoch 3:  57%|█████▋    | 481/844 [06:41<05:03,  1.19it/s]

batch loss: 0.4774


Epoch 3:  57%|█████▋    | 482/844 [06:42<05:02,  1.20it/s]

batch loss: 0.4023


Epoch 3:  57%|█████▋    | 483/844 [06:42<05:01,  1.20it/s]

batch loss: 0.4348


Epoch 3:  57%|█████▋    | 484/844 [06:43<05:01,  1.19it/s]

batch loss: 0.4428


Epoch 3:  57%|█████▋    | 485/844 [06:44<05:01,  1.19it/s]

batch loss: 0.3999


Epoch 3:  58%|█████▊    | 486/844 [06:45<05:00,  1.19it/s]

batch loss: 0.4460


Epoch 3:  58%|█████▊    | 487/844 [06:46<04:58,  1.20it/s]

batch loss: 0.4075


Epoch 3:  58%|█████▊    | 488/844 [06:47<04:57,  1.20it/s]

batch loss: 0.4802


Epoch 3:  58%|█████▊    | 489/844 [06:47<04:55,  1.20it/s]

batch loss: 0.4761


Epoch 3:  58%|█████▊    | 490/844 [06:48<04:55,  1.20it/s]

batch loss: 0.4761


Epoch 3:  58%|█████▊    | 491/844 [06:49<04:54,  1.20it/s]

batch loss: 0.5121


Epoch 3:  58%|█████▊    | 492/844 [06:50<04:53,  1.20it/s]

batch loss: 0.4061


Epoch 3:  58%|█████▊    | 493/844 [06:51<04:52,  1.20it/s]

batch loss: 0.4396


Epoch 3:  59%|█████▊    | 494/844 [06:52<04:50,  1.20it/s]

batch loss: 0.5268


Epoch 3:  59%|█████▊    | 495/844 [06:52<04:50,  1.20it/s]

batch loss: 0.4467


Epoch 3:  59%|█████▉    | 496/844 [06:53<04:49,  1.20it/s]

batch loss: 0.4373


Epoch 3:  59%|█████▉    | 497/844 [06:54<04:48,  1.20it/s]

batch loss: 0.4227


Epoch 3:  59%|█████▉    | 498/844 [06:55<04:49,  1.20it/s]

batch loss: 0.3776


Epoch 3:  59%|█████▉    | 499/844 [06:56<04:49,  1.19it/s]

batch loss: 0.5027


Epoch 3:  59%|█████▉    | 500/844 [06:57<04:48,  1.19it/s]

batch loss: 0.5067


Epoch 3:  59%|█████▉    | 501/844 [06:57<04:46,  1.20it/s]

batch loss: 0.4844


Epoch 3:  59%|█████▉    | 502/844 [06:58<04:46,  1.20it/s]

batch loss: 0.4439


Epoch 3:  60%|█████▉    | 503/844 [06:59<04:45,  1.20it/s]

batch loss: 0.5082


Epoch 3:  60%|█████▉    | 504/844 [07:00<04:44,  1.20it/s]

batch loss: 0.4168


Epoch 3:  60%|█████▉    | 505/844 [07:01<04:43,  1.20it/s]

batch loss: 0.4330


Epoch 3:  60%|█████▉    | 506/844 [07:02<04:41,  1.20it/s]

batch loss: 0.4607


Epoch 3:  60%|██████    | 507/844 [07:02<04:40,  1.20it/s]

batch loss: 0.4856


Epoch 3:  60%|██████    | 508/844 [07:03<04:40,  1.20it/s]

batch loss: 0.5494


Epoch 3:  60%|██████    | 509/844 [07:04<04:40,  1.19it/s]

batch loss: 0.5173


Epoch 3:  60%|██████    | 510/844 [07:05<04:38,  1.20it/s]

batch loss: 0.4978


Epoch 3:  61%|██████    | 511/844 [07:06<04:37,  1.20it/s]

batch loss: 0.4141


Epoch 3:  61%|██████    | 512/844 [07:07<04:35,  1.20it/s]

batch loss: 0.5174


Epoch 3:  61%|██████    | 513/844 [07:07<04:34,  1.20it/s]

batch loss: 0.4209


Epoch 3:  61%|██████    | 514/844 [07:08<04:34,  1.20it/s]

batch loss: 0.4195


Epoch 3:  61%|██████    | 515/844 [07:09<04:34,  1.20it/s]

batch loss: 0.4946


Epoch 3:  61%|██████    | 516/844 [07:10<04:33,  1.20it/s]

batch loss: 0.4303


Epoch 3:  61%|██████▏   | 517/844 [07:11<04:34,  1.19it/s]

batch loss: 0.5684


Epoch 3:  61%|██████▏   | 518/844 [07:12<04:32,  1.20it/s]

batch loss: 0.3790


Epoch 3:  61%|██████▏   | 519/844 [07:12<04:31,  1.20it/s]

batch loss: 0.4494


Epoch 3:  62%|██████▏   | 520/844 [07:13<04:31,  1.19it/s]

batch loss: 0.5066


Epoch 3:  62%|██████▏   | 521/844 [07:14<04:30,  1.20it/s]

batch loss: 0.4938


Epoch 3:  62%|██████▏   | 522/844 [07:15<04:28,  1.20it/s]

batch loss: 0.5756


Epoch 3:  62%|██████▏   | 523/844 [07:16<04:28,  1.20it/s]

batch loss: 0.5162


Epoch 3:  62%|██████▏   | 524/844 [07:17<04:26,  1.20it/s]

batch loss: 0.5217


Epoch 3:  62%|██████▏   | 525/844 [07:17<04:25,  1.20it/s]

batch loss: 0.4879


Epoch 3:  62%|██████▏   | 526/844 [07:18<04:24,  1.20it/s]

batch loss: 0.4339


Epoch 3:  62%|██████▏   | 527/844 [07:19<04:23,  1.20it/s]

batch loss: 0.5078


Epoch 3:  63%|██████▎   | 528/844 [07:20<04:22,  1.20it/s]

batch loss: 0.4008


Epoch 3:  63%|██████▎   | 529/844 [07:21<04:21,  1.21it/s]

batch loss: 0.4306


Epoch 3:  63%|██████▎   | 530/844 [07:22<04:20,  1.21it/s]

batch loss: 0.5239


Epoch 3:  63%|██████▎   | 531/844 [07:22<04:20,  1.20it/s]

batch loss: 0.4289


Epoch 3:  63%|██████▎   | 532/844 [07:23<04:20,  1.20it/s]

batch loss: 0.4773


Epoch 3:  63%|██████▎   | 533/844 [07:24<04:19,  1.20it/s]

batch loss: 0.5046


Epoch 3:  63%|██████▎   | 534/844 [07:25<04:19,  1.20it/s]

batch loss: 0.5135


Epoch 3:  63%|██████▎   | 535/844 [07:26<04:17,  1.20it/s]

batch loss: 0.4616


Epoch 3:  64%|██████▎   | 536/844 [07:27<04:16,  1.20it/s]

batch loss: 0.4552


Epoch 3:  64%|██████▎   | 537/844 [07:27<04:15,  1.20it/s]

batch loss: 0.4175


Epoch 3:  64%|██████▎   | 538/844 [07:28<04:14,  1.20it/s]

batch loss: 0.4621


Epoch 3:  64%|██████▍   | 539/844 [07:29<04:13,  1.20it/s]

batch loss: 0.4645


Epoch 3:  64%|██████▍   | 540/844 [07:30<04:12,  1.20it/s]

batch loss: 0.5308


Epoch 3:  64%|██████▍   | 541/844 [07:31<04:12,  1.20it/s]

batch loss: 0.4858


Epoch 3:  64%|██████▍   | 542/844 [07:32<04:11,  1.20it/s]

batch loss: 0.5229


Epoch 3:  64%|██████▍   | 543/844 [07:32<04:10,  1.20it/s]

batch loss: 0.4471


Epoch 3:  64%|██████▍   | 544/844 [07:33<04:09,  1.20it/s]

batch loss: 0.4182


Epoch 3:  65%|██████▍   | 545/844 [07:34<04:08,  1.20it/s]

batch loss: 0.4812


Epoch 3:  65%|██████▍   | 546/844 [07:35<04:07,  1.20it/s]

batch loss: 0.4559


Epoch 3:  65%|██████▍   | 547/844 [07:36<04:07,  1.20it/s]

batch loss: 0.4059


Epoch 3:  65%|██████▍   | 548/844 [07:37<04:06,  1.20it/s]

batch loss: 0.4093


Epoch 3:  65%|██████▌   | 549/844 [07:37<04:05,  1.20it/s]

batch loss: 0.3703


Epoch 3:  65%|██████▌   | 550/844 [07:38<04:04,  1.20it/s]

batch loss: 0.5269


Epoch 3:  65%|██████▌   | 551/844 [07:39<04:03,  1.20it/s]

batch loss: 0.4307


Epoch 3:  65%|██████▌   | 552/844 [07:40<04:03,  1.20it/s]

batch loss: 0.4675


Epoch 3:  66%|██████▌   | 553/844 [07:41<04:02,  1.20it/s]

batch loss: 0.4334


Epoch 3:  66%|██████▌   | 554/844 [07:42<04:01,  1.20it/s]

batch loss: 0.4831


Epoch 3:  66%|██████▌   | 555/844 [07:42<04:00,  1.20it/s]

batch loss: 0.4613


Epoch 3:  66%|██████▌   | 556/844 [07:43<03:59,  1.20it/s]

batch loss: 0.5135


Epoch 3:  66%|██████▌   | 557/844 [07:44<03:58,  1.20it/s]

batch loss: 0.4943


Epoch 3:  66%|██████▌   | 558/844 [07:45<03:57,  1.20it/s]

batch loss: 0.4878


Epoch 3:  66%|██████▌   | 559/844 [07:46<03:57,  1.20it/s]

batch loss: 0.5358


Epoch 3:  66%|██████▋   | 560/844 [07:47<03:56,  1.20it/s]

batch loss: 0.4167


Epoch 3:  66%|██████▋   | 561/844 [07:47<03:56,  1.20it/s]

batch loss: 0.5220


Epoch 3:  67%|██████▋   | 562/844 [07:48<03:54,  1.20it/s]

batch loss: 0.5286


Epoch 3:  67%|██████▋   | 563/844 [07:49<03:54,  1.20it/s]

batch loss: 0.4524


Epoch 3:  67%|██████▋   | 564/844 [07:50<03:53,  1.20it/s]

batch loss: 0.4000


Epoch 3:  67%|██████▋   | 565/844 [07:51<03:53,  1.20it/s]

batch loss: 0.4179


Epoch 3:  67%|██████▋   | 566/844 [07:52<03:51,  1.20it/s]

batch loss: 0.4328


Epoch 3:  67%|██████▋   | 567/844 [07:52<03:52,  1.19it/s]

batch loss: 0.5071


Epoch 3:  67%|██████▋   | 568/844 [07:53<03:50,  1.20it/s]

batch loss: 0.4186


Epoch 3:  67%|██████▋   | 569/844 [07:54<03:49,  1.20it/s]

batch loss: 0.5057


Epoch 3:  68%|██████▊   | 570/844 [07:55<03:49,  1.20it/s]

batch loss: 0.4256


Epoch 3:  68%|██████▊   | 571/844 [07:56<03:47,  1.20it/s]

batch loss: 0.4567


Epoch 3:  68%|██████▊   | 572/844 [07:57<03:46,  1.20it/s]

batch loss: 0.5029


Epoch 3:  68%|██████▊   | 573/844 [07:57<03:45,  1.20it/s]

batch loss: 0.4503


Epoch 3:  68%|██████▊   | 574/844 [07:58<03:44,  1.20it/s]

batch loss: 0.4578


Epoch 3:  68%|██████▊   | 575/844 [07:59<03:44,  1.20it/s]

batch loss: 0.4697


Epoch 3:  68%|██████▊   | 576/844 [08:00<03:42,  1.20it/s]

batch loss: 0.4644


Epoch 3:  68%|██████▊   | 577/844 [08:01<03:42,  1.20it/s]

batch loss: 0.4876


Epoch 3:  68%|██████▊   | 578/844 [08:02<03:41,  1.20it/s]

batch loss: 0.3524


Epoch 3:  69%|██████▊   | 579/844 [08:02<03:40,  1.20it/s]

batch loss: 0.4366


Epoch 3:  69%|██████▊   | 580/844 [08:03<03:39,  1.20it/s]

batch loss: 0.4632


Epoch 3:  69%|██████▉   | 581/844 [08:04<03:39,  1.20it/s]

batch loss: 0.4151


Epoch 3:  69%|██████▉   | 582/844 [08:05<03:38,  1.20it/s]

batch loss: 0.5393


Epoch 3:  69%|██████▉   | 583/844 [08:06<03:39,  1.19it/s]

batch loss: 0.5039


Epoch 3:  69%|██████▉   | 584/844 [08:07<03:38,  1.19it/s]

batch loss: 0.4642


Epoch 3:  69%|██████▉   | 585/844 [08:07<03:36,  1.19it/s]

batch loss: 0.4810


Epoch 3:  69%|██████▉   | 586/844 [08:08<03:35,  1.20it/s]

batch loss: 0.4532


Epoch 3:  70%|██████▉   | 587/844 [08:09<03:34,  1.20it/s]

batch loss: 0.3717


Epoch 3:  70%|██████▉   | 588/844 [08:10<03:34,  1.19it/s]

batch loss: 0.4460


Epoch 3:  70%|██████▉   | 589/844 [08:11<03:33,  1.20it/s]

batch loss: 0.4406


Epoch 3:  70%|██████▉   | 590/844 [08:12<03:32,  1.20it/s]

batch loss: 0.4913


Epoch 3:  70%|███████   | 591/844 [08:12<03:30,  1.20it/s]

batch loss: 0.3859


Epoch 3:  70%|███████   | 592/844 [08:13<03:29,  1.20it/s]

batch loss: 0.4305


Epoch 3:  70%|███████   | 593/844 [08:14<03:28,  1.20it/s]

batch loss: 0.4750


Epoch 3:  70%|███████   | 594/844 [08:15<03:27,  1.20it/s]

batch loss: 0.4688


Epoch 3:  70%|███████   | 595/844 [08:16<03:27,  1.20it/s]

batch loss: 0.4485


Epoch 3:  71%|███████   | 596/844 [08:17<03:26,  1.20it/s]

batch loss: 0.4197


Epoch 3:  71%|███████   | 597/844 [08:17<03:25,  1.20it/s]

batch loss: 0.5074


Epoch 3:  71%|███████   | 598/844 [08:18<03:24,  1.20it/s]

batch loss: 0.4826


Epoch 3:  71%|███████   | 599/844 [08:19<03:23,  1.20it/s]

batch loss: 0.4290


Epoch 3:  71%|███████   | 600/844 [08:20<03:23,  1.20it/s]

batch loss: 0.5496


Epoch 3:  71%|███████   | 601/844 [08:21<03:22,  1.20it/s]

batch loss: 0.5463


Epoch 3:  71%|███████▏  | 602/844 [08:22<03:22,  1.20it/s]

batch loss: 0.5479


Epoch 3:  71%|███████▏  | 603/844 [08:22<03:21,  1.20it/s]

batch loss: 0.4639


Epoch 3:  72%|███████▏  | 604/844 [08:23<03:20,  1.20it/s]

batch loss: 0.4295


Epoch 3:  72%|███████▏  | 605/844 [08:24<03:19,  1.20it/s]

batch loss: 0.4363


Epoch 3:  72%|███████▏  | 606/844 [08:25<03:18,  1.20it/s]

batch loss: 0.4933


Epoch 3:  72%|███████▏  | 607/844 [08:26<03:17,  1.20it/s]

batch loss: 0.4632


Epoch 3:  72%|███████▏  | 608/844 [08:27<03:16,  1.20it/s]

batch loss: 0.4465


Epoch 3:  72%|███████▏  | 609/844 [08:27<03:16,  1.20it/s]

batch loss: 0.4699


Epoch 3:  72%|███████▏  | 610/844 [08:28<03:14,  1.20it/s]

batch loss: 0.4947


Epoch 3:  72%|███████▏  | 611/844 [08:29<03:13,  1.20it/s]

batch loss: 0.4598


Epoch 3:  73%|███████▎  | 612/844 [08:30<03:12,  1.20it/s]

batch loss: 0.4911


Epoch 3:  73%|███████▎  | 613/844 [08:31<03:12,  1.20it/s]

batch loss: 0.4532


Epoch 3:  73%|███████▎  | 614/844 [08:32<03:11,  1.20it/s]

batch loss: 0.4972


Epoch 3:  73%|███████▎  | 615/844 [08:32<03:10,  1.20it/s]

batch loss: 0.5289


Epoch 3:  73%|███████▎  | 616/844 [08:33<03:10,  1.20it/s]

batch loss: 0.4705


Epoch 3:  73%|███████▎  | 617/844 [08:34<03:10,  1.19it/s]

batch loss: 0.4265


Epoch 3:  73%|███████▎  | 618/844 [08:35<03:08,  1.20it/s]

batch loss: 0.4081


Epoch 3:  73%|███████▎  | 619/844 [08:36<03:07,  1.20it/s]

batch loss: 0.4730


Epoch 3:  73%|███████▎  | 620/844 [08:37<03:06,  1.20it/s]

batch loss: 0.4844


Epoch 3:  74%|███████▎  | 621/844 [08:37<03:05,  1.20it/s]

batch loss: 0.3992


Epoch 3:  74%|███████▎  | 622/844 [08:38<03:04,  1.20it/s]

batch loss: 0.4591


Epoch 3:  74%|███████▍  | 623/844 [08:39<03:03,  1.20it/s]

batch loss: 0.5028


Epoch 3:  74%|███████▍  | 624/844 [08:40<03:02,  1.21it/s]

batch loss: 0.4355


Epoch 3:  74%|███████▍  | 625/844 [08:41<03:01,  1.21it/s]

batch loss: 0.5115


Epoch 3:  74%|███████▍  | 626/844 [08:42<03:00,  1.21it/s]

batch loss: 0.5154


Epoch 3:  74%|███████▍  | 627/844 [08:42<03:00,  1.20it/s]

batch loss: 0.4666


Epoch 3:  74%|███████▍  | 628/844 [08:43<02:59,  1.20it/s]

batch loss: 0.4561


Epoch 3:  75%|███████▍  | 629/844 [08:44<02:58,  1.21it/s]

batch loss: 0.5054


Epoch 3:  75%|███████▍  | 630/844 [08:45<02:57,  1.21it/s]

batch loss: 0.4376


Epoch 3:  75%|███████▍  | 631/844 [08:46<02:56,  1.21it/s]

batch loss: 0.5333


Epoch 3:  75%|███████▍  | 632/844 [08:47<02:56,  1.20it/s]

batch loss: 0.4469


Epoch 3:  75%|███████▌  | 633/844 [08:47<02:55,  1.20it/s]

batch loss: 0.4972


Epoch 3:  75%|███████▌  | 634/844 [08:48<02:54,  1.20it/s]

batch loss: 0.5019


Epoch 3:  75%|███████▌  | 635/844 [08:49<02:54,  1.20it/s]

batch loss: 0.4638


Epoch 3:  75%|███████▌  | 636/844 [08:50<02:53,  1.20it/s]

batch loss: 0.4437


Epoch 3:  75%|███████▌  | 637/844 [08:51<02:52,  1.20it/s]

batch loss: 0.4370


Epoch 3:  76%|███████▌  | 638/844 [08:52<02:51,  1.20it/s]

batch loss: 0.4171


Epoch 3:  76%|███████▌  | 639/844 [08:52<02:50,  1.20it/s]

batch loss: 0.4896


Epoch 3:  76%|███████▌  | 640/844 [08:53<02:49,  1.20it/s]

batch loss: 0.4272


Epoch 3:  76%|███████▌  | 641/844 [08:54<02:48,  1.21it/s]

batch loss: 0.5077


Epoch 3:  76%|███████▌  | 642/844 [08:55<02:47,  1.20it/s]

batch loss: 0.4973


Epoch 3:  76%|███████▌  | 643/844 [08:56<02:46,  1.21it/s]

batch loss: 0.4658


Epoch 3:  76%|███████▋  | 644/844 [08:57<02:45,  1.21it/s]

batch loss: 0.4658


Epoch 3:  76%|███████▋  | 645/844 [08:57<02:45,  1.20it/s]

batch loss: 0.4757


Epoch 3:  77%|███████▋  | 646/844 [08:58<02:44,  1.20it/s]

batch loss: 0.5803


Epoch 3:  77%|███████▋  | 647/844 [08:59<02:43,  1.20it/s]

batch loss: 0.4344


Epoch 3:  77%|███████▋  | 648/844 [09:00<02:43,  1.20it/s]

batch loss: 0.4280


Epoch 3:  77%|███████▋  | 649/844 [09:01<02:42,  1.20it/s]

batch loss: 0.4965


Epoch 3:  77%|███████▋  | 650/844 [09:02<02:41,  1.20it/s]

batch loss: 0.4663


Epoch 3:  77%|███████▋  | 651/844 [09:02<02:40,  1.20it/s]

batch loss: 0.4918


Epoch 3:  77%|███████▋  | 652/844 [09:03<02:39,  1.20it/s]

batch loss: 0.4689


Epoch 3:  77%|███████▋  | 653/844 [09:04<02:39,  1.20it/s]

batch loss: 0.4189


Epoch 3:  77%|███████▋  | 654/844 [09:05<02:38,  1.20it/s]

batch loss: 0.4610


Epoch 3:  78%|███████▊  | 655/844 [09:06<02:36,  1.20it/s]

batch loss: 0.4811


Epoch 3:  78%|███████▊  | 656/844 [09:07<02:36,  1.20it/s]

batch loss: 0.4948


Epoch 3:  78%|███████▊  | 657/844 [09:07<02:35,  1.20it/s]

batch loss: 0.4108


Epoch 3:  78%|███████▊  | 658/844 [09:08<02:34,  1.20it/s]

batch loss: 0.4140


Epoch 3:  78%|███████▊  | 659/844 [09:09<02:33,  1.20it/s]

batch loss: 0.4252


Epoch 3:  78%|███████▊  | 660/844 [09:10<02:33,  1.20it/s]

batch loss: 0.4646


Epoch 3:  78%|███████▊  | 661/844 [09:11<02:32,  1.20it/s]

batch loss: 0.5263


Epoch 3:  78%|███████▊  | 662/844 [09:12<02:31,  1.20it/s]

batch loss: 0.4110


Epoch 3:  79%|███████▊  | 663/844 [09:12<02:30,  1.20it/s]

batch loss: 0.4776


Epoch 3:  79%|███████▊  | 664/844 [09:13<02:29,  1.20it/s]

batch loss: 0.4551


Epoch 3:  79%|███████▉  | 665/844 [09:14<02:29,  1.20it/s]

batch loss: 0.4577


Epoch 3:  79%|███████▉  | 666/844 [09:15<02:28,  1.20it/s]

batch loss: 0.4189


Epoch 3:  79%|███████▉  | 667/844 [09:16<02:28,  1.20it/s]

batch loss: 0.4853


Epoch 3:  79%|███████▉  | 668/844 [09:17<02:26,  1.20it/s]

batch loss: 0.4939


Epoch 3:  79%|███████▉  | 669/844 [09:17<02:26,  1.20it/s]

batch loss: 0.4824


Epoch 3:  79%|███████▉  | 670/844 [09:18<02:24,  1.20it/s]

batch loss: 0.4501


Epoch 3:  80%|███████▉  | 671/844 [09:19<02:24,  1.20it/s]

batch loss: 0.4660


Epoch 3:  80%|███████▉  | 672/844 [09:20<02:23,  1.20it/s]

batch loss: 0.4532


Epoch 3:  80%|███████▉  | 673/844 [09:21<02:22,  1.20it/s]

batch loss: 0.5430


Epoch 3:  80%|███████▉  | 674/844 [09:22<02:21,  1.20it/s]

batch loss: 0.4389


Epoch 3:  80%|███████▉  | 675/844 [09:22<02:20,  1.20it/s]

batch loss: 0.5021


Epoch 3:  80%|████████  | 676/844 [09:23<02:19,  1.20it/s]

batch loss: 0.4608


Epoch 3:  80%|████████  | 677/844 [09:24<02:18,  1.20it/s]

batch loss: 0.4392


Epoch 3:  80%|████████  | 678/844 [09:25<02:17,  1.20it/s]

batch loss: 0.4149


Epoch 3:  80%|████████  | 679/844 [09:26<02:17,  1.20it/s]

batch loss: 0.4799


Epoch 3:  81%|████████  | 680/844 [09:27<02:16,  1.20it/s]

batch loss: 0.4187


Epoch 3:  81%|████████  | 681/844 [09:27<02:15,  1.20it/s]

batch loss: 0.5029


Epoch 3:  81%|████████  | 682/844 [09:28<02:14,  1.20it/s]

batch loss: 0.4130


Epoch 3:  81%|████████  | 683/844 [09:29<02:14,  1.20it/s]

batch loss: 0.5462


Epoch 3:  81%|████████  | 684/844 [09:30<02:13,  1.20it/s]

batch loss: 0.4861


Epoch 3:  81%|████████  | 685/844 [09:31<02:12,  1.20it/s]

batch loss: 0.4320


Epoch 3:  81%|████████▏ | 686/844 [09:32<02:11,  1.20it/s]

batch loss: 0.5352


Epoch 3:  81%|████████▏ | 687/844 [09:32<02:10,  1.20it/s]

batch loss: 0.4697


Epoch 3:  82%|████████▏ | 688/844 [09:33<02:09,  1.20it/s]

batch loss: 0.5026


Epoch 3:  82%|████████▏ | 689/844 [09:34<02:08,  1.20it/s]

batch loss: 0.4656


Epoch 3:  82%|████████▏ | 690/844 [09:35<02:07,  1.20it/s]

batch loss: 0.4892


Epoch 3:  82%|████████▏ | 691/844 [09:36<02:06,  1.21it/s]

batch loss: 0.4837


Epoch 3:  82%|████████▏ | 692/844 [09:37<02:06,  1.21it/s]

batch loss: 0.4651


Epoch 3:  82%|████████▏ | 693/844 [09:37<02:05,  1.21it/s]

batch loss: 0.4020


Epoch 3:  82%|████████▏ | 694/844 [09:38<02:04,  1.20it/s]

batch loss: 0.3473


Epoch 3:  82%|████████▏ | 695/844 [09:39<02:03,  1.20it/s]

batch loss: 0.4167


Epoch 3:  82%|████████▏ | 696/844 [09:40<02:02,  1.20it/s]

batch loss: 0.5006


Epoch 3:  83%|████████▎ | 697/844 [09:41<02:02,  1.20it/s]

batch loss: 0.4138


Epoch 3:  83%|████████▎ | 698/844 [09:42<02:01,  1.20it/s]

batch loss: 0.4495


Epoch 3:  83%|████████▎ | 699/844 [09:42<02:00,  1.20it/s]

batch loss: 0.4774


Epoch 3:  83%|████████▎ | 700/844 [09:43<02:00,  1.20it/s]

batch loss: 0.4768


Epoch 3:  83%|████████▎ | 701/844 [09:44<01:59,  1.20it/s]

batch loss: 0.4174


Epoch 3:  83%|████████▎ | 702/844 [09:45<01:58,  1.20it/s]

batch loss: 0.5061


Epoch 3:  83%|████████▎ | 703/844 [09:46<01:57,  1.20it/s]

batch loss: 0.5480


Epoch 3:  83%|████████▎ | 704/844 [09:47<01:56,  1.20it/s]

batch loss: 0.5023


Epoch 3:  84%|████████▎ | 705/844 [09:47<01:55,  1.20it/s]

batch loss: 0.5221


Epoch 3:  84%|████████▎ | 706/844 [09:48<01:54,  1.20it/s]

batch loss: 0.4056


Epoch 3:  84%|████████▍ | 707/844 [09:49<01:54,  1.20it/s]

batch loss: 0.4796


Epoch 3:  84%|████████▍ | 708/844 [09:50<01:53,  1.20it/s]

batch loss: 0.4348


Epoch 3:  84%|████████▍ | 709/844 [09:51<01:52,  1.20it/s]

batch loss: 0.4703


Epoch 3:  84%|████████▍ | 710/844 [09:51<01:51,  1.20it/s]

batch loss: 0.4459


Epoch 3:  84%|████████▍ | 711/844 [09:52<01:50,  1.20it/s]

batch loss: 0.5432


Epoch 3:  84%|████████▍ | 712/844 [09:53<01:49,  1.20it/s]

batch loss: 0.4581


Epoch 3:  84%|████████▍ | 713/844 [09:54<01:48,  1.20it/s]

batch loss: 0.4159


Epoch 3:  85%|████████▍ | 714/844 [09:55<01:47,  1.20it/s]

batch loss: 0.4944


Epoch 3:  85%|████████▍ | 715/844 [09:56<01:47,  1.20it/s]

batch loss: 0.4922


Epoch 3:  85%|████████▍ | 716/844 [09:56<01:46,  1.20it/s]

batch loss: 0.5057


Epoch 3:  85%|████████▍ | 717/844 [09:57<01:45,  1.20it/s]

batch loss: 0.4406


Epoch 3:  85%|████████▌ | 718/844 [09:58<01:44,  1.20it/s]

batch loss: 0.4510


Epoch 3:  85%|████████▌ | 719/844 [09:59<01:44,  1.20it/s]

batch loss: 0.4588


Epoch 3:  85%|████████▌ | 720/844 [10:00<01:43,  1.20it/s]

batch loss: 0.4306


Epoch 3:  85%|████████▌ | 721/844 [10:01<01:42,  1.20it/s]

batch loss: 0.5061


Epoch 3:  86%|████████▌ | 722/844 [10:01<01:41,  1.20it/s]

batch loss: 0.5398


Epoch 3:  86%|████████▌ | 723/844 [10:02<01:40,  1.20it/s]

batch loss: 0.4905


Epoch 3:  86%|████████▌ | 724/844 [10:03<01:39,  1.21it/s]

batch loss: 0.4999


Epoch 3:  86%|████████▌ | 725/844 [10:04<01:38,  1.21it/s]

batch loss: 0.5023


Epoch 3:  86%|████████▌ | 726/844 [10:05<01:37,  1.21it/s]

batch loss: 0.4473


Epoch 3:  86%|████████▌ | 727/844 [10:06<01:37,  1.20it/s]

batch loss: 0.4141


Epoch 3:  86%|████████▋ | 728/844 [10:06<01:36,  1.21it/s]

batch loss: 0.4762


Epoch 3:  86%|████████▋ | 729/844 [10:07<01:35,  1.21it/s]

batch loss: 0.4820


Epoch 3:  86%|████████▋ | 730/844 [10:08<01:34,  1.21it/s]

batch loss: 0.4029


Epoch 3:  87%|████████▋ | 731/844 [10:09<01:34,  1.19it/s]

batch loss: 0.4499


Epoch 3:  87%|████████▋ | 732/844 [10:10<01:34,  1.19it/s]

batch loss: 0.4866


Epoch 3:  87%|████████▋ | 733/844 [10:11<01:33,  1.19it/s]

batch loss: 0.4697


Epoch 3:  87%|████████▋ | 734/844 [10:11<01:32,  1.19it/s]

batch loss: 0.4524


Epoch 3:  87%|████████▋ | 735/844 [10:12<01:31,  1.19it/s]

batch loss: 0.4321


Epoch 3:  87%|████████▋ | 736/844 [10:13<01:30,  1.19it/s]

batch loss: 0.4189


Epoch 3:  87%|████████▋ | 737/844 [10:14<01:29,  1.19it/s]

batch loss: 0.4239


Epoch 3:  87%|████████▋ | 738/844 [10:15<01:28,  1.20it/s]

batch loss: 0.4068


Epoch 3:  88%|████████▊ | 739/844 [10:16<01:27,  1.20it/s]

batch loss: 0.4194


Epoch 3:  88%|████████▊ | 740/844 [10:16<01:26,  1.20it/s]

batch loss: 0.5068


Epoch 3:  88%|████████▊ | 741/844 [10:17<01:25,  1.20it/s]

batch loss: 0.4203


Epoch 3:  88%|████████▊ | 742/844 [10:18<01:24,  1.20it/s]

batch loss: 0.4882


Epoch 3:  88%|████████▊ | 743/844 [10:19<01:24,  1.20it/s]

batch loss: 0.4494


Epoch 3:  88%|████████▊ | 744/844 [10:20<01:23,  1.20it/s]

batch loss: 0.4876


Epoch 3:  88%|████████▊ | 745/844 [10:21<01:21,  1.21it/s]

batch loss: 0.5079


Epoch 3:  88%|████████▊ | 746/844 [10:21<01:21,  1.21it/s]

batch loss: 0.4101


Epoch 3:  89%|████████▊ | 747/844 [10:22<01:20,  1.20it/s]

batch loss: 0.4011


Epoch 3:  89%|████████▊ | 748/844 [10:23<01:19,  1.20it/s]

batch loss: 0.5204


Epoch 3:  89%|████████▊ | 749/844 [10:24<01:19,  1.20it/s]

batch loss: 0.4889


Epoch 3:  89%|████████▉ | 750/844 [10:25<01:18,  1.20it/s]

batch loss: 0.4709


Epoch 3:  89%|████████▉ | 751/844 [10:26<01:17,  1.20it/s]

batch loss: 0.5519


Epoch 3:  89%|████████▉ | 752/844 [10:26<01:16,  1.20it/s]

batch loss: 0.4563


Epoch 3:  89%|████████▉ | 753/844 [10:27<01:15,  1.20it/s]

batch loss: 0.3456


Epoch 3:  89%|████████▉ | 754/844 [10:28<01:14,  1.20it/s]

batch loss: 0.4489


Epoch 3:  89%|████████▉ | 755/844 [10:29<01:13,  1.20it/s]

batch loss: 0.4840


Epoch 3:  90%|████████▉ | 756/844 [10:30<01:13,  1.20it/s]

batch loss: 0.4651


Epoch 3:  90%|████████▉ | 757/844 [10:31<01:12,  1.20it/s]

batch loss: 0.4585


Epoch 3:  90%|████████▉ | 758/844 [10:31<01:11,  1.20it/s]

batch loss: 0.4189


Epoch 3:  90%|████████▉ | 759/844 [10:32<01:10,  1.20it/s]

batch loss: 0.4035


Epoch 3:  90%|█████████ | 760/844 [10:33<01:09,  1.20it/s]

batch loss: 0.4570


Epoch 3:  90%|█████████ | 761/844 [10:34<01:08,  1.20it/s]

batch loss: 0.5120


Epoch 3:  90%|█████████ | 762/844 [10:35<01:08,  1.21it/s]

batch loss: 0.5007


Epoch 3:  90%|█████████ | 763/844 [10:36<01:07,  1.20it/s]

batch loss: 0.3815


Epoch 3:  91%|█████████ | 764/844 [10:36<01:06,  1.20it/s]

batch loss: 0.4824


Epoch 3:  91%|█████████ | 765/844 [10:37<01:05,  1.20it/s]

batch loss: 0.4470


Epoch 3:  91%|█████████ | 766/844 [10:38<01:04,  1.20it/s]

batch loss: 0.4469


Epoch 3:  91%|█████████ | 767/844 [10:39<01:04,  1.20it/s]

batch loss: 0.4986


Epoch 3:  91%|█████████ | 768/844 [10:40<01:03,  1.20it/s]

batch loss: 0.3444


Epoch 3:  91%|█████████ | 769/844 [10:41<01:02,  1.20it/s]

batch loss: 0.4925


Epoch 3:  91%|█████████ | 770/844 [10:41<01:01,  1.20it/s]

batch loss: 0.4700


Epoch 3:  91%|█████████▏| 771/844 [10:42<01:00,  1.20it/s]

batch loss: 0.4333


Epoch 3:  91%|█████████▏| 772/844 [10:43<00:59,  1.20it/s]

batch loss: 0.4721


Epoch 3:  92%|█████████▏| 773/844 [10:44<00:59,  1.20it/s]

batch loss: 0.4451


Epoch 3:  92%|█████████▏| 774/844 [10:45<00:58,  1.20it/s]

batch loss: 0.4507


Epoch 3:  92%|█████████▏| 775/844 [10:46<00:57,  1.20it/s]

batch loss: 0.4708


Epoch 3:  92%|█████████▏| 776/844 [10:46<00:56,  1.21it/s]

batch loss: 0.4316


Epoch 3:  92%|█████████▏| 777/844 [10:47<00:55,  1.21it/s]

batch loss: 0.4395


Epoch 3:  92%|█████████▏| 778/844 [10:48<00:54,  1.20it/s]

batch loss: 0.5135


Epoch 3:  92%|█████████▏| 779/844 [10:49<00:53,  1.20it/s]

batch loss: 0.5519


Epoch 3:  92%|█████████▏| 780/844 [10:50<00:53,  1.20it/s]

batch loss: 0.4750


Epoch 3:  93%|█████████▎| 781/844 [10:51<00:52,  1.20it/s]

batch loss: 0.4557


Epoch 3:  93%|█████████▎| 782/844 [10:51<00:51,  1.20it/s]

batch loss: 0.4524


Epoch 3:  93%|█████████▎| 783/844 [10:52<00:50,  1.20it/s]

batch loss: 0.3902


Epoch 3:  93%|█████████▎| 784/844 [10:53<00:50,  1.20it/s]

batch loss: 0.4885


Epoch 3:  93%|█████████▎| 785/844 [10:54<00:49,  1.20it/s]

batch loss: 0.4343


Epoch 3:  93%|█████████▎| 786/844 [10:55<00:48,  1.20it/s]

batch loss: 0.5147


Epoch 3:  93%|█████████▎| 787/844 [10:56<00:47,  1.20it/s]

batch loss: 0.4516


Epoch 3:  93%|█████████▎| 788/844 [10:56<00:46,  1.20it/s]

batch loss: 0.4036


Epoch 3:  93%|█████████▎| 789/844 [10:57<00:45,  1.20it/s]

batch loss: 0.4416


Epoch 3:  94%|█████████▎| 790/844 [10:58<00:44,  1.20it/s]

batch loss: 0.5154


Epoch 3:  94%|█████████▎| 791/844 [10:59<00:43,  1.21it/s]

batch loss: 0.4553


Epoch 3:  94%|█████████▍| 792/844 [11:00<00:43,  1.21it/s]

batch loss: 0.4943


Epoch 3:  94%|█████████▍| 793/844 [11:01<00:42,  1.21it/s]

batch loss: 0.4042


Epoch 3:  94%|█████████▍| 794/844 [11:01<00:41,  1.21it/s]

batch loss: 0.4606


Epoch 3:  94%|█████████▍| 795/844 [11:02<00:40,  1.20it/s]

batch loss: 0.4316


Epoch 3:  94%|█████████▍| 796/844 [11:03<00:39,  1.20it/s]

batch loss: 0.5002


Epoch 3:  94%|█████████▍| 797/844 [11:04<00:39,  1.20it/s]

batch loss: 0.3812


Epoch 3:  95%|█████████▍| 798/844 [11:05<00:38,  1.20it/s]

batch loss: 0.4746


Epoch 3:  95%|█████████▍| 799/844 [11:06<00:37,  1.20it/s]

batch loss: 0.4560


Epoch 3:  95%|█████████▍| 800/844 [11:06<00:36,  1.20it/s]

batch loss: 0.5309


Epoch 3:  95%|█████████▍| 801/844 [11:07<00:35,  1.20it/s]

batch loss: 0.5685


Epoch 3:  95%|█████████▌| 802/844 [11:08<00:35,  1.19it/s]

batch loss: 0.5891


Epoch 3:  95%|█████████▌| 803/844 [11:09<00:34,  1.20it/s]

batch loss: 0.4955


Epoch 3:  95%|█████████▌| 804/844 [11:10<00:33,  1.20it/s]

batch loss: 0.3707


Epoch 3:  95%|█████████▌| 805/844 [11:11<00:32,  1.20it/s]

batch loss: 0.4275


Epoch 3:  95%|█████████▌| 806/844 [11:11<00:31,  1.20it/s]

batch loss: 0.4205


Epoch 3:  96%|█████████▌| 807/844 [11:12<00:30,  1.20it/s]

batch loss: 0.4526


Epoch 3:  96%|█████████▌| 808/844 [11:13<00:29,  1.20it/s]

batch loss: 0.3795


Epoch 3:  96%|█████████▌| 809/844 [11:14<00:29,  1.20it/s]

batch loss: 0.4567


Epoch 3:  96%|█████████▌| 810/844 [11:15<00:28,  1.20it/s]

batch loss: 0.4023


Epoch 3:  96%|█████████▌| 811/844 [11:16<00:27,  1.20it/s]

batch loss: 0.5003


Epoch 3:  96%|█████████▌| 812/844 [11:16<00:26,  1.20it/s]

batch loss: 0.5042


Epoch 3:  96%|█████████▋| 813/844 [11:17<00:25,  1.20it/s]

batch loss: 0.4815


Epoch 3:  96%|█████████▋| 814/844 [11:18<00:25,  1.20it/s]

batch loss: 0.5752


Epoch 3:  97%|█████████▋| 815/844 [11:19<00:24,  1.20it/s]

batch loss: 0.5146


Epoch 3:  97%|█████████▋| 816/844 [11:20<00:23,  1.20it/s]

batch loss: 0.4761


Epoch 3:  97%|█████████▋| 817/844 [11:21<00:22,  1.20it/s]

batch loss: 0.4884


Epoch 3:  97%|█████████▋| 818/844 [11:21<00:21,  1.20it/s]

batch loss: 0.4105


Epoch 3:  97%|█████████▋| 819/844 [11:22<00:20,  1.20it/s]

batch loss: 0.4620


Epoch 3:  97%|█████████▋| 820/844 [11:23<00:19,  1.21it/s]

batch loss: 0.4450


Epoch 3:  97%|█████████▋| 821/844 [11:24<00:19,  1.20it/s]

batch loss: 0.4734


Epoch 3:  97%|█████████▋| 822/844 [11:25<00:18,  1.20it/s]

batch loss: 0.4842


Epoch 3:  98%|█████████▊| 823/844 [11:26<00:17,  1.20it/s]

batch loss: 0.4469


Epoch 3:  98%|█████████▊| 824/844 [11:26<00:16,  1.20it/s]

batch loss: 0.4526


Epoch 3:  98%|█████████▊| 825/844 [11:27<00:15,  1.20it/s]

batch loss: 0.4375


Epoch 3:  98%|█████████▊| 826/844 [11:28<00:14,  1.20it/s]

batch loss: 0.4088


Epoch 3:  98%|█████████▊| 827/844 [11:29<00:14,  1.21it/s]

batch loss: 0.4775


Epoch 3:  98%|█████████▊| 828/844 [11:30<00:13,  1.20it/s]

batch loss: 0.4209


Epoch 3:  98%|█████████▊| 829/844 [11:31<00:12,  1.20it/s]

batch loss: 0.4664


Epoch 3:  98%|█████████▊| 830/844 [11:31<00:11,  1.20it/s]

batch loss: 0.5609


Epoch 3:  98%|█████████▊| 831/844 [11:32<00:10,  1.20it/s]

batch loss: 0.4527


Epoch 3:  99%|█████████▊| 832/844 [11:33<00:10,  1.20it/s]

batch loss: 0.4865


Epoch 3:  99%|█████████▊| 833/844 [11:34<00:09,  1.20it/s]

batch loss: 0.5039


Epoch 3:  99%|█████████▉| 834/844 [11:35<00:08,  1.20it/s]

batch loss: 0.4754


Epoch 3:  99%|█████████▉| 835/844 [11:36<00:07,  1.20it/s]

batch loss: 0.4299


Epoch 3:  99%|█████████▉| 836/844 [11:36<00:06,  1.20it/s]

batch loss: 0.4335


Epoch 3:  99%|█████████▉| 837/844 [11:37<00:05,  1.20it/s]

batch loss: 0.4748


Epoch 3:  99%|█████████▉| 838/844 [11:38<00:04,  1.21it/s]

batch loss: 0.4326


Epoch 3:  99%|█████████▉| 839/844 [11:39<00:04,  1.20it/s]

batch loss: 0.4101


Epoch 3: 100%|█████████▉| 840/844 [11:40<00:03,  1.20it/s]

batch loss: 0.4089


Epoch 3: 100%|█████████▉| 841/844 [11:41<00:02,  1.20it/s]

batch loss: 0.4727


Epoch 3: 100%|█████████▉| 842/844 [11:41<00:01,  1.20it/s]

batch loss: 0.5084


Epoch 3: 100%|█████████▉| 843/844 [11:42<00:00,  1.20it/s]

batch loss: 0.5012


Epoch 3: 100%|██████████| 844/844 [11:43<00:00,  1.20it/s]


batch loss: 0.4360
--------------------------------------------------------------------------------
Epoch 3 - Average Loss: 0.4645


Epoch 4:   0%|          | 1/844 [00:00<11:41,  1.20it/s]

batch loss: 0.4542


Epoch 4:   0%|          | 2/844 [00:01<11:40,  1.20it/s]

batch loss: 0.4868


Epoch 4:   0%|          | 3/844 [00:02<11:43,  1.19it/s]

batch loss: 0.4227


Epoch 4:   0%|          | 4/844 [00:03<11:43,  1.19it/s]

batch loss: 0.4651


Epoch 4:   1%|          | 5/844 [00:04<11:45,  1.19it/s]

batch loss: 0.4474


Epoch 4:   1%|          | 6/844 [00:05<11:46,  1.19it/s]

batch loss: 0.3408


Epoch 4:   1%|          | 7/844 [00:05<11:40,  1.19it/s]

batch loss: 0.4243


Epoch 4:   1%|          | 8/844 [00:06<11:38,  1.20it/s]

batch loss: 0.4515


Epoch 4:   1%|          | 9/844 [00:07<11:35,  1.20it/s]

batch loss: 0.3900


Epoch 4:   1%|          | 10/844 [00:08<11:34,  1.20it/s]

batch loss: 0.4196


Epoch 4:   1%|▏         | 11/844 [00:09<11:33,  1.20it/s]

batch loss: 0.4778


Epoch 4:   1%|▏         | 12/844 [00:10<11:32,  1.20it/s]

batch loss: 0.5166


Epoch 4:   2%|▏         | 13/844 [00:10<11:31,  1.20it/s]

batch loss: 0.5243


Epoch 4:   2%|▏         | 14/844 [00:11<11:30,  1.20it/s]

batch loss: 0.4001


Epoch 4:   2%|▏         | 15/844 [00:12<11:32,  1.20it/s]

batch loss: 0.4070


Epoch 4:   2%|▏         | 16/844 [00:13<11:29,  1.20it/s]

batch loss: 0.3736


Epoch 4:   2%|▏         | 17/844 [00:14<11:28,  1.20it/s]

batch loss: 0.4124


Epoch 4:   2%|▏         | 18/844 [00:15<11:27,  1.20it/s]

batch loss: 0.5093


Epoch 4:   2%|▏         | 19/844 [00:15<11:28,  1.20it/s]

batch loss: 0.4401


Epoch 4:   2%|▏         | 20/844 [00:16<11:27,  1.20it/s]

batch loss: 0.3790


Epoch 4:   2%|▏         | 21/844 [00:17<11:30,  1.19it/s]

batch loss: 0.5116


Epoch 4:   3%|▎         | 22/844 [00:18<11:26,  1.20it/s]

batch loss: 0.4858


Epoch 4:   3%|▎         | 23/844 [00:19<11:24,  1.20it/s]

batch loss: 0.4003


Epoch 4:   3%|▎         | 24/844 [00:20<11:24,  1.20it/s]

batch loss: 0.4267


Epoch 4:   3%|▎         | 25/844 [00:20<11:24,  1.20it/s]

batch loss: 0.5437


Epoch 4:   3%|▎         | 26/844 [00:21<11:23,  1.20it/s]

batch loss: 0.5724


Epoch 4:   3%|▎         | 27/844 [00:22<11:21,  1.20it/s]

batch loss: 0.4634


Epoch 4:   3%|▎         | 28/844 [00:23<11:20,  1.20it/s]

batch loss: 0.4823


Epoch 4:   3%|▎         | 29/844 [00:24<11:20,  1.20it/s]

batch loss: 0.4061


Epoch 4:   4%|▎         | 30/844 [00:25<11:18,  1.20it/s]

batch loss: 0.3826


Epoch 4:   4%|▎         | 31/844 [00:25<11:17,  1.20it/s]

batch loss: 0.5038


Epoch 4:   4%|▍         | 32/844 [00:26<11:17,  1.20it/s]

batch loss: 0.4142


Epoch 4:   4%|▍         | 33/844 [00:27<11:16,  1.20it/s]

batch loss: 0.4581


Epoch 4:   4%|▍         | 34/844 [00:28<11:16,  1.20it/s]

batch loss: 0.4157


Epoch 4:   4%|▍         | 35/844 [00:29<11:15,  1.20it/s]

batch loss: 0.4603


Epoch 4:   4%|▍         | 36/844 [00:30<11:15,  1.20it/s]

batch loss: 0.4556


Epoch 4:   4%|▍         | 37/844 [00:30<11:13,  1.20it/s]

batch loss: 0.5297


Epoch 4:   5%|▍         | 38/844 [00:31<11:13,  1.20it/s]

batch loss: 0.4790


Epoch 4:   5%|▍         | 39/844 [00:32<11:14,  1.19it/s]

batch loss: 0.4884


Epoch 4:   5%|▍         | 40/844 [00:33<11:12,  1.20it/s]

batch loss: 0.4901


Epoch 4:   5%|▍         | 41/844 [00:34<11:08,  1.20it/s]

batch loss: 0.4664


Epoch 4:   5%|▍         | 42/844 [00:35<11:08,  1.20it/s]

batch loss: 0.4636


Epoch 4:   5%|▌         | 43/844 [00:35<11:06,  1.20it/s]

batch loss: 0.4773


Epoch 4:   5%|▌         | 44/844 [00:36<11:05,  1.20it/s]

batch loss: 0.4875


Epoch 4:   5%|▌         | 45/844 [00:37<11:05,  1.20it/s]

batch loss: 0.4869


Epoch 4:   5%|▌         | 46/844 [00:38<11:06,  1.20it/s]

batch loss: 0.4376


Epoch 4:   6%|▌         | 47/844 [00:39<11:04,  1.20it/s]

batch loss: 0.3895


Epoch 4:   6%|▌         | 48/844 [00:40<11:02,  1.20it/s]

batch loss: 0.4471


Epoch 4:   6%|▌         | 49/844 [00:40<11:00,  1.20it/s]

batch loss: 0.4856


Epoch 4:   6%|▌         | 50/844 [00:41<10:59,  1.20it/s]

batch loss: 0.4462


Epoch 4:   6%|▌         | 51/844 [00:42<10:59,  1.20it/s]

batch loss: 0.4072


Epoch 4:   6%|▌         | 52/844 [00:43<11:00,  1.20it/s]

batch loss: 0.3863


Epoch 4:   6%|▋         | 53/844 [00:44<11:00,  1.20it/s]

batch loss: 0.4035


Epoch 4:   6%|▋         | 54/844 [00:45<10:57,  1.20it/s]

batch loss: 0.4109


Epoch 4:   7%|▋         | 55/844 [00:45<10:59,  1.20it/s]

batch loss: 0.4602


Epoch 4:   7%|▋         | 56/844 [00:46<11:00,  1.19it/s]

batch loss: 0.4265


Epoch 4:   7%|▋         | 57/844 [00:47<10:57,  1.20it/s]

batch loss: 0.4925


Epoch 4:   7%|▋         | 58/844 [00:48<10:58,  1.19it/s]

batch loss: 0.3621


Epoch 4:   7%|▋         | 59/844 [00:49<10:55,  1.20it/s]

batch loss: 0.4587


Epoch 4:   7%|▋         | 60/844 [00:50<10:54,  1.20it/s]

batch loss: 0.3504


Epoch 4:   7%|▋         | 61/844 [00:50<10:54,  1.20it/s]

batch loss: 0.5213


Epoch 4:   7%|▋         | 62/844 [00:51<10:53,  1.20it/s]

batch loss: 0.5148


Epoch 4:   7%|▋         | 63/844 [00:52<10:53,  1.20it/s]

batch loss: 0.4034


Epoch 4:   8%|▊         | 64/844 [00:53<10:51,  1.20it/s]

batch loss: 0.3965


Epoch 4:   8%|▊         | 65/844 [00:54<10:49,  1.20it/s]

batch loss: 0.4927


Epoch 4:   8%|▊         | 66/844 [00:55<10:47,  1.20it/s]

batch loss: 0.5311


Epoch 4:   8%|▊         | 67/844 [00:55<10:46,  1.20it/s]

batch loss: 0.3993


Epoch 4:   8%|▊         | 68/844 [00:56<10:47,  1.20it/s]

batch loss: 0.4719


Epoch 4:   8%|▊         | 69/844 [00:57<10:48,  1.19it/s]

batch loss: 0.4427


Epoch 4:   8%|▊         | 70/844 [00:58<10:46,  1.20it/s]

batch loss: 0.3926


Epoch 4:   8%|▊         | 71/844 [00:59<10:44,  1.20it/s]

batch loss: 0.4131


Epoch 4:   9%|▊         | 72/844 [01:00<10:44,  1.20it/s]

batch loss: 0.4290


Epoch 4:   9%|▊         | 73/844 [01:00<10:43,  1.20it/s]

batch loss: 0.4126


Epoch 4:   9%|▉         | 74/844 [01:01<10:39,  1.20it/s]

batch loss: 0.4368


Epoch 4:   9%|▉         | 75/844 [01:02<10:40,  1.20it/s]

batch loss: 0.4910


Epoch 4:   9%|▉         | 76/844 [01:03<10:38,  1.20it/s]

batch loss: 0.4188


Epoch 4:   9%|▉         | 77/844 [01:04<10:37,  1.20it/s]

batch loss: 0.4517


Epoch 4:   9%|▉         | 78/844 [01:05<10:36,  1.20it/s]

batch loss: 0.4680


Epoch 4:   9%|▉         | 79/844 [01:05<10:35,  1.20it/s]

batch loss: 0.4721


Epoch 4:   9%|▉         | 80/844 [01:06<10:34,  1.21it/s]

batch loss: 0.4097


Epoch 4:  10%|▉         | 81/844 [01:07<10:31,  1.21it/s]

batch loss: 0.5644


Epoch 4:  10%|▉         | 82/844 [01:08<10:33,  1.20it/s]

batch loss: 0.4430


Epoch 4:  10%|▉         | 83/844 [01:09<10:32,  1.20it/s]

batch loss: 0.4782


Epoch 4:  10%|▉         | 84/844 [01:10<10:32,  1.20it/s]

batch loss: 0.4584


Epoch 4:  10%|█         | 85/844 [01:10<10:31,  1.20it/s]

batch loss: 0.4454


Epoch 4:  10%|█         | 86/844 [01:11<10:31,  1.20it/s]

batch loss: 0.4279


Epoch 4:  10%|█         | 87/844 [01:12<10:29,  1.20it/s]

batch loss: 0.4202


Epoch 4:  10%|█         | 88/844 [01:13<10:29,  1.20it/s]

batch loss: 0.4501


Epoch 4:  11%|█         | 89/844 [01:14<10:28,  1.20it/s]

batch loss: 0.4196


Epoch 4:  11%|█         | 90/844 [01:15<10:26,  1.20it/s]

batch loss: 0.4061


Epoch 4:  11%|█         | 91/844 [01:15<10:26,  1.20it/s]

batch loss: 0.4841


Epoch 4:  11%|█         | 92/844 [01:16<10:25,  1.20it/s]

batch loss: 0.5189


Epoch 4:  11%|█         | 93/844 [01:17<10:24,  1.20it/s]

batch loss: 0.4270


Epoch 4:  11%|█         | 94/844 [01:18<10:25,  1.20it/s]

batch loss: 0.4904


Epoch 4:  11%|█▏        | 95/844 [01:19<10:23,  1.20it/s]

batch loss: 0.4317


Epoch 4:  11%|█▏        | 96/844 [01:20<10:21,  1.20it/s]

batch loss: 0.4255


Epoch 4:  11%|█▏        | 97/844 [01:20<10:19,  1.21it/s]

batch loss: 0.3581


Epoch 4:  12%|█▏        | 98/844 [01:21<10:18,  1.21it/s]

batch loss: 0.4320


Epoch 4:  12%|█▏        | 99/844 [01:22<10:19,  1.20it/s]

batch loss: 0.4427


Epoch 4:  12%|█▏        | 100/844 [01:23<10:18,  1.20it/s]

batch loss: 0.4296


Epoch 4:  12%|█▏        | 101/844 [01:24<10:20,  1.20it/s]

batch loss: 0.4477


Epoch 4:  12%|█▏        | 102/844 [01:25<10:19,  1.20it/s]

batch loss: 0.3813


Epoch 4:  12%|█▏        | 103/844 [01:25<10:17,  1.20it/s]

batch loss: 0.4772


Epoch 4:  12%|█▏        | 104/844 [01:26<10:16,  1.20it/s]

batch loss: 0.4853


Epoch 4:  12%|█▏        | 105/844 [01:27<10:16,  1.20it/s]

batch loss: 0.4661


Epoch 4:  13%|█▎        | 106/844 [01:28<10:16,  1.20it/s]

batch loss: 0.4534


Epoch 4:  13%|█▎        | 107/844 [01:29<10:13,  1.20it/s]

batch loss: 0.4385


Epoch 4:  13%|█▎        | 108/844 [01:30<10:11,  1.20it/s]

batch loss: 0.4267


Epoch 4:  13%|█▎        | 109/844 [01:30<10:11,  1.20it/s]

batch loss: 0.4249


Epoch 4:  13%|█▎        | 110/844 [01:31<10:09,  1.20it/s]

batch loss: 0.5045


Epoch 4:  13%|█▎        | 111/844 [01:32<10:08,  1.20it/s]

batch loss: 0.3915


Epoch 4:  13%|█▎        | 112/844 [01:33<10:07,  1.20it/s]

batch loss: 0.4638


Epoch 4:  13%|█▎        | 113/844 [01:34<10:06,  1.21it/s]

batch loss: 0.4692


Epoch 4:  14%|█▎        | 114/844 [01:34<10:05,  1.21it/s]

batch loss: 0.4360


Epoch 4:  14%|█▎        | 115/844 [01:35<10:03,  1.21it/s]

batch loss: 0.4363


Epoch 4:  14%|█▎        | 116/844 [01:36<10:03,  1.21it/s]

batch loss: 0.4481


Epoch 4:  14%|█▍        | 117/844 [01:37<10:04,  1.20it/s]

batch loss: 0.3993


Epoch 4:  14%|█▍        | 118/844 [01:38<10:02,  1.20it/s]

batch loss: 0.4555


Epoch 4:  14%|█▍        | 119/844 [01:39<10:03,  1.20it/s]

batch loss: 0.4860


Epoch 4:  14%|█▍        | 120/844 [01:39<10:04,  1.20it/s]

batch loss: 0.4679


Epoch 4:  14%|█▍        | 121/844 [01:40<10:04,  1.20it/s]

batch loss: 0.4950


Epoch 4:  14%|█▍        | 122/844 [01:41<10:04,  1.20it/s]

batch loss: 0.3674


Epoch 4:  15%|█▍        | 123/844 [01:42<10:00,  1.20it/s]

batch loss: 0.5869


Epoch 4:  15%|█▍        | 124/844 [01:43<09:59,  1.20it/s]

batch loss: 0.4343


Epoch 4:  15%|█▍        | 125/844 [01:44<09:58,  1.20it/s]

batch loss: 0.4895


Epoch 4:  15%|█▍        | 126/844 [01:44<09:57,  1.20it/s]

batch loss: 0.4147


Epoch 4:  15%|█▌        | 127/844 [01:45<09:55,  1.20it/s]

batch loss: 0.4028


Epoch 4:  15%|█▌        | 128/844 [01:46<09:57,  1.20it/s]

batch loss: 0.4360


Epoch 4:  15%|█▌        | 129/844 [01:47<09:55,  1.20it/s]

batch loss: 0.4089


Epoch 4:  15%|█▌        | 130/844 [01:48<09:54,  1.20it/s]

batch loss: 0.4159


Epoch 4:  16%|█▌        | 131/844 [01:49<09:54,  1.20it/s]

batch loss: 0.4764


Epoch 4:  16%|█▌        | 132/844 [01:50<09:54,  1.20it/s]

batch loss: 0.4224


Epoch 4:  16%|█▌        | 133/844 [01:50<09:52,  1.20it/s]

batch loss: 0.4482


Epoch 4:  16%|█▌        | 134/844 [01:51<09:52,  1.20it/s]

batch loss: 0.4326


Epoch 4:  16%|█▌        | 135/844 [01:52<09:51,  1.20it/s]

batch loss: 0.4191


Epoch 4:  16%|█▌        | 136/844 [01:53<09:48,  1.20it/s]

batch loss: 0.5193


Epoch 4:  16%|█▌        | 137/844 [01:54<09:49,  1.20it/s]

batch loss: 0.4954


Epoch 4:  16%|█▋        | 138/844 [01:55<09:48,  1.20it/s]

batch loss: 0.3436


Epoch 4:  16%|█▋        | 139/844 [01:55<09:47,  1.20it/s]

batch loss: 0.5100


Epoch 4:  17%|█▋        | 140/844 [01:56<09:46,  1.20it/s]

batch loss: 0.5179


Epoch 4:  17%|█▋        | 141/844 [01:57<09:44,  1.20it/s]

batch loss: 0.4682


Epoch 4:  17%|█▋        | 142/844 [01:58<09:43,  1.20it/s]

batch loss: 0.4116


Epoch 4:  17%|█▋        | 143/844 [01:59<09:42,  1.20it/s]

batch loss: 0.4496


Epoch 4:  17%|█▋        | 144/844 [01:59<09:41,  1.20it/s]

batch loss: 0.4280


Epoch 4:  17%|█▋        | 145/844 [02:00<09:43,  1.20it/s]

batch loss: 0.4358


Epoch 4:  17%|█▋        | 146/844 [02:01<09:42,  1.20it/s]

batch loss: 0.4684


Epoch 4:  17%|█▋        | 147/844 [02:02<09:40,  1.20it/s]

batch loss: 0.4597


Epoch 4:  18%|█▊        | 148/844 [02:03<09:39,  1.20it/s]

batch loss: 0.5696


Epoch 4:  18%|█▊        | 149/844 [02:04<09:37,  1.20it/s]

batch loss: 0.4075


Epoch 4:  18%|█▊        | 150/844 [02:04<09:37,  1.20it/s]

batch loss: 0.4276


Epoch 4:  18%|█▊        | 151/844 [02:05<09:39,  1.19it/s]

batch loss: 0.5001


Epoch 4:  18%|█▊        | 152/844 [02:06<09:40,  1.19it/s]

batch loss: 0.4883


Epoch 4:  18%|█▊        | 153/844 [02:07<09:39,  1.19it/s]

batch loss: 0.4609


Epoch 4:  18%|█▊        | 154/844 [02:08<09:35,  1.20it/s]

batch loss: 0.4816


Epoch 4:  18%|█▊        | 155/844 [02:09<09:35,  1.20it/s]

batch loss: 0.5063


Epoch 4:  18%|█▊        | 156/844 [02:10<09:33,  1.20it/s]

batch loss: 0.4259


Epoch 4:  19%|█▊        | 157/844 [02:10<09:32,  1.20it/s]

batch loss: 0.4571


Epoch 4:  19%|█▊        | 158/844 [02:11<09:31,  1.20it/s]

batch loss: 0.5125


Epoch 4:  19%|█▉        | 159/844 [02:12<09:29,  1.20it/s]

batch loss: 0.4388


Epoch 4:  19%|█▉        | 160/844 [02:13<09:29,  1.20it/s]

batch loss: 0.4279


Epoch 4:  19%|█▉        | 161/844 [02:14<09:28,  1.20it/s]

batch loss: 0.3756


Epoch 4:  19%|█▉        | 162/844 [02:14<09:27,  1.20it/s]

batch loss: 0.4270


Epoch 4:  19%|█▉        | 163/844 [02:15<09:25,  1.20it/s]

batch loss: 0.4435


Epoch 4:  19%|█▉        | 164/844 [02:16<09:25,  1.20it/s]

batch loss: 0.4918


Epoch 4:  20%|█▉        | 165/844 [02:17<09:25,  1.20it/s]

batch loss: 0.4006


Epoch 4:  20%|█▉        | 166/844 [02:18<09:24,  1.20it/s]

batch loss: 0.4572


Epoch 4:  20%|█▉        | 167/844 [02:19<09:25,  1.20it/s]

batch loss: 0.4288


Epoch 4:  20%|█▉        | 168/844 [02:20<09:24,  1.20it/s]

batch loss: 0.5042


Epoch 4:  20%|██        | 169/844 [02:20<09:23,  1.20it/s]

batch loss: 0.4346


Epoch 4:  20%|██        | 170/844 [02:21<09:23,  1.20it/s]

batch loss: 0.4380


Epoch 4:  20%|██        | 171/844 [02:22<09:22,  1.20it/s]

batch loss: 0.4447


Epoch 4:  20%|██        | 172/844 [02:23<09:21,  1.20it/s]

batch loss: 0.4243


Epoch 4:  20%|██        | 173/844 [02:24<09:18,  1.20it/s]

batch loss: 0.4427


Epoch 4:  21%|██        | 174/844 [02:25<09:19,  1.20it/s]

batch loss: 0.4006


Epoch 4:  21%|██        | 175/844 [02:25<09:17,  1.20it/s]

batch loss: 0.4619


Epoch 4:  21%|██        | 176/844 [02:26<09:15,  1.20it/s]

batch loss: 0.4658


Epoch 4:  21%|██        | 177/844 [02:27<09:14,  1.20it/s]

batch loss: 0.4546


Epoch 4:  21%|██        | 178/844 [02:28<09:13,  1.20it/s]

batch loss: 0.4973


Epoch 4:  21%|██        | 179/844 [02:29<09:12,  1.20it/s]

batch loss: 0.4094


Epoch 4:  21%|██▏       | 180/844 [02:29<09:11,  1.20it/s]

batch loss: 0.5456


Epoch 4:  21%|██▏       | 181/844 [02:30<09:11,  1.20it/s]

batch loss: 0.5374


Epoch 4:  22%|██▏       | 182/844 [02:31<09:11,  1.20it/s]

batch loss: 0.4591


Epoch 4:  22%|██▏       | 183/844 [02:32<09:10,  1.20it/s]

batch loss: 0.4680


Epoch 4:  22%|██▏       | 184/844 [02:33<09:10,  1.20it/s]

batch loss: 0.4398


Epoch 4:  22%|██▏       | 185/844 [02:34<09:09,  1.20it/s]

batch loss: 0.3952


Epoch 4:  22%|██▏       | 186/844 [02:34<09:08,  1.20it/s]

batch loss: 0.4480


Epoch 4:  22%|██▏       | 187/844 [02:35<09:06,  1.20it/s]

batch loss: 0.4543


Epoch 4:  22%|██▏       | 188/844 [02:36<09:05,  1.20it/s]

batch loss: 0.4879


Epoch 4:  22%|██▏       | 189/844 [02:37<09:05,  1.20it/s]

batch loss: 0.4194


Epoch 4:  23%|██▎       | 190/844 [02:38<09:04,  1.20it/s]

batch loss: 0.4494


Epoch 4:  23%|██▎       | 191/844 [02:39<09:02,  1.20it/s]

batch loss: 0.3298


Epoch 4:  23%|██▎       | 192/844 [02:39<09:03,  1.20it/s]

batch loss: 0.4373


Epoch 4:  23%|██▎       | 193/844 [02:40<09:02,  1.20it/s]

batch loss: 0.4442


Epoch 4:  23%|██▎       | 194/844 [02:41<09:00,  1.20it/s]

batch loss: 0.4859


Epoch 4:  23%|██▎       | 195/844 [02:42<09:00,  1.20it/s]

batch loss: 0.4395


Epoch 4:  23%|██▎       | 196/844 [02:43<09:01,  1.20it/s]

batch loss: 0.4303


Epoch 4:  23%|██▎       | 197/844 [02:44<08:59,  1.20it/s]

batch loss: 0.4159


Epoch 4:  23%|██▎       | 198/844 [02:44<08:58,  1.20it/s]

batch loss: 0.5163


Epoch 4:  24%|██▎       | 199/844 [02:45<08:56,  1.20it/s]

batch loss: 0.4306


Epoch 4:  24%|██▎       | 200/844 [02:46<08:59,  1.19it/s]

batch loss: 0.5880


Epoch 4:  24%|██▍       | 201/844 [02:47<08:56,  1.20it/s]

batch loss: 0.4386


Epoch 4:  24%|██▍       | 202/844 [02:48<08:55,  1.20it/s]

batch loss: 0.4958


Epoch 4:  24%|██▍       | 203/844 [02:49<08:54,  1.20it/s]

batch loss: 0.3997


Epoch 4:  24%|██▍       | 204/844 [02:49<08:53,  1.20it/s]

batch loss: 0.5204


Epoch 4:  24%|██▍       | 205/844 [02:50<08:53,  1.20it/s]

batch loss: 0.4490


Epoch 4:  24%|██▍       | 206/844 [02:51<08:52,  1.20it/s]

batch loss: 0.4982


Epoch 4:  25%|██▍       | 207/844 [02:52<08:50,  1.20it/s]

batch loss: 0.4289


Epoch 4:  25%|██▍       | 208/844 [02:53<08:49,  1.20it/s]

batch loss: 0.4246


Epoch 4:  25%|██▍       | 209/844 [02:54<08:48,  1.20it/s]

batch loss: 0.3915


Epoch 4:  25%|██▍       | 210/844 [02:54<08:46,  1.20it/s]

batch loss: 0.4850


Epoch 4:  25%|██▌       | 211/844 [02:55<08:46,  1.20it/s]

batch loss: 0.4965


Epoch 4:  25%|██▌       | 212/844 [02:56<08:46,  1.20it/s]

batch loss: 0.4501


Epoch 4:  25%|██▌       | 213/844 [02:57<08:46,  1.20it/s]

batch loss: 0.5079


Epoch 4:  25%|██▌       | 214/844 [02:58<08:44,  1.20it/s]

batch loss: 0.5053


Epoch 4:  25%|██▌       | 215/844 [02:59<08:42,  1.20it/s]

batch loss: 0.5868


Epoch 4:  26%|██▌       | 216/844 [02:59<08:42,  1.20it/s]

batch loss: 0.5289


Epoch 4:  26%|██▌       | 217/844 [03:00<08:44,  1.20it/s]

batch loss: 0.4292


Epoch 4:  26%|██▌       | 218/844 [03:01<08:46,  1.19it/s]

batch loss: 0.4448


Epoch 4:  26%|██▌       | 219/844 [03:02<08:45,  1.19it/s]

batch loss: 0.3995


Epoch 4:  26%|██▌       | 220/844 [03:03<08:42,  1.20it/s]

batch loss: 0.3913


Epoch 4:  26%|██▌       | 221/844 [03:04<08:42,  1.19it/s]

batch loss: 0.4312


Epoch 4:  26%|██▋       | 222/844 [03:05<08:39,  1.20it/s]

batch loss: 0.4210


Epoch 4:  26%|██▋       | 223/844 [03:05<08:38,  1.20it/s]

batch loss: 0.4345


Epoch 4:  27%|██▋       | 224/844 [03:06<08:37,  1.20it/s]

batch loss: 0.4648


Epoch 4:  27%|██▋       | 225/844 [03:07<08:34,  1.20it/s]

batch loss: 0.5044


Epoch 4:  27%|██▋       | 226/844 [03:08<08:33,  1.20it/s]

batch loss: 0.4864


Epoch 4:  27%|██▋       | 227/844 [03:09<08:33,  1.20it/s]

batch loss: 0.3984


Epoch 4:  27%|██▋       | 228/844 [03:10<08:31,  1.20it/s]

batch loss: 0.4832


Epoch 4:  27%|██▋       | 229/844 [03:10<08:30,  1.20it/s]

batch loss: 0.5239


Epoch 4:  27%|██▋       | 230/844 [03:11<08:30,  1.20it/s]

batch loss: 0.5204


Epoch 4:  27%|██▋       | 231/844 [03:12<08:29,  1.20it/s]

batch loss: 0.4900


Epoch 4:  27%|██▋       | 232/844 [03:13<08:28,  1.20it/s]

batch loss: 0.4861


Epoch 4:  28%|██▊       | 233/844 [03:14<08:31,  1.20it/s]

batch loss: 0.4611


Epoch 4:  28%|██▊       | 234/844 [03:15<08:29,  1.20it/s]

batch loss: 0.4857


Epoch 4:  28%|██▊       | 235/844 [03:15<08:30,  1.19it/s]

batch loss: 0.4615


Epoch 4:  28%|██▊       | 236/844 [03:16<08:28,  1.20it/s]

batch loss: 0.4919


Epoch 4:  28%|██▊       | 237/844 [03:17<08:25,  1.20it/s]

batch loss: 0.4689


Epoch 4:  28%|██▊       | 238/844 [03:18<08:24,  1.20it/s]

batch loss: 0.5278


Epoch 4:  28%|██▊       | 239/844 [03:19<08:23,  1.20it/s]

batch loss: 0.4726


Epoch 4:  28%|██▊       | 240/844 [03:20<08:23,  1.20it/s]

batch loss: 0.5114


Epoch 4:  29%|██▊       | 241/844 [03:20<08:21,  1.20it/s]

batch loss: 0.4358


Epoch 4:  29%|██▊       | 242/844 [03:21<08:20,  1.20it/s]

batch loss: 0.4425


Epoch 4:  29%|██▉       | 243/844 [03:22<08:21,  1.20it/s]

batch loss: 0.5145


Epoch 4:  29%|██▉       | 244/844 [03:23<08:19,  1.20it/s]

batch loss: 0.4155


Epoch 4:  29%|██▉       | 245/844 [03:24<08:18,  1.20it/s]

batch loss: 0.4643


Epoch 4:  29%|██▉       | 246/844 [03:24<08:16,  1.20it/s]

batch loss: 0.4525


Epoch 4:  29%|██▉       | 247/844 [03:25<08:15,  1.20it/s]

batch loss: 0.5376


Epoch 4:  29%|██▉       | 248/844 [03:26<08:14,  1.20it/s]

batch loss: 0.4601


Epoch 4:  30%|██▉       | 249/844 [03:27<08:14,  1.20it/s]

batch loss: 0.4823


Epoch 4:  30%|██▉       | 250/844 [03:28<08:15,  1.20it/s]

batch loss: 0.4940


Epoch 4:  30%|██▉       | 251/844 [03:29<08:15,  1.20it/s]

batch loss: 0.5289


Epoch 4:  30%|██▉       | 252/844 [03:30<08:15,  1.19it/s]

batch loss: 0.5370


Epoch 4:  30%|██▉       | 253/844 [03:30<08:14,  1.20it/s]

batch loss: 0.4766


Epoch 4:  30%|███       | 254/844 [03:31<08:12,  1.20it/s]

batch loss: 0.4631


Epoch 4:  30%|███       | 255/844 [03:32<08:10,  1.20it/s]

batch loss: 0.4513


Epoch 4:  30%|███       | 256/844 [03:33<08:08,  1.20it/s]

batch loss: 0.4349


Epoch 4:  30%|███       | 257/844 [03:34<08:08,  1.20it/s]

batch loss: 0.4464


Epoch 4:  31%|███       | 258/844 [03:35<08:09,  1.20it/s]

batch loss: 0.5150


Epoch 4:  31%|███       | 259/844 [03:35<08:07,  1.20it/s]

batch loss: 0.5090


Epoch 4:  31%|███       | 260/844 [03:36<08:06,  1.20it/s]

batch loss: 0.4497


Epoch 4:  31%|███       | 261/844 [03:37<08:05,  1.20it/s]

batch loss: 0.4924


Epoch 4:  31%|███       | 262/844 [03:38<08:04,  1.20it/s]

batch loss: 0.4327


Epoch 4:  31%|███       | 263/844 [03:39<08:04,  1.20it/s]

batch loss: 0.4860


Epoch 4:  31%|███▏      | 264/844 [03:40<08:03,  1.20it/s]

batch loss: 0.4429


Epoch 4:  31%|███▏      | 265/844 [03:40<08:02,  1.20it/s]

batch loss: 0.4231


Epoch 4:  32%|███▏      | 266/844 [03:41<08:01,  1.20it/s]

batch loss: 0.4084


Epoch 4:  32%|███▏      | 267/844 [03:42<08:02,  1.20it/s]

batch loss: 0.4127


Epoch 4:  32%|███▏      | 268/844 [03:43<08:00,  1.20it/s]

batch loss: 0.4772


Epoch 4:  32%|███▏      | 269/844 [03:44<08:01,  1.19it/s]

batch loss: 0.4535


Epoch 4:  32%|███▏      | 270/844 [03:45<08:00,  1.20it/s]

batch loss: 0.4424


Epoch 4:  32%|███▏      | 271/844 [03:45<07:58,  1.20it/s]

batch loss: 0.4305


Epoch 4:  32%|███▏      | 272/844 [03:46<07:57,  1.20it/s]

batch loss: 0.4470


Epoch 4:  32%|███▏      | 273/844 [03:47<07:56,  1.20it/s]

batch loss: 0.4290


Epoch 4:  32%|███▏      | 274/844 [03:48<07:55,  1.20it/s]

batch loss: 0.4820


Epoch 4:  33%|███▎      | 275/844 [03:49<07:53,  1.20it/s]

batch loss: 0.4149


Epoch 4:  33%|███▎      | 276/844 [03:50<07:51,  1.20it/s]

batch loss: 0.4843


Epoch 4:  33%|███▎      | 277/844 [03:50<07:51,  1.20it/s]

batch loss: 0.4122


Epoch 4:  33%|███▎      | 278/844 [03:51<07:50,  1.20it/s]

batch loss: 0.4708


Epoch 4:  33%|███▎      | 279/844 [03:52<07:49,  1.20it/s]

batch loss: 0.4497


Epoch 4:  33%|███▎      | 280/844 [03:53<07:50,  1.20it/s]

batch loss: 0.5055


Epoch 4:  33%|███▎      | 281/844 [03:54<07:49,  1.20it/s]

batch loss: 0.4512


Epoch 4:  33%|███▎      | 282/844 [03:55<07:48,  1.20it/s]

batch loss: 0.4106


Epoch 4:  34%|███▎      | 283/844 [03:55<07:49,  1.20it/s]

batch loss: 0.4194


Epoch 4:  34%|███▎      | 284/844 [03:56<07:48,  1.19it/s]

batch loss: 0.5143


Epoch 4:  34%|███▍      | 285/844 [03:57<07:48,  1.19it/s]

batch loss: 0.4392


Epoch 4:  34%|███▍      | 286/844 [03:58<07:47,  1.19it/s]

batch loss: 0.5377


Epoch 4:  34%|███▍      | 287/844 [03:59<07:45,  1.20it/s]

batch loss: 0.4319


Epoch 4:  34%|███▍      | 288/844 [04:00<07:45,  1.19it/s]

batch loss: 0.4846


Epoch 4:  34%|███▍      | 289/844 [04:00<07:43,  1.20it/s]

batch loss: 0.4119


Epoch 4:  34%|███▍      | 290/844 [04:01<07:42,  1.20it/s]

batch loss: 0.5548


Epoch 4:  34%|███▍      | 291/844 [04:02<07:40,  1.20it/s]

batch loss: 0.3816


Epoch 4:  35%|███▍      | 292/844 [04:03<07:40,  1.20it/s]

batch loss: 0.3997


Epoch 4:  35%|███▍      | 293/844 [04:04<07:40,  1.20it/s]

batch loss: 0.5334


Epoch 4:  35%|███▍      | 294/844 [04:05<07:38,  1.20it/s]

batch loss: 0.5016


Epoch 4:  35%|███▍      | 295/844 [04:05<07:37,  1.20it/s]

batch loss: 0.4502


Epoch 4:  35%|███▌      | 296/844 [04:06<07:37,  1.20it/s]

batch loss: 0.4606


Epoch 4:  35%|███▌      | 297/844 [04:07<07:35,  1.20it/s]

batch loss: 0.4820


Epoch 4:  35%|███▌      | 298/844 [04:08<07:34,  1.20it/s]

batch loss: 0.4733


Epoch 4:  35%|███▌      | 299/844 [04:09<07:35,  1.20it/s]

batch loss: 0.3783


Epoch 4:  36%|███▌      | 300/844 [04:10<07:33,  1.20it/s]

batch loss: 0.5010


Epoch 4:  36%|███▌      | 301/844 [04:10<07:32,  1.20it/s]

batch loss: 0.4064


Epoch 4:  36%|███▌      | 302/844 [04:11<07:33,  1.19it/s]

batch loss: 0.4104


Epoch 4:  36%|███▌      | 303/844 [04:12<07:32,  1.19it/s]

batch loss: 0.4507


Epoch 4:  36%|███▌      | 304/844 [04:13<07:31,  1.19it/s]

batch loss: 0.4532


Epoch 4:  36%|███▌      | 305/844 [04:14<07:30,  1.20it/s]

batch loss: 0.3971


Epoch 4:  36%|███▋      | 306/844 [04:15<07:27,  1.20it/s]

batch loss: 0.5095


Epoch 4:  36%|███▋      | 307/844 [04:15<07:27,  1.20it/s]

batch loss: 0.3835


Epoch 4:  36%|███▋      | 308/844 [04:16<07:26,  1.20it/s]

batch loss: 0.4464


Epoch 4:  37%|███▋      | 309/844 [04:17<07:25,  1.20it/s]

batch loss: 0.5157


Epoch 4:  37%|███▋      | 310/844 [04:18<07:24,  1.20it/s]

batch loss: 0.4035


Epoch 4:  37%|███▋      | 311/844 [04:19<07:23,  1.20it/s]

batch loss: 0.3988


Epoch 4:  37%|███▋      | 312/844 [04:20<07:22,  1.20it/s]

batch loss: 0.6404


Epoch 4:  37%|███▋      | 313/844 [04:20<07:22,  1.20it/s]

batch loss: 0.4822


Epoch 4:  37%|███▋      | 314/844 [04:21<07:21,  1.20it/s]

batch loss: 0.5210


Epoch 4:  37%|███▋      | 315/844 [04:22<07:21,  1.20it/s]

batch loss: 0.4761


Epoch 4:  37%|███▋      | 316/844 [04:23<07:21,  1.20it/s]

batch loss: 0.4460


Epoch 4:  38%|███▊      | 317/844 [04:24<07:20,  1.20it/s]

batch loss: 0.4488


Epoch 4:  38%|███▊      | 318/844 [04:25<07:20,  1.19it/s]

batch loss: 0.4560


Epoch 4:  38%|███▊      | 319/844 [04:25<07:20,  1.19it/s]

batch loss: 0.4359


Epoch 4:  38%|███▊      | 320/844 [04:26<07:21,  1.19it/s]

batch loss: 0.3378


Epoch 4:  38%|███▊      | 321/844 [04:27<07:20,  1.19it/s]

batch loss: 0.4722


Epoch 4:  38%|███▊      | 322/844 [04:28<07:17,  1.19it/s]

batch loss: 0.4392


Epoch 4:  38%|███▊      | 323/844 [04:29<07:18,  1.19it/s]

batch loss: 0.5576


Epoch 4:  38%|███▊      | 324/844 [04:30<07:16,  1.19it/s]

batch loss: 0.4746


Epoch 4:  39%|███▊      | 325/844 [04:30<07:13,  1.20it/s]

batch loss: 0.4796


Epoch 4:  39%|███▊      | 326/844 [04:31<07:12,  1.20it/s]

batch loss: 0.4814


Epoch 4:  39%|███▊      | 327/844 [04:32<07:10,  1.20it/s]

batch loss: 0.3916


Epoch 4:  39%|███▉      | 328/844 [04:33<07:09,  1.20it/s]

batch loss: 0.3952


Epoch 4:  39%|███▉      | 329/844 [04:34<07:08,  1.20it/s]

batch loss: 0.4228


Epoch 4:  39%|███▉      | 330/844 [04:35<07:09,  1.20it/s]

batch loss: 0.5064


Epoch 4:  39%|███▉      | 331/844 [04:35<07:08,  1.20it/s]

batch loss: 0.4184


Epoch 4:  39%|███▉      | 332/844 [04:36<07:09,  1.19it/s]

batch loss: 0.4802


Epoch 4:  39%|███▉      | 333/844 [04:37<07:06,  1.20it/s]

batch loss: 0.4474


Epoch 4:  40%|███▉      | 334/844 [04:38<07:05,  1.20it/s]

batch loss: 0.5114


Epoch 4:  40%|███▉      | 335/844 [04:39<07:05,  1.20it/s]

batch loss: 0.4968


Epoch 4:  40%|███▉      | 336/844 [04:40<07:04,  1.20it/s]

batch loss: 0.4807


Epoch 4:  40%|███▉      | 337/844 [04:40<07:05,  1.19it/s]

batch loss: 0.4481


Epoch 4:  40%|████      | 338/844 [04:41<07:01,  1.20it/s]

batch loss: 0.4655


Epoch 4:  40%|████      | 339/844 [04:42<07:01,  1.20it/s]

batch loss: 0.4844


Epoch 4:  40%|████      | 340/844 [04:43<07:00,  1.20it/s]

batch loss: 0.4321


Epoch 4:  40%|████      | 341/844 [04:44<06:59,  1.20it/s]

batch loss: 0.4721


Epoch 4:  41%|████      | 342/844 [04:45<06:59,  1.20it/s]

batch loss: 0.4396


Epoch 4:  41%|████      | 343/844 [04:45<06:57,  1.20it/s]

batch loss: 0.4217


Epoch 4:  41%|████      | 344/844 [04:46<06:55,  1.20it/s]

batch loss: 0.4609


Epoch 4:  41%|████      | 345/844 [04:47<06:55,  1.20it/s]

batch loss: 0.5534


Epoch 4:  41%|████      | 346/844 [04:48<06:53,  1.20it/s]

batch loss: 0.4741


Epoch 4:  41%|████      | 347/844 [04:49<06:53,  1.20it/s]

batch loss: 0.4434


Epoch 4:  41%|████      | 348/844 [04:50<06:55,  1.19it/s]

batch loss: 0.4430


Epoch 4:  41%|████▏     | 349/844 [04:50<06:53,  1.20it/s]

batch loss: 0.4287


Epoch 4:  41%|████▏     | 350/844 [04:51<06:52,  1.20it/s]

batch loss: 0.4593


Epoch 4:  42%|████▏     | 351/844 [04:52<06:51,  1.20it/s]

batch loss: 0.5374


Epoch 4:  42%|████▏     | 352/844 [04:53<06:50,  1.20it/s]

batch loss: 0.4008


Epoch 4:  42%|████▏     | 353/844 [04:54<06:49,  1.20it/s]

batch loss: 0.4456


Epoch 4:  42%|████▏     | 354/844 [04:55<06:48,  1.20it/s]

batch loss: 0.4743


Epoch 4:  42%|████▏     | 355/844 [04:55<06:48,  1.20it/s]

batch loss: 0.5036


Epoch 4:  42%|████▏     | 356/844 [04:56<06:46,  1.20it/s]

batch loss: 0.4652


Epoch 4:  42%|████▏     | 357/844 [04:57<06:45,  1.20it/s]

batch loss: 0.4629


Epoch 4:  42%|████▏     | 358/844 [04:58<06:44,  1.20it/s]

batch loss: 0.5251


Epoch 4:  43%|████▎     | 359/844 [04:59<06:43,  1.20it/s]

batch loss: 0.4807


Epoch 4:  43%|████▎     | 360/844 [05:00<06:41,  1.20it/s]

batch loss: 0.4295


Epoch 4:  43%|████▎     | 361/844 [05:00<06:40,  1.20it/s]

batch loss: 0.4823


Epoch 4:  43%|████▎     | 362/844 [05:01<06:40,  1.20it/s]

batch loss: 0.4268


Epoch 4:  43%|████▎     | 363/844 [05:02<06:39,  1.20it/s]

batch loss: 0.4094


Epoch 4:  43%|████▎     | 364/844 [05:03<06:40,  1.20it/s]

batch loss: 0.5203


Epoch 4:  43%|████▎     | 365/844 [05:04<06:41,  1.19it/s]

batch loss: 0.4241


Epoch 4:  43%|████▎     | 366/844 [05:05<06:40,  1.19it/s]

batch loss: 0.4543


Epoch 4:  43%|████▎     | 367/844 [05:05<06:37,  1.20it/s]

batch loss: 0.4905


Epoch 4:  44%|████▎     | 368/844 [05:06<06:36,  1.20it/s]

batch loss: 0.6590


Epoch 4:  44%|████▎     | 369/844 [05:07<06:35,  1.20it/s]

batch loss: 0.4657


Epoch 4:  44%|████▍     | 370/844 [05:08<06:34,  1.20it/s]

batch loss: 0.3891


Epoch 4:  44%|████▍     | 371/844 [05:09<06:33,  1.20it/s]

batch loss: 0.4159


Epoch 4:  44%|████▍     | 372/844 [05:10<06:32,  1.20it/s]

batch loss: 0.4396


Epoch 4:  44%|████▍     | 373/844 [05:10<06:31,  1.20it/s]

batch loss: 0.4755


Epoch 4:  44%|████▍     | 374/844 [05:11<06:31,  1.20it/s]

batch loss: 0.4505


Epoch 4:  44%|████▍     | 375/844 [05:12<06:31,  1.20it/s]

batch loss: 0.5087


Epoch 4:  45%|████▍     | 376/844 [05:13<06:29,  1.20it/s]

batch loss: 0.5034


Epoch 4:  45%|████▍     | 377/844 [05:14<06:27,  1.20it/s]

batch loss: 0.4607


Epoch 4:  45%|████▍     | 378/844 [05:15<06:27,  1.20it/s]

batch loss: 0.4523


Epoch 4:  45%|████▍     | 379/844 [05:15<06:26,  1.20it/s]

batch loss: 0.4780


Epoch 4:  45%|████▌     | 380/844 [05:16<06:26,  1.20it/s]

batch loss: 0.5701


Epoch 4:  45%|████▌     | 381/844 [05:17<06:25,  1.20it/s]

batch loss: 0.4680


Epoch 4:  45%|████▌     | 382/844 [05:18<06:25,  1.20it/s]

batch loss: 0.5460


Epoch 4:  45%|████▌     | 383/844 [05:19<06:25,  1.20it/s]

batch loss: 0.5200


Epoch 4:  45%|████▌     | 384/844 [05:20<06:24,  1.20it/s]

batch loss: 0.4257


Epoch 4:  46%|████▌     | 385/844 [05:20<06:23,  1.20it/s]

batch loss: 0.4763


Epoch 4:  46%|████▌     | 386/844 [05:21<06:21,  1.20it/s]

batch loss: 0.4546


Epoch 4:  46%|████▌     | 387/844 [05:22<06:20,  1.20it/s]

batch loss: 0.4315


Epoch 4:  46%|████▌     | 388/844 [05:23<06:18,  1.20it/s]

batch loss: 0.3562


Epoch 4:  46%|████▌     | 389/844 [05:24<06:17,  1.20it/s]

batch loss: 0.4714


Epoch 4:  46%|████▌     | 390/844 [05:25<06:17,  1.20it/s]

batch loss: 0.4728


Epoch 4:  46%|████▋     | 391/844 [05:25<06:16,  1.20it/s]

batch loss: 0.4708


Epoch 4:  46%|████▋     | 392/844 [05:26<06:15,  1.20it/s]

batch loss: 0.4143


Epoch 4:  47%|████▋     | 393/844 [05:27<06:14,  1.20it/s]

batch loss: 0.6030


Epoch 4:  47%|████▋     | 394/844 [05:28<06:14,  1.20it/s]

batch loss: 0.3933


Epoch 4:  47%|████▋     | 395/844 [05:29<06:12,  1.20it/s]

batch loss: 0.4523


Epoch 4:  47%|████▋     | 396/844 [05:30<06:12,  1.20it/s]

batch loss: 0.3996


Epoch 4:  47%|████▋     | 397/844 [05:30<06:12,  1.20it/s]

batch loss: 0.4970


Epoch 4:  47%|████▋     | 398/844 [05:31<06:11,  1.20it/s]

batch loss: 0.4467


Epoch 4:  47%|████▋     | 399/844 [05:32<06:11,  1.20it/s]

batch loss: 0.4737


Epoch 4:  47%|████▋     | 400/844 [05:33<06:12,  1.19it/s]

batch loss: 0.5297


Epoch 4:  48%|████▊     | 401/844 [05:34<06:10,  1.20it/s]

batch loss: 0.4238


Epoch 4:  48%|████▊     | 402/844 [05:35<06:09,  1.20it/s]

batch loss: 0.4338


Epoch 4:  48%|████▊     | 403/844 [05:35<06:08,  1.20it/s]

batch loss: 0.4582


Epoch 4:  48%|████▊     | 404/844 [05:36<06:06,  1.20it/s]

batch loss: 0.5750


Epoch 4:  48%|████▊     | 405/844 [05:37<06:04,  1.20it/s]

batch loss: 0.4200


Epoch 4:  48%|████▊     | 406/844 [05:38<06:04,  1.20it/s]

batch loss: 0.3938


Epoch 4:  48%|████▊     | 407/844 [05:39<06:03,  1.20it/s]

batch loss: 0.4683


Epoch 4:  48%|████▊     | 408/844 [05:40<06:02,  1.20it/s]

batch loss: 0.4726


Epoch 4:  48%|████▊     | 409/844 [05:40<06:02,  1.20it/s]

batch loss: 0.4151


Epoch 4:  49%|████▊     | 410/844 [05:41<06:02,  1.20it/s]

batch loss: 0.4776


Epoch 4:  49%|████▊     | 411/844 [05:42<06:00,  1.20it/s]

batch loss: 0.5542


Epoch 4:  49%|████▉     | 412/844 [05:43<05:58,  1.20it/s]

batch loss: 0.5168


Epoch 4:  49%|████▉     | 413/844 [05:44<05:58,  1.20it/s]

batch loss: 0.4682


Epoch 4:  49%|████▉     | 414/844 [05:45<05:58,  1.20it/s]

batch loss: 0.5208


Epoch 4:  49%|████▉     | 415/844 [05:45<05:58,  1.20it/s]

batch loss: 0.5242


Epoch 4:  49%|████▉     | 416/844 [05:46<05:58,  1.19it/s]

batch loss: 0.4742


Epoch 4:  49%|████▉     | 417/844 [05:47<05:57,  1.19it/s]

batch loss: 0.4180


Epoch 4:  50%|████▉     | 418/844 [05:48<05:55,  1.20it/s]

batch loss: 0.4660


Epoch 4:  50%|████▉     | 419/844 [05:49<05:53,  1.20it/s]

batch loss: 0.4614


Epoch 4:  50%|████▉     | 420/844 [05:50<05:52,  1.20it/s]

batch loss: 0.4684


Epoch 4:  50%|████▉     | 421/844 [05:50<05:51,  1.20it/s]

batch loss: 0.4571


Epoch 4:  50%|█████     | 422/844 [05:51<05:51,  1.20it/s]

batch loss: 0.5250


Epoch 4:  50%|█████     | 423/844 [05:52<05:50,  1.20it/s]

batch loss: 0.4898


Epoch 4:  50%|█████     | 424/844 [05:53<05:49,  1.20it/s]

batch loss: 0.4111


Epoch 4:  50%|█████     | 425/844 [05:54<05:48,  1.20it/s]

batch loss: 0.4776


Epoch 4:  50%|█████     | 426/844 [05:55<05:48,  1.20it/s]

batch loss: 0.3768


Epoch 4:  51%|█████     | 427/844 [05:55<05:47,  1.20it/s]

batch loss: 0.4100


Epoch 4:  51%|█████     | 428/844 [05:56<05:46,  1.20it/s]

batch loss: 0.4736


Epoch 4:  51%|█████     | 429/844 [05:57<05:45,  1.20it/s]

batch loss: 0.4868


Epoch 4:  51%|█████     | 430/844 [05:58<05:45,  1.20it/s]

batch loss: 0.4490


Epoch 4:  51%|█████     | 431/844 [05:59<05:44,  1.20it/s]

batch loss: 0.5464


Epoch 4:  51%|█████     | 432/844 [06:00<05:44,  1.19it/s]

batch loss: 0.4857


Epoch 4:  51%|█████▏    | 433/844 [06:00<05:43,  1.20it/s]

batch loss: 0.4456


Epoch 4:  51%|█████▏    | 434/844 [06:01<05:42,  1.20it/s]

batch loss: 0.5435


Epoch 4:  52%|█████▏    | 435/844 [06:02<05:42,  1.19it/s]

batch loss: 0.4705


Epoch 4:  52%|█████▏    | 436/844 [06:03<05:40,  1.20it/s]

batch loss: 0.4691


Epoch 4:  52%|█████▏    | 437/844 [06:04<05:39,  1.20it/s]

batch loss: 0.4460


Epoch 4:  52%|█████▏    | 438/844 [06:05<05:38,  1.20it/s]

batch loss: 0.4979


Epoch 4:  52%|█████▏    | 439/844 [06:05<05:38,  1.19it/s]

batch loss: 0.4027


Epoch 4:  52%|█████▏    | 440/844 [06:06<05:37,  1.20it/s]

batch loss: 0.5312


Epoch 4:  52%|█████▏    | 441/844 [06:07<05:36,  1.20it/s]

batch loss: 0.4366


Epoch 4:  52%|█████▏    | 442/844 [06:08<05:35,  1.20it/s]

batch loss: 0.4848


Epoch 4:  52%|█████▏    | 443/844 [06:09<05:34,  1.20it/s]

batch loss: 0.4517


Epoch 4:  53%|█████▎    | 444/844 [06:10<05:33,  1.20it/s]

batch loss: 0.4481


Epoch 4:  53%|█████▎    | 445/844 [06:10<05:32,  1.20it/s]

batch loss: 0.4347


Epoch 4:  53%|█████▎    | 446/844 [06:11<05:31,  1.20it/s]

batch loss: 0.4976


Epoch 4:  53%|█████▎    | 447/844 [06:12<05:31,  1.20it/s]

batch loss: 0.5419


Epoch 4:  53%|█████▎    | 448/844 [06:13<05:32,  1.19it/s]

batch loss: 0.4205


Epoch 4:  53%|█████▎    | 449/844 [06:14<05:32,  1.19it/s]

batch loss: 0.4705


Epoch 4:  53%|█████▎    | 450/844 [06:15<05:31,  1.19it/s]

batch loss: 0.4811


Epoch 4:  53%|█████▎    | 451/844 [06:16<05:29,  1.19it/s]

batch loss: 0.4755


Epoch 4:  54%|█████▎    | 452/844 [06:16<05:28,  1.19it/s]

batch loss: 0.5164


Epoch 4:  54%|█████▎    | 453/844 [06:17<05:27,  1.19it/s]

batch loss: 0.4913


Epoch 4:  54%|█████▍    | 454/844 [06:18<05:25,  1.20it/s]

batch loss: 0.4446


Epoch 4:  54%|█████▍    | 455/844 [06:19<05:25,  1.20it/s]

batch loss: 0.4973


Epoch 4:  54%|█████▍    | 456/844 [06:20<05:24,  1.20it/s]

batch loss: 0.4574


Epoch 4:  54%|█████▍    | 457/844 [06:21<05:23,  1.20it/s]

batch loss: 0.4468


Epoch 4:  54%|█████▍    | 458/844 [06:21<05:22,  1.20it/s]

batch loss: 0.5638


Epoch 4:  54%|█████▍    | 459/844 [06:22<05:21,  1.20it/s]

batch loss: 0.4445


Epoch 4:  55%|█████▍    | 460/844 [06:23<05:21,  1.20it/s]

batch loss: 0.4984


Epoch 4:  55%|█████▍    | 461/844 [06:24<05:20,  1.20it/s]

batch loss: 0.4316


Epoch 4:  55%|█████▍    | 462/844 [06:25<05:19,  1.20it/s]

batch loss: 0.6054


Epoch 4:  55%|█████▍    | 463/844 [06:26<05:18,  1.20it/s]

batch loss: 0.4524


Epoch 4:  55%|█████▍    | 464/844 [06:26<05:17,  1.20it/s]

batch loss: 0.4354


Epoch 4:  55%|█████▌    | 465/844 [06:27<05:16,  1.20it/s]

batch loss: 0.4339


Epoch 4:  55%|█████▌    | 466/844 [06:28<05:17,  1.19it/s]

batch loss: 0.4432


Epoch 4:  55%|█████▌    | 467/844 [06:29<05:16,  1.19it/s]

batch loss: 0.4670


Epoch 4:  55%|█████▌    | 468/844 [06:30<05:15,  1.19it/s]

batch loss: 0.4509


Epoch 4:  56%|█████▌    | 469/844 [06:31<05:14,  1.19it/s]

batch loss: 0.4187


Epoch 4:  56%|█████▌    | 470/844 [06:31<05:13,  1.19it/s]

batch loss: 0.4814


Epoch 4:  56%|█████▌    | 471/844 [06:32<05:12,  1.20it/s]

batch loss: 0.5542


Epoch 4:  56%|█████▌    | 472/844 [06:33<05:10,  1.20it/s]

batch loss: 0.4048


Epoch 4:  56%|█████▌    | 473/844 [06:34<05:09,  1.20it/s]

batch loss: 0.5617


Epoch 4:  56%|█████▌    | 474/844 [06:35<05:08,  1.20it/s]

batch loss: 0.4996


Epoch 4:  56%|█████▋    | 475/844 [06:36<05:08,  1.20it/s]

batch loss: 0.3953


Epoch 4:  56%|█████▋    | 476/844 [06:36<05:08,  1.19it/s]

batch loss: 0.4304


Epoch 4:  57%|█████▋    | 477/844 [06:37<05:07,  1.19it/s]

batch loss: 0.4520


Epoch 4:  57%|█████▋    | 478/844 [06:38<05:06,  1.19it/s]

batch loss: 0.4914


Epoch 4:  57%|█████▋    | 479/844 [06:39<05:05,  1.20it/s]

batch loss: 0.4458


Epoch 4:  57%|█████▋    | 480/844 [06:40<05:04,  1.20it/s]

batch loss: 0.4222


Epoch 4:  57%|█████▋    | 481/844 [06:41<05:03,  1.19it/s]

batch loss: 0.4961


Epoch 4:  57%|█████▋    | 482/844 [06:41<05:03,  1.19it/s]

batch loss: 0.5185


Epoch 4:  57%|█████▋    | 483/844 [06:42<05:02,  1.19it/s]

batch loss: 0.5165


Epoch 4:  57%|█████▋    | 484/844 [06:43<05:01,  1.19it/s]

batch loss: 0.4989


Epoch 4:  57%|█████▋    | 485/844 [06:44<05:00,  1.19it/s]

batch loss: 0.4029


Epoch 4:  58%|█████▊    | 486/844 [06:45<05:00,  1.19it/s]

batch loss: 0.4636


Epoch 4:  58%|█████▊    | 487/844 [06:46<05:00,  1.19it/s]

batch loss: 0.4769


Epoch 4:  58%|█████▊    | 488/844 [06:46<04:58,  1.19it/s]

batch loss: 0.4620


Epoch 4:  58%|█████▊    | 489/844 [06:47<04:57,  1.19it/s]

batch loss: 0.4913


Epoch 4:  58%|█████▊    | 490/844 [06:48<04:56,  1.20it/s]

batch loss: 0.4429


Epoch 4:  58%|█████▊    | 491/844 [06:49<04:55,  1.19it/s]

batch loss: 0.3791


Epoch 4:  58%|█████▊    | 492/844 [06:50<04:55,  1.19it/s]

batch loss: 0.4050


Epoch 4:  58%|█████▊    | 493/844 [06:51<04:54,  1.19it/s]

batch loss: 0.5112


Epoch 4:  59%|█████▊    | 494/844 [06:51<04:53,  1.19it/s]

batch loss: 0.4684


Epoch 4:  59%|█████▊    | 495/844 [06:52<04:52,  1.19it/s]

batch loss: 0.4839


Epoch 4:  59%|█████▉    | 496/844 [06:53<04:52,  1.19it/s]

batch loss: 0.4809


Epoch 4:  59%|█████▉    | 497/844 [06:54<04:52,  1.19it/s]

batch loss: 0.4384


Epoch 4:  59%|█████▉    | 498/844 [06:55<04:50,  1.19it/s]

batch loss: 0.4719


Epoch 4:  59%|█████▉    | 499/844 [06:56<04:48,  1.20it/s]

batch loss: 0.4706


Epoch 4:  59%|█████▉    | 500/844 [06:57<04:48,  1.19it/s]

batch loss: 0.3914


Epoch 4:  59%|█████▉    | 501/844 [06:57<04:47,  1.19it/s]

batch loss: 0.4702


Epoch 4:  59%|█████▉    | 502/844 [06:58<04:45,  1.20it/s]

batch loss: 0.4375


Epoch 4:  60%|█████▉    | 503/844 [06:59<04:44,  1.20it/s]

batch loss: 0.4143


Epoch 4:  60%|█████▉    | 504/844 [07:00<04:43,  1.20it/s]

batch loss: 0.4974


Epoch 4:  60%|█████▉    | 505/844 [07:01<04:43,  1.20it/s]

batch loss: 0.4307


Epoch 4:  60%|█████▉    | 506/844 [07:02<04:42,  1.20it/s]

batch loss: 0.4609


Epoch 4:  60%|██████    | 507/844 [07:02<04:41,  1.20it/s]

batch loss: 0.4609


Epoch 4:  60%|██████    | 508/844 [07:03<04:40,  1.20it/s]

batch loss: 0.4158


Epoch 4:  60%|██████    | 509/844 [07:04<04:39,  1.20it/s]

batch loss: 0.4333


Epoch 4:  60%|██████    | 510/844 [07:05<04:39,  1.20it/s]

batch loss: 0.4664


Epoch 4:  61%|██████    | 511/844 [07:06<04:38,  1.19it/s]

batch loss: 0.4344


Epoch 4:  61%|██████    | 512/844 [07:07<04:39,  1.19it/s]

batch loss: 0.3938


Epoch 4:  61%|██████    | 513/844 [07:07<04:38,  1.19it/s]

batch loss: 0.3959


Epoch 4:  61%|██████    | 514/844 [07:08<04:37,  1.19it/s]

batch loss: 0.4469


Epoch 4:  61%|██████    | 515/844 [07:09<04:36,  1.19it/s]

batch loss: 0.4646


Epoch 4:  61%|██████    | 516/844 [07:10<04:34,  1.19it/s]

batch loss: 0.4526


Epoch 4:  61%|██████▏   | 517/844 [07:11<04:33,  1.19it/s]

batch loss: 0.4064


Epoch 4:  61%|██████▏   | 518/844 [07:12<04:32,  1.20it/s]

batch loss: 0.4730


Epoch 4:  61%|██████▏   | 519/844 [07:12<04:31,  1.20it/s]

batch loss: 0.4584


Epoch 4:  62%|██████▏   | 520/844 [07:13<04:30,  1.20it/s]

batch loss: 0.4766


Epoch 4:  62%|██████▏   | 521/844 [07:14<04:29,  1.20it/s]

batch loss: 0.4402


Epoch 4:  62%|██████▏   | 522/844 [07:15<04:29,  1.20it/s]

batch loss: 0.4355


Epoch 4:  62%|██████▏   | 523/844 [07:16<04:29,  1.19it/s]

batch loss: 0.4857


Epoch 4:  62%|██████▏   | 524/844 [07:17<04:27,  1.19it/s]

batch loss: 0.4142


Epoch 4:  62%|██████▏   | 525/844 [07:17<04:26,  1.20it/s]

batch loss: 0.4262


Epoch 4:  62%|██████▏   | 526/844 [07:18<04:25,  1.20it/s]

batch loss: 0.4341


Epoch 4:  62%|██████▏   | 527/844 [07:19<04:24,  1.20it/s]

batch loss: 0.4393


Epoch 4:  63%|██████▎   | 528/844 [07:20<04:23,  1.20it/s]

batch loss: 0.5328


Epoch 4:  63%|██████▎   | 529/844 [07:21<04:23,  1.19it/s]

batch loss: 0.3665


Epoch 4:  63%|██████▎   | 530/844 [07:22<04:22,  1.20it/s]

batch loss: 0.4624


Epoch 4:  63%|██████▎   | 531/844 [07:22<04:22,  1.19it/s]

batch loss: 0.4444


Epoch 4:  63%|██████▎   | 532/844 [07:23<04:21,  1.19it/s]

batch loss: 0.5241


Epoch 4:  63%|██████▎   | 533/844 [07:24<04:19,  1.20it/s]

batch loss: 0.5048


Epoch 4:  63%|██████▎   | 534/844 [07:25<04:18,  1.20it/s]

batch loss: 0.4709


Epoch 4:  63%|██████▎   | 535/844 [07:26<04:18,  1.19it/s]

batch loss: 0.4802


Epoch 4:  64%|██████▎   | 536/844 [07:27<04:17,  1.20it/s]

batch loss: 0.3904


Epoch 4:  64%|██████▎   | 537/844 [07:27<04:16,  1.20it/s]

batch loss: 0.4068


Epoch 4:  64%|██████▎   | 538/844 [07:28<04:14,  1.20it/s]

batch loss: 0.4553


Epoch 4:  64%|██████▍   | 539/844 [07:29<04:14,  1.20it/s]

batch loss: 0.4360


Epoch 4:  64%|██████▍   | 540/844 [07:30<04:13,  1.20it/s]

batch loss: 0.5012


Epoch 4:  64%|██████▍   | 541/844 [07:31<04:12,  1.20it/s]

batch loss: 0.4463


Epoch 4:  64%|██████▍   | 542/844 [07:32<04:11,  1.20it/s]

batch loss: 0.4349


Epoch 4:  64%|██████▍   | 543/844 [07:32<04:10,  1.20it/s]

batch loss: 0.4443


Epoch 4:  64%|██████▍   | 544/844 [07:33<04:09,  1.20it/s]

batch loss: 0.4932


Epoch 4:  65%|██████▍   | 545/844 [07:34<04:10,  1.20it/s]

batch loss: 0.4389


Epoch 4:  65%|██████▍   | 546/844 [07:35<04:10,  1.19it/s]

batch loss: 0.4754


Epoch 4:  65%|██████▍   | 547/844 [07:36<04:08,  1.19it/s]

batch loss: 0.4544


Epoch 4:  65%|██████▍   | 548/844 [07:37<04:07,  1.20it/s]

batch loss: 0.4805


Epoch 4:  65%|██████▌   | 549/844 [07:37<04:06,  1.20it/s]

batch loss: 0.4930


Epoch 4:  65%|██████▌   | 550/844 [07:38<04:05,  1.20it/s]

batch loss: 0.3825


Epoch 4:  65%|██████▌   | 551/844 [07:39<04:05,  1.19it/s]

batch loss: 0.5508


Epoch 4:  65%|██████▌   | 552/844 [07:40<04:04,  1.20it/s]

batch loss: 0.4556


Epoch 4:  66%|██████▌   | 553/844 [07:41<04:02,  1.20it/s]

batch loss: 0.4512


Epoch 4:  66%|██████▌   | 554/844 [07:42<04:01,  1.20it/s]

batch loss: 0.4664


Epoch 4:  66%|██████▌   | 555/844 [07:42<04:00,  1.20it/s]

batch loss: 0.3763


Epoch 4:  66%|██████▌   | 556/844 [07:43<03:59,  1.20it/s]

batch loss: 0.4846


Epoch 4:  66%|██████▌   | 557/844 [07:44<03:59,  1.20it/s]

batch loss: 0.4303


Epoch 4:  66%|██████▌   | 558/844 [07:45<03:58,  1.20it/s]

batch loss: 0.4532


Epoch 4:  66%|██████▌   | 559/844 [07:46<03:57,  1.20it/s]

batch loss: 0.4646


Epoch 4:  66%|██████▋   | 560/844 [07:47<03:56,  1.20it/s]

batch loss: 0.4087


Epoch 4:  66%|██████▋   | 561/844 [07:47<03:57,  1.19it/s]

batch loss: 0.4877


Epoch 4:  67%|██████▋   | 562/844 [07:48<03:55,  1.20it/s]

batch loss: 0.4977


Epoch 4:  67%|██████▋   | 563/844 [07:49<03:54,  1.20it/s]

batch loss: 0.4566


Epoch 4:  67%|██████▋   | 564/844 [07:50<03:54,  1.19it/s]

batch loss: 0.4598


Epoch 4:  67%|██████▋   | 565/844 [07:51<03:53,  1.20it/s]

batch loss: 0.5227


Epoch 4:  67%|██████▋   | 566/844 [07:52<03:52,  1.19it/s]

batch loss: 0.4109


Epoch 4:  67%|██████▋   | 567/844 [07:53<03:50,  1.20it/s]

batch loss: 0.4818


Epoch 4:  67%|██████▋   | 568/844 [07:53<03:50,  1.20it/s]

batch loss: 0.4334


Epoch 4:  67%|██████▋   | 569/844 [07:54<03:49,  1.20it/s]

batch loss: 0.3916


Epoch 4:  68%|██████▊   | 570/844 [07:55<03:48,  1.20it/s]

batch loss: 0.5062


Epoch 4:  68%|██████▊   | 571/844 [07:56<03:46,  1.20it/s]

batch loss: 0.5051


Epoch 4:  68%|██████▊   | 572/844 [07:57<03:46,  1.20it/s]

batch loss: 0.4294


Epoch 4:  68%|██████▊   | 573/844 [07:57<03:45,  1.20it/s]

batch loss: 0.4967


Epoch 4:  68%|██████▊   | 574/844 [07:58<03:44,  1.20it/s]

batch loss: 0.4154


Epoch 4:  68%|██████▊   | 575/844 [07:59<03:44,  1.20it/s]

batch loss: 0.5090


Epoch 4:  68%|██████▊   | 576/844 [08:00<03:43,  1.20it/s]

batch loss: 0.4811


Epoch 4:  68%|██████▊   | 577/844 [08:01<03:42,  1.20it/s]

batch loss: 0.4904


Epoch 4:  68%|██████▊   | 578/844 [08:02<03:41,  1.20it/s]

batch loss: 0.4336


Epoch 4:  69%|██████▊   | 579/844 [08:03<03:41,  1.20it/s]

batch loss: 0.4824


Epoch 4:  69%|██████▊   | 580/844 [08:03<03:40,  1.20it/s]

batch loss: 0.5430


Epoch 4:  69%|██████▉   | 581/844 [08:04<03:39,  1.20it/s]

batch loss: 0.3980


Epoch 4:  69%|██████▉   | 582/844 [08:05<03:37,  1.20it/s]

batch loss: 0.4006


Epoch 4:  69%|██████▉   | 583/844 [08:06<03:37,  1.20it/s]

batch loss: 0.4446


Epoch 4:  69%|██████▉   | 584/844 [08:07<03:36,  1.20it/s]

batch loss: 0.4875


Epoch 4:  69%|██████▉   | 585/844 [08:07<03:35,  1.20it/s]

batch loss: 0.4327


Epoch 4:  69%|██████▉   | 586/844 [08:08<03:34,  1.20it/s]

batch loss: 0.4358


Epoch 4:  70%|██████▉   | 587/844 [08:09<03:33,  1.20it/s]

batch loss: 0.3847


Epoch 4:  70%|██████▉   | 588/844 [08:10<03:33,  1.20it/s]

batch loss: 0.4707


Epoch 4:  70%|██████▉   | 589/844 [08:11<03:32,  1.20it/s]

batch loss: 0.4990


Epoch 4:  70%|██████▉   | 590/844 [08:12<03:31,  1.20it/s]

batch loss: 0.4759


Epoch 4:  70%|███████   | 591/844 [08:12<03:30,  1.20it/s]

batch loss: 0.4722


Epoch 4:  70%|███████   | 592/844 [08:13<03:29,  1.20it/s]

batch loss: 0.4375


Epoch 4:  70%|███████   | 593/844 [08:14<03:29,  1.20it/s]

batch loss: 0.4498


Epoch 4:  70%|███████   | 594/844 [08:15<03:28,  1.20it/s]

batch loss: 0.4150


Epoch 4:  70%|███████   | 595/844 [08:16<03:27,  1.20it/s]

batch loss: 0.4172


Epoch 4:  71%|███████   | 596/844 [08:17<03:26,  1.20it/s]

batch loss: 0.4704


Epoch 4:  71%|███████   | 597/844 [08:17<03:25,  1.20it/s]

batch loss: 0.4384


Epoch 4:  71%|███████   | 598/844 [08:18<03:25,  1.20it/s]

batch loss: 0.4967


Epoch 4:  71%|███████   | 599/844 [08:19<03:25,  1.19it/s]

batch loss: 0.4392


Epoch 4:  71%|███████   | 600/844 [08:20<03:24,  1.20it/s]

batch loss: 0.4881


Epoch 4:  71%|███████   | 601/844 [08:21<03:22,  1.20it/s]

batch loss: 0.4022


Epoch 4:  71%|███████▏  | 602/844 [08:22<03:21,  1.20it/s]

batch loss: 0.4772


Epoch 4:  71%|███████▏  | 603/844 [08:22<03:20,  1.20it/s]

batch loss: 0.5197


Epoch 4:  72%|███████▏  | 604/844 [08:23<03:20,  1.20it/s]

batch loss: 0.4379


Epoch 4:  72%|███████▏  | 605/844 [08:24<03:19,  1.20it/s]

batch loss: 0.4136


Epoch 4:  72%|███████▏  | 606/844 [08:25<03:18,  1.20it/s]

batch loss: 0.4001


Epoch 4:  72%|███████▏  | 607/844 [08:26<03:17,  1.20it/s]

batch loss: 0.5665


Epoch 4:  72%|███████▏  | 608/844 [08:27<03:16,  1.20it/s]

batch loss: 0.4323


Epoch 4:  72%|███████▏  | 609/844 [08:27<03:15,  1.20it/s]

batch loss: 0.4524


Epoch 4:  72%|███████▏  | 610/844 [08:28<03:15,  1.20it/s]

batch loss: 0.4107


Epoch 4:  72%|███████▏  | 611/844 [08:29<03:14,  1.20it/s]

batch loss: 0.4683


Epoch 4:  73%|███████▎  | 612/844 [08:30<03:14,  1.19it/s]

batch loss: 0.3998


Epoch 4:  73%|███████▎  | 613/844 [08:31<03:13,  1.20it/s]

batch loss: 0.4903


Epoch 4:  73%|███████▎  | 614/844 [08:32<03:13,  1.19it/s]

batch loss: 0.4513


Epoch 4:  73%|███████▎  | 615/844 [08:33<03:12,  1.19it/s]

batch loss: 0.5339


Epoch 4:  73%|███████▎  | 616/844 [08:33<03:11,  1.19it/s]

batch loss: 0.4886


Epoch 4:  73%|███████▎  | 617/844 [08:34<03:10,  1.19it/s]

batch loss: 0.5643


Epoch 4:  73%|███████▎  | 618/844 [08:35<03:09,  1.19it/s]

batch loss: 0.4484


Epoch 4:  73%|███████▎  | 619/844 [08:36<03:08,  1.20it/s]

batch loss: 0.5097


Epoch 4:  73%|███████▎  | 620/844 [08:37<03:07,  1.19it/s]

batch loss: 0.4319


Epoch 4:  74%|███████▎  | 621/844 [08:38<03:06,  1.20it/s]

batch loss: 0.4688


Epoch 4:  74%|███████▎  | 622/844 [08:38<03:05,  1.20it/s]

batch loss: 0.4672


Epoch 4:  74%|███████▍  | 623/844 [08:39<03:04,  1.20it/s]

batch loss: 0.3625


Epoch 4:  74%|███████▍  | 624/844 [08:40<03:03,  1.20it/s]

batch loss: 0.4310


Epoch 4:  74%|███████▍  | 625/844 [08:41<03:03,  1.20it/s]

batch loss: 0.4257


Epoch 4:  74%|███████▍  | 626/844 [08:42<03:02,  1.20it/s]

batch loss: 0.3592


Epoch 4:  74%|███████▍  | 627/844 [08:43<03:01,  1.20it/s]

batch loss: 0.5075


Epoch 4:  74%|███████▍  | 628/844 [08:43<03:00,  1.20it/s]

batch loss: 0.4645


Epoch 4:  75%|███████▍  | 629/844 [08:44<02:59,  1.20it/s]

batch loss: 0.4742


Epoch 4:  75%|███████▍  | 630/844 [08:45<02:59,  1.19it/s]

batch loss: 0.4205


Epoch 4:  75%|███████▍  | 631/844 [08:46<02:58,  1.19it/s]

batch loss: 0.4101


Epoch 4:  75%|███████▍  | 632/844 [08:47<02:58,  1.19it/s]

batch loss: 0.5139


Epoch 4:  75%|███████▌  | 633/844 [08:48<02:57,  1.19it/s]

batch loss: 0.4480


Epoch 4:  75%|███████▌  | 634/844 [08:48<02:55,  1.19it/s]

batch loss: 0.5315


Epoch 4:  75%|███████▌  | 635/844 [08:49<02:54,  1.20it/s]

batch loss: 0.5027


Epoch 4:  75%|███████▌  | 636/844 [08:50<02:53,  1.20it/s]

batch loss: 0.4087


Epoch 4:  75%|███████▌  | 637/844 [08:51<02:53,  1.19it/s]

batch loss: 0.5225


Epoch 4:  76%|███████▌  | 638/844 [08:52<02:52,  1.20it/s]

batch loss: 0.5167


Epoch 4:  76%|███████▌  | 639/844 [08:53<02:50,  1.20it/s]

batch loss: 0.4878


Epoch 4:  76%|███████▌  | 640/844 [08:53<02:49,  1.20it/s]

batch loss: 0.4959


Epoch 4:  76%|███████▌  | 641/844 [08:54<02:49,  1.20it/s]

batch loss: 0.3909


Epoch 4:  76%|███████▌  | 642/844 [08:55<02:48,  1.20it/s]

batch loss: 0.3782


Epoch 4:  76%|███████▌  | 643/844 [08:56<02:47,  1.20it/s]

batch loss: 0.4010


Epoch 4:  76%|███████▋  | 644/844 [08:57<02:46,  1.20it/s]

batch loss: 0.4663


Epoch 4:  76%|███████▋  | 645/844 [08:58<02:45,  1.20it/s]

batch loss: 0.4592


Epoch 4:  77%|███████▋  | 646/844 [08:58<02:44,  1.20it/s]

batch loss: 0.4265


Epoch 4:  77%|███████▋  | 647/844 [08:59<02:44,  1.20it/s]

batch loss: 0.4280


Epoch 4:  77%|███████▋  | 648/844 [09:00<02:44,  1.19it/s]

batch loss: 0.4979


Epoch 4:  77%|███████▋  | 649/844 [09:01<02:43,  1.19it/s]

batch loss: 0.6609


Epoch 4:  77%|███████▋  | 650/844 [09:02<02:42,  1.19it/s]

batch loss: 0.4769


Epoch 4:  77%|███████▋  | 651/844 [09:03<02:41,  1.20it/s]

batch loss: 0.4596


Epoch 4:  77%|███████▋  | 652/844 [09:03<02:40,  1.20it/s]

batch loss: 0.4679


Epoch 4:  77%|███████▋  | 653/844 [09:04<02:39,  1.20it/s]

batch loss: 0.3893


Epoch 4:  77%|███████▋  | 654/844 [09:05<02:38,  1.20it/s]

batch loss: 0.4975


Epoch 4:  78%|███████▊  | 655/844 [09:06<02:37,  1.20it/s]

batch loss: 0.4862


Epoch 4:  78%|███████▊  | 656/844 [09:07<02:36,  1.20it/s]

batch loss: 0.4829


Epoch 4:  78%|███████▊  | 657/844 [09:08<02:35,  1.20it/s]

batch loss: 0.4795


Epoch 4:  78%|███████▊  | 658/844 [09:08<02:34,  1.20it/s]

batch loss: 0.4836


Epoch 4:  78%|███████▊  | 659/844 [09:09<02:34,  1.20it/s]

batch loss: 0.4167


Epoch 4:  78%|███████▊  | 660/844 [09:10<02:33,  1.20it/s]

batch loss: 0.4606


Epoch 4:  78%|███████▊  | 661/844 [09:11<02:32,  1.20it/s]

batch loss: 0.4175


Epoch 4:  78%|███████▊  | 662/844 [09:12<02:31,  1.20it/s]

batch loss: 0.4060


Epoch 4:  79%|███████▊  | 663/844 [09:13<02:31,  1.20it/s]

batch loss: 0.5136


Epoch 4:  79%|███████▊  | 664/844 [09:13<02:30,  1.20it/s]

batch loss: 0.4305


Epoch 4:  79%|███████▉  | 665/844 [09:14<02:29,  1.20it/s]

batch loss: 0.5278


Epoch 4:  79%|███████▉  | 666/844 [09:15<02:28,  1.20it/s]

batch loss: 0.5139


Epoch 4:  79%|███████▉  | 667/844 [09:16<02:27,  1.20it/s]

batch loss: 0.4446


Epoch 4:  79%|███████▉  | 668/844 [09:17<02:27,  1.19it/s]

batch loss: 0.4987


Epoch 4:  79%|███████▉  | 669/844 [09:18<02:25,  1.20it/s]

batch loss: 0.5065


Epoch 4:  79%|███████▉  | 670/844 [09:18<02:25,  1.20it/s]

batch loss: 0.4442


Epoch 4:  80%|███████▉  | 671/844 [09:19<02:24,  1.20it/s]

batch loss: 0.4475


Epoch 4:  80%|███████▉  | 672/844 [09:20<02:23,  1.20it/s]

batch loss: 0.4303


Epoch 4:  80%|███████▉  | 673/844 [09:21<02:22,  1.20it/s]

batch loss: 0.5089


Epoch 4:  80%|███████▉  | 674/844 [09:22<02:21,  1.20it/s]

batch loss: 0.4176


Epoch 4:  80%|███████▉  | 675/844 [09:23<02:20,  1.20it/s]

batch loss: 0.4356


Epoch 4:  80%|████████  | 676/844 [09:23<02:19,  1.20it/s]

batch loss: 0.4022


Epoch 4:  80%|████████  | 677/844 [09:24<02:19,  1.20it/s]

batch loss: 0.5010


Epoch 4:  80%|████████  | 678/844 [09:25<02:18,  1.20it/s]

batch loss: 0.4076


Epoch 4:  80%|████████  | 679/844 [09:26<02:17,  1.20it/s]

batch loss: 0.4326


Epoch 4:  81%|████████  | 680/844 [09:27<02:16,  1.20it/s]

batch loss: 0.5214


Epoch 4:  81%|████████  | 681/844 [09:28<02:16,  1.19it/s]

batch loss: 0.4667


Epoch 4:  81%|████████  | 682/844 [09:28<02:15,  1.19it/s]

batch loss: 0.4633


Epoch 4:  81%|████████  | 683/844 [09:29<02:14,  1.20it/s]

batch loss: 0.3973


Epoch 4:  81%|████████  | 684/844 [09:30<02:13,  1.20it/s]

batch loss: 0.4424


Epoch 4:  81%|████████  | 685/844 [09:31<02:13,  1.19it/s]

batch loss: 0.4843


Epoch 4:  81%|████████▏ | 686/844 [09:32<02:12,  1.20it/s]

batch loss: 0.4496


Epoch 4:  81%|████████▏ | 687/844 [09:33<02:11,  1.20it/s]

batch loss: 0.5384


Epoch 4:  82%|████████▏ | 688/844 [09:34<02:10,  1.19it/s]

batch loss: 0.4815


Epoch 4:  82%|████████▏ | 689/844 [09:34<02:09,  1.20it/s]

batch loss: 0.4312


Epoch 4:  82%|████████▏ | 690/844 [09:35<02:08,  1.20it/s]

batch loss: 0.4352


Epoch 4:  82%|████████▏ | 691/844 [09:36<02:07,  1.20it/s]

batch loss: 0.4268


Epoch 4:  82%|████████▏ | 692/844 [09:37<02:06,  1.20it/s]

batch loss: 0.4649


Epoch 4:  82%|████████▏ | 693/844 [09:38<02:05,  1.20it/s]

batch loss: 0.4239


Epoch 4:  82%|████████▏ | 694/844 [09:38<02:04,  1.20it/s]

batch loss: 0.4975


Epoch 4:  82%|████████▏ | 695/844 [09:39<02:03,  1.20it/s]

batch loss: 0.4652


Epoch 4:  82%|████████▏ | 696/844 [09:40<02:03,  1.20it/s]

batch loss: 0.4215


Epoch 4:  83%|████████▎ | 697/844 [09:41<02:03,  1.19it/s]

batch loss: 0.3978


Epoch 4:  83%|████████▎ | 698/844 [09:42<02:02,  1.20it/s]

batch loss: 0.4698


Epoch 4:  83%|████████▎ | 699/844 [09:43<02:01,  1.20it/s]

batch loss: 0.5780


Epoch 4:  83%|████████▎ | 700/844 [09:44<01:59,  1.20it/s]

batch loss: 0.4857


Epoch 4:  83%|████████▎ | 701/844 [09:44<01:59,  1.20it/s]

batch loss: 0.4608


Epoch 4:  83%|████████▎ | 702/844 [09:45<01:58,  1.20it/s]

batch loss: 0.4094


Epoch 4:  83%|████████▎ | 703/844 [09:46<01:57,  1.20it/s]

batch loss: 0.5227


Epoch 4:  83%|████████▎ | 704/844 [09:47<01:56,  1.20it/s]

batch loss: 0.4112


Epoch 4:  84%|████████▎ | 705/844 [09:48<01:55,  1.20it/s]

batch loss: 0.4738


Epoch 4:  84%|████████▎ | 706/844 [09:48<01:54,  1.20it/s]

batch loss: 0.4629


Epoch 4:  84%|████████▍ | 707/844 [09:49<01:53,  1.20it/s]

batch loss: 0.4932


Epoch 4:  84%|████████▍ | 708/844 [09:50<01:52,  1.20it/s]

batch loss: 0.4202


Epoch 4:  84%|████████▍ | 709/844 [09:51<01:52,  1.20it/s]

batch loss: 0.3722


Epoch 4:  84%|████████▍ | 710/844 [09:52<01:51,  1.20it/s]

batch loss: 0.4205


Epoch 4:  84%|████████▍ | 711/844 [09:53<01:50,  1.20it/s]

batch loss: 0.4979


Epoch 4:  84%|████████▍ | 712/844 [09:53<01:49,  1.20it/s]

batch loss: 0.4435


Epoch 4:  84%|████████▍ | 713/844 [09:54<01:48,  1.20it/s]

batch loss: 0.5076


Epoch 4:  85%|████████▍ | 714/844 [09:55<01:48,  1.20it/s]

batch loss: 0.4670


Epoch 4:  85%|████████▍ | 715/844 [09:56<01:47,  1.19it/s]

batch loss: 0.5358


Epoch 4:  85%|████████▍ | 716/844 [09:57<01:46,  1.20it/s]

batch loss: 0.3921


Epoch 4:  85%|████████▍ | 717/844 [09:58<01:45,  1.20it/s]

batch loss: 0.4001


Epoch 4:  85%|████████▌ | 718/844 [09:58<01:44,  1.20it/s]

batch loss: 0.4760


Epoch 4:  85%|████████▌ | 719/844 [09:59<01:44,  1.20it/s]

batch loss: 0.4656


Epoch 4:  85%|████████▌ | 720/844 [10:00<01:43,  1.20it/s]

batch loss: 0.3922


Epoch 4:  85%|████████▌ | 721/844 [10:01<01:42,  1.20it/s]

batch loss: 0.4534


Epoch 4:  86%|████████▌ | 722/844 [10:02<01:41,  1.20it/s]

batch loss: 0.3906


Epoch 4:  86%|████████▌ | 723/844 [10:03<01:40,  1.20it/s]

batch loss: 0.5066


Epoch 4:  86%|████████▌ | 724/844 [10:03<01:39,  1.20it/s]

batch loss: 0.4807


Epoch 4:  86%|████████▌ | 725/844 [10:04<01:38,  1.20it/s]

batch loss: 0.4320


Epoch 4:  86%|████████▌ | 726/844 [10:05<01:38,  1.20it/s]

batch loss: 0.5251


Epoch 4:  86%|████████▌ | 727/844 [10:06<01:37,  1.20it/s]

batch loss: 0.5308


Epoch 4:  86%|████████▋ | 728/844 [10:07<01:36,  1.20it/s]

batch loss: 0.5546


Epoch 4:  86%|████████▋ | 729/844 [10:08<01:36,  1.20it/s]

batch loss: 0.4155


Epoch 4:  86%|████████▋ | 730/844 [10:08<01:35,  1.19it/s]

batch loss: 0.5154


Epoch 4:  87%|████████▋ | 731/844 [10:09<01:34,  1.20it/s]

batch loss: 0.4411


Epoch 4:  87%|████████▋ | 732/844 [10:10<01:33,  1.20it/s]

batch loss: 0.4548


Epoch 4:  87%|████████▋ | 733/844 [10:11<01:32,  1.20it/s]

batch loss: 0.4894


Epoch 4:  87%|████████▋ | 734/844 [10:12<01:31,  1.20it/s]

batch loss: 0.4833


Epoch 4:  87%|████████▋ | 735/844 [10:13<01:31,  1.20it/s]

batch loss: 0.5182


Epoch 4:  87%|████████▋ | 736/844 [10:13<01:30,  1.20it/s]

batch loss: 0.4885


Epoch 4:  87%|████████▋ | 737/844 [10:14<01:29,  1.20it/s]

batch loss: 0.4640


Epoch 4:  87%|████████▋ | 738/844 [10:15<01:28,  1.20it/s]

batch loss: 0.5555


Epoch 4:  88%|████████▊ | 739/844 [10:16<01:27,  1.20it/s]

batch loss: 0.4531


Epoch 4:  88%|████████▊ | 740/844 [10:17<01:26,  1.20it/s]

batch loss: 0.4713


Epoch 4:  88%|████████▊ | 741/844 [10:18<01:25,  1.20it/s]

batch loss: 0.5177


Epoch 4:  88%|████████▊ | 742/844 [10:18<01:25,  1.20it/s]

batch loss: 0.4795


Epoch 4:  88%|████████▊ | 743/844 [10:19<01:24,  1.20it/s]

batch loss: 0.5221


Epoch 4:  88%|████████▊ | 744/844 [10:20<01:23,  1.20it/s]

batch loss: 0.4611


Epoch 4:  88%|████████▊ | 745/844 [10:21<01:22,  1.20it/s]

batch loss: 0.4476


Epoch 4:  88%|████████▊ | 746/844 [10:22<01:21,  1.20it/s]

batch loss: 0.4715


Epoch 4:  89%|████████▊ | 747/844 [10:23<01:21,  1.20it/s]

batch loss: 0.4535


Epoch 4:  89%|████████▊ | 748/844 [10:24<01:20,  1.20it/s]

batch loss: 0.4873


Epoch 4:  89%|████████▊ | 749/844 [10:24<01:19,  1.20it/s]

batch loss: 0.4555


Epoch 4:  89%|████████▉ | 750/844 [10:25<01:18,  1.20it/s]

batch loss: 0.4736


Epoch 4:  89%|████████▉ | 751/844 [10:26<01:17,  1.20it/s]

batch loss: 0.4243


Epoch 4:  89%|████████▉ | 752/844 [10:27<01:16,  1.20it/s]

batch loss: 0.4091


Epoch 4:  89%|████████▉ | 753/844 [10:28<01:15,  1.20it/s]

batch loss: 0.4581


Epoch 4:  89%|████████▉ | 754/844 [10:29<01:15,  1.20it/s]

batch loss: 0.4971


Epoch 4:  89%|████████▉ | 755/844 [10:29<01:14,  1.20it/s]

batch loss: 0.4740


Epoch 4:  90%|████████▉ | 756/844 [10:30<01:13,  1.20it/s]

batch loss: 0.4866


Epoch 4:  90%|████████▉ | 757/844 [10:31<01:12,  1.20it/s]

batch loss: 0.4224


Epoch 4:  90%|████████▉ | 758/844 [10:32<01:11,  1.20it/s]

batch loss: 0.4735


Epoch 4:  90%|████████▉ | 759/844 [10:33<01:10,  1.20it/s]

batch loss: 0.4625


Epoch 4:  90%|█████████ | 760/844 [10:33<01:09,  1.20it/s]

batch loss: 0.4575


Epoch 4:  90%|█████████ | 761/844 [10:34<01:09,  1.20it/s]

batch loss: 0.4302


Epoch 4:  90%|█████████ | 762/844 [10:35<01:08,  1.20it/s]

batch loss: 0.3917


Epoch 4:  90%|█████████ | 763/844 [10:36<01:07,  1.20it/s]

batch loss: 0.4878


Epoch 4:  91%|█████████ | 764/844 [10:37<01:06,  1.20it/s]

batch loss: 0.4627


Epoch 4:  91%|█████████ | 765/844 [10:38<01:06,  1.19it/s]

batch loss: 0.4365


Epoch 4:  91%|█████████ | 766/844 [10:39<01:05,  1.20it/s]

batch loss: 0.4532


Epoch 4:  91%|█████████ | 767/844 [10:39<01:04,  1.20it/s]

batch loss: 0.4007


Epoch 4:  91%|█████████ | 768/844 [10:40<01:03,  1.20it/s]

batch loss: 0.4818


Epoch 4:  91%|█████████ | 769/844 [10:41<01:02,  1.20it/s]

batch loss: 0.4813


Epoch 4:  91%|█████████ | 770/844 [10:42<01:01,  1.20it/s]

batch loss: 0.4209


Epoch 4:  91%|█████████▏| 771/844 [10:43<01:00,  1.20it/s]

batch loss: 0.3783


Epoch 4:  91%|█████████▏| 772/844 [10:44<00:59,  1.20it/s]

batch loss: 0.5400


Epoch 4:  92%|█████████▏| 773/844 [10:44<00:59,  1.20it/s]

batch loss: 0.4439


Epoch 4:  92%|█████████▏| 774/844 [10:45<00:58,  1.20it/s]

batch loss: 0.4470


Epoch 4:  92%|█████████▏| 775/844 [10:46<00:57,  1.20it/s]

batch loss: 0.4212


Epoch 4:  92%|█████████▏| 776/844 [10:47<00:56,  1.20it/s]

batch loss: 0.4714


Epoch 4:  92%|█████████▏| 777/844 [10:48<00:55,  1.20it/s]

batch loss: 0.4233


Epoch 4:  92%|█████████▏| 778/844 [10:49<00:55,  1.20it/s]

batch loss: 0.5149


Epoch 4:  92%|█████████▏| 779/844 [10:49<00:54,  1.20it/s]

batch loss: 0.3933


Epoch 4:  92%|█████████▏| 780/844 [10:50<00:53,  1.19it/s]

batch loss: 0.4971


Epoch 4:  93%|█████████▎| 781/844 [10:51<00:52,  1.19it/s]

batch loss: 0.4176


Epoch 4:  93%|█████████▎| 782/844 [10:52<00:51,  1.19it/s]

batch loss: 0.3820


Epoch 4:  93%|█████████▎| 783/844 [10:53<00:50,  1.20it/s]

batch loss: 0.4559


Epoch 4:  93%|█████████▎| 784/844 [10:54<00:50,  1.19it/s]

batch loss: 0.4518


Epoch 4:  93%|█████████▎| 785/844 [10:54<00:49,  1.19it/s]

batch loss: 0.4744


Epoch 4:  93%|█████████▎| 786/844 [10:55<00:48,  1.20it/s]

batch loss: 0.3674


Epoch 4:  93%|█████████▎| 787/844 [10:56<00:47,  1.20it/s]

batch loss: 0.5093


Epoch 4:  93%|█████████▎| 788/844 [10:57<00:46,  1.20it/s]

batch loss: 0.4525


Epoch 4:  93%|█████████▎| 789/844 [10:58<00:46,  1.19it/s]

batch loss: 0.4173


Epoch 4:  94%|█████████▎| 790/844 [10:59<00:45,  1.20it/s]

batch loss: 0.5254


Epoch 4:  94%|█████████▎| 791/844 [10:59<00:44,  1.20it/s]

batch loss: 0.3999


Epoch 4:  94%|█████████▍| 792/844 [11:00<00:43,  1.20it/s]

batch loss: 0.4926


Epoch 4:  94%|█████████▍| 793/844 [11:01<00:42,  1.20it/s]

batch loss: 0.4699


Epoch 4:  94%|█████████▍| 794/844 [11:02<00:41,  1.20it/s]

batch loss: 0.5282


Epoch 4:  94%|█████████▍| 795/844 [11:03<00:41,  1.19it/s]

batch loss: 0.4566


Epoch 4:  94%|█████████▍| 796/844 [11:04<00:40,  1.20it/s]

batch loss: 0.4901


Epoch 4:  94%|█████████▍| 797/844 [11:04<00:39,  1.19it/s]

batch loss: 0.4490


Epoch 4:  95%|█████████▍| 798/844 [11:05<00:38,  1.19it/s]

batch loss: 0.4574


Epoch 4:  95%|█████████▍| 799/844 [11:06<00:37,  1.19it/s]

batch loss: 0.5384


Epoch 4:  95%|█████████▍| 800/844 [11:07<00:36,  1.19it/s]

batch loss: 0.4405


Epoch 4:  95%|█████████▍| 801/844 [11:08<00:35,  1.20it/s]

batch loss: 0.5354


Epoch 4:  95%|█████████▌| 802/844 [11:09<00:35,  1.20it/s]

batch loss: 0.5185


Epoch 4:  95%|█████████▌| 803/844 [11:09<00:34,  1.19it/s]

batch loss: 0.4674


Epoch 4:  95%|█████████▌| 804/844 [11:10<00:33,  1.20it/s]

batch loss: 0.4653


Epoch 4:  95%|█████████▌| 805/844 [11:11<00:32,  1.20it/s]

batch loss: 0.5032


Epoch 4:  95%|█████████▌| 806/844 [11:12<00:31,  1.20it/s]

batch loss: 0.5819


Epoch 4:  96%|█████████▌| 807/844 [11:13<00:30,  1.20it/s]

batch loss: 0.5046


Epoch 4:  96%|█████████▌| 808/844 [11:14<00:29,  1.20it/s]

batch loss: 0.4322


Epoch 4:  96%|█████████▌| 809/844 [11:14<00:29,  1.20it/s]

batch loss: 0.4958


Epoch 4:  96%|█████████▌| 810/844 [11:15<00:28,  1.20it/s]

batch loss: 0.4947


Epoch 4:  96%|█████████▌| 811/844 [11:16<00:27,  1.20it/s]

batch loss: 0.5293


Epoch 4:  96%|█████████▌| 812/844 [11:17<00:26,  1.19it/s]

batch loss: 0.4773


Epoch 4:  96%|█████████▋| 813/844 [11:18<00:25,  1.20it/s]

batch loss: 0.4760


Epoch 4:  96%|█████████▋| 814/844 [11:19<00:25,  1.20it/s]

batch loss: 0.3975


Epoch 4:  97%|█████████▋| 815/844 [11:19<00:24,  1.20it/s]

batch loss: 0.4451


Epoch 4:  97%|█████████▋| 816/844 [11:20<00:23,  1.20it/s]

batch loss: 0.4978


Epoch 4:  97%|█████████▋| 817/844 [11:21<00:22,  1.20it/s]

batch loss: 0.3875


Epoch 4:  97%|█████████▋| 818/844 [11:22<00:21,  1.20it/s]

batch loss: 0.4928


Epoch 4:  97%|█████████▋| 819/844 [11:23<00:20,  1.20it/s]

batch loss: 0.4394


Epoch 4:  97%|█████████▋| 820/844 [11:24<00:20,  1.20it/s]

batch loss: 0.4562


Epoch 4:  97%|█████████▋| 821/844 [11:24<00:19,  1.20it/s]

batch loss: 0.3897


Epoch 4:  97%|█████████▋| 822/844 [11:25<00:18,  1.20it/s]

batch loss: 0.4523


Epoch 4:  98%|█████████▊| 823/844 [11:26<00:17,  1.20it/s]

batch loss: 0.4786


Epoch 4:  98%|█████████▊| 824/844 [11:27<00:16,  1.20it/s]

batch loss: 0.4550


Epoch 4:  98%|█████████▊| 825/844 [11:28<00:15,  1.20it/s]

batch loss: 0.3845


Epoch 4:  98%|█████████▊| 826/844 [11:29<00:15,  1.20it/s]

batch loss: 0.4843


Epoch 4:  98%|█████████▊| 827/844 [11:29<00:14,  1.20it/s]

batch loss: 0.4503


Epoch 4:  98%|█████████▊| 828/844 [11:30<00:13,  1.20it/s]

batch loss: 0.3934


Epoch 4:  98%|█████████▊| 829/844 [11:31<00:12,  1.20it/s]

batch loss: 0.4323


Epoch 4:  98%|█████████▊| 830/844 [11:32<00:11,  1.19it/s]

batch loss: 0.4697


Epoch 4:  98%|█████████▊| 831/844 [11:33<00:10,  1.19it/s]

batch loss: 0.4881


Epoch 4:  99%|█████████▊| 832/844 [11:34<00:10,  1.19it/s]

batch loss: 0.4908


Epoch 4:  99%|█████████▊| 833/844 [11:34<00:09,  1.20it/s]

batch loss: 0.5252


Epoch 4:  99%|█████████▉| 834/844 [11:35<00:08,  1.20it/s]

batch loss: 0.4744


Epoch 4:  99%|█████████▉| 835/844 [11:36<00:07,  1.20it/s]

batch loss: 0.4079


Epoch 4:  99%|█████████▉| 836/844 [11:37<00:06,  1.20it/s]

batch loss: 0.4458


Epoch 4:  99%|█████████▉| 837/844 [11:38<00:05,  1.20it/s]

batch loss: 0.4501


Epoch 4:  99%|█████████▉| 838/844 [11:39<00:05,  1.20it/s]

batch loss: 0.4715


Epoch 4:  99%|█████████▉| 839/844 [11:39<00:04,  1.20it/s]

batch loss: 0.4450


Epoch 4: 100%|█████████▉| 840/844 [11:40<00:03,  1.20it/s]

batch loss: 0.5407


Epoch 4: 100%|█████████▉| 841/844 [11:41<00:02,  1.20it/s]

batch loss: 0.4293


Epoch 4: 100%|█████████▉| 842/844 [11:42<00:01,  1.20it/s]

batch loss: 0.4877


Epoch 4: 100%|█████████▉| 843/844 [11:43<00:00,  1.20it/s]

batch loss: 0.5104


Epoch 4: 100%|██████████| 844/844 [11:43<00:00,  1.20it/s]


batch loss: 0.4687
--------------------------------------------------------------------------------
Epoch 4 - Average Loss: 0.4599


Epoch 5:   0%|          | 1/844 [00:00<11:39,  1.20it/s]

batch loss: 0.4794


Epoch 5:   0%|          | 2/844 [00:01<11:41,  1.20it/s]

batch loss: 0.3624


Epoch 5:   0%|          | 3/844 [00:02<11:41,  1.20it/s]

batch loss: 0.4987


Epoch 5:   0%|          | 4/844 [00:03<11:43,  1.19it/s]

batch loss: 0.5146


Epoch 5:   1%|          | 5/844 [00:04<11:42,  1.19it/s]

batch loss: 0.4024


Epoch 5:   1%|          | 6/844 [00:05<11:38,  1.20it/s]

batch loss: 0.5349


Epoch 5:   1%|          | 7/844 [00:05<11:37,  1.20it/s]

batch loss: 0.4491


Epoch 5:   1%|          | 8/844 [00:06<11:37,  1.20it/s]

batch loss: 0.4782


Epoch 5:   1%|          | 9/844 [00:07<11:38,  1.19it/s]

batch loss: 0.4167


Epoch 5:   1%|          | 10/844 [00:08<11:36,  1.20it/s]

batch loss: 0.4619


Epoch 5:   1%|▏         | 11/844 [00:09<11:34,  1.20it/s]

batch loss: 0.4134


Epoch 5:   1%|▏         | 12/844 [00:10<11:34,  1.20it/s]

batch loss: 0.4009


Epoch 5:   2%|▏         | 13/844 [00:10<11:34,  1.20it/s]

batch loss: 0.4381


Epoch 5:   2%|▏         | 14/844 [00:11<11:32,  1.20it/s]

batch loss: 0.4334


Epoch 5:   2%|▏         | 15/844 [00:12<11:31,  1.20it/s]

batch loss: 0.3848


Epoch 5:   2%|▏         | 16/844 [00:13<11:32,  1.19it/s]

batch loss: 0.4520


Epoch 5:   2%|▏         | 17/844 [00:14<11:32,  1.19it/s]

batch loss: 0.4141


Epoch 5:   2%|▏         | 18/844 [00:15<11:31,  1.19it/s]

batch loss: 0.4759


Epoch 5:   2%|▏         | 19/844 [00:15<11:29,  1.20it/s]

batch loss: 0.4433


Epoch 5:   2%|▏         | 20/844 [00:16<11:29,  1.20it/s]

batch loss: 0.4181


Epoch 5:   2%|▏         | 21/844 [00:17<11:30,  1.19it/s]

batch loss: 0.4807


Epoch 5:   3%|▎         | 22/844 [00:18<11:27,  1.20it/s]

batch loss: 0.4709


Epoch 5:   3%|▎         | 23/844 [00:19<11:25,  1.20it/s]

batch loss: 0.5123


Epoch 5:   3%|▎         | 24/844 [00:20<11:25,  1.20it/s]

batch loss: 0.5400


Epoch 5:   3%|▎         | 25/844 [00:20<11:23,  1.20it/s]

batch loss: 0.4706


Epoch 5:   3%|▎         | 26/844 [00:21<11:23,  1.20it/s]

batch loss: 0.3806


Epoch 5:   3%|▎         | 27/844 [00:22<11:20,  1.20it/s]

batch loss: 0.4324


Epoch 5:   3%|▎         | 28/844 [00:23<11:19,  1.20it/s]

batch loss: 0.5569


Epoch 5:   3%|▎         | 29/844 [00:24<11:19,  1.20it/s]

batch loss: 0.3945


Epoch 5:   4%|▎         | 30/844 [00:25<11:18,  1.20it/s]

batch loss: 0.4270


Epoch 5:   4%|▎         | 31/844 [00:25<11:18,  1.20it/s]

batch loss: 0.4291


Epoch 5:   4%|▍         | 32/844 [00:26<11:17,  1.20it/s]

batch loss: 0.4228


Epoch 5:   4%|▍         | 33/844 [00:27<11:15,  1.20it/s]

batch loss: 0.4916


Epoch 5:   4%|▍         | 34/844 [00:28<11:17,  1.20it/s]

batch loss: 0.5374


Epoch 5:   4%|▍         | 35/844 [00:29<11:15,  1.20it/s]

batch loss: 0.3745


Epoch 5:   4%|▍         | 36/844 [00:30<11:14,  1.20it/s]

batch loss: 0.4148


Epoch 5:   4%|▍         | 37/844 [00:30<11:14,  1.20it/s]

batch loss: 0.3776


Epoch 5:   5%|▍         | 38/844 [00:31<11:13,  1.20it/s]

batch loss: 0.4947


Epoch 5:   5%|▍         | 39/844 [00:32<11:12,  1.20it/s]

batch loss: 0.4750


Epoch 5:   5%|▍         | 40/844 [00:33<11:11,  1.20it/s]

batch loss: 0.3994


Epoch 5:   5%|▍         | 41/844 [00:34<11:10,  1.20it/s]

batch loss: 0.4435


Epoch 5:   5%|▍         | 42/844 [00:35<11:09,  1.20it/s]

batch loss: 0.5090


Epoch 5:   5%|▌         | 43/844 [00:35<11:06,  1.20it/s]

batch loss: 0.5279


Epoch 5:   5%|▌         | 44/844 [00:36<11:04,  1.20it/s]

batch loss: 0.4715


Epoch 5:   5%|▌         | 45/844 [00:37<11:04,  1.20it/s]

batch loss: 0.3858


Epoch 5:   5%|▌         | 46/844 [00:38<11:03,  1.20it/s]

batch loss: 0.4831


Epoch 5:   6%|▌         | 47/844 [00:39<11:02,  1.20it/s]

batch loss: 0.4060


Epoch 5:   6%|▌         | 48/844 [00:40<11:01,  1.20it/s]

batch loss: 0.3391


Epoch 5:   6%|▌         | 49/844 [00:40<11:01,  1.20it/s]

batch loss: 0.4593


Epoch 5:   6%|▌         | 50/844 [00:41<11:01,  1.20it/s]

batch loss: 0.4483


Epoch 5:   6%|▌         | 51/844 [00:42<11:00,  1.20it/s]

batch loss: 0.4105


Epoch 5:   6%|▌         | 52/844 [00:43<10:58,  1.20it/s]

batch loss: 0.4462


Epoch 5:   6%|▋         | 53/844 [00:44<10:58,  1.20it/s]

batch loss: 0.4372


Epoch 5:   6%|▋         | 54/844 [00:45<10:57,  1.20it/s]

batch loss: 0.4442


Epoch 5:   7%|▋         | 55/844 [00:45<10:57,  1.20it/s]

batch loss: 0.4143


Epoch 5:   7%|▋         | 56/844 [00:46<10:57,  1.20it/s]

batch loss: 0.4683


Epoch 5:   7%|▋         | 57/844 [00:47<10:55,  1.20it/s]

batch loss: 0.4472


Epoch 5:   7%|▋         | 58/844 [00:48<10:55,  1.20it/s]

batch loss: 0.5108


Epoch 5:   7%|▋         | 59/844 [00:49<10:54,  1.20it/s]

batch loss: 0.4302


Epoch 5:   7%|▋         | 60/844 [00:50<10:53,  1.20it/s]

batch loss: 0.5099


Epoch 5:   7%|▋         | 61/844 [00:50<10:53,  1.20it/s]

batch loss: 0.4143


Epoch 5:   7%|▋         | 62/844 [00:51<10:51,  1.20it/s]

batch loss: 0.4882


Epoch 5:   7%|▋         | 63/844 [00:52<10:49,  1.20it/s]

batch loss: 0.3183


Epoch 5:   8%|▊         | 64/844 [00:53<10:49,  1.20it/s]

batch loss: 0.4733


Epoch 5:   8%|▊         | 65/844 [00:54<10:52,  1.19it/s]

batch loss: 0.4729


Epoch 5:   8%|▊         | 66/844 [00:55<10:55,  1.19it/s]

batch loss: 0.4332


Epoch 5:   8%|▊         | 67/844 [00:55<10:54,  1.19it/s]

batch loss: 0.4900


Epoch 5:   8%|▊         | 68/844 [00:56<10:52,  1.19it/s]

batch loss: 0.5705


Epoch 5:   8%|▊         | 69/844 [00:57<10:50,  1.19it/s]

batch loss: 0.4023


Epoch 5:   8%|▊         | 70/844 [00:58<10:47,  1.20it/s]

batch loss: 0.4213


Epoch 5:   8%|▊         | 71/844 [00:59<10:49,  1.19it/s]

batch loss: 0.4198


Epoch 5:   9%|▊         | 72/844 [01:00<10:48,  1.19it/s]

batch loss: 0.4231


Epoch 5:   9%|▊         | 73/844 [01:00<10:47,  1.19it/s]

batch loss: 0.4211


Epoch 5:   9%|▉         | 74/844 [01:01<10:43,  1.20it/s]

batch loss: 0.4903


Epoch 5:   9%|▉         | 75/844 [01:02<10:42,  1.20it/s]

batch loss: 0.5284


Epoch 5:   9%|▉         | 76/844 [01:03<10:40,  1.20it/s]

batch loss: 0.4365


Epoch 5:   9%|▉         | 77/844 [01:04<10:40,  1.20it/s]

batch loss: 0.4639


Epoch 5:   9%|▉         | 78/844 [01:05<10:40,  1.20it/s]

batch loss: 0.4649


Epoch 5:   9%|▉         | 79/844 [01:05<10:38,  1.20it/s]

batch loss: 0.5443


Epoch 5:   9%|▉         | 80/844 [01:06<10:36,  1.20it/s]

batch loss: 0.4490


Epoch 5:  10%|▉         | 81/844 [01:07<10:35,  1.20it/s]

batch loss: 0.4741


Epoch 5:  10%|▉         | 82/844 [01:08<10:34,  1.20it/s]

batch loss: 0.4920


Epoch 5:  10%|▉         | 83/844 [01:09<10:32,  1.20it/s]

batch loss: 0.4186


Epoch 5:  10%|▉         | 84/844 [01:10<10:33,  1.20it/s]

batch loss: 0.4599


Epoch 5:  10%|█         | 85/844 [01:10<10:31,  1.20it/s]

batch loss: 0.4570


Epoch 5:  10%|█         | 86/844 [01:11<10:31,  1.20it/s]

batch loss: 0.4368


Epoch 5:  10%|█         | 87/844 [01:12<10:33,  1.20it/s]

batch loss: 0.3248


Epoch 5:  10%|█         | 88/844 [01:13<10:32,  1.20it/s]

batch loss: 0.4486


Epoch 5:  11%|█         | 89/844 [01:14<10:32,  1.19it/s]

batch loss: 0.4397


Epoch 5:  11%|█         | 90/844 [01:15<10:31,  1.19it/s]

batch loss: 0.4370


Epoch 5:  11%|█         | 91/844 [01:15<10:28,  1.20it/s]

batch loss: 0.4094


Epoch 5:  11%|█         | 92/844 [01:16<10:29,  1.19it/s]

batch loss: 0.4319


Epoch 5:  11%|█         | 93/844 [01:17<10:27,  1.20it/s]

batch loss: 0.4415


Epoch 5:  11%|█         | 94/844 [01:18<10:24,  1.20it/s]

batch loss: 0.4510


Epoch 5:  11%|█▏        | 95/844 [01:19<10:25,  1.20it/s]

batch loss: 0.4012


Epoch 5:  11%|█▏        | 96/844 [01:20<10:23,  1.20it/s]

batch loss: 0.4253


Epoch 5:  11%|█▏        | 97/844 [01:20<10:24,  1.20it/s]

batch loss: 0.5006


Epoch 5:  12%|█▏        | 98/844 [01:21<10:21,  1.20it/s]

batch loss: 0.4900


Epoch 5:  12%|█▏        | 99/844 [01:22<10:21,  1.20it/s]

batch loss: 0.4970


Epoch 5:  12%|█▏        | 100/844 [01:23<10:20,  1.20it/s]

batch loss: 0.4778


Epoch 5:  12%|█▏        | 101/844 [01:24<10:20,  1.20it/s]

batch loss: 0.4755


Epoch 5:  12%|█▏        | 102/844 [01:25<10:19,  1.20it/s]

batch loss: 0.4853


Epoch 5:  12%|█▏        | 103/844 [01:25<10:18,  1.20it/s]

batch loss: 0.4177


Epoch 5:  12%|█▏        | 104/844 [01:26<10:17,  1.20it/s]

batch loss: 0.4560


Epoch 5:  12%|█▏        | 105/844 [01:27<10:19,  1.19it/s]

batch loss: 0.4353


Epoch 5:  13%|█▎        | 106/844 [01:28<10:20,  1.19it/s]

batch loss: 0.5048


Epoch 5:  13%|█▎        | 107/844 [01:29<10:17,  1.19it/s]

batch loss: 0.4496


Epoch 5:  13%|█▎        | 108/844 [01:30<10:16,  1.19it/s]

batch loss: 0.4195


Epoch 5:  13%|█▎        | 109/844 [01:31<10:15,  1.19it/s]

batch loss: 0.4219


Epoch 5:  13%|█▎        | 110/844 [01:31<10:14,  1.19it/s]

batch loss: 0.5663


Epoch 5:  13%|█▎        | 111/844 [01:32<10:12,  1.20it/s]

batch loss: 0.5181


Epoch 5:  13%|█▎        | 112/844 [01:33<10:10,  1.20it/s]

batch loss: 0.4271


Epoch 5:  13%|█▎        | 113/844 [01:34<10:07,  1.20it/s]

batch loss: 0.4568


Epoch 5:  14%|█▎        | 114/844 [01:35<10:06,  1.20it/s]

batch loss: 0.3865


Epoch 5:  14%|█▎        | 115/844 [01:36<10:06,  1.20it/s]

batch loss: 0.4533


Epoch 5:  14%|█▎        | 116/844 [01:36<10:05,  1.20it/s]

batch loss: 0.4010


Epoch 5:  14%|█▍        | 117/844 [01:37<10:03,  1.20it/s]

batch loss: 0.4623


Epoch 5:  14%|█▍        | 118/844 [01:38<10:02,  1.21it/s]

batch loss: 0.5218


Epoch 5:  14%|█▍        | 119/844 [01:39<10:01,  1.20it/s]

batch loss: 0.4323


Epoch 5:  14%|█▍        | 120/844 [01:40<10:01,  1.20it/s]

batch loss: 0.4118


Epoch 5:  14%|█▍        | 121/844 [01:41<10:02,  1.20it/s]

batch loss: 0.4874


Epoch 5:  14%|█▍        | 122/844 [01:41<10:02,  1.20it/s]

batch loss: 0.4144


Epoch 5:  15%|█▍        | 123/844 [01:42<09:59,  1.20it/s]

batch loss: 0.3971


Epoch 5:  15%|█▍        | 124/844 [01:43<09:59,  1.20it/s]

batch loss: 0.4184


Epoch 5:  15%|█▍        | 125/844 [01:44<09:58,  1.20it/s]

batch loss: 0.4204


Epoch 5:  15%|█▍        | 126/844 [01:45<09:57,  1.20it/s]

batch loss: 0.4631


Epoch 5:  15%|█▌        | 127/844 [01:45<09:55,  1.20it/s]

batch loss: 0.4320


Epoch 5:  15%|█▌        | 128/844 [01:46<09:54,  1.20it/s]

batch loss: 0.4278


Epoch 5:  15%|█▌        | 129/844 [01:47<09:54,  1.20it/s]

batch loss: 0.4464


Epoch 5:  15%|█▌        | 130/844 [01:48<09:55,  1.20it/s]

batch loss: 0.5592


Epoch 5:  16%|█▌        | 131/844 [01:49<09:53,  1.20it/s]

batch loss: 0.4183


Epoch 5:  16%|█▌        | 132/844 [01:50<09:52,  1.20it/s]

batch loss: 0.4072


Epoch 5:  16%|█▌        | 133/844 [01:50<09:49,  1.21it/s]

batch loss: 0.4822


Epoch 5:  16%|█▌        | 134/844 [01:51<09:49,  1.20it/s]

batch loss: 0.4923


Epoch 5:  16%|█▌        | 135/844 [01:52<09:48,  1.20it/s]

batch loss: 0.4342


Epoch 5:  16%|█▌        | 136/844 [01:53<09:48,  1.20it/s]

batch loss: 0.4640


Epoch 5:  16%|█▌        | 137/844 [01:54<09:51,  1.20it/s]

batch loss: 0.4372


Epoch 5:  16%|█▋        | 138/844 [01:55<09:48,  1.20it/s]

batch loss: 0.4526


Epoch 5:  16%|█▋        | 139/844 [01:55<09:47,  1.20it/s]

batch loss: 0.5047


Epoch 5:  17%|█▋        | 140/844 [01:56<09:48,  1.20it/s]

batch loss: 0.5557


Epoch 5:  17%|█▋        | 141/844 [01:57<09:46,  1.20it/s]

batch loss: 0.4095


Epoch 5:  17%|█▋        | 142/844 [01:58<09:45,  1.20it/s]

batch loss: 0.4301


Epoch 5:  17%|█▋        | 143/844 [01:59<09:44,  1.20it/s]

batch loss: 0.3716


Epoch 5:  17%|█▋        | 144/844 [02:00<09:41,  1.20it/s]

batch loss: 0.4763


Epoch 5:  17%|█▋        | 145/844 [02:00<09:41,  1.20it/s]

batch loss: 0.5027


Epoch 5:  17%|█▋        | 146/844 [02:01<09:40,  1.20it/s]

batch loss: 0.5274


Epoch 5:  17%|█▋        | 147/844 [02:02<09:40,  1.20it/s]

batch loss: 0.4643


Epoch 5:  18%|█▊        | 148/844 [02:03<09:38,  1.20it/s]

batch loss: 0.4285


Epoch 5:  18%|█▊        | 149/844 [02:04<09:37,  1.20it/s]

batch loss: 0.4508


Epoch 5:  18%|█▊        | 150/844 [02:05<09:36,  1.20it/s]

batch loss: 0.4282


Epoch 5:  18%|█▊        | 151/844 [02:05<09:36,  1.20it/s]

batch loss: 0.4284


Epoch 5:  18%|█▊        | 152/844 [02:06<09:37,  1.20it/s]

batch loss: 0.4803


Epoch 5:  18%|█▊        | 153/844 [02:07<09:36,  1.20it/s]

batch loss: 0.4642


Epoch 5:  18%|█▊        | 154/844 [02:08<09:37,  1.19it/s]

batch loss: 0.4355


Epoch 5:  18%|█▊        | 155/844 [02:09<09:35,  1.20it/s]

batch loss: 0.4530


Epoch 5:  18%|█▊        | 156/844 [02:10<09:34,  1.20it/s]

batch loss: 0.5131


Epoch 5:  19%|█▊        | 157/844 [02:11<09:36,  1.19it/s]

batch loss: 0.4466


Epoch 5:  19%|█▊        | 158/844 [02:11<09:33,  1.20it/s]

batch loss: 0.5470


Epoch 5:  19%|█▉        | 159/844 [02:12<09:32,  1.20it/s]

batch loss: 0.4507


Epoch 5:  19%|█▉        | 160/844 [02:13<09:30,  1.20it/s]

batch loss: 0.4685


Epoch 5:  19%|█▉        | 161/844 [02:14<09:29,  1.20it/s]

batch loss: 0.4193


Epoch 5:  19%|█▉        | 162/844 [02:15<09:29,  1.20it/s]

batch loss: 0.4780


Epoch 5:  19%|█▉        | 163/844 [02:15<09:27,  1.20it/s]

batch loss: 0.4559


Epoch 5:  19%|█▉        | 164/844 [02:16<09:27,  1.20it/s]

batch loss: 0.4348


Epoch 5:  20%|█▉        | 165/844 [02:17<09:24,  1.20it/s]

batch loss: 0.4183


Epoch 5:  20%|█▉        | 166/844 [02:18<09:23,  1.20it/s]

batch loss: 0.4149


Epoch 5:  20%|█▉        | 167/844 [02:19<09:24,  1.20it/s]

batch loss: 0.4534


Epoch 5:  20%|█▉        | 168/844 [02:20<09:22,  1.20it/s]

batch loss: 0.4429


Epoch 5:  20%|██        | 169/844 [02:20<09:22,  1.20it/s]

batch loss: 0.3906


Epoch 5:  20%|██        | 170/844 [02:21<09:24,  1.19it/s]

batch loss: 0.4299


Epoch 5:  20%|██        | 171/844 [02:22<09:22,  1.20it/s]

batch loss: 0.4984


Epoch 5:  20%|██        | 172/844 [02:23<09:21,  1.20it/s]

batch loss: 0.4936


Epoch 5:  20%|██        | 173/844 [02:24<09:22,  1.19it/s]

batch loss: 0.4559


Epoch 5:  21%|██        | 174/844 [02:25<09:20,  1.20it/s]

batch loss: 0.4095


Epoch 5:  21%|██        | 175/844 [02:26<09:20,  1.19it/s]

batch loss: 0.4342


Epoch 5:  21%|██        | 176/844 [02:26<09:18,  1.20it/s]

batch loss: 0.5234


Epoch 5:  21%|██        | 177/844 [02:27<09:16,  1.20it/s]

batch loss: 0.4575


Epoch 5:  21%|██        | 178/844 [02:28<09:14,  1.20it/s]

batch loss: 0.5133


Epoch 5:  21%|██        | 179/844 [02:29<09:13,  1.20it/s]

batch loss: 0.3932


Epoch 5:  21%|██▏       | 180/844 [02:30<09:13,  1.20it/s]

batch loss: 0.4584


Epoch 5:  21%|██▏       | 181/844 [02:31<09:14,  1.20it/s]

batch loss: 0.4551


Epoch 5:  22%|██▏       | 182/844 [02:31<09:11,  1.20it/s]

batch loss: 0.4526


Epoch 5:  22%|██▏       | 183/844 [02:32<09:09,  1.20it/s]

batch loss: 0.4297


Epoch 5:  22%|██▏       | 184/844 [02:33<09:08,  1.20it/s]

batch loss: 0.4193


Epoch 5:  22%|██▏       | 185/844 [02:34<09:08,  1.20it/s]

batch loss: 0.5290


Epoch 5:  22%|██▏       | 186/844 [02:35<09:08,  1.20it/s]

batch loss: 0.4522


Epoch 5:  22%|██▏       | 187/844 [02:36<09:10,  1.19it/s]

batch loss: 0.4305


Epoch 5:  22%|██▏       | 188/844 [02:36<09:11,  1.19it/s]

batch loss: 0.4894


Epoch 5:  22%|██▏       | 189/844 [02:37<09:08,  1.19it/s]

batch loss: 0.4463


Epoch 5:  23%|██▎       | 190/844 [02:38<09:08,  1.19it/s]

batch loss: 0.4765


Epoch 5:  23%|██▎       | 191/844 [02:39<09:06,  1.20it/s]

batch loss: 0.5265


Epoch 5:  23%|██▎       | 192/844 [02:40<09:05,  1.19it/s]

batch loss: 0.4057


Epoch 5:  23%|██▎       | 193/844 [02:41<09:05,  1.19it/s]

batch loss: 0.5275


Epoch 5:  23%|██▎       | 194/844 [02:41<09:04,  1.19it/s]

batch loss: 0.3961


Epoch 5:  23%|██▎       | 195/844 [02:42<09:02,  1.20it/s]

batch loss: 0.4523


Epoch 5:  23%|██▎       | 196/844 [02:43<08:59,  1.20it/s]

batch loss: 0.4522


Epoch 5:  23%|██▎       | 197/844 [02:44<08:58,  1.20it/s]

batch loss: 0.4397


Epoch 5:  23%|██▎       | 198/844 [02:45<08:58,  1.20it/s]

batch loss: 0.3968


Epoch 5:  24%|██▎       | 199/844 [02:46<08:57,  1.20it/s]

batch loss: 0.4768


Epoch 5:  24%|██▎       | 200/844 [02:46<08:57,  1.20it/s]

batch loss: 0.4837


Epoch 5:  24%|██▍       | 201/844 [02:47<08:57,  1.20it/s]

batch loss: 0.4576


Epoch 5:  24%|██▍       | 202/844 [02:48<08:56,  1.20it/s]

batch loss: 0.4386


Epoch 5:  24%|██▍       | 203/844 [02:49<08:56,  1.20it/s]

batch loss: 0.4671


Epoch 5:  24%|██▍       | 204/844 [02:50<08:54,  1.20it/s]

batch loss: 0.4750


Epoch 5:  24%|██▍       | 205/844 [02:51<08:54,  1.19it/s]

batch loss: 0.4966


Epoch 5:  24%|██▍       | 206/844 [02:51<08:53,  1.20it/s]

batch loss: 0.4290


Epoch 5:  25%|██▍       | 207/844 [02:52<08:51,  1.20it/s]

batch loss: 0.4153


Epoch 5:  25%|██▍       | 208/844 [02:53<08:50,  1.20it/s]

batch loss: 0.4059


Epoch 5:  25%|██▍       | 209/844 [02:54<08:49,  1.20it/s]

batch loss: 0.5265


Epoch 5:  25%|██▍       | 210/844 [02:55<08:50,  1.20it/s]

batch loss: 0.4566


Epoch 5:  25%|██▌       | 211/844 [02:56<08:48,  1.20it/s]

batch loss: 0.4958


Epoch 5:  25%|██▌       | 212/844 [02:56<08:46,  1.20it/s]

batch loss: 0.4909


Epoch 5:  25%|██▌       | 213/844 [02:57<08:44,  1.20it/s]

batch loss: 0.4235


Epoch 5:  25%|██▌       | 214/844 [02:58<08:43,  1.20it/s]

batch loss: 0.3993


Epoch 5:  25%|██▌       | 215/844 [02:59<08:43,  1.20it/s]

batch loss: 0.5352


Epoch 5:  26%|██▌       | 216/844 [03:00<08:44,  1.20it/s]

batch loss: 0.3909


Epoch 5:  26%|██▌       | 217/844 [03:01<08:43,  1.20it/s]

batch loss: 0.5683


Epoch 5:  26%|██▌       | 218/844 [03:01<08:42,  1.20it/s]

batch loss: 0.4106


Epoch 5:  26%|██▌       | 219/844 [03:02<08:42,  1.20it/s]

batch loss: 0.4143


Epoch 5:  26%|██▌       | 220/844 [03:03<08:41,  1.20it/s]

batch loss: 0.4855


Epoch 5:  26%|██▌       | 221/844 [03:04<08:42,  1.19it/s]

batch loss: 0.4140


Epoch 5:  26%|██▋       | 222/844 [03:05<08:41,  1.19it/s]

batch loss: 0.4822


Epoch 5:  26%|██▋       | 223/844 [03:06<08:41,  1.19it/s]

batch loss: 0.4560


Epoch 5:  27%|██▋       | 224/844 [03:06<08:40,  1.19it/s]

batch loss: 0.4653


Epoch 5:  27%|██▋       | 225/844 [03:07<08:40,  1.19it/s]

batch loss: 0.5684


Epoch 5:  27%|██▋       | 226/844 [03:08<08:38,  1.19it/s]

batch loss: 0.4984


Epoch 5:  27%|██▋       | 227/844 [03:09<08:37,  1.19it/s]

batch loss: 0.4690


Epoch 5:  27%|██▋       | 228/844 [03:10<08:34,  1.20it/s]

batch loss: 0.4386


Epoch 5:  27%|██▋       | 229/844 [03:11<08:33,  1.20it/s]

batch loss: 0.4537


Epoch 5:  27%|██▋       | 230/844 [03:11<08:32,  1.20it/s]

batch loss: 0.4173


Epoch 5:  27%|██▋       | 231/844 [03:12<08:32,  1.20it/s]

batch loss: 0.3960


Epoch 5:  27%|██▋       | 232/844 [03:13<08:30,  1.20it/s]

batch loss: 0.4193


Epoch 5:  28%|██▊       | 233/844 [03:14<08:29,  1.20it/s]

batch loss: 0.4710


Epoch 5:  28%|██▊       | 234/844 [03:15<08:28,  1.20it/s]

batch loss: 0.4653


Epoch 5:  28%|██▊       | 235/844 [03:16<08:27,  1.20it/s]

batch loss: 0.4792


Epoch 5:  28%|██▊       | 236/844 [03:16<08:29,  1.19it/s]

batch loss: 0.4512


Epoch 5:  28%|██▊       | 237/844 [03:17<08:28,  1.19it/s]

batch loss: 0.4587


Epoch 5:  28%|██▊       | 238/844 [03:18<08:25,  1.20it/s]

batch loss: 0.3860


Epoch 5:  28%|██▊       | 239/844 [03:19<08:24,  1.20it/s]

batch loss: 0.4771


Epoch 5:  28%|██▊       | 240/844 [03:20<08:23,  1.20it/s]

batch loss: 0.4999


Epoch 5:  29%|██▊       | 241/844 [03:21<08:22,  1.20it/s]

batch loss: 0.5184


Epoch 5:  29%|██▊       | 242/844 [03:21<08:23,  1.20it/s]

batch loss: 0.4135


Epoch 5:  29%|██▉       | 243/844 [03:22<08:22,  1.20it/s]

batch loss: 0.4833


Epoch 5:  29%|██▉       | 244/844 [03:23<08:20,  1.20it/s]

batch loss: 0.4273


Epoch 5:  29%|██▉       | 245/844 [03:24<08:19,  1.20it/s]

batch loss: 0.4676


Epoch 5:  29%|██▉       | 246/844 [03:25<08:20,  1.20it/s]

batch loss: 0.4701


Epoch 5:  29%|██▉       | 247/844 [03:26<08:18,  1.20it/s]

batch loss: 0.5021


Epoch 5:  29%|██▉       | 248/844 [03:26<08:16,  1.20it/s]

batch loss: 0.5123


Epoch 5:  30%|██▉       | 249/844 [03:27<08:15,  1.20it/s]

batch loss: 0.3683


Epoch 5:  30%|██▉       | 250/844 [03:28<08:13,  1.20it/s]

batch loss: 0.5846


Epoch 5:  30%|██▉       | 251/844 [03:29<08:12,  1.20it/s]

batch loss: 0.3896


Epoch 5:  30%|██▉       | 252/844 [03:30<08:12,  1.20it/s]

batch loss: 0.4246


Epoch 5:  30%|██▉       | 253/844 [03:31<08:12,  1.20it/s]

batch loss: 0.4863


Epoch 5:  30%|███       | 254/844 [03:31<08:11,  1.20it/s]

batch loss: 0.4938


Epoch 5:  30%|███       | 255/844 [03:32<08:11,  1.20it/s]

batch loss: 0.4486


Epoch 5:  30%|███       | 256/844 [03:33<08:11,  1.20it/s]

batch loss: 0.4824


Epoch 5:  30%|███       | 257/844 [03:34<08:09,  1.20it/s]

batch loss: 0.4499


Epoch 5:  31%|███       | 258/844 [03:35<08:08,  1.20it/s]

batch loss: 0.4380


Epoch 5:  31%|███       | 259/844 [03:36<08:07,  1.20it/s]

batch loss: 0.4879


Epoch 5:  31%|███       | 260/844 [03:36<08:07,  1.20it/s]

batch loss: 0.4348


Epoch 5:  31%|███       | 261/844 [03:37<08:06,  1.20it/s]

batch loss: 0.4541


Epoch 5:  31%|███       | 262/844 [03:38<08:05,  1.20it/s]

batch loss: 0.4548


Epoch 5:  31%|███       | 263/844 [03:39<08:03,  1.20it/s]

batch loss: 0.4173


Epoch 5:  31%|███▏      | 264/844 [03:40<08:02,  1.20it/s]

batch loss: 0.4806


Epoch 5:  31%|███▏      | 265/844 [03:41<08:02,  1.20it/s]

batch loss: 0.4827


Epoch 5:  32%|███▏      | 266/844 [03:41<08:02,  1.20it/s]

batch loss: 0.4549


Epoch 5:  32%|███▏      | 267/844 [03:42<08:01,  1.20it/s]

batch loss: 0.4493


Epoch 5:  32%|███▏      | 268/844 [03:43<07:59,  1.20it/s]

batch loss: 0.5066


Epoch 5:  32%|███▏      | 269/844 [03:44<07:58,  1.20it/s]

batch loss: 0.5134


Epoch 5:  32%|███▏      | 270/844 [03:45<07:57,  1.20it/s]

batch loss: 0.4849


Epoch 5:  32%|███▏      | 271/844 [03:46<07:58,  1.20it/s]

batch loss: 0.3935


Epoch 5:  32%|███▏      | 272/844 [03:47<07:58,  1.19it/s]

batch loss: 0.4384


Epoch 5:  32%|███▏      | 273/844 [03:47<07:56,  1.20it/s]

batch loss: 0.5268


Epoch 5:  32%|███▏      | 274/844 [03:48<07:55,  1.20it/s]

batch loss: 0.5516


Epoch 5:  33%|███▎      | 275/844 [03:49<07:54,  1.20it/s]

batch loss: 0.4423


Epoch 5:  33%|███▎      | 276/844 [03:50<07:53,  1.20it/s]

batch loss: 0.3739


Epoch 5:  33%|███▎      | 277/844 [03:51<07:53,  1.20it/s]

batch loss: 0.4508


Epoch 5:  33%|███▎      | 278/844 [03:52<07:51,  1.20it/s]

batch loss: 0.4311


Epoch 5:  33%|███▎      | 279/844 [03:52<07:50,  1.20it/s]

batch loss: 0.4527


Epoch 5:  33%|███▎      | 280/844 [03:53<07:48,  1.20it/s]

batch loss: 0.4125


Epoch 5:  33%|███▎      | 281/844 [03:54<07:47,  1.20it/s]

batch loss: 0.4621


Epoch 5:  33%|███▎      | 282/844 [03:55<07:47,  1.20it/s]

batch loss: 0.4610


Epoch 5:  34%|███▎      | 283/844 [03:56<07:46,  1.20it/s]

batch loss: 0.4246


Epoch 5:  34%|███▎      | 284/844 [03:56<07:45,  1.20it/s]

batch loss: 0.4610


Epoch 5:  34%|███▍      | 285/844 [03:57<07:45,  1.20it/s]

batch loss: 0.4243


Epoch 5:  34%|███▍      | 286/844 [03:58<07:45,  1.20it/s]

batch loss: 0.3986


Epoch 5:  34%|███▍      | 287/844 [03:59<07:44,  1.20it/s]

batch loss: 0.4710


Epoch 5:  34%|███▍      | 288/844 [04:00<07:45,  1.19it/s]

batch loss: 0.4147


Epoch 5:  34%|███▍      | 289/844 [04:01<07:43,  1.20it/s]

batch loss: 0.4003


Epoch 5:  34%|███▍      | 290/844 [04:01<07:41,  1.20it/s]

batch loss: 0.5072


Epoch 5:  34%|███▍      | 291/844 [04:02<07:41,  1.20it/s]

batch loss: 0.4318


Epoch 5:  35%|███▍      | 292/844 [04:03<07:39,  1.20it/s]

batch loss: 0.5550


Epoch 5:  35%|███▍      | 293/844 [04:04<07:39,  1.20it/s]

batch loss: 0.4649


Epoch 5:  35%|███▍      | 294/844 [04:05<07:39,  1.20it/s]

batch loss: 0.4702


Epoch 5:  35%|███▍      | 295/844 [04:06<07:37,  1.20it/s]

batch loss: 0.4875


Epoch 5:  35%|███▌      | 296/844 [04:06<07:35,  1.20it/s]

batch loss: 0.4488


Epoch 5:  35%|███▌      | 297/844 [04:07<07:34,  1.20it/s]

batch loss: 0.4532


Epoch 5:  35%|███▌      | 298/844 [04:08<07:33,  1.20it/s]

batch loss: 0.4108


Epoch 5:  35%|███▌      | 299/844 [04:09<07:33,  1.20it/s]

batch loss: 0.4476


Epoch 5:  36%|███▌      | 300/844 [04:10<07:32,  1.20it/s]

batch loss: 0.4587


Epoch 5:  36%|███▌      | 301/844 [04:11<07:31,  1.20it/s]

batch loss: 0.4995


Epoch 5:  36%|███▌      | 302/844 [04:11<07:31,  1.20it/s]

batch loss: 0.4740


Epoch 5:  36%|███▌      | 303/844 [04:12<07:30,  1.20it/s]

batch loss: 0.5146


Epoch 5:  36%|███▌      | 304/844 [04:13<07:30,  1.20it/s]

batch loss: 0.3990


Epoch 5:  36%|███▌      | 305/844 [04:14<07:31,  1.19it/s]

batch loss: 0.4838


Epoch 5:  36%|███▋      | 306/844 [04:15<07:29,  1.20it/s]

batch loss: 0.4541


Epoch 5:  36%|███▋      | 307/844 [04:16<07:27,  1.20it/s]

batch loss: 0.4613


Epoch 5:  36%|███▋      | 308/844 [04:16<07:25,  1.20it/s]

batch loss: 0.5197


Epoch 5:  37%|███▋      | 309/844 [04:17<07:25,  1.20it/s]

batch loss: 0.4570


Epoch 5:  37%|███▋      | 310/844 [04:18<07:24,  1.20it/s]

batch loss: 0.4977


Epoch 5:  37%|███▋      | 311/844 [04:19<07:23,  1.20it/s]

batch loss: 0.4710


Epoch 5:  37%|███▋      | 312/844 [04:20<07:22,  1.20it/s]

batch loss: 0.4671


Epoch 5:  37%|███▋      | 313/844 [04:21<07:21,  1.20it/s]

batch loss: 0.5292


Epoch 5:  37%|███▋      | 314/844 [04:21<07:19,  1.21it/s]

batch loss: 0.4225


Epoch 5:  37%|███▋      | 315/844 [04:22<07:19,  1.20it/s]

batch loss: 0.4435


Epoch 5:  37%|███▋      | 316/844 [04:23<07:18,  1.21it/s]

batch loss: 0.4744


Epoch 5:  38%|███▊      | 317/844 [04:24<07:16,  1.21it/s]

batch loss: 0.4769


Epoch 5:  38%|███▊      | 318/844 [04:25<07:16,  1.20it/s]

batch loss: 0.5048


Epoch 5:  38%|███▊      | 319/844 [04:26<07:17,  1.20it/s]

batch loss: 0.4985


Epoch 5:  38%|███▊      | 320/844 [04:26<07:17,  1.20it/s]

batch loss: 0.5279


Epoch 5:  38%|███▊      | 321/844 [04:27<07:15,  1.20it/s]

batch loss: 0.4663


Epoch 5:  38%|███▊      | 322/844 [04:28<07:13,  1.20it/s]

batch loss: 0.4554


Epoch 5:  38%|███▊      | 323/844 [04:29<07:13,  1.20it/s]

batch loss: 0.4216


Epoch 5:  38%|███▊      | 324/844 [04:30<07:12,  1.20it/s]

batch loss: 0.5185


Epoch 5:  39%|███▊      | 325/844 [04:31<07:11,  1.20it/s]

batch loss: 0.4407


Epoch 5:  39%|███▊      | 326/844 [04:31<07:10,  1.20it/s]

batch loss: 0.4215


Epoch 5:  39%|███▊      | 327/844 [04:32<07:10,  1.20it/s]

batch loss: 0.4432


Epoch 5:  39%|███▉      | 328/844 [04:33<07:09,  1.20it/s]

batch loss: 0.4506


Epoch 5:  39%|███▉      | 329/844 [04:34<07:08,  1.20it/s]

batch loss: 0.4819


Epoch 5:  39%|███▉      | 330/844 [04:35<07:07,  1.20it/s]

batch loss: 0.5562


Epoch 5:  39%|███▉      | 331/844 [04:36<07:06,  1.20it/s]

batch loss: 0.5152


Epoch 5:  39%|███▉      | 332/844 [04:36<07:05,  1.20it/s]

batch loss: 0.4807


Epoch 5:  39%|███▉      | 333/844 [04:37<07:05,  1.20it/s]

batch loss: 0.4580


Epoch 5:  40%|███▉      | 334/844 [04:38<07:05,  1.20it/s]

batch loss: 0.4531


Epoch 5:  40%|███▉      | 335/844 [04:39<07:04,  1.20it/s]

batch loss: 0.4647


Epoch 5:  40%|███▉      | 336/844 [04:40<07:04,  1.20it/s]

batch loss: 0.3796


Epoch 5:  40%|███▉      | 337/844 [04:41<07:03,  1.20it/s]

batch loss: 0.3867


Epoch 5:  40%|████      | 338/844 [04:41<07:03,  1.19it/s]

batch loss: 0.4513


Epoch 5:  40%|████      | 339/844 [04:42<07:02,  1.20it/s]

batch loss: 0.4252


Epoch 5:  40%|████      | 340/844 [04:43<07:00,  1.20it/s]

batch loss: 0.4046


Epoch 5:  40%|████      | 341/844 [04:44<06:59,  1.20it/s]

batch loss: 0.4912


Epoch 5:  41%|████      | 342/844 [04:45<06:57,  1.20it/s]

batch loss: 0.4099


Epoch 5:  41%|████      | 343/844 [04:46<06:57,  1.20it/s]

batch loss: 0.4486


Epoch 5:  41%|████      | 344/844 [04:46<06:56,  1.20it/s]

batch loss: 0.4688


Epoch 5:  41%|████      | 345/844 [04:47<06:53,  1.21it/s]

batch loss: 0.4165


Epoch 5:  41%|████      | 346/844 [04:48<06:54,  1.20it/s]

batch loss: 0.4794


Epoch 5:  41%|████      | 347/844 [04:49<06:53,  1.20it/s]

batch loss: 0.4384


Epoch 5:  41%|████      | 348/844 [04:50<06:54,  1.20it/s]

batch loss: 0.4799


Epoch 5:  41%|████▏     | 349/844 [04:51<06:52,  1.20it/s]

batch loss: 0.4258


Epoch 5:  41%|████▏     | 350/844 [04:51<06:50,  1.20it/s]

batch loss: 0.5398


Epoch 5:  42%|████▏     | 351/844 [04:52<06:50,  1.20it/s]

batch loss: 0.4313


Epoch 5:  42%|████▏     | 352/844 [04:53<06:51,  1.20it/s]

batch loss: 0.4890


Epoch 5:  42%|████▏     | 353/844 [04:54<06:50,  1.20it/s]

batch loss: 0.4804


Epoch 5:  42%|████▏     | 354/844 [04:55<06:51,  1.19it/s]

batch loss: 0.4362


Epoch 5:  42%|████▏     | 355/844 [04:56<06:48,  1.20it/s]

batch loss: 0.4401


Epoch 5:  42%|████▏     | 356/844 [04:56<06:47,  1.20it/s]

batch loss: 0.5104


Epoch 5:  42%|████▏     | 357/844 [04:57<06:46,  1.20it/s]

batch loss: 0.4749


Epoch 5:  42%|████▏     | 358/844 [04:58<06:45,  1.20it/s]

batch loss: 0.4170


Epoch 5:  43%|████▎     | 359/844 [04:59<06:46,  1.19it/s]

batch loss: 0.4829


Epoch 5:  43%|████▎     | 360/844 [05:00<06:43,  1.20it/s]

batch loss: 0.4078


Epoch 5:  43%|████▎     | 361/844 [05:01<06:42,  1.20it/s]

batch loss: 0.4709


Epoch 5:  43%|████▎     | 362/844 [05:01<06:42,  1.20it/s]

batch loss: 0.5027


Epoch 5:  43%|████▎     | 363/844 [05:02<06:42,  1.19it/s]

batch loss: 0.4482


Epoch 5:  43%|████▎     | 364/844 [05:03<06:41,  1.19it/s]

batch loss: 0.5110


Epoch 5:  43%|████▎     | 365/844 [05:04<06:41,  1.19it/s]

batch loss: 0.4620


Epoch 5:  43%|████▎     | 366/844 [05:05<06:40,  1.19it/s]

batch loss: 0.4153


Epoch 5:  43%|████▎     | 367/844 [05:06<06:39,  1.19it/s]

batch loss: 0.4903


Epoch 5:  44%|████▎     | 368/844 [05:07<06:38,  1.20it/s]

batch loss: 0.4319


Epoch 5:  44%|████▎     | 369/844 [05:07<06:36,  1.20it/s]

batch loss: 0.4273


Epoch 5:  44%|████▍     | 370/844 [05:08<06:37,  1.19it/s]

batch loss: 0.4062


Epoch 5:  44%|████▍     | 371/844 [05:09<06:35,  1.19it/s]

batch loss: 0.4710


Epoch 5:  44%|████▍     | 372/844 [05:10<06:35,  1.19it/s]

batch loss: 0.4758


Epoch 5:  44%|████▍     | 373/844 [05:11<06:34,  1.19it/s]

batch loss: 0.4488


Epoch 5:  44%|████▍     | 374/844 [05:12<06:33,  1.20it/s]

batch loss: 0.4705


Epoch 5:  44%|████▍     | 375/844 [05:12<06:31,  1.20it/s]

batch loss: 0.4194


Epoch 5:  45%|████▍     | 376/844 [05:13<06:30,  1.20it/s]

batch loss: 0.4764


Epoch 5:  45%|████▍     | 377/844 [05:14<06:29,  1.20it/s]

batch loss: 0.3943


Epoch 5:  45%|████▍     | 378/844 [05:15<06:29,  1.20it/s]

batch loss: 0.3768


Epoch 5:  45%|████▍     | 379/844 [05:16<06:27,  1.20it/s]

batch loss: 0.4842


Epoch 5:  45%|████▌     | 380/844 [05:17<06:26,  1.20it/s]

batch loss: 0.5347


Epoch 5:  45%|████▌     | 381/844 [05:17<06:27,  1.20it/s]

batch loss: 0.4811


Epoch 5:  45%|████▌     | 382/844 [05:18<06:25,  1.20it/s]

batch loss: 0.4370


Epoch 5:  45%|████▌     | 383/844 [05:19<06:24,  1.20it/s]

batch loss: 0.5055


Epoch 5:  45%|████▌     | 384/844 [05:20<06:23,  1.20it/s]

batch loss: 0.4595


Epoch 5:  46%|████▌     | 385/844 [05:21<06:24,  1.19it/s]

batch loss: 0.5044


Epoch 5:  46%|████▌     | 386/844 [05:22<06:22,  1.20it/s]

batch loss: 0.4340


Epoch 5:  46%|████▌     | 387/844 [05:22<06:23,  1.19it/s]

batch loss: 0.4820


Epoch 5:  46%|████▌     | 388/844 [05:23<06:21,  1.19it/s]

batch loss: 0.4857


Epoch 5:  46%|████▌     | 389/844 [05:24<06:21,  1.19it/s]

batch loss: 0.4452


Epoch 5:  46%|████▌     | 390/844 [05:25<06:20,  1.19it/s]

batch loss: 0.4962


Epoch 5:  46%|████▋     | 391/844 [05:26<06:19,  1.19it/s]

batch loss: 0.4880


Epoch 5:  46%|████▋     | 392/844 [05:27<06:17,  1.20it/s]

batch loss: 0.4471


Epoch 5:  47%|████▋     | 393/844 [05:27<06:17,  1.19it/s]

batch loss: 0.4110


Epoch 5:  47%|████▋     | 394/844 [05:28<06:15,  1.20it/s]

batch loss: 0.5510


Epoch 5:  47%|████▋     | 395/844 [05:29<06:14,  1.20it/s]

batch loss: 0.4837


Epoch 5:  47%|████▋     | 396/844 [05:30<06:12,  1.20it/s]

batch loss: 0.4391


Epoch 5:  47%|████▋     | 397/844 [05:31<06:11,  1.20it/s]

batch loss: 0.3854


Epoch 5:  47%|████▋     | 398/844 [05:32<06:11,  1.20it/s]

batch loss: 0.4810


Epoch 5:  47%|████▋     | 399/844 [05:32<06:11,  1.20it/s]

batch loss: 0.3551


Epoch 5:  47%|████▋     | 400/844 [05:33<06:10,  1.20it/s]

batch loss: 0.4035


Epoch 5:  48%|████▊     | 401/844 [05:34<06:09,  1.20it/s]

batch loss: 0.4975


Epoch 5:  48%|████▊     | 402/844 [05:35<06:09,  1.20it/s]

batch loss: 0.4761


Epoch 5:  48%|████▊     | 403/844 [05:36<06:10,  1.19it/s]

batch loss: 0.4236


Epoch 5:  48%|████▊     | 404/844 [05:37<06:08,  1.19it/s]

batch loss: 0.3866


Epoch 5:  48%|████▊     | 405/844 [05:37<06:07,  1.19it/s]

batch loss: 0.3532


Epoch 5:  48%|████▊     | 406/844 [05:38<06:06,  1.19it/s]

batch loss: 0.4270


Epoch 5:  48%|████▊     | 407/844 [05:39<06:05,  1.20it/s]

batch loss: 0.4510


Epoch 5:  48%|████▊     | 408/844 [05:40<06:04,  1.20it/s]

batch loss: 0.5512


Epoch 5:  48%|████▊     | 409/844 [05:41<06:03,  1.20it/s]

batch loss: 0.4245


Epoch 5:  49%|████▊     | 410/844 [05:42<06:02,  1.20it/s]

batch loss: 0.3887


Epoch 5:  49%|████▊     | 411/844 [05:42<06:01,  1.20it/s]

batch loss: 0.4943


Epoch 5:  49%|████▉     | 412/844 [05:43<06:00,  1.20it/s]

batch loss: 0.3989


Epoch 5:  49%|████▉     | 413/844 [05:44<05:58,  1.20it/s]

batch loss: 0.4108


Epoch 5:  49%|████▉     | 414/844 [05:45<05:58,  1.20it/s]

batch loss: 0.3826


Epoch 5:  49%|████▉     | 415/844 [05:46<05:58,  1.20it/s]

batch loss: 0.5572


Epoch 5:  49%|████▉     | 416/844 [05:47<05:56,  1.20it/s]

batch loss: 0.4820


Epoch 5:  49%|████▉     | 417/844 [05:47<05:56,  1.20it/s]

batch loss: 0.3874


Epoch 5:  50%|████▉     | 418/844 [05:48<05:55,  1.20it/s]

batch loss: 0.5087


Epoch 5:  50%|████▉     | 419/844 [05:49<05:54,  1.20it/s]

batch loss: 0.4351


Epoch 5:  50%|████▉     | 420/844 [05:50<05:54,  1.20it/s]

batch loss: 0.4189


Epoch 5:  50%|████▉     | 421/844 [05:51<05:53,  1.20it/s]

batch loss: 0.4558


Epoch 5:  50%|█████     | 422/844 [05:52<05:52,  1.20it/s]

batch loss: 0.4676


Epoch 5:  50%|█████     | 423/844 [05:52<05:50,  1.20it/s]

batch loss: 0.4716


Epoch 5:  50%|█████     | 424/844 [05:53<05:49,  1.20it/s]

batch loss: 0.4376


Epoch 5:  50%|█████     | 425/844 [05:54<05:48,  1.20it/s]

batch loss: 0.4675


Epoch 5:  50%|█████     | 426/844 [05:55<05:47,  1.20it/s]

batch loss: 0.4182


Epoch 5:  51%|█████     | 427/844 [05:56<05:46,  1.20it/s]

batch loss: 0.3538


Epoch 5:  51%|█████     | 428/844 [05:57<05:46,  1.20it/s]

batch loss: 0.4408


Epoch 5:  51%|█████     | 429/844 [05:57<05:45,  1.20it/s]

batch loss: 0.4552


Epoch 5:  51%|█████     | 430/844 [05:58<05:45,  1.20it/s]

batch loss: 0.4594


Epoch 5:  51%|█████     | 431/844 [05:59<05:44,  1.20it/s]

batch loss: 0.4205


Epoch 5:  51%|█████     | 432/844 [06:00<05:42,  1.20it/s]

batch loss: 0.4637


Epoch 5:  51%|█████▏    | 433/844 [06:01<05:42,  1.20it/s]

batch loss: 0.4108


Epoch 5:  51%|█████▏    | 434/844 [06:02<05:42,  1.20it/s]

batch loss: 0.4764


Epoch 5:  52%|█████▏    | 435/844 [06:02<05:42,  1.19it/s]

batch loss: 0.4648


Epoch 5:  52%|█████▏    | 436/844 [06:03<05:42,  1.19it/s]

batch loss: 0.4253


Epoch 5:  52%|█████▏    | 437/844 [06:04<05:40,  1.20it/s]

batch loss: 0.4814


Epoch 5:  52%|█████▏    | 438/844 [06:05<05:38,  1.20it/s]

batch loss: 0.4573


Epoch 5:  52%|█████▏    | 439/844 [06:06<05:37,  1.20it/s]

batch loss: 0.4747


Epoch 5:  52%|█████▏    | 440/844 [06:07<05:36,  1.20it/s]

batch loss: 0.4296


Epoch 5:  52%|█████▏    | 441/844 [06:07<05:35,  1.20it/s]

batch loss: 0.5977


Epoch 5:  52%|█████▏    | 442/844 [06:08<05:34,  1.20it/s]

batch loss: 0.5181


Epoch 5:  52%|█████▏    | 443/844 [06:09<05:34,  1.20it/s]

batch loss: 0.3613


Epoch 5:  53%|█████▎    | 444/844 [06:10<05:34,  1.20it/s]

batch loss: 0.4937


Epoch 5:  53%|█████▎    | 445/844 [06:11<05:34,  1.19it/s]

batch loss: 0.4214


Epoch 5:  53%|█████▎    | 446/844 [06:12<05:34,  1.19it/s]

batch loss: 0.5247


Epoch 5:  53%|█████▎    | 447/844 [06:12<05:32,  1.19it/s]

batch loss: 0.4287


Epoch 5:  53%|█████▎    | 448/844 [06:13<05:30,  1.20it/s]

batch loss: 0.4820


Epoch 5:  53%|█████▎    | 449/844 [06:14<05:29,  1.20it/s]

batch loss: 0.4977


Epoch 5:  53%|█████▎    | 450/844 [06:15<05:29,  1.20it/s]

batch loss: 0.5255


Epoch 5:  53%|█████▎    | 451/844 [06:16<05:28,  1.20it/s]

batch loss: 0.5194


Epoch 5:  54%|█████▎    | 452/844 [06:17<05:27,  1.20it/s]

batch loss: 0.5094


Epoch 5:  54%|█████▎    | 453/844 [06:17<05:26,  1.20it/s]

batch loss: 0.4147


Epoch 5:  54%|█████▍    | 454/844 [06:18<05:25,  1.20it/s]

batch loss: 0.4378


Epoch 5:  54%|█████▍    | 455/844 [06:19<05:24,  1.20it/s]

batch loss: 0.4975


Epoch 5:  54%|█████▍    | 456/844 [06:20<05:23,  1.20it/s]

batch loss: 0.4240


Epoch 5:  54%|█████▍    | 457/844 [06:21<05:22,  1.20it/s]

batch loss: 0.4054


Epoch 5:  54%|█████▍    | 458/844 [06:22<05:21,  1.20it/s]

batch loss: 0.4328


Epoch 5:  54%|█████▍    | 459/844 [06:22<05:21,  1.20it/s]

batch loss: 0.4705


Epoch 5:  55%|█████▍    | 460/844 [06:23<05:19,  1.20it/s]

batch loss: 0.4615


Epoch 5:  55%|█████▍    | 461/844 [06:24<05:18,  1.20it/s]

batch loss: 0.4071


Epoch 5:  55%|█████▍    | 462/844 [06:25<05:17,  1.20it/s]

batch loss: 0.4660


Epoch 5:  55%|█████▍    | 463/844 [06:26<05:16,  1.20it/s]

batch loss: 0.4926


Epoch 5:  55%|█████▍    | 464/844 [06:27<05:15,  1.20it/s]

batch loss: 0.4436


Epoch 5:  55%|█████▌    | 465/844 [06:27<05:15,  1.20it/s]

batch loss: 0.4753


Epoch 5:  55%|█████▌    | 466/844 [06:28<05:14,  1.20it/s]

batch loss: 0.4167


Epoch 5:  55%|█████▌    | 467/844 [06:29<05:13,  1.20it/s]

batch loss: 0.4535


Epoch 5:  55%|█████▌    | 468/844 [06:30<05:13,  1.20it/s]

batch loss: 0.3915


Epoch 5:  56%|█████▌    | 469/844 [06:31<05:12,  1.20it/s]

batch loss: 0.4309


Epoch 5:  56%|█████▌    | 470/844 [06:32<05:11,  1.20it/s]

batch loss: 0.4674


Epoch 5:  56%|█████▌    | 471/844 [06:32<05:10,  1.20it/s]

batch loss: 0.4176


Epoch 5:  56%|█████▌    | 472/844 [06:33<05:10,  1.20it/s]

batch loss: 0.3875


Epoch 5:  56%|█████▌    | 473/844 [06:34<05:08,  1.20it/s]

batch loss: 0.4836


Epoch 5:  56%|█████▌    | 474/844 [06:35<05:07,  1.20it/s]

batch loss: 0.4324


Epoch 5:  56%|█████▋    | 475/844 [06:36<05:07,  1.20it/s]

batch loss: 0.3931


Epoch 5:  56%|█████▋    | 476/844 [06:37<05:06,  1.20it/s]

batch loss: 0.5666


Epoch 5:  57%|█████▋    | 477/844 [06:37<05:05,  1.20it/s]

batch loss: 0.4065


Epoch 5:  57%|█████▋    | 478/844 [06:38<05:04,  1.20it/s]

batch loss: 0.4870


Epoch 5:  57%|█████▋    | 479/844 [06:39<05:04,  1.20it/s]

batch loss: 0.4822


Epoch 5:  57%|█████▋    | 480/844 [06:40<05:02,  1.20it/s]

batch loss: 0.5017


Epoch 5:  57%|█████▋    | 481/844 [06:41<05:01,  1.20it/s]

batch loss: 0.3733


Epoch 5:  57%|█████▋    | 482/844 [06:42<05:01,  1.20it/s]

batch loss: 0.5226


Epoch 5:  57%|█████▋    | 483/844 [06:42<05:00,  1.20it/s]

batch loss: 0.4433


Epoch 5:  57%|█████▋    | 484/844 [06:43<05:00,  1.20it/s]

batch loss: 0.4381


Epoch 5:  57%|█████▋    | 485/844 [06:44<04:59,  1.20it/s]

batch loss: 0.4762


Epoch 5:  58%|█████▊    | 486/844 [06:45<04:59,  1.20it/s]

batch loss: 0.4214


Epoch 5:  58%|█████▊    | 487/844 [06:46<04:58,  1.20it/s]

batch loss: 0.4763


Epoch 5:  58%|█████▊    | 488/844 [06:47<04:57,  1.20it/s]

batch loss: 0.3858


Epoch 5:  58%|█████▊    | 489/844 [06:47<04:55,  1.20it/s]

batch loss: 0.5171


Epoch 5:  58%|█████▊    | 490/844 [06:48<04:55,  1.20it/s]

batch loss: 0.4540


Epoch 5:  58%|█████▊    | 491/844 [06:49<04:54,  1.20it/s]

batch loss: 0.5004


Epoch 5:  58%|█████▊    | 492/844 [06:50<04:53,  1.20it/s]

batch loss: 0.5709


Epoch 5:  58%|█████▊    | 493/844 [06:51<04:53,  1.20it/s]

batch loss: 0.3890


Epoch 5:  59%|█████▊    | 494/844 [06:52<04:51,  1.20it/s]

batch loss: 0.4784


Epoch 5:  59%|█████▊    | 495/844 [06:52<04:50,  1.20it/s]

batch loss: 0.3777


Epoch 5:  59%|█████▉    | 496/844 [06:53<04:49,  1.20it/s]

batch loss: 0.4413


Epoch 5:  59%|█████▉    | 497/844 [06:54<04:49,  1.20it/s]

batch loss: 0.4296


Epoch 5:  59%|█████▉    | 498/844 [06:55<04:48,  1.20it/s]

batch loss: 0.3847


Epoch 5:  59%|█████▉    | 499/844 [06:56<04:48,  1.20it/s]

batch loss: 0.4086


Epoch 5:  59%|█████▉    | 500/844 [06:57<04:46,  1.20it/s]

batch loss: 0.4262


Epoch 5:  59%|█████▉    | 501/844 [06:57<04:46,  1.20it/s]

batch loss: 0.4895


Epoch 5:  59%|█████▉    | 502/844 [06:58<04:47,  1.19it/s]

batch loss: 0.4829


Epoch 5:  60%|█████▉    | 503/844 [06:59<04:45,  1.20it/s]

batch loss: 0.4533


Epoch 5:  60%|█████▉    | 504/844 [07:00<04:44,  1.20it/s]

batch loss: 0.4686


Epoch 5:  60%|█████▉    | 505/844 [07:01<04:42,  1.20it/s]

batch loss: 0.5447


Epoch 5:  60%|█████▉    | 506/844 [07:02<04:42,  1.20it/s]

batch loss: 0.4218


Epoch 5:  60%|██████    | 507/844 [07:03<04:41,  1.20it/s]

batch loss: 0.5223


Epoch 5:  60%|██████    | 508/844 [07:03<04:40,  1.20it/s]

batch loss: 0.3747


Epoch 5:  60%|██████    | 509/844 [07:04<04:39,  1.20it/s]

batch loss: 0.3898


Epoch 5:  60%|██████    | 510/844 [07:05<04:37,  1.20it/s]

batch loss: 0.5136


Epoch 5:  61%|██████    | 511/844 [07:06<04:37,  1.20it/s]

batch loss: 0.4877


Epoch 5:  61%|██████    | 512/844 [07:07<04:36,  1.20it/s]

batch loss: 0.5007


Epoch 5:  61%|██████    | 513/844 [07:07<04:35,  1.20it/s]

batch loss: 0.5123


Epoch 5:  61%|██████    | 514/844 [07:08<04:34,  1.20it/s]

batch loss: 0.4428


Epoch 5:  61%|██████    | 515/844 [07:09<04:33,  1.20it/s]

batch loss: 0.4142


Epoch 5:  61%|██████    | 516/844 [07:10<04:33,  1.20it/s]

batch loss: 0.4403


Epoch 5:  61%|██████▏   | 517/844 [07:11<04:33,  1.20it/s]

batch loss: 0.5004


Epoch 5:  61%|██████▏   | 518/844 [07:12<04:31,  1.20it/s]

batch loss: 0.4935


Epoch 5:  61%|██████▏   | 519/844 [07:13<04:31,  1.20it/s]

batch loss: 0.4668


Epoch 5:  62%|██████▏   | 520/844 [07:13<04:30,  1.20it/s]

batch loss: 0.4373


Epoch 5:  62%|██████▏   | 521/844 [07:14<04:30,  1.20it/s]

batch loss: 0.5557


Epoch 5:  62%|██████▏   | 522/844 [07:15<04:28,  1.20it/s]

batch loss: 0.4172


Epoch 5:  62%|██████▏   | 523/844 [07:16<04:28,  1.20it/s]

batch loss: 0.4586


Epoch 5:  62%|██████▏   | 524/844 [07:17<04:27,  1.20it/s]

batch loss: 0.4765


Epoch 5:  62%|██████▏   | 525/844 [07:18<04:26,  1.20it/s]

batch loss: 0.4969


Epoch 5:  62%|██████▏   | 526/844 [07:18<04:24,  1.20it/s]

batch loss: 0.4469


Epoch 5:  62%|██████▏   | 527/844 [07:19<04:23,  1.20it/s]

batch loss: 0.5325


Epoch 5:  63%|██████▎   | 528/844 [07:20<04:22,  1.20it/s]

batch loss: 0.4201


Epoch 5:  63%|██████▎   | 529/844 [07:21<04:22,  1.20it/s]

batch loss: 0.4035


Epoch 5:  63%|██████▎   | 530/844 [07:22<04:21,  1.20it/s]

batch loss: 0.4341


Epoch 5:  63%|██████▎   | 531/844 [07:22<04:20,  1.20it/s]

batch loss: 0.4091


Epoch 5:  63%|██████▎   | 532/844 [07:23<04:18,  1.20it/s]

batch loss: 0.4871


Epoch 5:  63%|██████▎   | 533/844 [07:24<04:18,  1.20it/s]

batch loss: 0.5120


Epoch 5:  63%|██████▎   | 534/844 [07:25<04:18,  1.20it/s]

batch loss: 0.4837


Epoch 5:  63%|██████▎   | 535/844 [07:26<04:18,  1.20it/s]

batch loss: 0.4821


Epoch 5:  64%|██████▎   | 536/844 [07:27<04:17,  1.20it/s]

batch loss: 0.4833


Epoch 5:  64%|██████▎   | 537/844 [07:28<04:15,  1.20it/s]

batch loss: 0.4662


Epoch 5:  64%|██████▎   | 538/844 [07:28<04:15,  1.20it/s]

batch loss: 0.4769


Epoch 5:  64%|██████▍   | 539/844 [07:29<04:14,  1.20it/s]

batch loss: 0.4938


Epoch 5:  64%|██████▍   | 540/844 [07:30<04:13,  1.20it/s]

batch loss: 0.4979


Epoch 5:  64%|██████▍   | 541/844 [07:31<04:12,  1.20it/s]

batch loss: 0.5561


Epoch 5:  64%|██████▍   | 542/844 [07:32<04:11,  1.20it/s]

batch loss: 0.4481


Epoch 5:  64%|██████▍   | 543/844 [07:32<04:10,  1.20it/s]

batch loss: 0.4091


Epoch 5:  64%|██████▍   | 544/844 [07:33<04:09,  1.20it/s]

batch loss: 0.4594


Epoch 5:  65%|██████▍   | 545/844 [07:34<04:08,  1.20it/s]

batch loss: 0.4424


Epoch 5:  65%|██████▍   | 546/844 [07:35<04:07,  1.20it/s]

batch loss: 0.3931


Epoch 5:  65%|██████▍   | 547/844 [07:36<04:06,  1.20it/s]

batch loss: 0.5043


Epoch 5:  65%|██████▍   | 548/844 [07:37<04:05,  1.21it/s]

batch loss: 0.5242


Epoch 5:  65%|██████▌   | 549/844 [07:37<04:04,  1.20it/s]

batch loss: 0.4608


Epoch 5:  65%|██████▌   | 550/844 [07:38<04:04,  1.20it/s]

batch loss: 0.3848


Epoch 5:  65%|██████▌   | 551/844 [07:39<04:04,  1.20it/s]

batch loss: 0.3822


Epoch 5:  65%|██████▌   | 552/844 [07:40<04:02,  1.20it/s]

batch loss: 0.4405


Epoch 5:  66%|██████▌   | 553/844 [07:41<04:02,  1.20it/s]

batch loss: 0.4646


Epoch 5:  66%|██████▌   | 554/844 [07:42<04:02,  1.19it/s]

batch loss: 0.4872


Epoch 5:  66%|██████▌   | 555/844 [07:42<04:01,  1.20it/s]

batch loss: 0.4799


Epoch 5:  66%|██████▌   | 556/844 [07:43<04:00,  1.20it/s]

batch loss: 0.4309


Epoch 5:  66%|██████▌   | 557/844 [07:44<03:59,  1.20it/s]

batch loss: 0.4430


Epoch 5:  66%|██████▌   | 558/844 [07:45<03:58,  1.20it/s]

batch loss: 0.5000


Epoch 5:  66%|██████▌   | 559/844 [07:46<03:56,  1.20it/s]

batch loss: 0.4683


Epoch 5:  66%|██████▋   | 560/844 [07:47<03:56,  1.20it/s]

batch loss: 0.5313


Epoch 5:  66%|██████▋   | 561/844 [07:47<03:55,  1.20it/s]

batch loss: 0.3799


Epoch 5:  67%|██████▋   | 562/844 [07:48<03:55,  1.20it/s]

batch loss: 0.3939


Epoch 5:  67%|██████▋   | 563/844 [07:49<03:53,  1.20it/s]

batch loss: 0.4841


Epoch 5:  67%|██████▋   | 564/844 [07:50<03:53,  1.20it/s]

batch loss: 0.5175


Epoch 5:  67%|██████▋   | 565/844 [07:51<03:52,  1.20it/s]

batch loss: 0.4007


Epoch 5:  67%|██████▋   | 566/844 [07:52<03:51,  1.20it/s]

batch loss: 0.4551


Epoch 5:  67%|██████▋   | 567/844 [07:52<03:51,  1.20it/s]

batch loss: 0.4876


Epoch 5:  67%|██████▋   | 568/844 [07:53<03:51,  1.19it/s]

batch loss: 0.4347


Epoch 5:  67%|██████▋   | 569/844 [07:54<03:49,  1.20it/s]

batch loss: 0.4291


Epoch 5:  68%|██████▊   | 570/844 [07:55<03:49,  1.19it/s]

batch loss: 0.4287


Epoch 5:  68%|██████▊   | 571/844 [07:56<03:48,  1.19it/s]

batch loss: 0.4607


Epoch 5:  68%|██████▊   | 572/844 [07:57<03:48,  1.19it/s]

batch loss: 0.4415


Epoch 5:  68%|██████▊   | 573/844 [07:58<03:46,  1.19it/s]

batch loss: 0.4609


Epoch 5:  68%|██████▊   | 574/844 [07:58<03:45,  1.20it/s]

batch loss: 0.4330


Epoch 5:  68%|██████▊   | 575/844 [07:59<03:43,  1.20it/s]

batch loss: 0.4956


Epoch 5:  68%|██████▊   | 576/844 [08:00<03:42,  1.20it/s]

batch loss: 0.4412


Epoch 5:  68%|██████▊   | 577/844 [08:01<03:41,  1.20it/s]

batch loss: 0.4323


Epoch 5:  68%|██████▊   | 578/844 [08:02<03:40,  1.20it/s]

batch loss: 0.5122


Epoch 5:  69%|██████▊   | 579/844 [08:02<03:40,  1.20it/s]

batch loss: 0.4667


Epoch 5:  69%|██████▊   | 580/844 [08:03<03:39,  1.20it/s]

batch loss: 0.4287


Epoch 5:  69%|██████▉   | 581/844 [08:04<03:39,  1.20it/s]

batch loss: 0.5063


Epoch 5:  69%|██████▉   | 582/844 [08:05<03:38,  1.20it/s]

batch loss: 0.4394


Epoch 5:  69%|██████▉   | 583/844 [08:06<03:37,  1.20it/s]

batch loss: 0.4437


Epoch 5:  69%|██████▉   | 584/844 [08:07<03:37,  1.19it/s]

batch loss: 0.4424


Epoch 5:  69%|██████▉   | 585/844 [08:08<03:37,  1.19it/s]

batch loss: 0.5352


Epoch 5:  69%|██████▉   | 586/844 [08:08<03:36,  1.19it/s]

batch loss: 0.4567


Epoch 5:  70%|██████▉   | 587/844 [08:09<03:36,  1.19it/s]

batch loss: 0.4433


Epoch 5:  70%|██████▉   | 588/844 [08:10<03:34,  1.19it/s]

batch loss: 0.4653


Epoch 5:  70%|██████▉   | 589/844 [08:11<03:33,  1.19it/s]

batch loss: 0.4755


Epoch 5:  70%|██████▉   | 590/844 [08:12<03:32,  1.20it/s]

batch loss: 0.4583


Epoch 5:  70%|███████   | 591/844 [08:13<03:31,  1.19it/s]

batch loss: 0.4838


Epoch 5:  70%|███████   | 592/844 [08:13<03:30,  1.20it/s]

batch loss: 0.4772


Epoch 5:  70%|███████   | 593/844 [08:14<03:29,  1.20it/s]

batch loss: 0.4776


Epoch 5:  70%|███████   | 594/844 [08:15<03:28,  1.20it/s]

batch loss: 0.4361


Epoch 5:  70%|███████   | 595/844 [08:16<03:28,  1.19it/s]

batch loss: 0.4724


Epoch 5:  71%|███████   | 596/844 [08:17<03:27,  1.20it/s]

batch loss: 0.5331


Epoch 5:  71%|███████   | 597/844 [08:18<03:25,  1.20it/s]

batch loss: 0.4309


Epoch 5:  71%|███████   | 598/844 [08:18<03:25,  1.20it/s]

batch loss: 0.4139


Epoch 5:  71%|███████   | 599/844 [08:19<03:24,  1.20it/s]

batch loss: 0.4587


Epoch 5:  71%|███████   | 600/844 [08:20<03:23,  1.20it/s]

batch loss: 0.4303


Epoch 5:  71%|███████   | 601/844 [08:21<03:23,  1.19it/s]

batch loss: 0.4655


Epoch 5:  71%|███████▏  | 602/844 [08:22<03:23,  1.19it/s]

batch loss: 0.4232


Epoch 5:  71%|███████▏  | 603/844 [08:23<03:21,  1.19it/s]

batch loss: 0.4082


Epoch 5:  72%|███████▏  | 604/844 [08:23<03:21,  1.19it/s]

batch loss: 0.4281


Epoch 5:  72%|███████▏  | 605/844 [08:24<03:20,  1.19it/s]

batch loss: 0.4252


Epoch 5:  72%|███████▏  | 606/844 [08:25<03:18,  1.20it/s]

batch loss: 0.4803


Epoch 5:  72%|███████▏  | 607/844 [08:26<03:17,  1.20it/s]

batch loss: 0.4647


Epoch 5:  72%|███████▏  | 608/844 [08:27<03:16,  1.20it/s]

batch loss: 0.3891


Epoch 5:  72%|███████▏  | 609/844 [08:28<03:16,  1.20it/s]

batch loss: 0.4318


Epoch 5:  72%|███████▏  | 610/844 [08:28<03:15,  1.20it/s]

batch loss: 0.3828


Epoch 5:  72%|███████▏  | 611/844 [08:29<03:14,  1.20it/s]

batch loss: 0.4915


Epoch 5:  73%|███████▎  | 612/844 [08:30<03:13,  1.20it/s]

batch loss: 0.4678


Epoch 5:  73%|███████▎  | 613/844 [08:31<03:12,  1.20it/s]

batch loss: 0.4787


Epoch 5:  73%|███████▎  | 614/844 [08:32<03:11,  1.20it/s]

batch loss: 0.4533


Epoch 5:  73%|███████▎  | 615/844 [08:33<03:11,  1.20it/s]

batch loss: 0.4817


Epoch 5:  73%|███████▎  | 616/844 [08:33<03:10,  1.20it/s]

batch loss: 0.4394


Epoch 5:  73%|███████▎  | 617/844 [08:34<03:10,  1.19it/s]

batch loss: 0.4641


Epoch 5:  73%|███████▎  | 618/844 [08:35<03:09,  1.19it/s]

batch loss: 0.4599


Epoch 5:  73%|███████▎  | 619/844 [08:36<03:08,  1.19it/s]

batch loss: 0.4303


Epoch 5:  73%|███████▎  | 620/844 [08:37<03:07,  1.19it/s]

batch loss: 0.4934


Epoch 5:  74%|███████▎  | 621/844 [08:38<03:06,  1.19it/s]

batch loss: 0.4587


Epoch 5:  74%|███████▎  | 622/844 [08:38<03:05,  1.20it/s]

batch loss: 0.4630


Epoch 5:  74%|███████▍  | 623/844 [08:39<03:04,  1.20it/s]

batch loss: 0.4550


Epoch 5:  74%|███████▍  | 624/844 [08:40<03:03,  1.20it/s]

batch loss: 0.4656


Epoch 5:  74%|███████▍  | 625/844 [08:41<03:03,  1.19it/s]

batch loss: 0.5025


Epoch 5:  74%|███████▍  | 626/844 [08:42<03:02,  1.20it/s]

batch loss: 0.4867


Epoch 5:  74%|███████▍  | 627/844 [08:43<03:01,  1.19it/s]

batch loss: 0.4350


Epoch 5:  74%|███████▍  | 628/844 [08:43<03:00,  1.20it/s]

batch loss: 0.4321


Epoch 5:  75%|███████▍  | 629/844 [08:44<02:58,  1.20it/s]

batch loss: 0.4706


Epoch 5:  75%|███████▍  | 630/844 [08:45<02:58,  1.20it/s]

batch loss: 0.4576


Epoch 5:  75%|███████▍  | 631/844 [08:46<02:57,  1.20it/s]

batch loss: 0.4315


Epoch 5:  75%|███████▍  | 632/844 [08:47<02:57,  1.19it/s]

batch loss: 0.4519


Epoch 5:  75%|███████▌  | 633/844 [08:48<02:57,  1.19it/s]

batch loss: 0.4698


Epoch 5:  75%|███████▌  | 634/844 [08:49<02:56,  1.19it/s]

batch loss: 0.4387


Epoch 5:  75%|███████▌  | 635/844 [08:49<02:56,  1.19it/s]

batch loss: 0.4838


Epoch 5:  75%|███████▌  | 636/844 [08:50<02:54,  1.19it/s]

batch loss: 0.4636


Epoch 5:  75%|███████▌  | 637/844 [08:51<02:53,  1.19it/s]

batch loss: 0.4962


Epoch 5:  76%|███████▌  | 638/844 [08:52<02:52,  1.19it/s]

batch loss: 0.4315


Epoch 5:  76%|███████▌  | 639/844 [08:53<02:51,  1.19it/s]

batch loss: 0.4974


Epoch 5:  76%|███████▌  | 640/844 [08:54<02:50,  1.19it/s]

batch loss: 0.4731


Epoch 5:  76%|███████▌  | 641/844 [08:54<02:49,  1.19it/s]

batch loss: 0.4783


Epoch 5:  76%|███████▌  | 642/844 [08:55<02:48,  1.20it/s]

batch loss: 0.4014


Epoch 5:  76%|███████▌  | 643/844 [08:56<02:47,  1.20it/s]

batch loss: 0.5009


Epoch 5:  76%|███████▋  | 644/844 [08:57<02:46,  1.20it/s]

batch loss: 0.4276


Epoch 5:  76%|███████▋  | 645/844 [08:58<02:45,  1.20it/s]

batch loss: 0.4440


Epoch 5:  77%|███████▋  | 646/844 [08:59<02:45,  1.19it/s]

batch loss: 0.4550


Epoch 5:  77%|███████▋  | 647/844 [08:59<02:44,  1.20it/s]

batch loss: 0.5132


Epoch 5:  77%|███████▋  | 648/844 [09:00<02:44,  1.19it/s]

batch loss: 0.4759


Epoch 5:  77%|███████▋  | 649/844 [09:01<02:43,  1.20it/s]

batch loss: 0.5540


Epoch 5:  77%|███████▋  | 650/844 [09:02<02:42,  1.19it/s]

batch loss: 0.4381


Epoch 5:  77%|███████▋  | 651/844 [09:03<02:41,  1.19it/s]

batch loss: 0.3607


Epoch 5:  77%|███████▋  | 652/844 [09:04<02:41,  1.19it/s]

batch loss: 0.5522


Epoch 5:  77%|███████▋  | 653/844 [09:04<02:40,  1.19it/s]

batch loss: 0.4478


Epoch 5:  77%|███████▋  | 654/844 [09:05<02:39,  1.19it/s]

batch loss: 0.5865


Epoch 5:  78%|███████▊  | 655/844 [09:06<02:38,  1.19it/s]

batch loss: 0.4591


Epoch 5:  78%|███████▊  | 656/844 [09:07<02:37,  1.19it/s]

batch loss: 0.4423


Epoch 5:  78%|███████▊  | 657/844 [09:08<02:37,  1.19it/s]

batch loss: 0.4398


Epoch 5:  78%|███████▊  | 658/844 [09:09<02:36,  1.19it/s]

batch loss: 0.4010


Epoch 5:  78%|███████▊  | 659/844 [09:09<02:35,  1.19it/s]

batch loss: 0.5056


Epoch 5:  78%|███████▊  | 660/844 [09:10<02:34,  1.19it/s]

batch loss: 0.4425


Epoch 5:  78%|███████▊  | 661/844 [09:11<02:33,  1.19it/s]

batch loss: 0.4499


Epoch 5:  78%|███████▊  | 662/844 [09:12<02:32,  1.19it/s]

batch loss: 0.4550


Epoch 5:  79%|███████▊  | 663/844 [09:13<02:31,  1.20it/s]

batch loss: 0.4031


Epoch 5:  79%|███████▊  | 664/844 [09:14<02:31,  1.19it/s]

batch loss: 0.4219


Epoch 5:  79%|███████▉  | 665/844 [09:15<02:30,  1.19it/s]

batch loss: 0.4860


Epoch 5:  79%|███████▉  | 666/844 [09:15<02:29,  1.19it/s]

batch loss: 0.4942


Epoch 5:  79%|███████▉  | 667/844 [09:16<02:28,  1.19it/s]

batch loss: 0.5129


Epoch 5:  79%|███████▉  | 668/844 [09:17<02:28,  1.18it/s]

batch loss: 0.5026


Epoch 5:  79%|███████▉  | 669/844 [09:18<02:27,  1.19it/s]

batch loss: 0.4082


Epoch 5:  79%|███████▉  | 670/844 [09:19<02:25,  1.20it/s]

batch loss: 0.4366


Epoch 5:  80%|███████▉  | 671/844 [09:20<02:25,  1.19it/s]

batch loss: 0.4809


Epoch 5:  80%|███████▉  | 672/844 [09:20<02:24,  1.19it/s]

batch loss: 0.4623


Epoch 5:  80%|███████▉  | 673/844 [09:21<02:23,  1.19it/s]

batch loss: 0.4271


Epoch 5:  80%|███████▉  | 674/844 [09:22<02:22,  1.20it/s]

batch loss: 0.4679


Epoch 5:  80%|███████▉  | 675/844 [09:23<02:21,  1.20it/s]

batch loss: 0.4937


Epoch 5:  80%|████████  | 676/844 [09:24<02:20,  1.20it/s]

batch loss: 0.4017


Epoch 5:  80%|████████  | 677/844 [09:25<02:19,  1.20it/s]

batch loss: 0.3866


Epoch 5:  80%|████████  | 678/844 [09:25<02:19,  1.19it/s]

batch loss: 0.4838


Epoch 5:  80%|████████  | 679/844 [09:26<02:18,  1.19it/s]

batch loss: 0.4664


Epoch 5:  81%|████████  | 680/844 [09:27<02:16,  1.20it/s]

batch loss: 0.4741


Epoch 5:  81%|████████  | 681/844 [09:28<02:15,  1.20it/s]

batch loss: 0.4500


Epoch 5:  81%|████████  | 682/844 [09:29<02:15,  1.20it/s]

batch loss: 0.3705


Epoch 5:  81%|████████  | 683/844 [09:30<02:14,  1.20it/s]

batch loss: 0.4687


Epoch 5:  81%|████████  | 684/844 [09:30<02:13,  1.20it/s]

batch loss: 0.4228


Epoch 5:  81%|████████  | 685/844 [09:31<02:13,  1.19it/s]

batch loss: 0.4772


Epoch 5:  81%|████████▏ | 686/844 [09:32<02:12,  1.20it/s]

batch loss: 0.3863


Epoch 5:  81%|████████▏ | 687/844 [09:33<02:11,  1.19it/s]

batch loss: 0.4477


Epoch 5:  82%|████████▏ | 688/844 [09:34<02:10,  1.19it/s]

batch loss: 0.5041


Epoch 5:  82%|████████▏ | 689/844 [09:35<02:09,  1.20it/s]

batch loss: 0.4812


Epoch 5:  82%|████████▏ | 690/844 [09:35<02:09,  1.19it/s]

batch loss: 0.4491


Epoch 5:  82%|████████▏ | 691/844 [09:36<02:07,  1.20it/s]

batch loss: 0.4414


Epoch 5:  82%|████████▏ | 692/844 [09:37<02:06,  1.20it/s]

batch loss: 0.5343


Epoch 5:  82%|████████▏ | 693/844 [09:38<02:06,  1.20it/s]

batch loss: 0.3958


Epoch 5:  82%|████████▏ | 694/844 [09:39<02:05,  1.20it/s]

batch loss: 0.5321


Epoch 5:  82%|████████▏ | 695/844 [09:40<02:04,  1.20it/s]

batch loss: 0.4610


Epoch 5:  82%|████████▏ | 696/844 [09:40<02:03,  1.19it/s]

batch loss: 0.4492


Epoch 5:  83%|████████▎ | 697/844 [09:41<02:02,  1.20it/s]

batch loss: 0.4122


Epoch 5:  83%|████████▎ | 698/844 [09:42<02:01,  1.20it/s]

batch loss: 0.4567


Epoch 5:  83%|████████▎ | 699/844 [09:43<02:01,  1.19it/s]

batch loss: 0.5162


Epoch 5:  83%|████████▎ | 700/844 [09:44<02:00,  1.19it/s]

batch loss: 0.4393


Epoch 5:  83%|████████▎ | 701/844 [09:45<02:00,  1.19it/s]

batch loss: 0.4509


Epoch 5:  83%|████████▎ | 702/844 [09:45<01:58,  1.19it/s]

batch loss: 0.4919


Epoch 5:  83%|████████▎ | 703/844 [09:46<01:57,  1.20it/s]

batch loss: 0.4235


Epoch 5:  83%|████████▎ | 704/844 [09:47<01:56,  1.20it/s]

batch loss: 0.4637


Epoch 5:  84%|████████▎ | 705/844 [09:48<01:55,  1.20it/s]

batch loss: 0.4857


Epoch 5:  84%|████████▎ | 706/844 [09:49<01:54,  1.20it/s]

batch loss: 0.4121


Epoch 5:  84%|████████▍ | 707/844 [09:50<01:54,  1.20it/s]

batch loss: 0.5044


Epoch 5:  84%|████████▍ | 708/844 [09:50<01:53,  1.20it/s]

batch loss: 0.4481


Epoch 5:  84%|████████▍ | 709/844 [09:51<01:52,  1.20it/s]

batch loss: 0.3796


Epoch 5:  84%|████████▍ | 710/844 [09:52<01:51,  1.20it/s]

batch loss: 0.4341


Epoch 5:  84%|████████▍ | 711/844 [09:53<01:50,  1.20it/s]

batch loss: 0.3938


Epoch 5:  84%|████████▍ | 712/844 [09:54<01:49,  1.20it/s]

batch loss: 0.4940


Epoch 5:  84%|████████▍ | 713/844 [09:55<01:49,  1.20it/s]

batch loss: 0.4339


Epoch 5:  85%|████████▍ | 714/844 [09:55<01:48,  1.20it/s]

batch loss: 0.4134


Epoch 5:  85%|████████▍ | 715/844 [09:56<01:48,  1.19it/s]

batch loss: 0.4625


Epoch 5:  85%|████████▍ | 716/844 [09:57<01:47,  1.19it/s]

batch loss: 0.4602


Epoch 5:  85%|████████▍ | 717/844 [09:58<01:46,  1.19it/s]

batch loss: 0.4492


Epoch 5:  85%|████████▌ | 718/844 [09:59<01:45,  1.20it/s]

batch loss: 0.3796


Epoch 5:  85%|████████▌ | 719/844 [10:00<01:44,  1.19it/s]

batch loss: 0.5103


Epoch 5:  85%|████████▌ | 720/844 [10:01<01:43,  1.19it/s]

batch loss: 0.5033


Epoch 5:  85%|████████▌ | 721/844 [10:01<01:42,  1.20it/s]

batch loss: 0.3763


Epoch 5:  86%|████████▌ | 722/844 [10:02<01:42,  1.20it/s]

batch loss: 0.4227


Epoch 5:  86%|████████▌ | 723/844 [10:03<01:41,  1.20it/s]

batch loss: 0.5526


Epoch 5:  86%|████████▌ | 724/844 [10:04<01:40,  1.20it/s]

batch loss: 0.4558


Epoch 5:  86%|████████▌ | 725/844 [10:05<01:38,  1.20it/s]

batch loss: 0.4125


Epoch 5:  86%|████████▌ | 726/844 [10:05<01:37,  1.20it/s]

batch loss: 0.4635


Epoch 5:  86%|████████▌ | 727/844 [10:06<01:37,  1.20it/s]

batch loss: 0.4927


Epoch 5:  86%|████████▋ | 728/844 [10:07<01:36,  1.20it/s]

batch loss: 0.5460


Epoch 5:  86%|████████▋ | 729/844 [10:08<01:35,  1.20it/s]

batch loss: 0.5330


Epoch 5:  86%|████████▋ | 730/844 [10:09<01:34,  1.20it/s]

batch loss: 0.4323


Epoch 5:  87%|████████▋ | 731/844 [10:10<01:33,  1.20it/s]

batch loss: 0.4334


Epoch 5:  87%|████████▋ | 732/844 [10:10<01:33,  1.20it/s]

batch loss: 0.4205


Epoch 5:  87%|████████▋ | 733/844 [10:11<01:32,  1.20it/s]

batch loss: 0.4330


Epoch 5:  87%|████████▋ | 734/844 [10:12<01:31,  1.20it/s]

batch loss: 0.4465


Epoch 5:  87%|████████▋ | 735/844 [10:13<01:31,  1.20it/s]

batch loss: 0.4661


Epoch 5:  87%|████████▋ | 736/844 [10:14<01:30,  1.20it/s]

batch loss: 0.5539


Epoch 5:  87%|████████▋ | 737/844 [10:15<01:29,  1.20it/s]

batch loss: 0.4512


Epoch 5:  87%|████████▋ | 738/844 [10:16<01:28,  1.20it/s]

batch loss: 0.4377


Epoch 5:  88%|████████▊ | 739/844 [10:16<01:27,  1.20it/s]

batch loss: 0.4965


Epoch 5:  88%|████████▊ | 740/844 [10:17<01:26,  1.20it/s]

batch loss: 0.4532


Epoch 5:  88%|████████▊ | 741/844 [10:18<01:25,  1.20it/s]

batch loss: 0.4779


Epoch 5:  88%|████████▊ | 742/844 [10:19<01:24,  1.20it/s]

batch loss: 0.5245


Epoch 5:  88%|████████▊ | 743/844 [10:20<01:24,  1.20it/s]

batch loss: 0.4495


Epoch 5:  88%|████████▊ | 744/844 [10:20<01:23,  1.20it/s]

batch loss: 0.3872


Epoch 5:  88%|████████▊ | 745/844 [10:21<01:22,  1.20it/s]

batch loss: 0.5560


Epoch 5:  88%|████████▊ | 746/844 [10:22<01:21,  1.20it/s]

batch loss: 0.4157


Epoch 5:  89%|████████▊ | 747/844 [10:23<01:20,  1.20it/s]

batch loss: 0.4300


Epoch 5:  89%|████████▊ | 748/844 [10:24<01:19,  1.20it/s]

batch loss: 0.4056


Epoch 5:  89%|████████▊ | 749/844 [10:25<01:19,  1.20it/s]

batch loss: 0.4216


Epoch 5:  89%|████████▉ | 750/844 [10:25<01:18,  1.20it/s]

batch loss: 0.4383


Epoch 5:  89%|████████▉ | 751/844 [10:26<01:17,  1.20it/s]

batch loss: 0.4987


Epoch 5:  89%|████████▉ | 752/844 [10:27<01:16,  1.20it/s]

batch loss: 0.4798


Epoch 5:  89%|████████▉ | 753/844 [10:28<01:15,  1.20it/s]

batch loss: 0.4288


Epoch 5:  89%|████████▉ | 754/844 [10:29<01:15,  1.20it/s]

batch loss: 0.4500


Epoch 5:  89%|████████▉ | 755/844 [10:30<01:14,  1.20it/s]

batch loss: 0.3517


Epoch 5:  90%|████████▉ | 756/844 [10:30<01:13,  1.20it/s]

batch loss: 0.5068


Epoch 5:  90%|████████▉ | 757/844 [10:31<01:12,  1.20it/s]

batch loss: 0.4343


Epoch 5:  90%|████████▉ | 758/844 [10:32<01:11,  1.20it/s]

batch loss: 0.4549


Epoch 5:  90%|████████▉ | 759/844 [10:33<01:10,  1.20it/s]

batch loss: 0.4645


Epoch 5:  90%|█████████ | 760/844 [10:34<01:10,  1.20it/s]

batch loss: 0.5162


Epoch 5:  90%|█████████ | 761/844 [10:35<01:09,  1.20it/s]

batch loss: 0.4685


Epoch 5:  90%|█████████ | 762/844 [10:35<01:08,  1.20it/s]

batch loss: 0.4845


Epoch 5:  90%|█████████ | 763/844 [10:36<01:07,  1.20it/s]

batch loss: 0.5581


Epoch 5:  91%|█████████ | 764/844 [10:37<01:06,  1.20it/s]

batch loss: 0.4281


Epoch 5:  91%|█████████ | 765/844 [10:38<01:06,  1.20it/s]

batch loss: 0.3609


Epoch 5:  91%|█████████ | 766/844 [10:39<01:05,  1.20it/s]

batch loss: 0.5331


Epoch 5:  91%|█████████ | 767/844 [10:40<01:04,  1.20it/s]

batch loss: 0.4530


Epoch 5:  91%|█████████ | 768/844 [10:41<01:03,  1.20it/s]

batch loss: 0.4977


Epoch 5:  91%|█████████ | 769/844 [10:41<01:02,  1.20it/s]

batch loss: 0.4395


Epoch 5:  91%|█████████ | 770/844 [10:42<01:01,  1.20it/s]

batch loss: 0.4789


Epoch 5:  91%|█████████▏| 771/844 [10:43<01:00,  1.20it/s]

batch loss: 0.4390


Epoch 5:  91%|█████████▏| 772/844 [10:44<01:00,  1.20it/s]

batch loss: 0.4497


Epoch 5:  92%|█████████▏| 773/844 [10:45<00:59,  1.20it/s]

batch loss: 0.4457


Epoch 5:  92%|█████████▏| 774/844 [10:46<00:58,  1.20it/s]

batch loss: 0.4514


Epoch 5:  92%|█████████▏| 775/844 [10:46<00:57,  1.20it/s]

batch loss: 0.4243


Epoch 5:  92%|█████████▏| 776/844 [10:47<00:56,  1.20it/s]

batch loss: 0.4447


Epoch 5:  92%|█████████▏| 777/844 [10:48<00:55,  1.20it/s]

batch loss: 0.4216


Epoch 5:  92%|█████████▏| 778/844 [10:49<00:54,  1.20it/s]

batch loss: 0.5708


Epoch 5:  92%|█████████▏| 779/844 [10:50<00:54,  1.20it/s]

batch loss: 0.4786


Epoch 5:  92%|█████████▏| 780/844 [10:51<00:53,  1.20it/s]

batch loss: 0.5085


Epoch 5:  93%|█████████▎| 781/844 [10:51<00:52,  1.20it/s]

batch loss: 0.5480


Epoch 5:  93%|█████████▎| 782/844 [10:52<00:51,  1.20it/s]

batch loss: 0.3789


Epoch 5:  93%|█████████▎| 783/844 [10:53<00:50,  1.20it/s]

batch loss: 0.4012


Epoch 5:  93%|█████████▎| 784/844 [10:54<00:50,  1.20it/s]

batch loss: 0.4785


Epoch 5:  93%|█████████▎| 785/844 [10:55<00:49,  1.19it/s]

batch loss: 0.4813


Epoch 5:  93%|█████████▎| 786/844 [10:56<00:48,  1.20it/s]

batch loss: 0.5009


Epoch 5:  93%|█████████▎| 787/844 [10:56<00:47,  1.20it/s]

batch loss: 0.4659


Epoch 5:  93%|█████████▎| 788/844 [10:57<00:46,  1.20it/s]

batch loss: 0.4372


Epoch 5:  93%|█████████▎| 789/844 [10:58<00:45,  1.20it/s]

batch loss: 0.5134


Epoch 5:  94%|█████████▎| 790/844 [10:59<00:45,  1.20it/s]

batch loss: 0.5482


Epoch 5:  94%|█████████▎| 791/844 [11:00<00:44,  1.20it/s]

batch loss: 0.4444


Epoch 5:  94%|█████████▍| 792/844 [11:01<00:43,  1.19it/s]

batch loss: 0.3996


Epoch 5:  94%|█████████▍| 793/844 [11:01<00:42,  1.19it/s]

batch loss: 0.4024


Epoch 5:  94%|█████████▍| 794/844 [11:02<00:41,  1.20it/s]

batch loss: 0.4956


Epoch 5:  94%|█████████▍| 795/844 [11:03<00:40,  1.20it/s]

batch loss: 0.5172


Epoch 5:  94%|█████████▍| 796/844 [11:04<00:40,  1.20it/s]

batch loss: 0.5024


Epoch 5:  94%|█████████▍| 797/844 [11:05<00:39,  1.20it/s]

batch loss: 0.4297


Epoch 5:  95%|█████████▍| 798/844 [11:06<00:38,  1.20it/s]

batch loss: 0.4616


Epoch 5:  95%|█████████▍| 799/844 [11:06<00:37,  1.19it/s]

batch loss: 0.5511


Epoch 5:  95%|█████████▍| 800/844 [11:07<00:36,  1.19it/s]

batch loss: 0.4409


Epoch 5:  95%|█████████▍| 801/844 [11:08<00:36,  1.19it/s]

batch loss: 0.4501


Epoch 5:  95%|█████████▌| 802/844 [11:09<00:35,  1.19it/s]

batch loss: 0.4962


Epoch 5:  95%|█████████▌| 803/844 [11:10<00:34,  1.19it/s]

batch loss: 0.3978


Epoch 5:  95%|█████████▌| 804/844 [11:11<00:33,  1.19it/s]

batch loss: 0.4739


Epoch 5:  95%|█████████▌| 805/844 [11:11<00:32,  1.19it/s]

batch loss: 0.3447


Epoch 5:  95%|█████████▌| 806/844 [11:12<00:31,  1.20it/s]

batch loss: 0.4346


Epoch 5:  96%|█████████▌| 807/844 [11:13<00:30,  1.19it/s]

batch loss: 0.4313


Epoch 5:  96%|█████████▌| 808/844 [11:14<00:30,  1.19it/s]

batch loss: 0.4487


Epoch 5:  96%|█████████▌| 809/844 [11:15<00:29,  1.19it/s]

batch loss: 0.4569


Epoch 5:  96%|█████████▌| 810/844 [11:16<00:28,  1.20it/s]

batch loss: 0.4507


Epoch 5:  96%|█████████▌| 811/844 [11:16<00:27,  1.20it/s]

batch loss: 0.4770


Epoch 5:  96%|█████████▌| 812/844 [11:17<00:26,  1.20it/s]

batch loss: 0.4450


Epoch 5:  96%|█████████▋| 813/844 [11:18<00:25,  1.20it/s]

batch loss: 0.4782


Epoch 5:  96%|█████████▋| 814/844 [11:19<00:25,  1.19it/s]

batch loss: 0.4391


Epoch 5:  97%|█████████▋| 815/844 [11:20<00:24,  1.19it/s]

batch loss: 0.4242


Epoch 5:  97%|█████████▋| 816/844 [11:21<00:23,  1.19it/s]

batch loss: 0.4932


Epoch 5:  97%|█████████▋| 817/844 [11:21<00:22,  1.20it/s]

batch loss: 0.4819


Epoch 5:  97%|█████████▋| 818/844 [11:22<00:21,  1.20it/s]

batch loss: 0.4545


Epoch 5:  97%|█████████▋| 819/844 [11:23<00:20,  1.20it/s]

batch loss: 0.3892


Epoch 5:  97%|█████████▋| 820/844 [11:24<00:20,  1.20it/s]

batch loss: 0.4094


Epoch 5:  97%|█████████▋| 821/844 [11:25<00:19,  1.20it/s]

batch loss: 0.5285


Epoch 5:  97%|█████████▋| 822/844 [11:26<00:18,  1.20it/s]

batch loss: 0.4783


Epoch 5:  98%|█████████▊| 823/844 [11:26<00:17,  1.20it/s]

batch loss: 0.4305


Epoch 5:  98%|█████████▊| 824/844 [11:27<00:16,  1.20it/s]

batch loss: 0.4688


Epoch 5:  98%|█████████▊| 825/844 [11:28<00:15,  1.20it/s]

batch loss: 0.4808


Epoch 5:  98%|█████████▊| 826/844 [11:29<00:14,  1.20it/s]

batch loss: 0.4495


Epoch 5:  98%|█████████▊| 827/844 [11:30<00:14,  1.20it/s]

batch loss: 0.4951


Epoch 5:  98%|█████████▊| 828/844 [11:31<00:13,  1.20it/s]

batch loss: 0.4498


Epoch 5:  98%|█████████▊| 829/844 [11:31<00:12,  1.20it/s]

batch loss: 0.4654


Epoch 5:  98%|█████████▊| 830/844 [11:32<00:11,  1.20it/s]

batch loss: 0.4458


Epoch 5:  98%|█████████▊| 831/844 [11:33<00:10,  1.20it/s]

batch loss: 0.3805


Epoch 5:  99%|█████████▊| 832/844 [11:34<00:09,  1.20it/s]

batch loss: 0.4467


Epoch 5:  99%|█████████▊| 833/844 [11:35<00:09,  1.20it/s]

batch loss: 0.5410


Epoch 5:  99%|█████████▉| 834/844 [11:36<00:08,  1.20it/s]

batch loss: 0.4513


Epoch 5:  99%|█████████▉| 835/844 [11:36<00:07,  1.20it/s]

batch loss: 0.4054


Epoch 5:  99%|█████████▉| 836/844 [11:37<00:06,  1.20it/s]

batch loss: 0.4171


Epoch 5:  99%|█████████▉| 837/844 [11:38<00:05,  1.20it/s]

batch loss: 0.4746


Epoch 5:  99%|█████████▉| 838/844 [11:39<00:05,  1.20it/s]

batch loss: 0.4724


Epoch 5:  99%|█████████▉| 839/844 [11:40<00:04,  1.19it/s]

batch loss: 0.4437


Epoch 5: 100%|█████████▉| 840/844 [11:41<00:03,  1.19it/s]

batch loss: 0.4218


Epoch 5: 100%|█████████▉| 841/844 [11:41<00:02,  1.19it/s]

batch loss: 0.4006


Epoch 5: 100%|█████████▉| 842/844 [11:42<00:01,  1.20it/s]

batch loss: 0.4406


Epoch 5: 100%|█████████▉| 843/844 [11:43<00:00,  1.20it/s]

batch loss: 0.4593


Epoch 5: 100%|██████████| 844/844 [11:44<00:00,  1.20it/s]

batch loss: 0.4174
--------------------------------------------------------------------------------
Epoch 5 - Average Loss: 0.4567





In [None]:
# Save your trained model
torch.save(model.state_dict(), 'xlmroberta_gru_model.pth')

In [None]:
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}"):
        input_ids, attention_mask, labels = batch
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        logits = model(input_ids, attention_mask)
        loss = criterion(logits, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Optional gradient clipping
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
        print(f"batch loss: {loss.item():.4f}")

    average_loss = total_loss / len(train_dataloader)
    print('-'*80)
    print(f"Epoch {epoch + 1} - Average Loss: {average_loss:.4f}")

# Save your trained model
torch.save(model.state_dict(), 'xlmroberta_gru_model.pth')


In [None]:
# Define optimizer and loss function
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()

        logits = model(input_ids, attention_mask).logits
        loss = criterion(logits, labels)

        loss.backward()
        optimizer.step()

    print("Epoch:", epoch, "loss:", loss.item())




Epoch: 0 loss: 0.45309561491012573
Epoch: 1 loss: 0.5078866481781006
Epoch: 2 loss: 0.4640763998031616


In [None]:
model.save_pretrained("emotion_detection_model")

## Evaluation

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

model.eval()
with torch.no_grad():
    all_true_labels = []
    all_predicted_labels = []

    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        logits = model(input_ids, attention_mask)

        # Apply sigmoid activation to logits to get predicted probabilities
        predicted_probs = torch.sigmoid(logits)

        # Apply a threshold (e.g., 0.5) to convert probabilities to binary predictions
        threshold = 0.5
        predicted_labels = (predicted_probs > threshold).to(torch.float32)

        # Convert labels and predictions to numpy arrays
        true_labels = labels.cpu().numpy()
        predicted_labels = predicted_labels.cpu().numpy()

        # Append the true and predicted labels to the lists
        all_true_labels.extend(true_labels)
        all_predicted_labels.extend(predicted_labels)


In [None]:
true_labels = all_true_labels
predicted_labels = all_predicted_labels

In [None]:
import numpy as np

In [None]:
# Calculate accuracy for each label
label_accuracies = []
for i in range(len(true_labels)):
    label_accuracy = accuracy_score(true_labels[i], predicted_labels[i])
    label_accuracies.append(label_accuracy)

# Calculate macro F1 score for each label and take the average
label_macro_f1_scores = []
for i in range(len(true_labels)):
    label_macro_f1 = f1_score(true_labels[i], predicted_labels[i], average='macro')
    label_macro_f1_scores.append(label_macro_f1)

# Calculate the average accuracy and macro F1 score
average_accuracy = np.mean(label_accuracies)
average_macro_f1 = np.mean(label_macro_f1_scores)

print("Average Accuracy:", average_accuracy)
print("Average Macro F1 Score:", average_macro_f1)

Average Accuracy: 0.8223333333333334
Average Macro F1 Score: 0.6198228956228956


## Test and Save Model Chekpoints & Predictions

In [None]:
test_data = pd.read_csv('3. data_emotion_without_label.csv')

In [None]:
test_data.head()

Unnamed: 0,local_id,tweet
0,973588328225411072,لنگ پولیس ۱ برق شیراز دوبی ۰\nخداییش تیم...
1,983807604982996995,دوستان بارسایی. \nحالا بشینید و فوتبال خوب تما...
2,1378698455305060353,والا ۱۳ روز #عید که هیچی با این شرایط یک سال ا...
3,1380360990793953282,بدبختی ماگناه #بیگانه نبود\nپیوندمن وشما #صمیم...
4,1382562339258441728,تصور کن انقدر #عشق بدی تاهمه چیز اطرافت شاخ و ...


In [None]:
model = model.to('cpu')

In [None]:
# Tokenize and predict emotions for each tweet
results = []

for j in range(len(test_data)):
    data_point = test_data.iloc[j]
    text = data_point["tweet"]
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)

    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.sigmoid(outputs)

    # Find the emotion with the highest predicted value
    primary_emotion_index = torch.argmax(probabilities)
    primary_emotion = targets[primary_emotion_index]

    # Normalize and apply threshold
    threshold = 0.15
    predictions = (probabilities > threshold).cpu().numpy().tolist()[0]

    # Create a dictionary with the required information
    result = {
        "local_id": data_point["local_id"],
        "tweet": text,
        "primary_emotion": primary_emotion,
    }

    # Add emotion predictions to the dictionary
    for i, emotion in enumerate(targets):
        result[emotion] = predictions[i]

    results.append(result)

# Create a DataFrame
result_df = pd.DataFrame(results)


In [None]:
targets = ["Anger", "Fear", "Happiness", "Hatred", "Sadness", "Wonder"]

In [None]:
result_df.head()

Unnamed: 0,local_id,tweet,primary_emotion,Anger,Fear,Happiness,Hatred,Sadness,Wonder
0,973588328225411072,لنگ پولیس ۱ برق شیراز دوبی ۰\nخداییش تیم...,Happiness,False,False,True,False,False,False
1,983807604982996995,دوستان بارسایی. \nحالا بشینید و فوتبال خوب تما...,Happiness,False,False,True,False,False,True
2,1378698455305060353,والا ۱۳ روز #عید که هیچی با این شرایط یک سال ا...,Sadness,False,False,True,False,True,False
3,1380360990793953282,بدبختی ماگناه #بیگانه نبود\nپیوندمن وشما #صمیم...,Sadness,True,True,False,True,True,False
4,1382562339258441728,تصور کن انقدر #عشق بدی تاهمه چیز اطرافت شاخ و ...,Happiness,False,False,True,False,True,False


In [None]:
result_df.to_csv('resulr.csv')

In [None]:
# Mapping from primary emotions
primary_emotion_mapping = {"Anger":'anger', "Sadness":'sadness', "Wonder":'surprise', "Happiness":'happiness', "Fear":'fear', "Hatred":'disgust', "Other":'other'}

# Tokenize and predict emotions for each tweet
results = []

for j in range(len(test_data)):
    data_point = test_data.iloc[j]
    text = data_point["tweet"]
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.sigmoid(outputs)

    # Find the emotion with the highest predicted value
    primary_emotion_index = torch.argmax(probabilities)
    primary_emotion = primary_emotion_mapping[targets[primary_emotion_index]]

    # Normalize and apply threshold
    threshold = 0.15
    predictions = (probabilities > threshold).cpu().numpy().tolist()[0]

    # Create a dictionary with the required information
    result = {
        "local_id": data_point["local_id"],
        "tweet": text,
        "primary_emotion": primary_emotion,
    }

    # Add emotion predictions to the dictionary
    for i, emotion in enumerate(targets):
        # Convert True/False to 1/0
        result[emotion] = int(predictions[i])

    # If all predicted scores are 0, set 'other' as primary emotion
    if all(score == 0 for score in predictions):
        result["primary_emotion"] = "Other"

    results.append(result)

# Create a DataFrame
result_df = pd.DataFrame(results)

# Print the DataFrame
print(result_df)


                local_id                                              tweet  \
0     973588328225411072  لنگ پولیس  ۱      برق شیراز دوبی ۰\nخداییش تیم...   
1     983807604982996995  دوستان بارسایی. \nحالا بشینید و فوتبال خوب تما...   
2    1378698455305060353  والا ۱۳ روز #عید که هیچی با این شرایط یک سال ا...   
3    1380360990793953282  بدبختی ماگناه #بیگانه نبود\nپیوندمن وشما #صمیم...   
4    1382562339258441728  تصور کن انقدر #عشق بدی تاهمه چیز اطرافت شاخ و ...   
..                   ...                                                ...   
495  1644747933890539521  <@USERNAME> با توجه به علاقه شدید جامعه سلبرید...   
496  1644749245377904640                    یک همدم باوفا ندیدم جز درد ...    
497  1644758722047266817  جناب،#رضاکیانیان اگربدنبال قاتل احتمالی مرحوم،...   
498  1644768661633396736         مرا به میکده بَر،\n در خُمِ شراب انداز...    
499  1644770853035339780  خواجه چقدر غم‌انگیز فرمود:\n\nنسیمِ زلفِ تو گر...   

    primary_emotion  Anger  Fear  Happiness  Hatred

In [None]:
result_df.head()

Unnamed: 0,local_id,tweet,primary_emotion,Anger,Fear,Happiness,Hatred,Sadness,Wonder
0,973588328225411072,لنگ پولیس ۱ برق شیراز دوبی ۰\nخداییش تیم...,happiness,0,0,1,0,0,0
1,983807604982996995,دوستان بارسایی. \nحالا بشینید و فوتبال خوب تما...,happiness,0,0,1,0,0,1
2,1378698455305060353,والا ۱۳ روز #عید که هیچی با این شرایط یک سال ا...,sadness,0,0,1,0,1,0
3,1380360990793953282,بدبختی ماگناه #بیگانه نبود\nپیوندمن وشما #صمیم...,sadness,1,1,0,1,1,0
4,1382562339258441728,تصور کن انقدر #عشق بدی تاهمه چیز اطرافت شاخ و ...,happiness,0,0,1,0,1,0


In [None]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   local_id         500 non-null    int64 
 1   tweet            500 non-null    object
 2   primary_emotion  500 non-null    object
 3   Anger            500 non-null    int64 
 4   Fear             500 non-null    int64 
 5   Happiness        500 non-null    int64 
 6   Hatred           500 non-null    int64 
 7   Sadness          500 non-null    int64 
 8   Wonder           500 non-null    int64 
dtypes: int64(7), object(2)
memory usage: 35.3+ KB


In [None]:
# Replace 'Other' with 'other' in the 'primary_emotion' column
result_df['primary_emotion'] = result_df['primary_emotion'].replace({'Other': 'other'})

result_df.head()

Unnamed: 0,local_id,tweet,primary_emotion,Anger,Fear,Happiness,Hatred,Sadness,Wonder
0,973588328225411072,لنگ پولیس ۱ برق شیراز دوبی ۰\nخداییش تیم...,happiness,0,0,1,0,0,0
1,983807604982996995,دوستان بارسایی. \nحالا بشینید و فوتبال خوب تما...,happiness,0,0,1,0,0,1
2,1378698455305060353,والا ۱۳ روز #عید که هیچی با این شرایط یک سال ا...,sadness,0,0,1,0,1,0
3,1380360990793953282,بدبختی ماگناه #بیگانه نبود\nپیوندمن وشما #صمیم...,sadness,1,1,0,1,1,0
4,1382562339258441728,تصور کن انقدر #عشق بدی تاهمه چیز اطرافت شاخ و ...,happiness,0,0,1,0,1,0


In [None]:
# Rename columns
result_df = result_df.rename(columns={"Anger":'anger', "Sadness":'sadness', "Wonder":'surprise', "Happiness":'happiness', "Fear":'fear', "Hatred":'disgust'})

# Print the reordered DataFrame
result_df.head()


Unnamed: 0,local_id,tweet,primary_emotion,anger,fear,happiness,disgust,sadness,surprise
0,973588328225411072,لنگ پولیس ۱ برق شیراز دوبی ۰\nخداییش تیم...,happiness,0,0,1,0,0,0
1,983807604982996995,دوستان بارسایی. \nحالا بشینید و فوتبال خوب تما...,happiness,0,0,1,0,0,1
2,1378698455305060353,والا ۱۳ روز #عید که هیچی با این شرایط یک سال ا...,sadness,0,0,1,0,1,0
3,1380360990793953282,بدبختی ماگناه #بیگانه نبود\nپیوندمن وشما #صمیم...,sadness,1,1,0,1,1,0
4,1382562339258441728,تصور کن انقدر #عشق بدی تاهمه چیز اطرافت شاخ و ...,happiness,0,0,1,0,1,0


In [None]:
cols = ['local_id', 'tweet', 'primary_emotion', 'anger', 'disgust', 'fear', 'sadness', 'happiness', 'surprise']

In [None]:
# Reorder the columns
result_df1 = result_df[cols]

result_df1.head()

Unnamed: 0,local_id,tweet,primary_emotion,anger,disgust,fear,sadness,happiness,surprise
0,973588328225411072,لنگ پولیس ۱ برق شیراز دوبی ۰\nخداییش تیم...,happiness,0,0,0,0,1,0
1,983807604982996995,دوستان بارسایی. \nحالا بشینید و فوتبال خوب تما...,happiness,0,0,0,0,1,1
2,1378698455305060353,والا ۱۳ روز #عید که هیچی با این شرایط یک سال ا...,sadness,0,0,0,1,1,0
3,1380360990793953282,بدبختی ماگناه #بیگانه نبود\nپیوندمن وشما #صمیم...,sadness,1,1,1,1,0,0
4,1382562339258441728,تصور کن انقدر #عشق بدی تاهمه چیز اطرافت شاخ و ...,happiness,0,0,0,1,1,0


In [None]:
result_df1.to_csv('result_final_AE.csv')

In [None]:
PATH = "model.pt"

torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, PATH)