In [28]:
from huggingface_hub import login
!pip install datasets
from datasets import load_dataset, Dataset
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import matplotlib.pyplot as plt
import re
from tqdm import tqdm
import numpy as np
from transformers import XLMRobertaModel, XLMRobertaTokenizerFast
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# authentication with huggingface
# create token at https://huggingface.co/settings/tokens (create as read)
# token should be stored locally, so technically login is only needed one time
login()



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [30]:
# # download specific dataset files; entire archive is only ~250 mb
# articles = load_dataset("Cofacts/line-msg-fact-check-tw", "articles") # article contents and some meta info
# article_replies = load_dataset("Cofacts/line-msg-fact-check-tw", "article_replies") # 'join table' for articles and replies with added meta info

# open from csv
articles = pd.read_csv('/content/articles.csv',lineterminator='\n')
article_replies = pd.read_csv('/content/article_replies.csv')
art_rep_df = pd.merge(articles, article_replies, left_on="id", right_on="articleId", how="left")
art_rep_df.head()

Unnamed: 0,id,articleType,status_x,text,normalArticleReplyCount,createdAt_x,updatedAt_x,lastRequestedAt,userIdsha256_x,appId_x,...,articleId,replyId,userIdsha256_y,negativeFeedbackCount,positiveFeedbackCount,replyType,appId_y,status_y,createdAt_y,updatedAt_y
0,14h550ymn3m3u,TEXT,NORMAL,BA.5病毒引發的不只「縮短潛伏期，增加R0值」，還有易使人「再感染（Reinfecion）...,1.0,2022-09-05T05:26:33.360Z,2022-09-05T05:26:33.360Z,2022-09-05T05:26:33.381Z,3753ca22ca96f5fc1f13e62291f28e405d56b1a976b21d...,RUMORS_LINE_BOT,...,14h550ymn3m3u,6oo8DYMBv5it-Cx_8405,6892d0026181e95d034bf8781025afbf395d57cc88ac05...,0.0,3.0,RUMOR,WEBSITE,NORMAL,2022-09-05T10:40:41.789Z,2022-09-05T10:40:41.789Z
1,326xjpkjbf01i,TEXT,NORMAL,https://youtu.be/xK9NzL3PkdE,0.0,2022-09-08T12:34:27.602Z,2022-09-08T12:34:27.602Z,2022-09-08T12:34:27.634Z,243b5897c14f02fb5b92a9e4f4cc39d5fb84ff16173add...,RUMORS_LINE_BOT,...,,,,,,,,,,
2,oupi0eu9aocr,TEXT,NORMAL,被取消的航班\n\n2018年的一天，我從洛杉磯回國，乘坐凌晨一點的國航航班，馬上要登機了，...,1.0,2021-11-13T16:36:56.548Z,2021-11-13T16:36:56.548Z,2022-04-27T08:52:17.115Z,4a266a6fdefc88e59eef644402e97c7f4c8196becca686...,RUMORS_LINE_BOT,...,oupi0eu9aocr,MYotCYMBv5it-Cx_gIpH,fc9652aa205dab75e19ecff420945e95aa3909a54b0d9a...,0.0,1.0,OPINIONATED,WEBSITE,NORMAL,2022-09-04T15:45:20.464Z,2022-09-04T15:45:20.464Z
3,my5ep5z69tql,TEXT,NORMAL,你好👱🏻‍♀️\n\n簡單跟你說一下公司工作內容：\n我們是做購物系統訂單的工作，主要就是增...,1.0,2022-08-11T20:53:15.644Z,2022-08-11T20:53:15.644Z,2022-09-07T11:02:00.344Z,8a8a319bea0b3bff580d8507ae6470ba37e87791171886...,RUMORS_LINE_BOT,...,my5ep5z69tql,y22DlmsBFV14knB4ErTJ,d16417c7ce4ab67ac5a7901ce62621445db3e06da5da6f...,0.0,1.0,RUMOR,WEBSITE,NORMAL,2022-09-07T22:59:48.921Z,2022-09-07T22:59:48.921Z
4,3dnh713ikpf3c,TEXT,NORMAL,先和你簡單介紹\n我們誠徵線上打工小幫手\n工作內容有：聽寫打字、頁面排版、商品評論刷流量、...,1.0,2022-09-07T10:10:41.742Z,2022-09-07T10:10:41.742Z,2022-09-07T10:10:41.761Z,47e90b2538a5615b323f50571b77d91f7f105e55684aa1...,RUMORS_LINE_BOT,...,3dnh713ikpf3c,H8Hz4WsBqwaEkHKwtyFm,d16417c7ce4ab67ac5a7901ce62621445db3e06da5da6f...,0.0,1.0,RUMOR,WEBSITE,NORMAL,2022-09-07T23:00:19.207Z,2022-09-07T23:00:19.207Z


In [31]:
def get_max_length(texts: list):
    max_length = 0
    for t in texts:
        if t:
            max_length = max(max_length, len(t))
    return max_length

def inspect_dist(col: list, threshold = 0):
    if threshold == 0:
        plt.hist(col, bins=100)
        plt.show()
    else:
        plt.hist(col[col <= threshold], bins=100)
        plt.show()
        print(f"Remaining datapoints: {len(col[col <= threshold]) / len(col)}")

def remove_url(text: str):
    text = re.sub(r"https?://(?:www\.)?\w+(?:\.\w+)+(?:/\S*)?", "", text)
    if len(text) == 0:
        text = pd.NA
    return text

In [32]:
no_nans = art_rep_df.dropna(subset=["text", "replyType"])
filter_articles = no_nans[no_nans["replyType"] != "NOT_ARTICLE"]
no_urls = filter_articles.copy()
no_urls["text"] = no_urls["text"].apply(remove_url)
no_urls = no_urls[no_urls['text'] != "<NA>"]
#no_urls = no_urls.dropna(subset=["text"])
no_urls.head()

Unnamed: 0,id,articleType,status_x,text,normalArticleReplyCount,createdAt_x,updatedAt_x,lastRequestedAt,userIdsha256_x,appId_x,...,articleId,replyId,userIdsha256_y,negativeFeedbackCount,positiveFeedbackCount,replyType,appId_y,status_y,createdAt_y,updatedAt_y
0,14h550ymn3m3u,TEXT,NORMAL,BA.5病毒引發的不只「縮短潛伏期，增加R0值」，還有易使人「再感染（Reinfecion）...,1.0,2022-09-05T05:26:33.360Z,2022-09-05T05:26:33.360Z,2022-09-05T05:26:33.381Z,3753ca22ca96f5fc1f13e62291f28e405d56b1a976b21d...,RUMORS_LINE_BOT,...,14h550ymn3m3u,6oo8DYMBv5it-Cx_8405,6892d0026181e95d034bf8781025afbf395d57cc88ac05...,0.0,3.0,RUMOR,WEBSITE,NORMAL,2022-09-05T10:40:41.789Z,2022-09-05T10:40:41.789Z
2,oupi0eu9aocr,TEXT,NORMAL,被取消的航班\n\n2018年的一天，我從洛杉磯回國，乘坐凌晨一點的國航航班，馬上要登機了，...,1.0,2021-11-13T16:36:56.548Z,2021-11-13T16:36:56.548Z,2022-04-27T08:52:17.115Z,4a266a6fdefc88e59eef644402e97c7f4c8196becca686...,RUMORS_LINE_BOT,...,oupi0eu9aocr,MYotCYMBv5it-Cx_gIpH,fc9652aa205dab75e19ecff420945e95aa3909a54b0d9a...,0.0,1.0,OPINIONATED,WEBSITE,NORMAL,2022-09-04T15:45:20.464Z,2022-09-04T15:45:20.464Z
3,my5ep5z69tql,TEXT,NORMAL,你好👱🏻‍♀️\n\n簡單跟你說一下公司工作內容：\n我們是做購物系統訂單的工作，主要就是增...,1.0,2022-08-11T20:53:15.644Z,2022-08-11T20:53:15.644Z,2022-09-07T11:02:00.344Z,8a8a319bea0b3bff580d8507ae6470ba37e87791171886...,RUMORS_LINE_BOT,...,my5ep5z69tql,y22DlmsBFV14knB4ErTJ,d16417c7ce4ab67ac5a7901ce62621445db3e06da5da6f...,0.0,1.0,RUMOR,WEBSITE,NORMAL,2022-09-07T22:59:48.921Z,2022-09-07T22:59:48.921Z
4,3dnh713ikpf3c,TEXT,NORMAL,先和你簡單介紹\n我們誠徵線上打工小幫手\n工作內容有：聽寫打字、頁面排版、商品評論刷流量、...,1.0,2022-09-07T10:10:41.742Z,2022-09-07T10:10:41.742Z,2022-09-07T10:10:41.761Z,47e90b2538a5615b323f50571b77d91f7f105e55684aa1...,RUMORS_LINE_BOT,...,3dnh713ikpf3c,H8Hz4WsBqwaEkHKwtyFm,d16417c7ce4ab67ac5a7901ce62621445db3e06da5da6f...,0.0,1.0,RUMOR,WEBSITE,NORMAL,2022-09-07T23:00:19.207Z,2022-09-07T23:00:19.207Z
5,2m9drd702d8l1,TEXT,NORMAL,【樂天貸款】尊敬的用戶，您的借款額度已提升到50萬，當天領取當天到賬，及時添加專員提現LIN...,1.0,2022-09-07T11:56:37.690Z,2022-09-07T11:56:37.690Z,2022-09-07T11:56:37.713Z,c8d02860e8fdeea921585710dba8e3eb8da914b57f3ad4...,RUMORS_LINE_BOT,...,2m9drd702d8l1,moopGoMBv5it-Cx_FZnG,d16417c7ce4ab67ac5a7901ce62621445db3e06da5da6f...,0.0,1.0,RUMOR,WEBSITE,NORMAL,2022-09-07T22:54:03.723Z,2022-09-07T22:54:03.723Z


In [61]:
tokenizer = XLMRobertaTokenizerFast.from_pretrained("xlm-roberta-base")

listed = no_urls[['text','replyType']].dropna().copy()
listed['replyType'] = listed['replyType'].map(lambda x: x.replace("NOT_RUMOR", "0"))
listed['replyType'] = listed['replyType'].map(lambda x: x.replace("OPINIONATED", "1"))
listed['replyType'] = listed['replyType'].map(lambda x: x.replace("RUMOR", "2"))
listed['replyType'] = listed['replyType'].astype(int)
listed['tokenizer'] = [tokenizer(x[:500]) for x in listed['text']]
listed['tokens'] = [torch.tensor(x.input_ids, dtype=torch.long) for x in listed['tokenizer']]
listed['attention'] = [torch.tensor(x.attention_mask, dtype=torch.long) for x in listed['tokenizer']]
listed['replyType'] = listed['replyType'].map(lambda x: torch.tensor(x, dtype=torch.long))
#padded = torch.nn.utils.rnn.pad_sequence(listed['text'], batch_first=True)
padded_tokens = torch.nn.utils.rnn.pad_sequence([i.flip(dims=[0]) for i in listed['tokens']], batch_first=True).flip(dims=[0])
padded_attention_mask = torch.nn.utils.rnn.pad_sequence([i.flip(dims=[0]) for i in listed['attention']], batch_first=True).flip(dims=[0])
zipped = list(zip(listed['tokens'], listed['replyType'], listed['attention']))

In [34]:
# def k_fold_split_data(dataset, batch_size, k=5):
#     n = len(dataset)
#     fold_size = n // k
#     folds = []

#     def collate_fn(data):
#         tensors, targets = zip(*data)
#         features = torch.nn.utils.rnn.pad_sequence(tensors, batch_first=True)
#         targets = torch.stack(targets)
#         return features, targets

#     for i in range(k):
#         start = i * fold_size
#         end = (i + 1) * fold_size if i < k - 1 else n
#         folds.append(torch.utils.data.Subset(dataset, range(start, end)))

#     dataloaders = []
#     for i in range(k):
#         validation_dataset = folds[i]
#         train_folds = [folds[j] for j in range(k) if j != i]
#         train_dataset = torch.utils.data.ConcatDataset(train_folds)

#         y = torch.tensor([label for _, label in train_dataset], dtype=torch.long)

#         global class_weights
#         class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y), y=y.numpy())
#         class_weights = torch.tensor(class_weights, dtype=torch.float)

#         train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
#         validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
#         dataloaders.append((train_dataloader, validation_dataloader))

#     return dataloaders

In [76]:
def randomly_split_data(dataset, batch_size):


    #generator = torch.Generator().manual_seed(42)
    #train_dataset, validation_dataset, test_dataset = torch.utils.data.random_split(dataset, [0.8, 0.1, 0.1], generator=generator)
    train_dataset, validation_dataset, test_dataset = torch.utils.data.random_split(dataset, [0.8, 0.1, 0.1])

    y = torch.tensor([label for _, label, _ in dataset], dtype=torch.long)

    def collate_fn(data):
        tensors, targets, attention_mask = zip(*data)
        features = torch.nn.utils.rnn.pad_sequence(tensors, batch_first=True)
        targets = torch.stack(targets)
        attention_mask = torch.nn.utils.rnn.pad_sequence(attention_mask, batch_first=True)
        return features, targets, attention_mask

    global class_weights
    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y), y=y.numpy())
    class_weights = torch.tensor(class_weights, dtype=torch.float)

    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

    return train_dataloader, validation_dataloader, test_dataloader

In [59]:
class TuneableModel(torch.nn.Module):
    def __init__(self, input_size, layer_size, dropout_rate, n_layers):
        super(TuneableModel, self).__init__()
        self.roberta = XLMRobertaModel.from_pretrained("xlm-roberta-base")
        self.requires_grad_(False)
        self.lstm = torch.nn.LSTM(input_size=32, hidden_size=layer_size, bidirectional=False,
                                  num_layers=n_layers, batch_first=True, dropout=dropout_rate, proj_size=3)
        self.downshift = torch.nn.Linear(768, 32)
        # self.output_layer = torch.nn.Linear(1, 3)
        #self.batchnorm = torch.nn.BatchNorm1d(32)
        self.activation = torch.nn.ReLU()
        # self.linear = torch.nn.Linear(layer_size, layer_size)

    def forward(self, x, attention_mask):
        x = self.roberta(x, attention_mask=attention_mask).last_hidden_state
        x = self.downshift(x)
        #x = self.batchnorm(x)
        x = self.activation(x)
        x = self.lstm(x)
        #print(x[0].shape)
        #x = self.activation(x)
        # x = self.linear(x)
        # x = self.batchnorm(x)
        # x = self.activation(x)
        # x = self.output_layer(x[0][:,-1:])
        return x[0][:,-1,:]

In [43]:
def train_test(model, dataloader, optimizer, training="train"):

    loss_function = torch.nn.CrossEntropyLoss(weight=class_weights.to(device))

    if training == "train":
        model.train()
    elif training == "validation":
        model.eval()
    elif training == "test":
        model.eval()
    else:
        raise ValueError("training argument must be either 'train', 'validation' or 'test'")

    cumulative_loss = 0
    prediction_list = []
    label_list = []
    for sample in tqdm(dataloader):
        if training == "train":
            optimizer.zero_grad()
        input, targets, attention_mask = sample[0].to(device).long(), sample[1].to(device), sample[2].to(device)
        output = model(input, attention_mask).to(device)
        loss_value = loss_function(output, targets)
        cumulative_loss += loss_value.item()
        if training == "train":
            print("batch loss:", loss_value.sum().item())
            loss_value.sum().backward()
            optimizer.step()

        predictions = output.to('cpu').detach().numpy().argmax(axis=1)
        target_labels = sample[1]
        prediction_list.extend(predictions)
        label_list.extend(target_labels)
    #f1 = f1_score(label_list, prediction_list)
    accuracy = accuracy_score(label_list, prediction_list)
    #confusion = confusion_matrix(label_list, prediction_list)

    return cumulative_loss, accuracy#, f1, confusion

In [38]:
# Training sample
def evaluate(params, dataset):
    dropout, hidden_size, learning_rate, batch_size, n_hidden = params

    max_epochs = 1000
    max_patience = 5
    seed = 42

    accuracies = []
    f1s = []
    train_dataloader, validation_dataloader, test_dataloader = randomly_split_data(dataset, batch_size)
    #dataloaders = k_fold_split_data(dataset, batch_size, k=5)
    # train_dataloader, validation_dataloader = dataloader[0], dataloader[1]
    # test_dataloader = dataloader[1]
    PATH = "model_.pt"
    last_loss = 1000000
    torch.manual_seed(seed)
    input_size = train_dataloader.dataset[0][0].size()[0]
    model = TuneableModel(input_size, hidden_size, dropout, n_hidden)
    model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

    for epoch in range(max_epochs):
        # training
        train_loss, train_accuracy = train_test(model, train_dataloader, optimizer, training="train")
        train_loss, train_accuracy = train_loss, round(train_accuracy, 4)
        # validation at end of epoch
        validation_loss, validation_accuracy = train_test(model, validation_dataloader, optimizer, training="validation")
        validation_loss, validation_accuracy = validation_loss, round(validation_accuracy, 4)
        if validation_loss < last_loss:
            last_loss = validation_loss
            current_patience = 0
        else:
            if current_patience == 0:
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': last_loss,
                    }, PATH)
            current_patience += 1
        if current_patience == max_patience:
            break
        if epoch % 1 == 0:
            print(f"Epoch {epoch} validation loss: {validation_loss} validation accuracy: {validation_accuracy*100}%")
    # Testing once patience is reached
    torch.manual_seed(seed)
    model = TuneableModel(input_size, hidden_size, dropout, n_hidden)
    model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    test_loss, test_accuracy = train_test(model, test_dataloader, optimizer, training="test")
    test_loss, test_accuracy = test_loss, test_accuracy
    #print(f"Model {i} at epoch {checkpoint['epoch']} test results: accuracy: {test_accuracy*100}% f1: {test_f1}")
    # accuracies.append(test_accuracy)
    # f1s.append(test_f1)
    #print(test_confusion)

    return round(test_accuracy*100, 2)
    # print(f"Average accuracy: {round(np.mean(accuracies), 2)}%")
    # print(f"Average f1: {round(np.mean(f1s), 2)}")


In [77]:
dataset = zipped
params = (0.0, 10, 0.01, 32, 1)
accuracy= evaluate(params, dataset)
print(f"final test accuracy: {accuracy}%")

  0%|          | 1/1677 [00:00<22:18,  1.25it/s]

batch loss: 1.0905479192733765


  0%|          | 2/1677 [00:01<24:19,  1.15it/s]

batch loss: 1.156109094619751


  0%|          | 3/1677 [00:02<25:09,  1.11it/s]

batch loss: 1.1096744537353516


  0%|          | 4/1677 [00:03<23:14,  1.20it/s]

batch loss: 1.116108775138855


  0%|          | 5/1677 [00:04<22:51,  1.22it/s]

batch loss: 1.12571382522583


  0%|          | 6/1677 [00:05<23:55,  1.16it/s]

batch loss: 1.1198641061782837


  0%|          | 7/1677 [00:05<23:33,  1.18it/s]

batch loss: 1.0995190143585205


  0%|          | 8/1677 [00:06<23:24,  1.19it/s]

batch loss: 1.1019444465637207


  1%|          | 9/1677 [00:07<24:02,  1.16it/s]

batch loss: 1.1040771007537842


  1%|          | 10/1677 [00:08<25:00,  1.11it/s]

batch loss: 1.0942195653915405


  1%|          | 11/1677 [00:09<24:29,  1.13it/s]

batch loss: 1.1044460535049438


  1%|          | 12/1677 [00:10<24:19,  1.14it/s]

batch loss: 1.1015571355819702


  1%|          | 13/1677 [00:11<23:40,  1.17it/s]

batch loss: 1.0901952981948853


  1%|          | 14/1677 [00:12<23:40,  1.17it/s]

batch loss: 1.0903078317642212


  1%|          | 15/1677 [00:12<23:12,  1.19it/s]

batch loss: 1.0958393812179565


  1%|          | 16/1677 [00:13<23:27,  1.18it/s]

batch loss: 1.1010901927947998


  1%|          | 17/1677 [00:14<23:01,  1.20it/s]

batch loss: 1.0953103303909302


  1%|          | 18/1677 [00:15<22:22,  1.24it/s]

batch loss: 1.0935395956039429


  1%|          | 19/1677 [00:16<22:52,  1.21it/s]

batch loss: 1.1062893867492676


  1%|          | 20/1677 [00:16<23:08,  1.19it/s]

batch loss: 1.0942920446395874


  1%|▏         | 21/1677 [00:17<23:16,  1.19it/s]

batch loss: 1.1074516773223877


  1%|▏         | 22/1677 [00:18<24:10,  1.14it/s]

batch loss: 1.0674705505371094


  1%|▏         | 23/1677 [00:19<23:57,  1.15it/s]

batch loss: 1.1286016702651978


  1%|▏         | 24/1677 [00:20<23:52,  1.15it/s]

batch loss: 1.120485782623291


  1%|▏         | 25/1677 [00:21<23:35,  1.17it/s]

batch loss: 1.125746250152588


  2%|▏         | 26/1677 [00:22<23:17,  1.18it/s]

batch loss: 1.094120979309082


  2%|▏         | 27/1677 [00:23<23:21,  1.18it/s]

batch loss: 1.1245557069778442


  2%|▏         | 28/1677 [00:23<23:58,  1.15it/s]

batch loss: 1.1135159730911255


  2%|▏         | 29/1677 [00:24<23:08,  1.19it/s]

batch loss: 1.1071712970733643


  2%|▏         | 30/1677 [00:25<23:00,  1.19it/s]

batch loss: 1.1015541553497314


  2%|▏         | 31/1677 [00:26<22:32,  1.22it/s]

batch loss: 1.1030359268188477


  2%|▏         | 32/1677 [00:27<21:59,  1.25it/s]

batch loss: 1.105315923690796


  2%|▏         | 33/1677 [00:27<21:39,  1.27it/s]

batch loss: 1.1020523309707642


  2%|▏         | 34/1677 [00:28<21:17,  1.29it/s]

batch loss: 1.0994243621826172


  2%|▏         | 35/1677 [00:29<21:44,  1.26it/s]

batch loss: 1.0994426012039185


  2%|▏         | 36/1677 [00:30<21:55,  1.25it/s]

batch loss: 1.0992693901062012


  2%|▏         | 37/1677 [00:31<23:34,  1.16it/s]

batch loss: 1.098384141921997


  2%|▏         | 38/1677 [00:32<22:54,  1.19it/s]

batch loss: 1.0990662574768066


  2%|▏         | 39/1677 [00:32<22:15,  1.23it/s]

batch loss: 1.0983353853225708


  2%|▏         | 40/1677 [00:33<22:16,  1.22it/s]

batch loss: 1.098067283630371


  2%|▏         | 41/1677 [00:34<22:08,  1.23it/s]

batch loss: 1.0985313653945923


  3%|▎         | 42/1677 [00:35<21:45,  1.25it/s]

batch loss: 1.0986961126327515


  3%|▎         | 43/1677 [00:35<21:49,  1.25it/s]

batch loss: 1.0981221199035645


  3%|▎         | 44/1677 [00:36<21:30,  1.27it/s]

batch loss: 1.0990310907363892


  3%|▎         | 45/1677 [00:37<20:44,  1.31it/s]

batch loss: 1.0989832878112793


  3%|▎         | 46/1677 [00:38<21:03,  1.29it/s]

batch loss: 1.0984208583831787


  3%|▎         | 47/1677 [00:39<22:06,  1.23it/s]

batch loss: 1.0985664129257202


  3%|▎         | 48/1677 [00:40<22:30,  1.21it/s]

batch loss: 1.0985941886901855


  3%|▎         | 49/1677 [00:40<22:49,  1.19it/s]

batch loss: 1.0987836122512817


  3%|▎         | 50/1677 [00:41<23:04,  1.17it/s]

batch loss: 1.0985534191131592


  3%|▎         | 51/1677 [00:42<23:18,  1.16it/s]

batch loss: 1.0985993146896362


  3%|▎         | 52/1677 [00:43<22:42,  1.19it/s]

batch loss: 1.0989056825637817


  3%|▎         | 53/1677 [00:44<21:58,  1.23it/s]

batch loss: 1.0985409021377563


  3%|▎         | 54/1677 [00:45<22:35,  1.20it/s]

batch loss: 1.0984452962875366


  3%|▎         | 55/1677 [00:45<21:46,  1.24it/s]

batch loss: 1.0985515117645264


  3%|▎         | 56/1677 [00:46<22:37,  1.19it/s]

batch loss: 1.0986671447753906


  3%|▎         | 57/1677 [00:47<22:29,  1.20it/s]

batch loss: 1.098907232284546


  3%|▎         | 58/1677 [00:48<22:39,  1.19it/s]

batch loss: 1.0985915660858154


  4%|▎         | 59/1677 [00:49<21:35,  1.25it/s]

batch loss: 1.0991113185882568


  4%|▎         | 60/1677 [00:49<22:00,  1.22it/s]

batch loss: 1.098613977432251


  4%|▎         | 61/1677 [00:50<21:20,  1.26it/s]

batch loss: 1.098713755607605


  4%|▎         | 62/1677 [00:51<22:09,  1.21it/s]

batch loss: 1.0985532999038696


  4%|▍         | 63/1677 [00:52<21:32,  1.25it/s]

batch loss: 1.098606824874878


  4%|▍         | 64/1677 [00:53<20:56,  1.28it/s]

batch loss: 1.098587155342102


  4%|▍         | 65/1677 [00:53<20:30,  1.31it/s]

batch loss: 1.0986818075180054


  4%|▍         | 66/1677 [00:54<20:10,  1.33it/s]

batch loss: 1.0986828804016113


  4%|▍         | 67/1677 [00:55<19:59,  1.34it/s]

batch loss: 1.0983961820602417


  4%|▍         | 68/1677 [00:56<20:07,  1.33it/s]

batch loss: 1.098570704460144


  4%|▍         | 69/1677 [00:56<20:16,  1.32it/s]

batch loss: 1.0986796617507935


  4%|▍         | 70/1677 [00:57<20:00,  1.34it/s]

batch loss: 1.098629117012024


  4%|▍         | 71/1677 [00:58<19:54,  1.34it/s]

batch loss: 1.0988043546676636


  4%|▍         | 72/1677 [00:58<19:47,  1.35it/s]

batch loss: 1.098717451095581


  4%|▍         | 73/1677 [00:59<19:39,  1.36it/s]

batch loss: 1.0988199710845947


  4%|▍         | 74/1677 [01:00<19:25,  1.38it/s]

batch loss: 1.0986263751983643


  4%|▍         | 75/1677 [01:01<19:35,  1.36it/s]

batch loss: 1.0985639095306396


  5%|▍         | 76/1677 [01:01<19:32,  1.37it/s]

batch loss: 1.0985223054885864


  5%|▍         | 77/1677 [01:02<19:41,  1.35it/s]

batch loss: 1.09857976436615


  5%|▍         | 78/1677 [01:03<19:40,  1.35it/s]

batch loss: 1.098572850227356


  5%|▍         | 79/1677 [01:04<19:35,  1.36it/s]

batch loss: 1.098564863204956


  5%|▍         | 80/1677 [01:04<19:32,  1.36it/s]

batch loss: 1.098554015159607


  5%|▍         | 81/1677 [01:05<19:01,  1.40it/s]

batch loss: 1.0987881422042847


  5%|▍         | 82/1677 [01:06<19:56,  1.33it/s]

batch loss: 1.0986557006835938


  5%|▍         | 83/1677 [01:07<19:58,  1.33it/s]

batch loss: 1.0985467433929443


  5%|▌         | 84/1677 [01:07<20:38,  1.29it/s]

batch loss: 1.0984758138656616


  5%|▌         | 85/1677 [01:08<20:36,  1.29it/s]

batch loss: 1.0985305309295654


  5%|▌         | 86/1677 [01:09<21:08,  1.25it/s]

batch loss: 1.0985820293426514


  5%|▌         | 87/1677 [01:10<20:37,  1.28it/s]

batch loss: 1.0986769199371338


  5%|▌         | 88/1677 [01:11<20:28,  1.29it/s]

batch loss: 1.098738670349121


  5%|▌         | 89/1677 [01:11<20:33,  1.29it/s]

batch loss: 1.0987260341644287


  5%|▌         | 90/1677 [01:12<20:35,  1.28it/s]

batch loss: 1.098641276359558


  5%|▌         | 91/1677 [01:13<20:38,  1.28it/s]

batch loss: 1.0987213850021362


  5%|▌         | 92/1677 [01:14<20:39,  1.28it/s]

batch loss: 1.098498821258545


  6%|▌         | 93/1677 [01:14<20:41,  1.28it/s]

batch loss: 1.0986039638519287


  6%|▌         | 94/1677 [01:15<20:21,  1.30it/s]

batch loss: 1.098547101020813


  6%|▌         | 95/1677 [01:16<20:29,  1.29it/s]

batch loss: 1.09862220287323


  6%|▌         | 96/1677 [01:17<20:35,  1.28it/s]

batch loss: 1.0986695289611816


  6%|▌         | 97/1677 [01:18<21:09,  1.24it/s]

batch loss: 1.098613977432251


  6%|▌         | 98/1677 [01:18<21:00,  1.25it/s]

batch loss: 1.09857976436615


  6%|▌         | 99/1677 [01:19<19:32,  1.35it/s]

batch loss: 1.098626971244812


  6%|▌         | 100/1677 [01:20<19:54,  1.32it/s]

batch loss: 1.0986655950546265


  6%|▌         | 101/1677 [01:21<19:50,  1.32it/s]

batch loss: 1.0987184047698975


  6%|▌         | 102/1677 [01:21<20:07,  1.30it/s]

batch loss: 1.0985867977142334


  6%|▌         | 103/1677 [01:22<20:13,  1.30it/s]

batch loss: 1.0985994338989258


  6%|▌         | 104/1677 [01:23<20:57,  1.25it/s]

batch loss: 1.0986207723617554


  6%|▋         | 105/1677 [01:24<20:34,  1.27it/s]

batch loss: 1.0986104011535645


  6%|▋         | 106/1677 [01:25<20:39,  1.27it/s]

batch loss: 1.0986472368240356


  6%|▋         | 107/1677 [01:26<21:44,  1.20it/s]

batch loss: 1.0986193418502808


  6%|▋         | 108/1677 [01:26<21:25,  1.22it/s]

batch loss: 1.0985606908798218


  6%|▋         | 109/1677 [01:27<21:47,  1.20it/s]

batch loss: 1.0986098051071167


  7%|▋         | 110/1677 [01:28<21:10,  1.23it/s]

batch loss: 1.0986731052398682


  7%|▋         | 111/1677 [01:29<21:11,  1.23it/s]

batch loss: 1.0986366271972656


  7%|▋         | 112/1677 [01:30<21:57,  1.19it/s]

batch loss: 1.0985934734344482


  7%|▋         | 113/1677 [01:30<21:22,  1.22it/s]

batch loss: 1.098628282546997


  7%|▋         | 114/1677 [01:31<21:51,  1.19it/s]

batch loss: 1.0985815525054932


  7%|▋         | 115/1677 [01:32<21:23,  1.22it/s]

batch loss: 1.0986021757125854


  7%|▋         | 116/1677 [01:33<20:46,  1.25it/s]

batch loss: 1.0985878705978394


  7%|▋         | 117/1677 [01:34<21:16,  1.22it/s]

batch loss: 1.0985852479934692


  7%|▋         | 118/1677 [01:35<21:05,  1.23it/s]

batch loss: 1.0986319780349731


  7%|▋         | 119/1677 [01:35<21:28,  1.21it/s]

batch loss: 1.098581075668335


  7%|▋         | 120/1677 [01:36<21:30,  1.21it/s]

batch loss: 1.0985522270202637


  7%|▋         | 121/1677 [01:37<20:43,  1.25it/s]

batch loss: 1.0986316204071045


  7%|▋         | 122/1677 [01:38<21:22,  1.21it/s]

batch loss: 1.0986542701721191


  7%|▋         | 123/1677 [01:39<22:04,  1.17it/s]

batch loss: 1.0986248254776


  7%|▋         | 124/1677 [01:40<21:43,  1.19it/s]

batch loss: 1.0985668897628784


  7%|▋         | 125/1677 [01:40<21:26,  1.21it/s]

batch loss: 1.0986454486846924


  8%|▊         | 126/1677 [01:41<20:42,  1.25it/s]

batch loss: 1.0985695123672485


  8%|▊         | 127/1677 [01:42<20:36,  1.25it/s]

batch loss: 1.0986956357955933


  8%|▊         | 128/1677 [01:43<20:36,  1.25it/s]

batch loss: 1.0986390113830566


  8%|▊         | 129/1677 [01:43<20:43,  1.24it/s]

batch loss: 1.0986673831939697


  8%|▊         | 130/1677 [01:44<20:36,  1.25it/s]

batch loss: 1.0985370874404907


  8%|▊         | 131/1677 [01:45<20:52,  1.23it/s]

batch loss: 1.0985764265060425


  8%|▊         | 132/1677 [01:46<20:41,  1.24it/s]

batch loss: 1.0985482931137085


  8%|▊         | 133/1677 [01:47<20:43,  1.24it/s]

batch loss: 1.0986168384552002


  8%|▊         | 134/1677 [01:48<21:47,  1.18it/s]

batch loss: 1.0986154079437256


  8%|▊         | 135/1677 [01:48<21:44,  1.18it/s]

batch loss: 1.0985904932022095


  8%|▊         | 136/1677 [01:49<21:35,  1.19it/s]

batch loss: 1.0987132787704468


  8%|▊         | 137/1677 [01:50<21:14,  1.21it/s]

batch loss: 1.0985959768295288


  8%|▊         | 138/1677 [01:51<21:13,  1.21it/s]

batch loss: 1.0986037254333496


  8%|▊         | 139/1677 [01:52<21:07,  1.21it/s]

batch loss: 1.0986220836639404


  8%|▊         | 140/1677 [01:53<20:37,  1.24it/s]

batch loss: 1.0985256433486938


  8%|▊         | 141/1677 [01:53<20:34,  1.24it/s]

batch loss: 1.0985722541809082


  8%|▊         | 142/1677 [01:54<21:10,  1.21it/s]

batch loss: 1.0985347032546997


  9%|▊         | 143/1677 [01:55<21:30,  1.19it/s]

batch loss: 1.098708152770996


  9%|▊         | 144/1677 [01:56<20:50,  1.23it/s]

batch loss: 1.0986312627792358


  9%|▊         | 145/1677 [01:57<20:52,  1.22it/s]

batch loss: 1.0986436605453491


  9%|▊         | 146/1677 [01:58<21:37,  1.18it/s]

batch loss: 1.098482608795166


  9%|▉         | 147/1677 [01:58<21:15,  1.20it/s]

batch loss: 1.0987601280212402


  9%|▉         | 148/1677 [01:59<20:23,  1.25it/s]

batch loss: 1.098604440689087


  9%|▉         | 149/1677 [02:00<19:50,  1.28it/s]

batch loss: 1.098603367805481


  9%|▉         | 150/1677 [02:01<19:42,  1.29it/s]

batch loss: 1.098604679107666


  9%|▉         | 151/1677 [02:01<19:27,  1.31it/s]

batch loss: 1.0984888076782227


  9%|▉         | 152/1677 [02:02<19:21,  1.31it/s]

batch loss: 1.0983810424804688


  9%|▉         | 153/1677 [02:03<19:17,  1.32it/s]

batch loss: 1.098862648010254


  9%|▉         | 154/1677 [02:04<19:40,  1.29it/s]

batch loss: 1.098589301109314


  9%|▉         | 155/1677 [02:04<19:12,  1.32it/s]

batch loss: 1.0984424352645874


  9%|▉         | 156/1677 [02:05<19:17,  1.31it/s]

batch loss: 1.0986177921295166


  9%|▉         | 157/1677 [02:06<19:12,  1.32it/s]

batch loss: 1.0985866785049438


  9%|▉         | 158/1677 [02:07<19:25,  1.30it/s]

batch loss: 1.0986206531524658


  9%|▉         | 159/1677 [02:08<20:20,  1.24it/s]

batch loss: 1.0989195108413696


 10%|▉         | 160/1677 [02:08<19:52,  1.27it/s]

batch loss: 1.097816824913025


 10%|▉         | 161/1677 [02:09<19:24,  1.30it/s]

batch loss: 1.098368525505066


 10%|▉         | 162/1677 [02:10<19:41,  1.28it/s]

batch loss: 1.0984256267547607


 10%|▉         | 163/1677 [02:11<19:27,  1.30it/s]

batch loss: 1.0984137058258057


 10%|▉         | 164/1677 [02:11<19:41,  1.28it/s]

batch loss: 1.0989820957183838


 10%|▉         | 165/1677 [02:12<19:48,  1.27it/s]

batch loss: 1.0991952419281006


 10%|▉         | 166/1677 [02:13<19:25,  1.30it/s]

batch loss: 1.098039150238037


 10%|▉         | 167/1677 [02:14<20:04,  1.25it/s]

batch loss: 1.099854588508606


 10%|█         | 168/1677 [02:15<19:58,  1.26it/s]

batch loss: 1.0997463464736938


 10%|█         | 169/1677 [02:15<20:30,  1.23it/s]

batch loss: 1.0973716974258423


 10%|█         | 170/1677 [02:16<19:57,  1.26it/s]

batch loss: 1.0986049175262451


 10%|█         | 171/1677 [02:17<19:58,  1.26it/s]

batch loss: 1.0994043350219727


 10%|█         | 172/1677 [02:18<19:24,  1.29it/s]

batch loss: 1.0994746685028076


 10%|█         | 173/1677 [02:19<20:11,  1.24it/s]

batch loss: 1.0985714197158813


 10%|█         | 174/1677 [02:19<19:32,  1.28it/s]

batch loss: 1.0990896224975586


 10%|█         | 175/1677 [02:20<19:42,  1.27it/s]

batch loss: 1.0986636877059937


 10%|█         | 176/1677 [02:21<19:18,  1.30it/s]

batch loss: 1.098628044128418


 11%|█         | 177/1677 [02:22<19:27,  1.28it/s]

batch loss: 1.0985321998596191


 11%|█         | 178/1677 [02:22<19:34,  1.28it/s]

batch loss: 1.0985767841339111


 11%|█         | 179/1677 [02:23<20:16,  1.23it/s]

batch loss: 1.098570704460144


 11%|█         | 180/1677 [02:24<20:42,  1.21it/s]

batch loss: 1.0985321998596191


 11%|█         | 181/1677 [02:25<21:04,  1.18it/s]

batch loss: 1.0983753204345703


 11%|█         | 182/1677 [02:26<20:15,  1.23it/s]

batch loss: 1.0986909866333008


 11%|█         | 183/1677 [02:27<20:36,  1.21it/s]

batch loss: 1.0984606742858887


 11%|█         | 184/1677 [02:27<19:58,  1.25it/s]

batch loss: 1.0985864400863647


 11%|█         | 185/1677 [02:28<19:34,  1.27it/s]

batch loss: 1.098535180091858


 11%|█         | 186/1677 [02:29<19:38,  1.26it/s]

batch loss: 1.0984488725662231


 11%|█         | 187/1677 [02:30<19:35,  1.27it/s]

batch loss: 1.0987380743026733


 11%|█         | 188/1677 [02:30<19:09,  1.30it/s]

batch loss: 1.098289966583252


 11%|█▏        | 189/1677 [02:31<19:55,  1.24it/s]

batch loss: 1.0988249778747559


 11%|█▏        | 190/1677 [02:32<19:50,  1.25it/s]

batch loss: 1.098645567893982


 11%|█▏        | 191/1677 [02:33<19:21,  1.28it/s]

batch loss: 1.0984843969345093


 11%|█▏        | 192/1677 [02:34<19:07,  1.29it/s]

batch loss: 1.0985767841339111


 12%|█▏        | 193/1677 [02:34<18:33,  1.33it/s]

batch loss: 1.098325490951538


 12%|█▏        | 194/1677 [02:35<19:26,  1.27it/s]

batch loss: 1.0983957052230835


 12%|█▏        | 195/1677 [02:36<19:02,  1.30it/s]

batch loss: 1.0982887744903564


 12%|█▏        | 196/1677 [02:37<18:38,  1.32it/s]

batch loss: 1.097927212715149


 12%|█▏        | 197/1677 [02:37<18:27,  1.34it/s]

batch loss: 1.0979118347167969


 12%|█▏        | 198/1677 [02:38<19:38,  1.26it/s]

batch loss: 1.0998127460479736


 12%|█▏        | 199/1677 [02:39<20:06,  1.22it/s]

batch loss: 1.0965853929519653


 12%|█▏        | 200/1677 [02:40<19:58,  1.23it/s]

batch loss: 1.0972058773040771


 12%|█▏        | 201/1677 [02:41<19:28,  1.26it/s]

batch loss: 1.097211480140686


 12%|█▏        | 202/1677 [02:42<19:30,  1.26it/s]

batch loss: 1.0928837060928345


 12%|█▏        | 203/1677 [02:42<19:07,  1.28it/s]

batch loss: 1.1020647287368774


 12%|█▏        | 204/1677 [02:43<18:47,  1.31it/s]

batch loss: 1.094971776008606


 12%|█▏        | 205/1677 [02:44<18:39,  1.31it/s]

batch loss: 1.0987579822540283


 12%|█▏        | 206/1677 [02:44<18:13,  1.34it/s]

batch loss: 1.097304344177246


 12%|█▏        | 207/1677 [02:45<17:18,  1.42it/s]

batch loss: 1.08660888671875


 12%|█▏        | 208/1677 [02:46<17:59,  1.36it/s]

batch loss: 1.0845494270324707


 12%|█▏        | 209/1677 [02:47<18:18,  1.34it/s]

batch loss: 1.1260470151901245


 13%|█▎        | 210/1677 [02:47<18:23,  1.33it/s]

batch loss: 1.077903389930725


 13%|█▎        | 211/1677 [02:48<19:12,  1.27it/s]

batch loss: 1.0765317678451538


 13%|█▎        | 212/1677 [02:49<18:56,  1.29it/s]

batch loss: 1.1012896299362183


 13%|█▎        | 213/1677 [02:50<18:43,  1.30it/s]

batch loss: 1.084529161453247


 13%|█▎        | 214/1677 [02:51<18:56,  1.29it/s]

batch loss: 1.1034255027770996


 13%|█▎        | 215/1677 [02:51<19:32,  1.25it/s]

batch loss: 1.1131422519683838


 13%|█▎        | 216/1677 [02:52<19:28,  1.25it/s]

batch loss: 1.1506848335266113


 13%|█▎        | 217/1677 [02:53<20:01,  1.22it/s]

batch loss: 1.1346311569213867


 13%|█▎        | 218/1677 [02:54<19:27,  1.25it/s]

batch loss: 1.1201601028442383


 13%|█▎        | 219/1677 [02:55<19:24,  1.25it/s]

batch loss: 1.087493658065796


 13%|█▎        | 220/1677 [02:55<19:06,  1.27it/s]

batch loss: 1.095517873764038


 13%|█▎        | 221/1677 [02:56<19:11,  1.26it/s]

batch loss: 1.1143522262573242


 13%|█▎        | 222/1677 [02:57<18:45,  1.29it/s]

batch loss: 1.0917977094650269


 13%|█▎        | 223/1677 [02:58<18:35,  1.30it/s]

batch loss: 1.1009360551834106


 13%|█▎        | 224/1677 [02:58<18:25,  1.31it/s]

batch loss: 1.0978764295578003


 13%|█▎        | 225/1677 [02:59<19:37,  1.23it/s]

batch loss: 1.0966897010803223


 13%|█▎        | 226/1677 [03:00<19:30,  1.24it/s]

batch loss: 1.1015539169311523


 14%|█▎        | 227/1677 [03:01<18:44,  1.29it/s]

batch loss: 1.0987900495529175


 14%|█▎        | 228/1677 [03:02<18:33,  1.30it/s]

batch loss: 1.1003268957138062


 14%|█▎        | 229/1677 [03:02<19:14,  1.25it/s]

batch loss: 1.100588321685791


 14%|█▎        | 230/1677 [03:03<19:15,  1.25it/s]

batch loss: 1.0944300889968872


 14%|█▍        | 231/1677 [03:04<19:43,  1.22it/s]

batch loss: 1.1011900901794434


 14%|█▍        | 232/1677 [03:05<20:14,  1.19it/s]

batch loss: 1.1030211448669434


 14%|█▍        | 233/1677 [03:06<19:19,  1.25it/s]

batch loss: 1.0940704345703125


 14%|█▍        | 234/1677 [03:07<19:47,  1.21it/s]

batch loss: 1.1056865453720093


 14%|█▍        | 235/1677 [03:07<20:01,  1.20it/s]

batch loss: 1.0987327098846436


 14%|█▍        | 236/1677 [03:08<19:38,  1.22it/s]

batch loss: 1.09694242477417


 14%|█▍        | 237/1677 [03:09<20:03,  1.20it/s]

batch loss: 1.098764419555664


 14%|█▍        | 238/1677 [03:10<19:28,  1.23it/s]

batch loss: 1.0999102592468262


 14%|█▍        | 239/1677 [03:11<20:05,  1.19it/s]

batch loss: 1.0992430448532104


 14%|█▍        | 240/1677 [03:12<19:14,  1.24it/s]

batch loss: 1.1013789176940918


 14%|█▍        | 241/1677 [03:12<19:11,  1.25it/s]

batch loss: 1.1012382507324219


 14%|█▍        | 242/1677 [03:13<19:12,  1.25it/s]

batch loss: 1.0986980199813843


 14%|█▍        | 243/1677 [03:14<18:49,  1.27it/s]

batch loss: 1.0992363691329956


 15%|█▍        | 244/1677 [03:15<18:26,  1.30it/s]

batch loss: 1.0975836515426636


 15%|█▍        | 245/1677 [03:15<18:19,  1.30it/s]

batch loss: 1.0967308282852173


 15%|█▍        | 246/1677 [03:16<18:20,  1.30it/s]

batch loss: 1.101143479347229


 15%|█▍        | 247/1677 [03:17<18:39,  1.28it/s]

batch loss: 1.0994991064071655


 15%|█▍        | 248/1677 [03:18<18:34,  1.28it/s]

batch loss: 1.098724603652954


 15%|█▍        | 249/1677 [03:18<18:07,  1.31it/s]

batch loss: 1.0986703634262085


 15%|█▍        | 250/1677 [03:19<17:56,  1.33it/s]

batch loss: 1.0985043048858643


 15%|█▍        | 251/1677 [03:20<18:53,  1.26it/s]

batch loss: 1.0985255241394043


 15%|█▌        | 252/1677 [03:21<19:22,  1.23it/s]

batch loss: 1.098588466644287


 15%|█▌        | 253/1677 [03:22<19:16,  1.23it/s]

batch loss: 1.0985808372497559


 15%|█▌        | 254/1677 [03:23<19:12,  1.24it/s]

batch loss: 1.0985249280929565


 15%|█▌        | 255/1677 [03:23<18:48,  1.26it/s]

batch loss: 1.0984227657318115


 15%|█▌        | 256/1677 [03:24<18:51,  1.26it/s]

batch loss: 1.098784327507019


 15%|█▌        | 257/1677 [03:25<18:27,  1.28it/s]

batch loss: 1.0986298322677612


 15%|█▌        | 258/1677 [03:26<18:39,  1.27it/s]

batch loss: 1.0988430976867676


 15%|█▌        | 259/1677 [03:26<18:21,  1.29it/s]

batch loss: 1.0987461805343628


 16%|█▌        | 260/1677 [03:27<17:56,  1.32it/s]

batch loss: 1.0986238718032837


 16%|█▌        | 261/1677 [03:28<17:53,  1.32it/s]

batch loss: 1.0985759496688843


 16%|█▌        | 262/1677 [03:29<18:08,  1.30it/s]

batch loss: 1.0984857082366943


 16%|█▌        | 263/1677 [03:29<18:20,  1.28it/s]

batch loss: 1.0985993146896362


 16%|█▌        | 264/1677 [03:30<18:11,  1.29it/s]

batch loss: 1.098427176475525


 16%|█▌        | 265/1677 [03:31<18:03,  1.30it/s]

batch loss: 1.0987651348114014


 16%|█▌        | 266/1677 [03:32<17:56,  1.31it/s]

batch loss: 1.0985558032989502


 16%|█▌        | 267/1677 [03:32<17:47,  1.32it/s]

batch loss: 1.0986052751541138


 16%|█▌        | 268/1677 [03:33<18:01,  1.30it/s]

batch loss: 1.098503828048706


 16%|█▌        | 269/1677 [03:34<18:13,  1.29it/s]

batch loss: 1.0985236167907715


 16%|█▌        | 270/1677 [03:35<17:58,  1.30it/s]

batch loss: 1.098533034324646


 16%|█▌        | 271/1677 [03:36<17:59,  1.30it/s]

batch loss: 1.0984766483306885


 16%|█▌        | 272/1677 [03:36<17:39,  1.33it/s]

batch loss: 1.0988078117370605


 16%|█▋        | 273/1677 [03:37<17:52,  1.31it/s]

batch loss: 1.0982494354248047


 16%|█▋        | 274/1677 [03:38<18:03,  1.29it/s]

batch loss: 1.0985621213912964


 16%|█▋        | 275/1677 [03:39<19:42,  1.19it/s]

batch loss: 1.0985729694366455


 16%|█▋        | 276/1677 [03:40<19:50,  1.18it/s]

batch loss: 1.0984644889831543


 17%|█▋        | 277/1677 [03:41<19:28,  1.20it/s]

batch loss: 1.0981942415237427


 17%|█▋        | 278/1677 [03:41<18:34,  1.25it/s]

batch loss: 1.0988335609436035


 17%|█▋        | 279/1677 [03:42<18:13,  1.28it/s]

batch loss: 1.099078893661499


 17%|█▋        | 280/1677 [03:43<18:08,  1.28it/s]

batch loss: 1.098543405532837


 17%|█▋        | 281/1677 [03:44<18:16,  1.27it/s]

batch loss: 1.0977786779403687


 17%|█▋        | 282/1677 [03:44<18:24,  1.26it/s]

batch loss: 1.0986886024475098


 17%|█▋        | 283/1677 [03:45<19:00,  1.22it/s]

batch loss: 1.0990220308303833


 17%|█▋        | 284/1677 [03:46<18:30,  1.25it/s]

batch loss: 1.0984392166137695


 17%|█▋        | 285/1677 [03:47<18:08,  1.28it/s]

batch loss: 1.100679636001587


 17%|█▋        | 286/1677 [03:48<18:00,  1.29it/s]

batch loss: 1.100765585899353


 17%|█▋        | 287/1677 [03:48<18:36,  1.25it/s]

batch loss: 1.0938974618911743


 17%|█▋        | 288/1677 [03:49<18:20,  1.26it/s]

batch loss: 1.0939486026763916


 17%|█▋        | 289/1677 [03:50<17:40,  1.31it/s]

batch loss: 1.0896140336990356


 17%|█▋        | 290/1677 [03:51<17:28,  1.32it/s]

batch loss: 1.0937286615371704


 17%|█▋        | 291/1677 [03:51<17:44,  1.30it/s]

batch loss: 1.1282355785369873


 17%|█▋        | 292/1677 [03:52<17:51,  1.29it/s]

batch loss: 1.1096431016921997


 17%|█▋        | 293/1677 [03:53<18:22,  1.26it/s]

batch loss: 1.104660153388977


 18%|█▊        | 294/1677 [03:54<18:59,  1.21it/s]

batch loss: 1.0986006259918213


 18%|█▊        | 295/1677 [03:55<18:25,  1.25it/s]

batch loss: 1.0946048498153687


 18%|█▊        | 296/1677 [03:56<18:50,  1.22it/s]

batch loss: 1.0906784534454346


 18%|█▊        | 297/1677 [03:56<18:19,  1.26it/s]

batch loss: 1.0988034009933472


 18%|█▊        | 298/1677 [03:57<20:07,  1.14it/s]

batch loss: 1.0926992893218994


 18%|█▊        | 299/1677 [03:58<19:12,  1.20it/s]

batch loss: 1.1029796600341797


 18%|█▊        | 300/1677 [03:59<18:41,  1.23it/s]

batch loss: 1.0953421592712402


 18%|█▊        | 301/1677 [04:00<19:07,  1.20it/s]

batch loss: 1.100364327430725


 18%|█▊        | 302/1677 [04:01<19:17,  1.19it/s]

batch loss: 1.099592685699463


 18%|█▊        | 303/1677 [04:01<18:37,  1.23it/s]

batch loss: 1.0893579721450806


 18%|█▊        | 304/1677 [04:02<18:59,  1.20it/s]

batch loss: 1.0999802350997925


 18%|█▊        | 305/1677 [04:03<18:25,  1.24it/s]

batch loss: 1.0943498611450195


 18%|█▊        | 306/1677 [04:04<18:41,  1.22it/s]

batch loss: 1.1014219522476196


 18%|█▊        | 307/1677 [04:05<18:12,  1.25it/s]

batch loss: 1.0941284894943237


 18%|█▊        | 308/1677 [04:05<17:49,  1.28it/s]

batch loss: 1.085162878036499


 18%|█▊        | 309/1677 [04:06<17:55,  1.27it/s]

batch loss: 1.1039643287658691


 18%|█▊        | 310/1677 [04:07<17:17,  1.32it/s]

batch loss: 1.1011290550231934


 19%|█▊        | 311/1677 [04:08<17:07,  1.33it/s]

batch loss: 1.1108453273773193


 19%|█▊        | 312/1677 [04:08<17:11,  1.32it/s]

batch loss: 1.1132079362869263


 19%|█▊        | 313/1677 [04:09<18:13,  1.25it/s]

batch loss: 1.1259362697601318


 19%|█▊        | 314/1677 [04:10<17:45,  1.28it/s]

batch loss: 1.1537214517593384


 19%|█▉        | 315/1677 [04:11<17:47,  1.28it/s]

batch loss: 1.1161264181137085


 19%|█▉        | 316/1677 [04:12<18:37,  1.22it/s]

batch loss: 1.0928566455841064


 19%|█▉        | 317/1677 [04:12<18:45,  1.21it/s]

batch loss: 1.081007480621338


 19%|█▉        | 318/1677 [04:13<18:18,  1.24it/s]

batch loss: 1.1151583194732666


 19%|█▉        | 319/1677 [04:14<17:51,  1.27it/s]

batch loss: 1.0951074361801147


 19%|█▉        | 320/1677 [04:15<17:25,  1.30it/s]

batch loss: 1.12077796459198


 19%|█▉        | 321/1677 [04:15<17:15,  1.31it/s]

batch loss: 1.085971474647522


 19%|█▉        | 322/1677 [04:16<17:29,  1.29it/s]

batch loss: 1.1059678792953491


 19%|█▉        | 323/1677 [04:17<17:41,  1.28it/s]

batch loss: 1.1021065711975098


 19%|█▉        | 324/1677 [04:18<18:06,  1.25it/s]

batch loss: 1.0948917865753174


 19%|█▉        | 325/1677 [04:19<17:50,  1.26it/s]

batch loss: 1.0977075099945068


 19%|█▉        | 326/1677 [04:19<17:31,  1.28it/s]

batch loss: 1.1008741855621338


 19%|█▉        | 327/1677 [04:20<17:18,  1.30it/s]

batch loss: 1.0972918272018433


 20%|█▉        | 328/1677 [04:21<17:54,  1.26it/s]

batch loss: 1.1000449657440186


 20%|█▉        | 329/1677 [04:22<17:29,  1.28it/s]

batch loss: 1.094490647315979


 20%|█▉        | 330/1677 [04:23<18:56,  1.18it/s]

batch loss: 1.0911470651626587


 20%|█▉        | 331/1677 [04:24<19:05,  1.18it/s]

batch loss: 1.1026172637939453


 20%|█▉        | 332/1677 [04:24<18:41,  1.20it/s]

batch loss: 1.0965197086334229


 20%|█▉        | 333/1677 [04:25<18:23,  1.22it/s]

batch loss: 1.1019582748413086


 20%|█▉        | 334/1677 [04:26<17:55,  1.25it/s]

batch loss: 1.1183100938796997


 20%|█▉        | 335/1677 [04:27<18:37,  1.20it/s]

batch loss: 1.0814032554626465


 20%|██        | 336/1677 [04:28<17:55,  1.25it/s]

batch loss: 1.0906163454055786


 20%|██        | 337/1677 [04:28<18:17,  1.22it/s]

batch loss: 1.1013104915618896


 20%|██        | 338/1677 [04:29<17:45,  1.26it/s]

batch loss: 1.1083623170852661


 20%|██        | 339/1677 [04:30<18:20,  1.22it/s]

batch loss: 1.1082504987716675


 20%|██        | 340/1677 [04:31<17:55,  1.24it/s]

batch loss: 1.1016567945480347


 20%|██        | 341/1677 [04:32<17:45,  1.25it/s]

batch loss: 1.1019926071166992


 20%|██        | 342/1677 [04:33<18:29,  1.20it/s]

batch loss: 1.0966925621032715


 20%|██        | 343/1677 [04:33<17:59,  1.24it/s]

batch loss: 1.0794613361358643


 21%|██        | 344/1677 [04:34<18:16,  1.22it/s]

batch loss: 1.0909631252288818


 21%|██        | 345/1677 [04:35<17:43,  1.25it/s]

batch loss: 1.0919464826583862


 21%|██        | 346/1677 [04:36<17:49,  1.24it/s]

batch loss: 1.0828913450241089


 21%|██        | 347/1677 [04:37<17:47,  1.25it/s]

batch loss: 1.11216139793396


 21%|██        | 348/1677 [04:37<17:25,  1.27it/s]

batch loss: 1.0970710515975952


 21%|██        | 349/1677 [04:38<17:57,  1.23it/s]

batch loss: 1.0887994766235352


 21%|██        | 350/1677 [04:39<18:09,  1.22it/s]

batch loss: 1.1145188808441162


 21%|██        | 351/1677 [04:40<18:24,  1.20it/s]

batch loss: 1.1166572570800781


 21%|██        | 352/1677 [04:41<18:36,  1.19it/s]

batch loss: 1.1043384075164795


 21%|██        | 353/1677 [04:42<18:50,  1.17it/s]

batch loss: 1.0876083374023438


 21%|██        | 354/1677 [04:42<18:47,  1.17it/s]

batch loss: 1.0957127809524536


 21%|██        | 355/1677 [04:43<18:58,  1.16it/s]

batch loss: 1.0873041152954102


 21%|██        | 356/1677 [04:44<18:10,  1.21it/s]

batch loss: 1.100584626197815


 21%|██▏       | 357/1677 [04:45<17:57,  1.22it/s]

batch loss: 1.0777640342712402


 21%|██▏       | 358/1677 [04:46<18:16,  1.20it/s]

batch loss: 1.133556604385376


 21%|██▏       | 359/1677 [04:46<17:38,  1.25it/s]

batch loss: 1.1012741327285767


 21%|██▏       | 360/1677 [04:47<17:18,  1.27it/s]

batch loss: 1.0870277881622314


 22%|██▏       | 361/1677 [04:48<17:54,  1.22it/s]

batch loss: 1.1075682640075684


 22%|██▏       | 362/1677 [04:49<17:48,  1.23it/s]

batch loss: 1.077157735824585


 22%|██▏       | 363/1677 [04:50<17:45,  1.23it/s]

batch loss: 1.0990864038467407


 22%|██▏       | 364/1677 [04:50<17:42,  1.24it/s]

batch loss: 1.0888586044311523


 22%|██▏       | 365/1677 [04:51<17:22,  1.26it/s]

batch loss: 1.1037206649780273


 22%|██▏       | 366/1677 [04:52<17:20,  1.26it/s]

batch loss: 1.095893144607544


 22%|██▏       | 367/1677 [04:53<17:06,  1.28it/s]

batch loss: 1.113435983657837


 22%|██▏       | 368/1677 [04:54<17:02,  1.28it/s]

batch loss: 1.133666753768921


 22%|██▏       | 369/1677 [04:54<17:09,  1.27it/s]

batch loss: 1.0911771059036255


 22%|██▏       | 370/1677 [04:55<18:23,  1.18it/s]

batch loss: 1.0959216356277466


 22%|██▏       | 371/1677 [04:56<18:24,  1.18it/s]

batch loss: 1.079512357711792


 22%|██▏       | 372/1677 [04:57<17:41,  1.23it/s]

batch loss: 1.1061269044876099


 22%|██▏       | 373/1677 [04:58<18:18,  1.19it/s]

batch loss: 1.1041311025619507


 22%|██▏       | 374/1677 [04:59<18:25,  1.18it/s]

batch loss: 1.1074615716934204


 22%|██▏       | 375/1677 [05:00<18:36,  1.17it/s]

batch loss: 1.082438588142395


 22%|██▏       | 376/1677 [05:00<18:00,  1.20it/s]

batch loss: 1.0935896635055542


 22%|██▏       | 377/1677 [05:01<18:13,  1.19it/s]

batch loss: 1.0873104333877563


 23%|██▎       | 378/1677 [05:02<17:56,  1.21it/s]

batch loss: 1.1025047302246094


 23%|██▎       | 379/1677 [05:03<17:46,  1.22it/s]

batch loss: 1.089647889137268


 23%|██▎       | 380/1677 [05:04<17:37,  1.23it/s]

batch loss: 1.0888432264328003


 23%|██▎       | 381/1677 [05:04<17:14,  1.25it/s]

batch loss: 1.0796719789505005


 23%|██▎       | 382/1677 [05:05<17:02,  1.27it/s]

batch loss: 1.11636483669281


 23%|██▎       | 383/1677 [05:06<17:10,  1.26it/s]

batch loss: 1.1187406778335571


 23%|██▎       | 384/1677 [05:07<17:57,  1.20it/s]

batch loss: 1.0869032144546509


 23%|██▎       | 385/1677 [05:08<17:08,  1.26it/s]

batch loss: 1.1038529872894287


 23%|██▎       | 386/1677 [05:08<16:40,  1.29it/s]

batch loss: 1.119845986366272


 23%|██▎       | 387/1677 [05:09<16:38,  1.29it/s]

batch loss: 1.0731072425842285


 23%|██▎       | 388/1677 [05:10<16:48,  1.28it/s]

batch loss: 1.1033291816711426


 23%|██▎       | 389/1677 [05:11<18:06,  1.19it/s]

batch loss: 1.0831876993179321


 23%|██▎       | 390/1677 [05:12<17:41,  1.21it/s]

batch loss: 1.1032800674438477


 23%|██▎       | 391/1677 [05:13<18:13,  1.18it/s]

batch loss: 1.1133289337158203


 23%|██▎       | 392/1677 [05:13<17:55,  1.19it/s]

batch loss: 1.1280018091201782


 23%|██▎       | 393/1677 [05:14<16:11,  1.32it/s]

batch loss: 1.0715981721878052


 23%|██▎       | 394/1677 [05:15<16:02,  1.33it/s]

batch loss: 1.0971678495407104


 24%|██▎       | 395/1677 [05:15<16:21,  1.31it/s]

batch loss: 1.1003068685531616


 24%|██▎       | 396/1677 [05:16<16:37,  1.28it/s]

batch loss: 1.0933424234390259


 24%|██▎       | 397/1677 [05:17<17:19,  1.23it/s]

batch loss: 1.122389554977417


 24%|██▎       | 398/1677 [05:18<16:56,  1.26it/s]

batch loss: 1.1146868467330933


 24%|██▍       | 399/1677 [05:19<17:30,  1.22it/s]

batch loss: 1.098535180091858


 24%|██▍       | 400/1677 [05:20<17:09,  1.24it/s]

batch loss: 1.1050910949707031


 24%|██▍       | 401/1677 [05:20<16:46,  1.27it/s]

batch loss: 1.0913946628570557


 24%|██▍       | 402/1677 [05:21<16:48,  1.26it/s]

batch loss: 1.124238133430481


 24%|██▍       | 403/1677 [05:22<16:14,  1.31it/s]

batch loss: 1.1062599420547485


 24%|██▍       | 404/1677 [05:23<16:22,  1.30it/s]

batch loss: 1.089247465133667


 24%|██▍       | 405/1677 [05:24<16:55,  1.25it/s]

batch loss: 1.1077656745910645


 24%|██▍       | 406/1677 [05:24<16:59,  1.25it/s]

batch loss: 1.10028076171875


 24%|██▍       | 407/1677 [05:25<16:55,  1.25it/s]

batch loss: 1.1238397359848022


 24%|██▍       | 408/1677 [05:26<16:37,  1.27it/s]

batch loss: 1.0973975658416748


 24%|██▍       | 409/1677 [05:27<16:41,  1.27it/s]

batch loss: 1.1040918827056885


 24%|██▍       | 410/1677 [05:28<17:13,  1.23it/s]

batch loss: 1.112316370010376


 25%|██▍       | 411/1677 [05:28<17:41,  1.19it/s]

batch loss: 1.10623037815094


 25%|██▍       | 412/1677 [05:29<17:46,  1.19it/s]

batch loss: 1.1118991374969482


 25%|██▍       | 413/1677 [05:30<17:54,  1.18it/s]

batch loss: 1.0993324518203735


 25%|██▍       | 414/1677 [05:31<18:00,  1.17it/s]

batch loss: 1.1028720140457153


 25%|██▍       | 415/1677 [05:32<18:04,  1.16it/s]

batch loss: 1.1004960536956787


 25%|██▍       | 416/1677 [05:33<18:03,  1.16it/s]

batch loss: 1.0986361503601074


 25%|██▍       | 417/1677 [05:34<18:05,  1.16it/s]

batch loss: 1.0967730283737183


 25%|██▍       | 418/1677 [05:34<17:19,  1.21it/s]

batch loss: 1.0984922647476196


 25%|██▍       | 419/1677 [05:35<16:50,  1.24it/s]

batch loss: 1.1006596088409424


 25%|██▌       | 420/1677 [05:36<17:18,  1.21it/s]

batch loss: 1.1030020713806152


 25%|██▌       | 421/1677 [05:37<16:40,  1.26it/s]

batch loss: 1.088025689125061


 25%|██▌       | 422/1677 [05:37<16:23,  1.28it/s]

batch loss: 1.0912467241287231


 25%|██▌       | 423/1677 [05:38<16:24,  1.27it/s]

batch loss: 1.0839489698410034


 25%|██▌       | 424/1677 [05:39<16:11,  1.29it/s]

batch loss: 1.106469988822937


 25%|██▌       | 425/1677 [05:40<16:01,  1.30it/s]

batch loss: 1.1024237871170044


 25%|██▌       | 426/1677 [05:41<16:15,  1.28it/s]

batch loss: 1.0645751953125


 25%|██▌       | 427/1677 [05:41<16:21,  1.27it/s]

batch loss: 1.0845465660095215


 26%|██▌       | 428/1677 [05:42<16:58,  1.23it/s]

batch loss: 1.0927469730377197


 26%|██▌       | 429/1677 [05:43<16:33,  1.26it/s]

batch loss: 1.0730081796646118


 26%|██▌       | 430/1677 [05:44<17:00,  1.22it/s]

batch loss: 1.1157761812210083


 26%|██▌       | 431/1677 [05:45<16:35,  1.25it/s]

batch loss: 1.0920277833938599


 26%|██▌       | 432/1677 [05:45<16:34,  1.25it/s]

batch loss: 1.122759222984314


 26%|██▌       | 433/1677 [05:46<16:31,  1.25it/s]

batch loss: 1.0865566730499268


 26%|██▌       | 434/1677 [05:47<16:26,  1.26it/s]

batch loss: 1.094173789024353


 26%|██▌       | 435/1677 [05:48<15:49,  1.31it/s]

batch loss: 1.0371670722961426


 26%|██▌       | 436/1677 [05:49<16:29,  1.25it/s]

batch loss: 1.1119354963302612


 26%|██▌       | 437/1677 [05:49<16:55,  1.22it/s]

batch loss: 1.181602954864502


 26%|██▌       | 438/1677 [05:50<16:48,  1.23it/s]

batch loss: 1.1352328062057495


 26%|██▌       | 439/1677 [05:51<16:37,  1.24it/s]

batch loss: 1.0809799432754517


 26%|██▌       | 440/1677 [05:52<16:24,  1.26it/s]

batch loss: 1.1354775428771973


 26%|██▋       | 441/1677 [05:53<16:22,  1.26it/s]

batch loss: 1.1098711490631104


 26%|██▋       | 442/1677 [05:53<17:01,  1.21it/s]

batch loss: 1.0924170017242432


 26%|██▋       | 443/1677 [05:54<17:14,  1.19it/s]

batch loss: 1.1062374114990234


 26%|██▋       | 444/1677 [05:55<16:41,  1.23it/s]

batch loss: 1.131559133529663


 27%|██▋       | 445/1677 [05:56<17:22,  1.18it/s]

batch loss: 1.106207013130188


 27%|██▋       | 446/1677 [05:57<16:40,  1.23it/s]

batch loss: 1.0551797151565552


 27%|██▋       | 447/1677 [05:57<16:00,  1.28it/s]

batch loss: 1.1602364778518677


 27%|██▋       | 448/1677 [05:58<16:40,  1.23it/s]

batch loss: 1.1096829175949097


 27%|██▋       | 449/1677 [05:59<16:33,  1.24it/s]

batch loss: 1.0942007303237915


 27%|██▋       | 450/1677 [06:00<16:47,  1.22it/s]

batch loss: 1.1081644296646118


 27%|██▋       | 451/1677 [06:01<16:18,  1.25it/s]

batch loss: 1.0714261531829834


 27%|██▋       | 452/1677 [06:02<15:57,  1.28it/s]

batch loss: 1.1040726900100708


 27%|██▋       | 453/1677 [06:02<15:56,  1.28it/s]

batch loss: 1.0735535621643066


 27%|██▋       | 454/1677 [06:03<16:21,  1.25it/s]

batch loss: 1.0791677236557007


 27%|██▋       | 455/1677 [06:04<16:44,  1.22it/s]

batch loss: 1.1031752824783325


 27%|██▋       | 456/1677 [06:05<16:35,  1.23it/s]

batch loss: 1.10458242893219


 27%|██▋       | 457/1677 [06:06<16:29,  1.23it/s]

batch loss: 1.1092206239700317


 27%|██▋       | 458/1677 [06:06<16:25,  1.24it/s]

batch loss: 1.0771629810333252


 27%|██▋       | 459/1677 [06:07<16:06,  1.26it/s]

batch loss: 1.0960471630096436


 27%|██▋       | 460/1677 [06:08<15:37,  1.30it/s]

batch loss: 1.1114083528518677


 27%|██▋       | 461/1677 [06:09<15:31,  1.31it/s]

batch loss: 1.105304479598999


 28%|██▊       | 462/1677 [06:09<15:25,  1.31it/s]

batch loss: 1.1105928421020508


 28%|██▊       | 463/1677 [06:10<15:13,  1.33it/s]

batch loss: 1.0861124992370605


 28%|██▊       | 464/1677 [06:11<15:33,  1.30it/s]

batch loss: 1.0850555896759033


 28%|██▊       | 465/1677 [06:12<16:16,  1.24it/s]

batch loss: 1.0940579175949097


 28%|██▊       | 466/1677 [06:13<16:12,  1.25it/s]

batch loss: 1.1088556051254272


 28%|██▊       | 467/1677 [06:13<16:09,  1.25it/s]

batch loss: 1.0796425342559814


 28%|██▊       | 468/1677 [06:14<16:34,  1.22it/s]

batch loss: 1.1149755716323853


 28%|██▊       | 469/1677 [06:15<16:19,  1.23it/s]

batch loss: 1.0979907512664795


 28%|██▊       | 470/1677 [06:16<16:55,  1.19it/s]

batch loss: 1.1210484504699707


 28%|██▊       | 471/1677 [06:17<16:09,  1.24it/s]

batch loss: 1.095715880393982


 28%|██▊       | 472/1677 [06:18<16:31,  1.21it/s]

batch loss: 1.1167207956314087


 28%|██▊       | 473/1677 [06:18<16:44,  1.20it/s]

batch loss: 1.1020704507827759


 28%|██▊       | 474/1677 [06:19<16:28,  1.22it/s]

batch loss: 1.1099905967712402


 28%|██▊       | 475/1677 [06:20<16:46,  1.19it/s]

batch loss: 1.085785150527954


 28%|██▊       | 476/1677 [06:21<16:19,  1.23it/s]

batch loss: 1.096989393234253


 28%|██▊       | 477/1677 [06:22<15:57,  1.25it/s]

batch loss: 1.086509346961975


 29%|██▊       | 478/1677 [06:22<15:23,  1.30it/s]

batch loss: 1.1143684387207031


 29%|██▊       | 479/1677 [06:23<15:17,  1.31it/s]

batch loss: 1.0886937379837036


 29%|██▊       | 480/1677 [06:24<15:23,  1.30it/s]

batch loss: 1.1018447875976562


 29%|██▊       | 481/1677 [06:25<15:34,  1.28it/s]

batch loss: 1.097242832183838


 29%|██▊       | 482/1677 [06:25<15:43,  1.27it/s]

batch loss: 1.0959556102752686


 29%|██▉       | 483/1677 [06:26<16:28,  1.21it/s]

batch loss: 1.0891939401626587


 29%|██▉       | 484/1677 [06:27<16:35,  1.20it/s]

batch loss: 1.0988894701004028


 29%|██▉       | 485/1677 [06:28<16:14,  1.22it/s]

batch loss: 1.0962107181549072


 29%|██▉       | 486/1677 [06:29<15:51,  1.25it/s]

batch loss: 1.0930519104003906


 29%|██▉       | 487/1677 [06:30<16:16,  1.22it/s]

batch loss: 1.0974493026733398


 29%|██▉       | 488/1677 [06:30<15:41,  1.26it/s]

batch loss: 1.0965490341186523


 29%|██▉       | 489/1677 [06:31<15:45,  1.26it/s]

batch loss: 1.1061232089996338


 29%|██▉       | 490/1677 [06:32<15:22,  1.29it/s]

batch loss: 1.0954077243804932


 29%|██▉       | 491/1677 [06:33<14:58,  1.32it/s]

batch loss: 1.0968230962753296


 29%|██▉       | 492/1677 [06:33<15:37,  1.26it/s]

batch loss: 1.1078300476074219


 29%|██▉       | 493/1677 [06:34<13:54,  1.42it/s]

batch loss: 1.0993133783340454


 29%|██▉       | 494/1677 [06:35<14:09,  1.39it/s]

batch loss: 1.1115485429763794


 30%|██▉       | 495/1677 [06:36<15:08,  1.30it/s]

batch loss: 1.103237509727478


 30%|██▉       | 496/1677 [06:36<14:55,  1.32it/s]

batch loss: 1.094463586807251


 30%|██▉       | 497/1677 [06:37<15:08,  1.30it/s]

batch loss: 1.0984904766082764


 30%|██▉       | 498/1677 [06:38<14:55,  1.32it/s]

batch loss: 1.0953459739685059


 30%|██▉       | 499/1677 [06:39<14:51,  1.32it/s]

batch loss: 1.1032782793045044


 30%|██▉       | 500/1677 [06:39<15:05,  1.30it/s]

batch loss: 1.0916244983673096


 30%|██▉       | 501/1677 [06:40<15:09,  1.29it/s]

batch loss: 1.0865974426269531


 30%|██▉       | 502/1677 [06:41<14:17,  1.37it/s]

batch loss: 1.1009660959243774


 30%|██▉       | 503/1677 [06:42<15:08,  1.29it/s]

batch loss: 1.1018787622451782


 30%|███       | 504/1677 [06:43<15:32,  1.26it/s]

batch loss: 1.0910804271697998


 30%|███       | 505/1677 [06:43<15:35,  1.25it/s]

batch loss: 1.1042453050613403


 30%|███       | 506/1677 [06:44<16:13,  1.20it/s]

batch loss: 1.0945682525634766


 30%|███       | 507/1677 [06:45<15:42,  1.24it/s]

batch loss: 1.1172322034835815


 30%|███       | 508/1677 [06:46<16:13,  1.20it/s]

batch loss: 1.102457046508789


 30%|███       | 509/1677 [06:47<15:58,  1.22it/s]

batch loss: 1.0941972732543945


 30%|███       | 510/1677 [06:47<15:30,  1.25it/s]

batch loss: 1.095868706703186


 30%|███       | 511/1677 [06:48<15:27,  1.26it/s]

batch loss: 1.1029399633407593


 31%|███       | 512/1677 [06:49<15:31,  1.25it/s]

batch loss: 1.0993870496749878


 31%|███       | 513/1677 [06:50<15:28,  1.25it/s]

batch loss: 1.0903090238571167


 31%|███       | 514/1677 [06:51<15:57,  1.21it/s]

batch loss: 1.1046419143676758


 31%|███       | 515/1677 [06:52<16:02,  1.21it/s]

batch loss: 1.1005412340164185


 31%|███       | 516/1677 [06:52<15:40,  1.23it/s]

batch loss: 1.0981189012527466


 31%|███       | 517/1677 [06:53<15:32,  1.24it/s]

batch loss: 1.096001148223877


 31%|███       | 518/1677 [06:54<15:51,  1.22it/s]

batch loss: 1.0988465547561646


 31%|███       | 519/1677 [06:55<15:24,  1.25it/s]

batch loss: 1.1042195558547974


 31%|███       | 520/1677 [06:55<15:05,  1.28it/s]

batch loss: 1.0992249250411987


 31%|███       | 521/1677 [06:56<14:52,  1.30it/s]

batch loss: 1.099793553352356


 31%|███       | 522/1677 [06:57<14:58,  1.29it/s]

batch loss: 1.0936135053634644


 31%|███       | 523/1677 [06:58<15:07,  1.27it/s]

batch loss: 1.0946272611618042


 31%|███       | 524/1677 [06:59<15:05,  1.27it/s]

batch loss: 1.10714590549469


 31%|███▏      | 525/1677 [06:59<15:27,  1.24it/s]

batch loss: 1.0993610620498657


 31%|███▏      | 526/1677 [07:00<15:24,  1.25it/s]

batch loss: 1.0951822996139526


 31%|███▏      | 527/1677 [07:01<13:19,  1.44it/s]

batch loss: 1.0982447862625122


 31%|███▏      | 528/1677 [07:02<13:53,  1.38it/s]

batch loss: 1.089173436164856


 32%|███▏      | 529/1677 [07:02<14:45,  1.30it/s]

batch loss: 1.0943433046340942


 32%|███▏      | 530/1677 [07:03<15:31,  1.23it/s]

batch loss: 1.100288987159729


 32%|███▏      | 531/1677 [07:04<14:59,  1.27it/s]

batch loss: 1.0886462926864624


 32%|███▏      | 532/1677 [07:05<14:47,  1.29it/s]

batch loss: 1.0979338884353638


 32%|███▏      | 533/1677 [07:06<15:14,  1.25it/s]

batch loss: 1.0987221002578735


 32%|███▏      | 534/1677 [07:06<15:34,  1.22it/s]

batch loss: 1.090593934059143


 32%|███▏      | 535/1677 [07:07<15:25,  1.23it/s]

batch loss: 1.106753945350647


 32%|███▏      | 536/1677 [07:08<14:47,  1.29it/s]

batch loss: 1.1055185794830322


 32%|███▏      | 537/1677 [07:09<15:35,  1.22it/s]

batch loss: 1.1040840148925781


 32%|███▏      | 538/1677 [07:10<15:56,  1.19it/s]

batch loss: 1.0995383262634277


 32%|███▏      | 539/1677 [07:11<15:25,  1.23it/s]

batch loss: 1.0980989933013916


 32%|███▏      | 540/1677 [07:11<15:41,  1.21it/s]

batch loss: 1.0973163843154907


 32%|███▏      | 541/1677 [07:12<15:52,  1.19it/s]

batch loss: 1.1063724756240845


 32%|███▏      | 542/1677 [07:13<15:24,  1.23it/s]

batch loss: 1.094599723815918


 32%|███▏      | 543/1677 [07:14<15:02,  1.26it/s]

batch loss: 1.0950534343719482


 32%|███▏      | 544/1677 [07:15<14:44,  1.28it/s]

batch loss: 1.0935362577438354


 32%|███▏      | 545/1677 [07:15<14:53,  1.27it/s]

batch loss: 1.0996029376983643


 33%|███▎      | 546/1677 [07:16<14:57,  1.26it/s]

batch loss: 1.1057029962539673


 33%|███▎      | 547/1677 [07:17<15:00,  1.25it/s]

batch loss: 1.0955119132995605


 33%|███▎      | 548/1677 [07:18<15:24,  1.22it/s]

batch loss: 1.1014472246170044


 33%|███▎      | 549/1677 [07:19<15:39,  1.20it/s]

batch loss: 1.0959901809692383


 33%|███▎      | 550/1677 [07:19<15:27,  1.21it/s]

batch loss: 1.1014082431793213


 33%|███▎      | 551/1677 [07:20<14:59,  1.25it/s]

batch loss: 1.0981658697128296


 33%|███▎      | 552/1677 [07:21<14:42,  1.28it/s]

batch loss: 1.098595380783081


 33%|███▎      | 553/1677 [07:22<15:09,  1.24it/s]

batch loss: 1.0986270904541016


 33%|███▎      | 554/1677 [07:23<14:46,  1.27it/s]

batch loss: 1.0986112356185913


 33%|███▎      | 555/1677 [07:23<14:49,  1.26it/s]

batch loss: 1.0985658168792725


 33%|███▎      | 556/1677 [07:24<15:34,  1.20it/s]

batch loss: 1.0985987186431885


 33%|███▎      | 557/1677 [07:25<15:40,  1.19it/s]

batch loss: 1.098602056503296


 33%|███▎      | 558/1677 [07:26<15:20,  1.22it/s]

batch loss: 1.0985937118530273


 33%|███▎      | 559/1677 [07:27<14:57,  1.25it/s]

batch loss: 1.098607063293457


 33%|███▎      | 560/1677 [07:27<14:40,  1.27it/s]

batch loss: 1.0986231565475464


 33%|███▎      | 561/1677 [07:28<14:47,  1.26it/s]

batch loss: 1.098606824874878


 34%|███▎      | 562/1677 [07:29<14:21,  1.29it/s]

batch loss: 1.0985952615737915


 34%|███▎      | 563/1677 [07:30<14:14,  1.30it/s]

batch loss: 1.0986137390136719


 34%|███▎      | 564/1677 [07:31<14:47,  1.25it/s]

batch loss: 1.098605751991272


 34%|███▎      | 565/1677 [07:31<14:30,  1.28it/s]

batch loss: 1.0985902547836304


 34%|███▍      | 566/1677 [07:32<14:21,  1.29it/s]

batch loss: 1.098611831665039


 34%|███▍      | 567/1677 [07:33<14:32,  1.27it/s]

batch loss: 1.0986169576644897


 34%|███▍      | 568/1677 [07:34<14:22,  1.29it/s]

batch loss: 1.0986028909683228


 34%|███▍      | 569/1677 [07:34<14:12,  1.30it/s]

batch loss: 1.0985983610153198


 34%|███▍      | 570/1677 [07:35<14:18,  1.29it/s]

batch loss: 1.0986073017120361


 34%|███▍      | 571/1677 [07:36<14:24,  1.28it/s]

batch loss: 1.0986045598983765


 34%|███▍      | 572/1677 [07:37<14:54,  1.23it/s]

batch loss: 1.098635196685791


 34%|███▍      | 573/1677 [07:38<14:39,  1.25it/s]

batch loss: 1.0986098051071167


 34%|███▍      | 574/1677 [07:38<14:46,  1.24it/s]

batch loss: 1.0986089706420898


 34%|███▍      | 575/1677 [07:39<14:31,  1.27it/s]

batch loss: 1.098616123199463


 34%|███▍      | 576/1677 [07:40<15:04,  1.22it/s]

batch loss: 1.0986021757125854


 34%|███▍      | 577/1677 [07:41<14:24,  1.27it/s]

batch loss: 1.0985995531082153


 34%|███▍      | 578/1677 [07:42<14:12,  1.29it/s]

batch loss: 1.0985984802246094


 35%|███▍      | 579/1677 [07:42<14:46,  1.24it/s]

batch loss: 1.0986088514328003


 35%|███▍      | 580/1677 [07:43<14:42,  1.24it/s]

batch loss: 1.0986168384552002


 35%|███▍      | 581/1677 [07:44<14:35,  1.25it/s]

batch loss: 1.098612904548645


 35%|███▍      | 582/1677 [07:45<14:55,  1.22it/s]

batch loss: 1.0986621379852295


 35%|███▍      | 583/1677 [07:46<15:21,  1.19it/s]

batch loss: 1.0986316204071045


 35%|███▍      | 584/1677 [07:47<14:48,  1.23it/s]

batch loss: 1.0986098051071167


 35%|███▍      | 585/1677 [07:47<14:28,  1.26it/s]

batch loss: 1.0986015796661377


 35%|███▍      | 586/1677 [07:48<14:47,  1.23it/s]

batch loss: 1.098609209060669


 35%|███▌      | 587/1677 [07:49<14:38,  1.24it/s]

batch loss: 1.0986109972000122


 35%|███▌      | 588/1677 [07:50<14:20,  1.27it/s]

batch loss: 1.098607063293457


 35%|███▌      | 589/1677 [07:50<14:06,  1.29it/s]

batch loss: 1.098624587059021


 35%|███▌      | 590/1677 [07:51<14:04,  1.29it/s]

batch loss: 1.0986155271530151


 35%|███▌      | 591/1677 [07:52<13:51,  1.31it/s]

batch loss: 1.0986186265945435


 35%|███▌      | 592/1677 [07:53<15:05,  1.20it/s]

batch loss: 1.0986192226409912


 35%|███▌      | 593/1677 [07:54<14:29,  1.25it/s]

batch loss: 1.0986086130142212


 35%|███▌      | 594/1677 [07:55<14:51,  1.21it/s]

batch loss: 1.0986169576644897


 35%|███▌      | 595/1677 [07:55<14:47,  1.22it/s]

batch loss: 1.0986136198043823


 36%|███▌      | 596/1677 [07:56<15:56,  1.13it/s]

batch loss: 1.0986117124557495


 36%|███▌      | 597/1677 [07:57<15:06,  1.19it/s]

batch loss: 1.0986216068267822


 36%|███▌      | 598/1677 [07:58<14:28,  1.24it/s]

batch loss: 1.0986093282699585


 36%|███▌      | 599/1677 [07:59<14:46,  1.22it/s]

batch loss: 1.0986112356185913


 36%|███▌      | 600/1677 [07:59<14:19,  1.25it/s]

batch loss: 1.098596453666687


 36%|███▌      | 601/1677 [08:00<14:00,  1.28it/s]

batch loss: 1.0986119508743286


 36%|███▌      | 602/1677 [08:01<13:50,  1.30it/s]

batch loss: 1.0985972881317139


 36%|███▌      | 603/1677 [08:02<13:39,  1.31it/s]

batch loss: 1.0986093282699585


 36%|███▌      | 604/1677 [08:03<14:24,  1.24it/s]

batch loss: 1.0986015796661377


 36%|███▌      | 605/1677 [08:03<14:45,  1.21it/s]

batch loss: 1.0986220836639404


 36%|███▌      | 606/1677 [08:04<14:31,  1.23it/s]

batch loss: 1.0985918045043945


 36%|███▌      | 607/1677 [08:05<14:51,  1.20it/s]

batch loss: 1.0986013412475586


 36%|███▋      | 608/1677 [08:06<15:14,  1.17it/s]

batch loss: 1.0985935926437378


 36%|███▋      | 609/1677 [08:07<14:55,  1.19it/s]

batch loss: 1.0986193418502808


 36%|███▋      | 610/1677 [08:08<14:28,  1.23it/s]

batch loss: 1.0986045598983765


 36%|███▋      | 611/1677 [08:08<14:42,  1.21it/s]

batch loss: 1.098597526550293


 36%|███▋      | 612/1677 [08:09<14:16,  1.24it/s]

batch loss: 1.0986087322235107


 37%|███▋      | 613/1677 [08:10<13:51,  1.28it/s]

batch loss: 1.0985875129699707


 37%|███▋      | 614/1677 [08:11<13:58,  1.27it/s]

batch loss: 1.0986000299453735


 37%|███▋      | 615/1677 [08:12<13:55,  1.27it/s]

batch loss: 1.0986127853393555


 37%|███▋      | 616/1677 [08:12<13:29,  1.31it/s]

batch loss: 1.0986098051071167


 37%|███▋      | 617/1677 [08:13<13:43,  1.29it/s]

batch loss: 1.0986422300338745


 37%|███▋      | 618/1677 [08:14<13:48,  1.28it/s]

batch loss: 1.09856116771698


 37%|███▋      | 619/1677 [08:15<14:15,  1.24it/s]

batch loss: 1.0986038446426392


 37%|███▋      | 620/1677 [08:15<13:55,  1.27it/s]

batch loss: 1.098612666130066


 37%|███▋      | 621/1677 [08:16<13:42,  1.28it/s]

batch loss: 1.098617434501648


 37%|███▋      | 622/1677 [08:17<13:31,  1.30it/s]

batch loss: 1.0986535549163818


 37%|███▋      | 623/1677 [08:18<13:22,  1.31it/s]

batch loss: 1.0985887050628662


 37%|███▋      | 624/1677 [08:19<13:34,  1.29it/s]

batch loss: 1.0987162590026855


 37%|███▋      | 625/1677 [08:19<13:42,  1.28it/s]

batch loss: 1.0986409187316895


 37%|███▋      | 626/1677 [08:20<13:36,  1.29it/s]

batch loss: 1.0987129211425781


 37%|███▋      | 627/1677 [08:21<13:23,  1.31it/s]

batch loss: 1.0985901355743408


 37%|███▋      | 628/1677 [08:22<13:49,  1.26it/s]

batch loss: 1.0987403392791748


 38%|███▊      | 629/1677 [08:22<13:53,  1.26it/s]

batch loss: 1.0985970497131348


 38%|███▊      | 630/1677 [08:23<13:54,  1.25it/s]

batch loss: 1.0986377000808716


 38%|███▊      | 631/1677 [08:24<13:46,  1.27it/s]

batch loss: 1.0986783504486084


 38%|███▊      | 632/1677 [08:25<14:06,  1.24it/s]

batch loss: 1.0985603332519531


 38%|███▊      | 633/1677 [08:26<14:02,  1.24it/s]

batch loss: 1.098616600036621


 38%|███▊      | 634/1677 [08:27<14:32,  1.19it/s]

batch loss: 1.0986334085464478


 38%|███▊      | 635/1677 [08:27<14:12,  1.22it/s]

batch loss: 1.0985807180404663


 38%|███▊      | 636/1677 [08:28<13:50,  1.25it/s]

batch loss: 1.0985980033874512


 38%|███▊      | 637/1677 [08:29<14:10,  1.22it/s]

batch loss: 1.098602294921875


 38%|███▊      | 638/1677 [08:30<13:48,  1.25it/s]

batch loss: 1.0985901355743408


 38%|███▊      | 639/1677 [08:31<14:10,  1.22it/s]

batch loss: 1.0986138582229614


 38%|███▊      | 640/1677 [08:31<13:44,  1.26it/s]

batch loss: 1.0986363887786865


 38%|███▊      | 641/1677 [08:32<13:29,  1.28it/s]

batch loss: 1.0986032485961914


 38%|███▊      | 642/1677 [08:33<13:36,  1.27it/s]

batch loss: 1.0986088514328003


 38%|███▊      | 643/1677 [08:34<13:42,  1.26it/s]

batch loss: 1.0985798835754395


 38%|███▊      | 644/1677 [08:35<13:59,  1.23it/s]

batch loss: 1.0985952615737915


 38%|███▊      | 645/1677 [08:35<13:41,  1.26it/s]

batch loss: 1.0985966920852661


 39%|███▊      | 646/1677 [08:36<13:44,  1.25it/s]

batch loss: 1.09860098361969


 39%|███▊      | 647/1677 [08:37<14:01,  1.22it/s]

batch loss: 1.0986192226409912


 39%|███▊      | 648/1677 [08:38<13:58,  1.23it/s]

batch loss: 1.0985901355743408


 39%|███▊      | 649/1677 [08:39<14:15,  1.20it/s]

batch loss: 1.0986146926879883


 39%|███▉      | 650/1677 [08:40<14:30,  1.18it/s]

batch loss: 1.0986140966415405


 39%|███▉      | 651/1677 [08:40<14:39,  1.17it/s]

batch loss: 1.0985698699951172


 39%|███▉      | 652/1677 [08:41<14:50,  1.15it/s]

batch loss: 1.09861159324646


 39%|███▉      | 653/1677 [08:42<15:30,  1.10it/s]

batch loss: 1.0985485315322876


 39%|███▉      | 654/1677 [08:43<15:13,  1.12it/s]

batch loss: 1.0985875129699707


 39%|███▉      | 655/1677 [08:44<14:42,  1.16it/s]

batch loss: 1.0985641479492188


 39%|███▉      | 656/1677 [08:45<14:10,  1.20it/s]

batch loss: 1.0985652208328247


 39%|███▉      | 657/1677 [08:46<14:02,  1.21it/s]

batch loss: 1.0985459089279175


 39%|███▉      | 658/1677 [08:46<14:27,  1.17it/s]

batch loss: 1.0986204147338867


 39%|███▉      | 659/1677 [08:47<13:56,  1.22it/s]

batch loss: 1.0991291999816895


 39%|███▉      | 660/1677 [08:48<13:24,  1.26it/s]

batch loss: 1.098520278930664


 39%|███▉      | 661/1677 [08:49<12:35,  1.35it/s]

batch loss: 1.0985629558563232


 39%|███▉      | 662/1677 [08:49<12:55,  1.31it/s]

batch loss: 1.098552942276001


 40%|███▉      | 663/1677 [08:50<12:47,  1.32it/s]

batch loss: 1.0985918045043945


 40%|███▉      | 664/1677 [08:51<13:00,  1.30it/s]

batch loss: 1.0986039638519287


 40%|███▉      | 665/1677 [08:52<13:07,  1.28it/s]

batch loss: 1.0985575914382935


 40%|███▉      | 666/1677 [08:53<13:13,  1.27it/s]

batch loss: 1.0986021757125854


 40%|███▉      | 667/1677 [08:53<12:49,  1.31it/s]

batch loss: 1.0986034870147705


 40%|███▉      | 668/1677 [08:54<12:46,  1.32it/s]

batch loss: 1.098649024963379


 40%|███▉      | 669/1677 [08:55<12:44,  1.32it/s]

batch loss: 1.0985922813415527


 40%|███▉      | 670/1677 [08:56<12:40,  1.32it/s]

batch loss: 1.0986148118972778


 40%|████      | 671/1677 [08:56<12:40,  1.32it/s]

batch loss: 1.098582148551941


 40%|████      | 672/1677 [08:57<12:43,  1.32it/s]

batch loss: 1.0986015796661377


 40%|████      | 673/1677 [08:58<13:16,  1.26it/s]

batch loss: 1.0986145734786987


 40%|████      | 674/1677 [08:59<13:02,  1.28it/s]

batch loss: 1.0986052751541138


 40%|████      | 675/1677 [09:00<14:24,  1.16it/s]

batch loss: 1.0986328125


 40%|████      | 676/1677 [09:01<14:33,  1.15it/s]

batch loss: 1.0985971689224243


 40%|████      | 677/1677 [09:01<13:48,  1.21it/s]

batch loss: 1.0986230373382568


 40%|████      | 678/1677 [09:02<13:38,  1.22it/s]

batch loss: 1.0986013412475586


 40%|████      | 679/1677 [09:03<13:17,  1.25it/s]

batch loss: 1.098597764968872


 41%|████      | 680/1677 [09:04<13:04,  1.27it/s]

batch loss: 1.0985833406448364


 41%|████      | 681/1677 [09:04<12:47,  1.30it/s]

batch loss: 1.0986006259918213


 41%|████      | 682/1677 [09:05<12:37,  1.31it/s]

batch loss: 1.0986665487289429


 41%|████      | 683/1677 [09:06<12:47,  1.30it/s]

batch loss: 1.0986416339874268


 41%|████      | 684/1677 [09:07<12:52,  1.29it/s]

batch loss: 1.098616600036621


 41%|████      | 685/1677 [09:08<13:03,  1.27it/s]

batch loss: 1.0985960960388184


 41%|████      | 686/1677 [09:08<12:50,  1.29it/s]

batch loss: 1.0985771417617798


 41%|████      | 687/1677 [09:09<12:53,  1.28it/s]

batch loss: 1.0985534191131592


 41%|████      | 688/1677 [09:10<13:23,  1.23it/s]

batch loss: 1.0986483097076416


 41%|████      | 689/1677 [09:11<13:17,  1.24it/s]

batch loss: 1.0986578464508057


 41%|████      | 690/1677 [09:12<13:32,  1.21it/s]

batch loss: 1.0986298322677612


 41%|████      | 691/1677 [09:12<13:44,  1.20it/s]

batch loss: 1.0985931158065796


 41%|████▏     | 692/1677 [09:13<13:27,  1.22it/s]

batch loss: 1.0985924005508423


 41%|████▏     | 693/1677 [09:14<13:23,  1.22it/s]

batch loss: 1.098615050315857


 41%|████▏     | 694/1677 [09:15<13:49,  1.19it/s]

batch loss: 1.098647952079773


 41%|████▏     | 695/1677 [09:16<13:52,  1.18it/s]

batch loss: 1.0985628366470337


 42%|████▏     | 696/1677 [09:17<13:32,  1.21it/s]

batch loss: 1.0986521244049072


 42%|████▏     | 697/1677 [09:17<13:05,  1.25it/s]

batch loss: 1.0985816717147827


 42%|████▏     | 698/1677 [09:18<13:00,  1.25it/s]

batch loss: 1.0985982418060303


 42%|████▏     | 699/1677 [09:19<12:44,  1.28it/s]

batch loss: 1.0986405611038208


 42%|████▏     | 700/1677 [09:20<12:32,  1.30it/s]

batch loss: 1.0986192226409912


 42%|████▏     | 701/1677 [09:20<12:25,  1.31it/s]

batch loss: 1.0986146926879883


 42%|████▏     | 702/1677 [09:21<12:50,  1.27it/s]

batch loss: 1.0985758304595947


 42%|████▏     | 703/1677 [09:22<12:54,  1.26it/s]

batch loss: 1.0985971689224243


 42%|████▏     | 704/1677 [09:23<12:39,  1.28it/s]

batch loss: 1.098641037940979


 42%|████▏     | 705/1677 [09:24<12:29,  1.30it/s]

batch loss: 1.0985840559005737


 42%|████▏     | 706/1677 [09:24<12:21,  1.31it/s]

batch loss: 1.098618745803833


 42%|████▏     | 707/1677 [09:25<12:26,  1.30it/s]

batch loss: 1.0986109972000122


 42%|████▏     | 708/1677 [09:26<13:00,  1.24it/s]

batch loss: 1.0986301898956299


 42%|████▏     | 709/1677 [09:27<12:24,  1.30it/s]

batch loss: 1.0986241102218628


 42%|████▏     | 710/1677 [09:27<12:30,  1.29it/s]

batch loss: 1.0985654592514038


 42%|████▏     | 711/1677 [09:28<12:21,  1.30it/s]

batch loss: 1.0986214876174927


 42%|████▏     | 712/1677 [09:29<12:30,  1.29it/s]

batch loss: 1.0986357927322388


 43%|████▎     | 713/1677 [09:30<12:36,  1.27it/s]

batch loss: 1.0986576080322266


 43%|████▎     | 714/1677 [09:31<12:22,  1.30it/s]

batch loss: 1.0986242294311523


 43%|████▎     | 715/1677 [09:31<12:13,  1.31it/s]

batch loss: 1.098638653755188


 43%|████▎     | 716/1677 [09:32<12:03,  1.33it/s]

batch loss: 1.0985713005065918


 43%|████▎     | 717/1677 [09:33<11:49,  1.35it/s]

batch loss: 1.0985751152038574


 43%|████▎     | 718/1677 [09:34<12:23,  1.29it/s]

batch loss: 1.0986053943634033


 43%|████▎     | 719/1677 [09:34<12:13,  1.31it/s]

batch loss: 1.0985660552978516


 43%|████▎     | 720/1677 [09:35<12:06,  1.32it/s]

batch loss: 1.0986229181289673


 43%|████▎     | 721/1677 [09:36<12:00,  1.33it/s]

batch loss: 1.0986530780792236


 43%|████▎     | 722/1677 [09:37<11:58,  1.33it/s]

batch loss: 1.0986328125


 43%|████▎     | 723/1677 [09:37<12:06,  1.31it/s]

batch loss: 1.0985747575759888


 43%|████▎     | 724/1677 [09:38<12:17,  1.29it/s]

batch loss: 1.098572015762329


 43%|████▎     | 725/1677 [09:39<12:03,  1.32it/s]

batch loss: 1.0985617637634277


 43%|████▎     | 726/1677 [09:40<12:12,  1.30it/s]

batch loss: 1.0985426902770996


 43%|████▎     | 727/1677 [09:40<11:54,  1.33it/s]

batch loss: 1.0985949039459229


 43%|████▎     | 728/1677 [09:41<11:53,  1.33it/s]

batch loss: 1.0985690355300903


 43%|████▎     | 729/1677 [09:42<12:23,  1.28it/s]

batch loss: 1.098607063293457


 44%|████▎     | 730/1677 [09:43<12:23,  1.27it/s]

batch loss: 1.098645567893982


 44%|████▎     | 731/1677 [09:44<12:26,  1.27it/s]

batch loss: 1.0986073017120361


 44%|████▎     | 732/1677 [09:44<12:15,  1.28it/s]

batch loss: 1.0985889434814453


 44%|████▎     | 733/1677 [09:45<12:14,  1.29it/s]

batch loss: 1.098598837852478


 44%|████▍     | 734/1677 [09:46<12:20,  1.27it/s]

batch loss: 1.0986194610595703


 44%|████▍     | 735/1677 [09:47<12:44,  1.23it/s]

batch loss: 1.0985620021820068


 44%|████▍     | 736/1677 [09:47<12:13,  1.28it/s]

batch loss: 1.0985994338989258


 44%|████▍     | 737/1677 [09:48<11:49,  1.33it/s]

batch loss: 1.0985482931137085


 44%|████▍     | 738/1677 [09:49<12:03,  1.30it/s]

batch loss: 1.0985825061798096


 44%|████▍     | 739/1677 [09:50<12:12,  1.28it/s]

batch loss: 1.0984975099563599


 44%|████▍     | 740/1677 [09:51<12:11,  1.28it/s]

batch loss: 1.0985534191131592


 44%|████▍     | 741/1677 [09:51<12:04,  1.29it/s]

batch loss: 1.0985840559005737


 44%|████▍     | 742/1677 [09:52<11:56,  1.30it/s]

batch loss: 1.098496913909912


 44%|████▍     | 743/1677 [09:53<12:05,  1.29it/s]

batch loss: 1.0985289812088013


 44%|████▍     | 744/1677 [09:54<12:11,  1.28it/s]

batch loss: 1.0984878540039062


 44%|████▍     | 745/1677 [09:54<12:04,  1.29it/s]

batch loss: 1.0983917713165283


 44%|████▍     | 746/1677 [09:55<12:29,  1.24it/s]

batch loss: 1.0981910228729248


 45%|████▍     | 747/1677 [09:56<12:29,  1.24it/s]

batch loss: 1.0983707904815674


 45%|████▍     | 748/1677 [09:57<12:13,  1.27it/s]

batch loss: 1.0982507467269897


 45%|████▍     | 749/1677 [09:57<11:31,  1.34it/s]

batch loss: 1.0989404916763306


 45%|████▍     | 750/1677 [09:58<11:40,  1.32it/s]

batch loss: 1.0986615419387817


 45%|████▍     | 751/1677 [09:59<11:49,  1.31it/s]

batch loss: 1.0941119194030762


 45%|████▍     | 752/1677 [10:00<11:45,  1.31it/s]

batch loss: 1.0964741706848145


 45%|████▍     | 753/1677 [10:01<11:59,  1.28it/s]

batch loss: 1.1027867794036865


 45%|████▍     | 754/1677 [10:01<12:06,  1.27it/s]

batch loss: 1.1068698167800903


 45%|████▌     | 755/1677 [10:02<12:10,  1.26it/s]

batch loss: 1.1024452447891235


 45%|████▌     | 756/1677 [10:03<11:58,  1.28it/s]

batch loss: 1.096476435661316


 45%|████▌     | 757/1677 [10:04<12:30,  1.23it/s]

batch loss: 1.0993752479553223


 45%|████▌     | 758/1677 [10:05<12:44,  1.20it/s]

batch loss: 1.0988606214523315


 45%|████▌     | 759/1677 [10:06<12:30,  1.22it/s]

batch loss: 1.0979024171829224


 45%|████▌     | 760/1677 [10:06<12:42,  1.20it/s]

batch loss: 1.096118450164795


 45%|████▌     | 761/1677 [10:07<12:29,  1.22it/s]

batch loss: 1.0974042415618896


 45%|████▌     | 762/1677 [10:08<12:36,  1.21it/s]

batch loss: 1.0983846187591553


 45%|████▌     | 763/1677 [10:09<12:42,  1.20it/s]

batch loss: 1.0987008810043335


 46%|████▌     | 764/1677 [10:10<12:51,  1.18it/s]

batch loss: 1.0971497297286987


 46%|████▌     | 765/1677 [10:11<12:57,  1.17it/s]

batch loss: 1.0983084440231323


 46%|████▌     | 766/1677 [10:11<12:46,  1.19it/s]

batch loss: 1.0917203426361084


 46%|████▌     | 767/1677 [10:12<12:19,  1.23it/s]

batch loss: 1.0985556840896606


 46%|████▌     | 768/1677 [10:13<12:37,  1.20it/s]

batch loss: 1.1095023155212402


 46%|████▌     | 769/1677 [10:14<12:22,  1.22it/s]

batch loss: 1.1002860069274902


 46%|████▌     | 770/1677 [10:15<12:33,  1.20it/s]

batch loss: 1.1160818338394165


 46%|████▌     | 771/1677 [10:15<12:05,  1.25it/s]

batch loss: 1.0922261476516724


 46%|████▌     | 772/1677 [10:16<12:28,  1.21it/s]

batch loss: 1.1127212047576904


 46%|████▌     | 773/1677 [10:17<12:01,  1.25it/s]

batch loss: 1.086082100868225


 46%|████▌     | 774/1677 [10:18<12:15,  1.23it/s]

batch loss: 1.1077475547790527


 46%|████▌     | 775/1677 [10:19<12:11,  1.23it/s]

batch loss: 1.1099209785461426


 46%|████▋     | 776/1677 [10:20<12:27,  1.21it/s]

batch loss: 1.0989859104156494


 46%|████▋     | 777/1677 [10:20<12:05,  1.24it/s]

batch loss: 1.0973817110061646


 46%|████▋     | 778/1677 [10:21<11:55,  1.26it/s]

batch loss: 1.1007524728775024


 46%|████▋     | 779/1677 [10:22<12:29,  1.20it/s]

batch loss: 1.0985723733901978


 47%|████▋     | 780/1677 [10:23<12:41,  1.18it/s]

batch loss: 1.1011124849319458


 47%|████▋     | 781/1677 [10:24<12:13,  1.22it/s]

batch loss: 1.1042827367782593


 47%|████▋     | 782/1677 [10:24<11:49,  1.26it/s]

batch loss: 1.0960948467254639


 47%|████▋     | 783/1677 [10:25<12:07,  1.23it/s]

batch loss: 1.0996242761611938


 47%|████▋     | 784/1677 [10:26<11:38,  1.28it/s]

batch loss: 1.0985331535339355


 47%|████▋     | 785/1677 [10:27<11:30,  1.29it/s]

batch loss: 1.104796290397644


 47%|████▋     | 786/1677 [10:27<11:22,  1.31it/s]

batch loss: 1.0959880352020264


 47%|████▋     | 787/1677 [10:28<11:17,  1.31it/s]

batch loss: 1.0985723733901978


 47%|████▋     | 788/1677 [10:29<12:00,  1.23it/s]

batch loss: 1.1049187183380127


 47%|████▋     | 789/1677 [10:30<11:41,  1.27it/s]

batch loss: 1.1041412353515625


 47%|████▋     | 790/1677 [10:31<12:02,  1.23it/s]

batch loss: 1.1015610694885254


 47%|████▋     | 791/1677 [10:32<11:44,  1.26it/s]

batch loss: 1.0988222360610962


 47%|████▋     | 792/1677 [10:32<11:31,  1.28it/s]

batch loss: 1.1012530326843262


 47%|████▋     | 793/1677 [10:33<11:26,  1.29it/s]

batch loss: 1.0962789058685303


 47%|████▋     | 794/1677 [10:34<11:30,  1.28it/s]

batch loss: 1.0958198308944702


 47%|████▋     | 795/1677 [10:35<11:54,  1.23it/s]

batch loss: 1.1006911993026733


 47%|████▋     | 796/1677 [10:35<11:49,  1.24it/s]

batch loss: 1.102673053741455


 48%|████▊     | 797/1677 [10:36<11:34,  1.27it/s]

batch loss: 1.0946993827819824


 48%|████▊     | 798/1677 [10:37<11:31,  1.27it/s]

batch loss: 1.0842870473861694


 48%|████▊     | 799/1677 [10:38<11:17,  1.30it/s]

batch loss: 1.0922945737838745


 48%|████▊     | 800/1677 [10:39<11:23,  1.28it/s]

batch loss: 1.1060810089111328


 48%|████▊     | 801/1677 [10:39<11:18,  1.29it/s]

batch loss: 1.076705813407898


 48%|████▊     | 802/1677 [10:40<11:39,  1.25it/s]

batch loss: 1.0674444437026978


 48%|████▊     | 803/1677 [10:41<11:25,  1.28it/s]

batch loss: 1.0797045230865479


 48%|████▊     | 804/1677 [10:42<11:16,  1.29it/s]

batch loss: 1.1095691919326782


 48%|████▊     | 805/1677 [10:42<11:24,  1.27it/s]

batch loss: 1.0915786027908325


 48%|████▊     | 806/1677 [10:43<11:30,  1.26it/s]

batch loss: 1.0994495153427124


 48%|████▊     | 807/1677 [10:44<11:19,  1.28it/s]

batch loss: 1.0749163627624512


 48%|████▊     | 808/1677 [10:45<11:12,  1.29it/s]

batch loss: 1.1209503412246704


 48%|████▊     | 809/1677 [10:46<11:22,  1.27it/s]

batch loss: 1.105878472328186


 48%|████▊     | 810/1677 [10:47<11:46,  1.23it/s]

batch loss: 1.1011689901351929


 48%|████▊     | 811/1677 [10:47<11:25,  1.26it/s]

batch loss: 1.0963212251663208


 48%|████▊     | 812/1677 [10:48<11:24,  1.26it/s]

batch loss: 1.0208303928375244


 48%|████▊     | 813/1677 [10:49<11:39,  1.24it/s]

batch loss: 1.1459550857543945


 49%|████▊     | 814/1677 [10:50<11:41,  1.23it/s]

batch loss: 1.1014217138290405


 49%|████▊     | 815/1677 [10:50<11:20,  1.27it/s]

batch loss: 1.1436717510223389


 49%|████▊     | 816/1677 [10:51<11:41,  1.23it/s]

batch loss: 1.0436949729919434


 49%|████▊     | 817/1677 [10:52<11:37,  1.23it/s]

batch loss: 1.1200287342071533


 49%|████▉     | 818/1677 [10:53<11:21,  1.26it/s]

batch loss: 1.132768988609314


 49%|████▉     | 819/1677 [10:54<11:20,  1.26it/s]

batch loss: 1.1240979433059692


 49%|████▉     | 820/1677 [10:55<11:39,  1.22it/s]

batch loss: 1.1504539251327515


 49%|████▉     | 821/1677 [10:55<11:15,  1.27it/s]

batch loss: 1.1004194021224976


 49%|████▉     | 822/1677 [10:56<11:05,  1.29it/s]

batch loss: 1.0945875644683838


 49%|████▉     | 823/1677 [10:57<11:34,  1.23it/s]

batch loss: 1.0787529945373535


 49%|████▉     | 824/1677 [10:58<11:15,  1.26it/s]

batch loss: 1.0803319215774536


 49%|████▉     | 825/1677 [10:59<11:33,  1.23it/s]

batch loss: 1.095548391342163


 49%|████▉     | 826/1677 [10:59<11:46,  1.21it/s]

batch loss: 1.068529725074768


 49%|████▉     | 827/1677 [11:00<11:38,  1.22it/s]

batch loss: 1.0847549438476562


 49%|████▉     | 828/1677 [11:01<11:57,  1.18it/s]

batch loss: 1.1079556941986084


 49%|████▉     | 829/1677 [11:02<11:22,  1.24it/s]

batch loss: 1.0894749164581299


 49%|████▉     | 830/1677 [11:03<11:37,  1.21it/s]

batch loss: 1.0683729648590088


 50%|████▉     | 831/1677 [11:03<11:29,  1.23it/s]

batch loss: 1.1298733949661255


 50%|████▉     | 832/1677 [11:04<11:22,  1.24it/s]

batch loss: 1.1376922130584717


 50%|████▉     | 833/1677 [11:05<11:45,  1.20it/s]

batch loss: 1.0676931142807007


 50%|████▉     | 834/1677 [11:06<11:22,  1.23it/s]

batch loss: 1.1254688501358032


 50%|████▉     | 835/1677 [11:07<11:20,  1.24it/s]

batch loss: 1.0930730104446411


 50%|████▉     | 836/1677 [11:07<11:10,  1.25it/s]

batch loss: 1.123514175415039


 50%|████▉     | 837/1677 [11:08<10:58,  1.28it/s]

batch loss: 1.0795578956604004


 50%|████▉     | 838/1677 [11:09<11:19,  1.23it/s]

batch loss: 1.0494588613510132


 50%|█████     | 839/1677 [11:10<11:03,  1.26it/s]

batch loss: 1.118410348892212


 50%|█████     | 840/1677 [11:11<10:52,  1.28it/s]

batch loss: 1.0800034999847412


 50%|█████     | 841/1677 [11:11<10:36,  1.31it/s]

batch loss: 1.0898350477218628


 50%|█████     | 842/1677 [11:12<10:47,  1.29it/s]

batch loss: 1.08030366897583


 50%|█████     | 843/1677 [11:13<10:51,  1.28it/s]

batch loss: 1.1021965742111206


 50%|█████     | 844/1677 [11:14<10:44,  1.29it/s]

batch loss: 1.1013325452804565


 50%|█████     | 845/1677 [11:15<11:22,  1.22it/s]

batch loss: 1.1045596599578857


 50%|█████     | 846/1677 [11:15<11:33,  1.20it/s]

batch loss: 1.0879369974136353


 51%|█████     | 847/1677 [11:16<11:24,  1.21it/s]

batch loss: 1.083070993423462


 51%|█████     | 848/1677 [11:17<11:15,  1.23it/s]

batch loss: 1.1051628589630127


 51%|█████     | 849/1677 [11:18<12:04,  1.14it/s]

batch loss: 1.0525349378585815


 51%|█████     | 850/1677 [11:19<11:58,  1.15it/s]

batch loss: 1.1063079833984375


 51%|█████     | 851/1677 [11:20<12:06,  1.14it/s]

batch loss: 1.1063334941864014


 51%|█████     | 852/1677 [11:21<11:43,  1.17it/s]

batch loss: 1.0670766830444336


 51%|█████     | 853/1677 [11:22<11:51,  1.16it/s]

batch loss: 1.1175564527511597


 51%|█████     | 854/1677 [11:22<11:33,  1.19it/s]

batch loss: 1.1151295900344849


 51%|█████     | 855/1677 [11:23<11:36,  1.18it/s]

batch loss: 1.0922791957855225


 51%|█████     | 856/1677 [11:24<11:08,  1.23it/s]

batch loss: 1.1373019218444824


 51%|█████     | 857/1677 [11:25<11:32,  1.18it/s]

batch loss: 1.074430227279663


 51%|█████     | 858/1677 [11:26<11:14,  1.21it/s]

batch loss: 1.1233583688735962


 51%|█████     | 859/1677 [11:26<10:53,  1.25it/s]

batch loss: 1.0861274003982544


 51%|█████▏    | 860/1677 [11:27<10:49,  1.26it/s]

batch loss: 1.1161569356918335


 51%|█████▏    | 861/1677 [11:28<10:37,  1.28it/s]

batch loss: 1.100122094154358


 51%|█████▏    | 862/1677 [11:29<10:30,  1.29it/s]

batch loss: 1.0900375843048096


 51%|█████▏    | 863/1677 [11:29<10:37,  1.28it/s]

batch loss: 1.0709712505340576


 52%|█████▏    | 864/1677 [11:30<11:07,  1.22it/s]

batch loss: 1.0917892456054688


 52%|█████▏    | 865/1677 [11:31<10:48,  1.25it/s]

batch loss: 1.1178423166275024


 52%|█████▏    | 866/1677 [11:32<11:06,  1.22it/s]

batch loss: 1.094707727432251


 52%|█████▏    | 867/1677 [11:33<10:57,  1.23it/s]

batch loss: 1.100531816482544


 52%|█████▏    | 868/1677 [11:34<11:08,  1.21it/s]

batch loss: 1.1406205892562866


 52%|█████▏    | 869/1677 [11:35<11:26,  1.18it/s]

batch loss: 1.1400457620620728


 52%|█████▏    | 870/1677 [11:35<10:57,  1.23it/s]

batch loss: 1.100570797920227


 52%|█████▏    | 871/1677 [11:36<10:39,  1.26it/s]

batch loss: 1.1163852214813232


 52%|█████▏    | 872/1677 [11:37<10:40,  1.26it/s]

batch loss: 1.0713520050048828


 52%|█████▏    | 873/1677 [11:38<10:34,  1.27it/s]

batch loss: 1.0860942602157593


 52%|█████▏    | 874/1677 [11:38<10:31,  1.27it/s]

batch loss: 1.112301230430603


 52%|█████▏    | 875/1677 [11:39<10:10,  1.31it/s]

batch loss: 1.096624732017517


 52%|█████▏    | 876/1677 [11:40<10:20,  1.29it/s]

batch loss: 1.1088294982910156


 52%|█████▏    | 877/1677 [11:41<10:14,  1.30it/s]

batch loss: 1.1180506944656372


 52%|█████▏    | 878/1677 [11:41<10:13,  1.30it/s]

batch loss: 1.0901459455490112


 52%|█████▏    | 879/1677 [11:42<10:22,  1.28it/s]

batch loss: 1.0865498781204224


 52%|█████▏    | 880/1677 [11:43<10:23,  1.28it/s]

batch loss: 1.113925814628601


 53%|█████▎    | 881/1677 [11:44<10:19,  1.28it/s]

batch loss: 1.0939308404922485


 53%|█████▎    | 882/1677 [11:44<10:04,  1.32it/s]

batch loss: 1.10252046585083


 53%|█████▎    | 883/1677 [11:45<10:01,  1.32it/s]

batch loss: 1.093708872795105


 53%|█████▎    | 884/1677 [11:46<10:25,  1.27it/s]

batch loss: 1.094078779220581


 53%|█████▎    | 885/1677 [11:47<11:55,  1.11it/s]

batch loss: 1.095104694366455


 53%|█████▎    | 886/1677 [11:48<12:22,  1.06it/s]

batch loss: 1.0851680040359497


 53%|█████▎    | 887/1677 [11:49<11:27,  1.15it/s]

batch loss: 1.106126308441162


 53%|█████▎    | 888/1677 [11:50<11:01,  1.19it/s]

batch loss: 1.0903687477111816


 53%|█████▎    | 889/1677 [11:51<11:10,  1.17it/s]

batch loss: 1.096354603767395


 53%|█████▎    | 890/1677 [11:51<11:01,  1.19it/s]

batch loss: 1.1023913621902466


 53%|█████▎    | 891/1677 [11:52<11:04,  1.18it/s]

batch loss: 1.1015173196792603


 53%|█████▎    | 892/1677 [11:53<10:36,  1.23it/s]

batch loss: 1.0964239835739136


 53%|█████▎    | 893/1677 [11:54<10:45,  1.22it/s]

batch loss: 1.113281011581421


 53%|█████▎    | 894/1677 [11:55<11:09,  1.17it/s]

batch loss: 1.1096951961517334


 53%|█████▎    | 895/1677 [11:56<10:52,  1.20it/s]

batch loss: 1.0982407331466675


 53%|█████▎    | 896/1677 [11:56<10:48,  1.20it/s]

batch loss: 1.1050398349761963


 53%|█████▎    | 897/1677 [11:57<10:56,  1.19it/s]

batch loss: 1.10683274269104


 54%|█████▎    | 898/1677 [11:58<10:38,  1.22it/s]

batch loss: 1.0957386493682861


 54%|█████▎    | 899/1677 [11:59<10:34,  1.23it/s]

batch loss: 1.1007072925567627


 54%|█████▎    | 900/1677 [12:00<10:28,  1.24it/s]

batch loss: 1.0946340560913086


 54%|█████▎    | 901/1677 [12:00<10:25,  1.24it/s]

batch loss: 1.1012625694274902


 54%|█████▍    | 902/1677 [12:01<10:17,  1.25it/s]

batch loss: 1.0926798582077026


 54%|█████▍    | 903/1677 [12:02<10:16,  1.26it/s]

batch loss: 1.099265694618225


 54%|█████▍    | 904/1677 [12:03<10:08,  1.27it/s]

batch loss: 1.0941998958587646


 54%|█████▍    | 905/1677 [12:04<10:10,  1.26it/s]

batch loss: 1.1068251132965088


 54%|█████▍    | 906/1677 [12:04<10:29,  1.23it/s]

batch loss: 1.0970979928970337


 54%|█████▍    | 907/1677 [12:05<10:23,  1.23it/s]

batch loss: 1.1026661396026611


 54%|█████▍    | 908/1677 [12:06<10:09,  1.26it/s]

batch loss: 1.0943483114242554


 54%|█████▍    | 909/1677 [12:07<10:36,  1.21it/s]

batch loss: 1.096777081489563


 54%|█████▍    | 910/1677 [12:08<10:28,  1.22it/s]

batch loss: 1.098656415939331


 54%|█████▍    | 911/1677 [12:09<10:22,  1.23it/s]

batch loss: 1.0887027978897095


 54%|█████▍    | 912/1677 [12:09<10:16,  1.24it/s]

batch loss: 1.1006675958633423


 54%|█████▍    | 913/1677 [12:10<10:43,  1.19it/s]

batch loss: 1.0950976610183716


 55%|█████▍    | 914/1677 [12:11<10:08,  1.25it/s]

batch loss: 1.112142562866211


 55%|█████▍    | 915/1677 [12:12<10:09,  1.25it/s]

batch loss: 1.0990029573440552


 55%|█████▍    | 916/1677 [12:12<09:54,  1.28it/s]

batch loss: 1.09930419921875


 55%|█████▍    | 917/1677 [12:13<09:55,  1.28it/s]

batch loss: 1.0983147621154785


 55%|█████▍    | 918/1677 [12:14<10:19,  1.23it/s]

batch loss: 1.0895229578018188


 55%|█████▍    | 919/1677 [12:15<11:01,  1.15it/s]

batch loss: 1.0983718633651733


 55%|█████▍    | 920/1677 [12:16<10:56,  1.15it/s]

batch loss: 1.1001907587051392


 55%|█████▍    | 921/1677 [12:17<10:36,  1.19it/s]

batch loss: 1.100328803062439


 55%|█████▍    | 922/1677 [12:18<10:27,  1.20it/s]

batch loss: 1.1056898832321167


 55%|█████▌    | 923/1677 [12:18<10:38,  1.18it/s]

batch loss: 1.1016006469726562


 55%|█████▌    | 924/1677 [12:19<10:12,  1.23it/s]

batch loss: 1.0859756469726562


 55%|█████▌    | 925/1677 [12:20<10:08,  1.24it/s]

batch loss: 1.0983858108520508


 55%|█████▌    | 926/1677 [12:21<09:53,  1.26it/s]

batch loss: 1.1017634868621826


 55%|█████▌    | 927/1677 [12:22<09:42,  1.29it/s]

batch loss: 1.1061819791793823


 55%|█████▌    | 928/1677 [12:22<09:49,  1.27it/s]

batch loss: 1.1065796613693237


 55%|█████▌    | 929/1677 [12:23<09:40,  1.29it/s]

batch loss: 1.0982574224472046


 55%|█████▌    | 930/1677 [12:24<09:35,  1.30it/s]

batch loss: 1.0939067602157593


 56%|█████▌    | 931/1677 [12:25<09:28,  1.31it/s]

batch loss: 1.0988266468048096


 56%|█████▌    | 932/1677 [12:25<09:17,  1.34it/s]

batch loss: 1.0955976247787476


 56%|█████▌    | 933/1677 [12:26<09:42,  1.28it/s]

batch loss: 1.0959422588348389


 56%|█████▌    | 934/1677 [12:27<10:05,  1.23it/s]

batch loss: 1.1064656972885132


 56%|█████▌    | 935/1677 [12:28<09:53,  1.25it/s]

batch loss: 1.097615122795105


 56%|█████▌    | 936/1677 [12:29<09:53,  1.25it/s]

batch loss: 1.1034046411514282


 56%|█████▌    | 937/1677 [12:30<10:16,  1.20it/s]

batch loss: 1.10890793800354


 56%|█████▌    | 938/1677 [12:30<09:53,  1.25it/s]

batch loss: 1.091099739074707


 56%|█████▌    | 939/1677 [12:31<09:40,  1.27it/s]

batch loss: 1.0987498760223389


 56%|█████▌    | 940/1677 [12:32<09:04,  1.35it/s]

batch loss: 1.0943416357040405


 56%|█████▌    | 941/1677 [12:32<09:05,  1.35it/s]

batch loss: 1.1091077327728271


 56%|█████▌    | 942/1677 [12:33<09:17,  1.32it/s]

batch loss: 1.0990246534347534


 56%|█████▌    | 943/1677 [12:34<09:27,  1.29it/s]

batch loss: 1.0951265096664429


 56%|█████▋    | 944/1677 [12:35<09:45,  1.25it/s]

batch loss: 1.1039046049118042


 56%|█████▋    | 945/1677 [12:36<09:58,  1.22it/s]

batch loss: 1.1015632152557373


 56%|█████▋    | 946/1677 [12:36<09:46,  1.25it/s]

batch loss: 1.0934146642684937


 56%|█████▋    | 947/1677 [12:37<09:31,  1.28it/s]

batch loss: 1.090104341506958


 57%|█████▋    | 948/1677 [12:38<09:35,  1.27it/s]

batch loss: 1.09981369972229


 57%|█████▋    | 949/1677 [12:39<09:28,  1.28it/s]

batch loss: 1.0996265411376953


 57%|█████▋    | 950/1677 [12:40<09:52,  1.23it/s]

batch loss: 1.1019635200500488


 57%|█████▋    | 951/1677 [12:41<10:02,  1.20it/s]

batch loss: 1.1133414506912231


 57%|█████▋    | 952/1677 [12:41<10:22,  1.16it/s]

batch loss: 1.0977733135223389


 57%|█████▋    | 953/1677 [12:42<10:07,  1.19it/s]

batch loss: 1.09428071975708


 57%|█████▋    | 954/1677 [12:43<10:13,  1.18it/s]

batch loss: 1.0984193086624146


 57%|█████▋    | 955/1677 [12:44<09:42,  1.24it/s]

batch loss: 1.0994118452072144


 57%|█████▋    | 956/1677 [12:45<09:29,  1.27it/s]

batch loss: 1.0980623960494995


 57%|█████▋    | 957/1677 [12:45<09:46,  1.23it/s]

batch loss: 1.0964034795761108


 57%|█████▋    | 958/1677 [12:46<09:43,  1.23it/s]

batch loss: 1.0953655242919922


 57%|█████▋    | 959/1677 [12:47<09:50,  1.22it/s]

batch loss: 1.1031248569488525


 57%|█████▋    | 960/1677 [12:48<09:57,  1.20it/s]

batch loss: 1.0926271677017212


 57%|█████▋    | 961/1677 [12:49<09:50,  1.21it/s]

batch loss: 1.08797025680542


 57%|█████▋    | 962/1677 [12:50<09:44,  1.22it/s]

batch loss: 1.1066261529922485


 57%|█████▋    | 963/1677 [12:50<09:28,  1.26it/s]

batch loss: 1.0930603742599487


 57%|█████▋    | 964/1677 [12:51<10:11,  1.17it/s]

batch loss: 1.1000590324401855


 58%|█████▊    | 965/1677 [12:52<09:43,  1.22it/s]

batch loss: 1.0941663980484009


 58%|█████▊    | 966/1677 [12:53<09:36,  1.23it/s]

batch loss: 1.0960309505462646


 58%|█████▊    | 967/1677 [12:54<09:22,  1.26it/s]

batch loss: 1.1081130504608154


 58%|█████▊    | 968/1677 [12:54<09:36,  1.23it/s]

batch loss: 1.0944157838821411


 58%|█████▊    | 969/1677 [12:55<10:18,  1.15it/s]

batch loss: 1.1147676706314087


 58%|█████▊    | 970/1677 [12:56<09:58,  1.18it/s]

batch loss: 1.0978593826293945


 58%|█████▊    | 971/1677 [12:57<10:04,  1.17it/s]

batch loss: 1.0902409553527832


 58%|█████▊    | 972/1677 [12:58<09:53,  1.19it/s]

batch loss: 1.0996695756912231


 58%|█████▊    | 973/1677 [12:59<09:57,  1.18it/s]

batch loss: 1.1017736196517944


 58%|█████▊    | 974/1677 [13:00<10:29,  1.12it/s]

batch loss: 1.091021180152893


 58%|█████▊    | 975/1677 [13:01<10:23,  1.13it/s]

batch loss: 1.0979753732681274


 58%|█████▊    | 976/1677 [13:01<10:00,  1.17it/s]

batch loss: 1.0879778861999512


 58%|█████▊    | 977/1677 [13:02<09:39,  1.21it/s]

batch loss: 1.0978775024414062


 58%|█████▊    | 978/1677 [13:03<09:43,  1.20it/s]

batch loss: 1.0959563255310059


 58%|█████▊    | 979/1677 [13:04<09:47,  1.19it/s]

batch loss: 1.0992909669876099


 58%|█████▊    | 980/1677 [13:05<09:38,  1.21it/s]

batch loss: 1.0999197959899902


 58%|█████▊    | 981/1677 [13:06<09:43,  1.19it/s]

batch loss: 1.112428069114685


 59%|█████▊    | 982/1677 [13:06<09:20,  1.24it/s]

batch loss: 1.092475175857544


 59%|█████▊    | 983/1677 [13:07<09:08,  1.26it/s]

batch loss: 1.102609395980835


 59%|█████▊    | 984/1677 [13:08<09:22,  1.23it/s]

batch loss: 1.0985678434371948


 59%|█████▊    | 985/1677 [13:09<09:07,  1.26it/s]

batch loss: 1.0917617082595825


 59%|█████▉    | 986/1677 [13:09<08:56,  1.29it/s]

batch loss: 1.0799989700317383


 59%|█████▉    | 987/1677 [13:10<08:54,  1.29it/s]

batch loss: 1.0950450897216797


 59%|█████▉    | 988/1677 [13:11<08:49,  1.30it/s]

batch loss: 1.0871614217758179


 59%|█████▉    | 989/1677 [13:12<08:44,  1.31it/s]

batch loss: 1.1020313501358032


 59%|█████▉    | 990/1677 [13:12<08:40,  1.32it/s]

batch loss: 1.092406153678894


 59%|█████▉    | 991/1677 [13:13<08:36,  1.33it/s]

batch loss: 1.098685383796692


 59%|█████▉    | 992/1677 [13:14<08:36,  1.33it/s]

batch loss: 1.1209865808486938


 59%|█████▉    | 993/1677 [13:15<09:01,  1.26it/s]

batch loss: 1.0924580097198486


 59%|█████▉    | 994/1677 [13:16<09:13,  1.23it/s]

batch loss: 1.1076565980911255


 59%|█████▉    | 995/1677 [13:16<09:09,  1.24it/s]

batch loss: 1.096933364868164


 59%|█████▉    | 996/1677 [13:17<09:22,  1.21it/s]

batch loss: 1.0945605039596558


 59%|█████▉    | 997/1677 [13:18<09:02,  1.25it/s]

batch loss: 1.1061638593673706


 60%|█████▉    | 998/1677 [13:19<09:21,  1.21it/s]

batch loss: 1.0980861186981201


 60%|█████▉    | 999/1677 [13:20<09:00,  1.25it/s]

batch loss: 1.1004687547683716


 60%|█████▉    | 1000/1677 [13:20<08:55,  1.27it/s]

batch loss: 1.0945407152175903


 60%|█████▉    | 1001/1677 [13:21<09:16,  1.22it/s]

batch loss: 1.0904929637908936


 60%|█████▉    | 1002/1677 [13:22<09:31,  1.18it/s]

batch loss: 1.1153355836868286


 60%|█████▉    | 1003/1677 [13:23<09:13,  1.22it/s]

batch loss: 1.0916167497634888


 60%|█████▉    | 1004/1677 [13:24<09:10,  1.22it/s]

batch loss: 1.1032111644744873


 60%|█████▉    | 1005/1677 [13:25<08:59,  1.25it/s]

batch loss: 1.0828306674957275


 60%|█████▉    | 1006/1677 [13:25<08:47,  1.27it/s]

batch loss: 1.125604510307312


 60%|██████    | 1007/1677 [13:26<09:04,  1.23it/s]

batch loss: 1.0990217924118042


 60%|██████    | 1008/1677 [13:27<09:16,  1.20it/s]

batch loss: 1.0867458581924438


 60%|██████    | 1009/1677 [13:28<08:55,  1.25it/s]

batch loss: 1.0925036668777466


 60%|██████    | 1010/1677 [13:29<08:44,  1.27it/s]

batch loss: 1.097933053970337


 60%|██████    | 1011/1677 [13:29<08:34,  1.29it/s]

batch loss: 1.0887634754180908


 60%|██████    | 1012/1677 [13:30<09:03,  1.22it/s]

batch loss: 1.1331548690795898


 60%|██████    | 1013/1677 [13:31<08:46,  1.26it/s]

batch loss: 1.0870883464813232


 60%|██████    | 1014/1677 [13:32<08:56,  1.24it/s]

batch loss: 1.0945073366165161


 61%|██████    | 1015/1677 [13:33<08:39,  1.27it/s]

batch loss: 1.0985361337661743


 61%|██████    | 1016/1677 [13:33<08:54,  1.24it/s]

batch loss: 1.0942442417144775


 61%|██████    | 1017/1677 [13:34<08:42,  1.26it/s]

batch loss: 1.0885695219039917


 61%|██████    | 1018/1677 [13:35<08:32,  1.29it/s]

batch loss: 1.1076139211654663


 61%|██████    | 1019/1677 [13:36<08:26,  1.30it/s]

batch loss: 1.1072252988815308


 61%|██████    | 1020/1677 [13:37<08:44,  1.25it/s]

batch loss: 1.0934886932373047


 61%|██████    | 1021/1677 [13:37<08:34,  1.28it/s]

batch loss: 1.0977565050125122


 61%|██████    | 1022/1677 [13:38<08:48,  1.24it/s]

batch loss: 1.0863465070724487


 61%|██████    | 1023/1677 [13:39<08:31,  1.28it/s]

batch loss: 1.0917158126831055


 61%|██████    | 1024/1677 [13:40<08:26,  1.29it/s]

batch loss: 1.0961735248565674


 61%|██████    | 1025/1677 [13:40<08:23,  1.29it/s]

batch loss: 1.1234029531478882


 61%|██████    | 1026/1677 [13:41<08:44,  1.24it/s]

batch loss: 1.0881428718566895


 61%|██████    | 1027/1677 [13:42<08:41,  1.25it/s]

batch loss: 1.1107089519500732


 61%|██████▏   | 1028/1677 [13:43<08:36,  1.26it/s]

batch loss: 1.1009907722473145


 61%|██████▏   | 1029/1677 [13:44<08:45,  1.23it/s]

batch loss: 1.0945186614990234


 61%|██████▏   | 1030/1677 [13:45<09:02,  1.19it/s]

batch loss: 1.104849100112915


 61%|██████▏   | 1031/1677 [13:45<08:53,  1.21it/s]

batch loss: 1.0765641927719116


 62%|██████▏   | 1032/1677 [13:46<08:56,  1.20it/s]

batch loss: 1.088654637336731


 62%|██████▏   | 1033/1677 [13:47<08:49,  1.22it/s]

batch loss: 1.1047555208206177


 62%|██████▏   | 1034/1677 [13:48<08:43,  1.23it/s]

batch loss: 1.0886282920837402


 62%|██████▏   | 1035/1677 [13:49<08:32,  1.25it/s]

batch loss: 1.1097503900527954


 62%|██████▏   | 1036/1677 [13:49<08:15,  1.29it/s]

batch loss: 1.0836865901947021


 62%|██████▏   | 1037/1677 [13:50<08:12,  1.30it/s]

batch loss: 1.0984444618225098


 62%|██████▏   | 1038/1677 [13:51<08:33,  1.24it/s]

batch loss: 1.1015048027038574


 62%|██████▏   | 1039/1677 [13:52<08:31,  1.25it/s]

batch loss: 1.1011370420455933


 62%|██████▏   | 1040/1677 [13:53<08:52,  1.20it/s]

batch loss: 1.100131869316101


 62%|██████▏   | 1041/1677 [13:53<08:32,  1.24it/s]

batch loss: 1.1067099571228027


 62%|██████▏   | 1042/1677 [13:54<08:42,  1.22it/s]

batch loss: 1.0988346338272095


 62%|██████▏   | 1043/1677 [13:55<08:22,  1.26it/s]

batch loss: 1.0928857326507568


 62%|██████▏   | 1044/1677 [13:56<08:17,  1.27it/s]

batch loss: 1.111005187034607


 62%|██████▏   | 1045/1677 [13:57<08:09,  1.29it/s]

batch loss: 1.1031453609466553


 62%|██████▏   | 1046/1677 [13:57<08:13,  1.28it/s]

batch loss: 1.085022211074829


 62%|██████▏   | 1047/1677 [13:58<08:08,  1.29it/s]

batch loss: 1.110790729522705


 62%|██████▏   | 1048/1677 [13:59<08:37,  1.22it/s]

batch loss: 1.0943775177001953


 63%|██████▎   | 1049/1677 [14:00<08:21,  1.25it/s]

batch loss: 1.0956963300704956


 63%|██████▎   | 1050/1677 [14:01<08:40,  1.20it/s]

batch loss: 1.0954638719558716


 63%|██████▎   | 1051/1677 [14:01<08:20,  1.25it/s]

batch loss: 1.1154553890228271


 63%|██████▎   | 1052/1677 [14:02<08:08,  1.28it/s]

batch loss: 1.0981382131576538


 63%|██████▎   | 1053/1677 [14:03<08:01,  1.30it/s]

batch loss: 1.1037219762802124


 63%|██████▎   | 1054/1677 [14:04<07:58,  1.30it/s]

batch loss: 1.1117857694625854


 63%|██████▎   | 1055/1677 [14:04<08:00,  1.29it/s]

batch loss: 1.077475905418396


 63%|██████▎   | 1056/1677 [14:05<08:03,  1.28it/s]

batch loss: 1.0904077291488647


 63%|██████▎   | 1057/1677 [14:06<08:09,  1.27it/s]

batch loss: 1.1005642414093018


 63%|██████▎   | 1058/1677 [14:07<08:21,  1.23it/s]

batch loss: 1.108586311340332


 63%|██████▎   | 1059/1677 [14:08<08:18,  1.24it/s]

batch loss: 1.1093337535858154


 63%|██████▎   | 1060/1677 [14:08<08:07,  1.27it/s]

batch loss: 1.074855089187622


 63%|██████▎   | 1061/1677 [14:09<08:08,  1.26it/s]

batch loss: 1.1103876829147339


 63%|██████▎   | 1062/1677 [14:10<07:57,  1.29it/s]

batch loss: 1.0821439027786255


 63%|██████▎   | 1063/1677 [14:11<07:51,  1.30it/s]

batch loss: 1.103788137435913


 63%|██████▎   | 1064/1677 [14:12<07:57,  1.28it/s]

batch loss: 1.0938036441802979


 64%|██████▎   | 1065/1677 [14:12<07:50,  1.30it/s]

batch loss: 1.0798707008361816


 64%|██████▎   | 1066/1677 [14:13<07:46,  1.31it/s]

batch loss: 1.093519926071167


 64%|██████▎   | 1067/1677 [14:14<08:11,  1.24it/s]

batch loss: 1.0987632274627686


 64%|██████▎   | 1068/1677 [14:15<08:23,  1.21it/s]

batch loss: 1.104737639427185


 64%|██████▎   | 1069/1677 [14:16<08:08,  1.24it/s]

batch loss: 1.1071512699127197


 64%|██████▍   | 1070/1677 [14:16<08:05,  1.25it/s]

batch loss: 1.0905048847198486


 64%|██████▍   | 1071/1677 [14:17<08:18,  1.22it/s]

batch loss: 1.1058692932128906


 64%|██████▍   | 1072/1677 [14:18<08:02,  1.25it/s]

batch loss: 1.10886812210083


 64%|██████▍   | 1073/1677 [14:19<07:49,  1.29it/s]

batch loss: 1.110095500946045


 64%|██████▍   | 1074/1677 [14:20<08:05,  1.24it/s]

batch loss: 1.1046444177627563


 64%|██████▍   | 1075/1677 [14:20<08:03,  1.24it/s]

batch loss: 1.1074646711349487


 64%|██████▍   | 1076/1677 [14:21<08:00,  1.25it/s]

batch loss: 1.0937557220458984


 64%|██████▍   | 1077/1677 [14:22<08:17,  1.21it/s]

batch loss: 1.0968278646469116


 64%|██████▍   | 1078/1677 [14:23<08:32,  1.17it/s]

batch loss: 1.1028858423233032


 64%|██████▍   | 1079/1677 [14:24<08:08,  1.22it/s]

batch loss: 1.0760159492492676


 64%|██████▍   | 1080/1677 [14:25<08:16,  1.20it/s]

batch loss: 1.0942809581756592


 64%|██████▍   | 1081/1677 [14:25<08:10,  1.22it/s]

batch loss: 1.1173725128173828


 65%|██████▍   | 1082/1677 [14:26<07:58,  1.24it/s]

batch loss: 1.1115880012512207


 65%|██████▍   | 1083/1677 [14:27<07:47,  1.27it/s]

batch loss: 1.1016108989715576


 65%|██████▍   | 1084/1677 [14:28<07:47,  1.27it/s]

batch loss: 1.0766042470932007


 65%|██████▍   | 1085/1677 [14:29<08:01,  1.23it/s]

batch loss: 1.0811268091201782


 65%|██████▍   | 1086/1677 [14:29<07:58,  1.23it/s]

batch loss: 1.0988825559616089


 65%|██████▍   | 1087/1677 [14:30<07:53,  1.25it/s]

batch loss: 1.1091629266738892


 65%|██████▍   | 1088/1677 [14:31<08:04,  1.22it/s]

batch loss: 1.0984233617782593


 65%|██████▍   | 1089/1677 [14:32<08:00,  1.22it/s]

batch loss: 1.106325626373291


 65%|██████▍   | 1090/1677 [14:33<08:10,  1.20it/s]

batch loss: 1.0903033018112183


 65%|██████▌   | 1091/1677 [14:33<07:54,  1.24it/s]

batch loss: 1.0965962409973145


 65%|██████▌   | 1092/1677 [14:34<07:49,  1.25it/s]

batch loss: 1.1154491901397705


 65%|██████▌   | 1093/1677 [14:35<07:39,  1.27it/s]

batch loss: 1.0992647409439087


 65%|██████▌   | 1094/1677 [14:36<07:42,  1.26it/s]

batch loss: 1.0996627807617188


 65%|██████▌   | 1095/1677 [14:37<07:54,  1.23it/s]

batch loss: 1.0837500095367432


 65%|██████▌   | 1096/1677 [14:38<08:04,  1.20it/s]

batch loss: 1.0837386846542358


 65%|██████▌   | 1097/1677 [14:38<08:13,  1.18it/s]

batch loss: 1.1050456762313843


 65%|██████▌   | 1098/1677 [14:39<08:02,  1.20it/s]

batch loss: 1.1056550741195679


 66%|██████▌   | 1099/1677 [14:40<07:42,  1.25it/s]

batch loss: 1.1105307340621948


 66%|██████▌   | 1100/1677 [14:41<07:33,  1.27it/s]

batch loss: 1.104630470275879


 66%|██████▌   | 1101/1677 [14:42<07:46,  1.24it/s]

batch loss: 1.105669617652893


 66%|██████▌   | 1102/1677 [14:42<07:55,  1.21it/s]

batch loss: 1.0864266157150269


 66%|██████▌   | 1103/1677 [14:43<07:47,  1.23it/s]

batch loss: 1.0976052284240723


 66%|██████▌   | 1104/1677 [14:44<07:42,  1.24it/s]

batch loss: 1.1073836088180542


 66%|██████▌   | 1105/1677 [14:45<07:42,  1.24it/s]

batch loss: 1.0958006381988525


 66%|██████▌   | 1106/1677 [14:46<07:29,  1.27it/s]

batch loss: 1.113103985786438


 66%|██████▌   | 1107/1677 [14:46<06:56,  1.37it/s]

batch loss: 1.094673752784729


 66%|██████▌   | 1108/1677 [14:47<06:59,  1.36it/s]

batch loss: 1.1021095514297485


 66%|██████▌   | 1109/1677 [14:48<07:01,  1.35it/s]

batch loss: 1.0887943506240845


 66%|██████▌   | 1110/1677 [14:48<07:23,  1.28it/s]

batch loss: 1.1088449954986572


 66%|██████▌   | 1111/1677 [14:49<07:09,  1.32it/s]

batch loss: 1.0985888242721558


 66%|██████▋   | 1112/1677 [14:50<07:15,  1.30it/s]

batch loss: 1.1085714101791382


 66%|██████▋   | 1113/1677 [14:51<07:12,  1.30it/s]

batch loss: 1.116927146911621


 66%|██████▋   | 1114/1677 [14:52<07:17,  1.29it/s]

batch loss: 1.0996770858764648


 66%|██████▋   | 1115/1677 [14:52<07:20,  1.28it/s]

batch loss: 1.097235918045044


 67%|██████▋   | 1116/1677 [14:53<07:22,  1.27it/s]

batch loss: 1.102286458015442


 67%|██████▋   | 1117/1677 [14:54<07:20,  1.27it/s]

batch loss: 1.0976892709732056


 67%|██████▋   | 1118/1677 [14:55<07:42,  1.21it/s]

batch loss: 1.0940091609954834


 67%|██████▋   | 1119/1677 [14:56<07:33,  1.23it/s]

batch loss: 1.0964525938034058


 67%|██████▋   | 1120/1677 [14:57<07:41,  1.21it/s]

batch loss: 1.105988621711731


 67%|██████▋   | 1121/1677 [14:57<07:48,  1.19it/s]

batch loss: 1.0982438325881958


 67%|██████▋   | 1122/1677 [14:58<07:32,  1.23it/s]

batch loss: 1.0977329015731812


 67%|██████▋   | 1123/1677 [14:59<07:26,  1.24it/s]

batch loss: 1.0982861518859863


 67%|██████▋   | 1124/1677 [15:00<07:16,  1.27it/s]

batch loss: 1.0993757247924805


 67%|██████▋   | 1125/1677 [15:00<07:11,  1.28it/s]

batch loss: 1.097312331199646


 67%|██████▋   | 1126/1677 [15:01<07:04,  1.30it/s]

batch loss: 1.0997326374053955


 67%|██████▋   | 1127/1677 [15:02<07:07,  1.29it/s]

batch loss: 1.0996626615524292


 67%|██████▋   | 1128/1677 [15:03<07:08,  1.28it/s]

batch loss: 1.0983010530471802


 67%|██████▋   | 1129/1677 [15:04<07:02,  1.30it/s]

batch loss: 1.0962982177734375


 67%|██████▋   | 1130/1677 [15:04<07:18,  1.25it/s]

batch loss: 1.0947128534317017


 67%|██████▋   | 1131/1677 [15:05<07:07,  1.28it/s]

batch loss: 1.1008720397949219


 68%|██████▊   | 1132/1677 [15:06<07:20,  1.24it/s]

batch loss: 1.1013823747634888


 68%|██████▊   | 1133/1677 [15:07<07:17,  1.24it/s]

batch loss: 1.0965068340301514


 68%|██████▊   | 1134/1677 [15:08<07:11,  1.26it/s]

batch loss: 1.1035393476486206


 68%|██████▊   | 1135/1677 [15:08<07:08,  1.27it/s]

batch loss: 1.1010358333587646


 68%|██████▊   | 1136/1677 [15:09<07:10,  1.26it/s]

batch loss: 1.092958688735962


 68%|██████▊   | 1137/1677 [15:10<07:25,  1.21it/s]

batch loss: 1.1000638008117676


 68%|██████▊   | 1138/1677 [15:11<07:31,  1.19it/s]

batch loss: 1.1050920486450195


 68%|██████▊   | 1139/1677 [15:12<07:11,  1.25it/s]

batch loss: 1.1006065607070923


 68%|██████▊   | 1140/1677 [15:12<07:01,  1.27it/s]

batch loss: 1.10020911693573


 68%|██████▊   | 1141/1677 [15:13<07:02,  1.27it/s]

batch loss: 1.1035072803497314


 68%|██████▊   | 1142/1677 [15:14<06:56,  1.28it/s]

batch loss: 1.0998622179031372


 68%|██████▊   | 1143/1677 [15:15<07:13,  1.23it/s]

batch loss: 1.0980453491210938


 68%|██████▊   | 1144/1677 [15:16<07:11,  1.24it/s]

batch loss: 1.0981308221817017


 68%|██████▊   | 1145/1677 [15:16<06:59,  1.27it/s]

batch loss: 1.096386194229126


 68%|██████▊   | 1146/1677 [15:17<07:10,  1.23it/s]

batch loss: 1.1022354364395142


 68%|██████▊   | 1147/1677 [15:18<07:02,  1.25it/s]

batch loss: 1.0979381799697876


 68%|██████▊   | 1148/1677 [15:19<07:03,  1.25it/s]

batch loss: 1.0984731912612915


 69%|██████▊   | 1149/1677 [15:20<07:22,  1.19it/s]

batch loss: 1.0994234085083008


 69%|██████▊   | 1150/1677 [15:20<07:05,  1.24it/s]

batch loss: 1.0989484786987305


 69%|██████▊   | 1151/1677 [15:21<06:56,  1.26it/s]

batch loss: 1.100656270980835


 69%|██████▊   | 1152/1677 [15:22<06:49,  1.28it/s]

batch loss: 1.1005686521530151


 69%|██████▉   | 1153/1677 [15:23<06:43,  1.30it/s]

batch loss: 1.0977590084075928


 69%|██████▉   | 1154/1677 [15:23<06:40,  1.31it/s]

batch loss: 1.0993571281433105


 69%|██████▉   | 1155/1677 [15:24<06:55,  1.26it/s]

batch loss: 1.097773790359497


 69%|██████▉   | 1156/1677 [15:25<07:04,  1.23it/s]

batch loss: 1.0981892347335815


 69%|██████▉   | 1157/1677 [15:26<06:54,  1.26it/s]

batch loss: 1.0975757837295532


 69%|██████▉   | 1158/1677 [15:27<06:45,  1.28it/s]

batch loss: 1.0969152450561523


 69%|██████▉   | 1159/1677 [15:28<06:57,  1.24it/s]

batch loss: 1.098402500152588


 69%|██████▉   | 1160/1677 [15:28<07:05,  1.21it/s]

batch loss: 1.1006443500518799


 69%|██████▉   | 1161/1677 [15:29<06:52,  1.25it/s]

batch loss: 1.1016579866409302


 69%|██████▉   | 1162/1677 [15:30<07:26,  1.15it/s]

batch loss: 1.0970526933670044


 69%|██████▉   | 1163/1677 [15:31<07:08,  1.20it/s]

batch loss: 1.0983480215072632


 69%|██████▉   | 1164/1677 [15:32<07:11,  1.19it/s]

batch loss: 1.1059328317642212


 69%|██████▉   | 1165/1677 [15:33<06:51,  1.25it/s]

batch loss: 1.1033285856246948


 70%|██████▉   | 1166/1677 [15:33<06:43,  1.27it/s]

batch loss: 1.098753571510315


 70%|██████▉   | 1167/1677 [15:34<06:39,  1.28it/s]

batch loss: 1.0961986780166626


 70%|██████▉   | 1168/1677 [15:35<06:20,  1.34it/s]

batch loss: 1.1019480228424072


 70%|██████▉   | 1169/1677 [15:35<06:20,  1.34it/s]

batch loss: 1.0971112251281738


 70%|██████▉   | 1170/1677 [15:36<06:23,  1.32it/s]

batch loss: 1.094584584236145


 70%|██████▉   | 1171/1677 [15:37<06:22,  1.32it/s]

batch loss: 1.095668077468872


 70%|██████▉   | 1172/1677 [15:38<06:29,  1.30it/s]

batch loss: 1.0947329998016357


 70%|██████▉   | 1173/1677 [15:39<06:31,  1.29it/s]

batch loss: 1.1034694910049438


 70%|███████   | 1174/1677 [15:39<06:25,  1.30it/s]

batch loss: 1.0987080335617065


 70%|███████   | 1175/1677 [15:40<06:22,  1.31it/s]

batch loss: 1.0941059589385986


 70%|███████   | 1176/1677 [15:41<06:36,  1.26it/s]

batch loss: 1.100780963897705


 70%|███████   | 1177/1677 [15:42<06:38,  1.25it/s]

batch loss: 1.0938674211502075


 70%|███████   | 1178/1677 [15:42<06:24,  1.30it/s]

batch loss: 1.0896621942520142


 70%|███████   | 1179/1677 [15:43<06:29,  1.28it/s]

batch loss: 1.0830003023147583


 70%|███████   | 1180/1677 [15:44<06:44,  1.23it/s]

batch loss: 1.105413794517517


 70%|███████   | 1181/1677 [15:45<06:34,  1.26it/s]

batch loss: 1.1043425798416138


 70%|███████   | 1182/1677 [15:46<06:30,  1.27it/s]

batch loss: 1.0990819931030273


 71%|███████   | 1183/1677 [15:46<06:22,  1.29it/s]

batch loss: 1.1146023273468018


 71%|███████   | 1184/1677 [15:47<06:24,  1.28it/s]

batch loss: 1.097956895828247


 71%|███████   | 1185/1677 [15:48<06:45,  1.21it/s]

batch loss: 1.1130858659744263


 71%|███████   | 1186/1677 [15:49<06:31,  1.25it/s]

batch loss: 1.0986464023590088


 71%|███████   | 1187/1677 [15:50<06:33,  1.24it/s]

batch loss: 1.096468210220337


 71%|███████   | 1188/1677 [15:50<06:20,  1.29it/s]

batch loss: 1.1048243045806885


 71%|███████   | 1189/1677 [15:51<06:24,  1.27it/s]

batch loss: 1.1068212985992432


 71%|███████   | 1190/1677 [15:52<06:26,  1.26it/s]

batch loss: 1.0846726894378662


 71%|███████   | 1191/1677 [15:53<06:20,  1.28it/s]

batch loss: 1.1122103929519653


 71%|███████   | 1192/1677 [15:54<06:22,  1.27it/s]

batch loss: 1.0964971780776978


 71%|███████   | 1193/1677 [15:54<06:16,  1.29it/s]

batch loss: 1.1160156726837158


 71%|███████   | 1194/1677 [15:55<06:16,  1.28it/s]

batch loss: 1.1093841791152954


 71%|███████▏  | 1195/1677 [15:56<06:14,  1.29it/s]

batch loss: 1.0955873727798462


 71%|███████▏  | 1196/1677 [15:57<06:37,  1.21it/s]

batch loss: 1.0982921123504639


 71%|███████▏  | 1197/1677 [15:58<06:31,  1.23it/s]

batch loss: 1.1190624237060547


 71%|███████▏  | 1198/1677 [15:58<06:24,  1.25it/s]

batch loss: 1.101423740386963


 71%|███████▏  | 1199/1677 [15:59<07:00,  1.14it/s]

batch loss: 1.0942045450210571


 72%|███████▏  | 1200/1677 [16:00<06:36,  1.20it/s]

batch loss: 1.1060518026351929


 72%|███████▏  | 1201/1677 [16:01<06:27,  1.23it/s]

batch loss: 1.0859886407852173


 72%|███████▏  | 1202/1677 [16:02<06:34,  1.20it/s]

batch loss: 1.094758152961731


 72%|███████▏  | 1203/1677 [16:03<06:21,  1.24it/s]

batch loss: 1.1016478538513184


 72%|███████▏  | 1204/1677 [16:03<06:32,  1.20it/s]

batch loss: 1.0952191352844238


 72%|███████▏  | 1205/1677 [16:04<06:38,  1.18it/s]

batch loss: 1.0959792137145996


 72%|███████▏  | 1206/1677 [16:05<06:23,  1.23it/s]

batch loss: 1.0897051095962524


 72%|███████▏  | 1207/1677 [16:06<06:33,  1.19it/s]

batch loss: 1.0992109775543213


 72%|███████▏  | 1208/1677 [16:07<06:26,  1.21it/s]

batch loss: 1.0955718755722046


 72%|███████▏  | 1209/1677 [16:08<06:22,  1.22it/s]

batch loss: 1.0936988592147827


 72%|███████▏  | 1210/1677 [16:08<06:05,  1.28it/s]

batch loss: 1.104731559753418


 72%|███████▏  | 1211/1677 [16:09<06:06,  1.27it/s]

batch loss: 1.0907772779464722


 72%|███████▏  | 1212/1677 [16:10<06:22,  1.21it/s]

batch loss: 1.0968352556228638


 72%|███████▏  | 1213/1677 [16:11<06:17,  1.23it/s]

batch loss: 1.0952214002609253


 72%|███████▏  | 1214/1677 [16:12<06:07,  1.26it/s]

batch loss: 1.0935378074645996


 72%|███████▏  | 1215/1677 [16:12<06:26,  1.20it/s]

batch loss: 1.10313880443573


 73%|███████▎  | 1216/1677 [16:13<06:30,  1.18it/s]

batch loss: 1.1182278394699097


 73%|███████▎  | 1217/1677 [16:14<06:18,  1.21it/s]

batch loss: 1.079819679260254


 73%|███████▎  | 1218/1677 [16:15<05:42,  1.34it/s]

batch loss: 1.0925023555755615


 73%|███████▎  | 1219/1677 [16:15<05:47,  1.32it/s]

batch loss: 1.0924584865570068


 73%|███████▎  | 1220/1677 [16:16<05:46,  1.32it/s]

batch loss: 1.0927608013153076


 73%|███████▎  | 1221/1677 [16:17<05:44,  1.32it/s]

batch loss: 1.0901672840118408


 73%|███████▎  | 1222/1677 [16:18<05:39,  1.34it/s]

batch loss: 1.0885272026062012


 73%|███████▎  | 1223/1677 [16:18<05:44,  1.32it/s]

batch loss: 1.1174052953720093


 73%|███████▎  | 1224/1677 [16:19<05:57,  1.27it/s]

batch loss: 1.0771149396896362


 73%|███████▎  | 1225/1677 [16:20<06:07,  1.23it/s]

batch loss: 1.0973620414733887


 73%|███████▎  | 1226/1677 [16:21<05:56,  1.27it/s]

batch loss: 1.1082208156585693


 73%|███████▎  | 1227/1677 [16:22<05:49,  1.29it/s]

batch loss: 1.0922951698303223


 73%|███████▎  | 1228/1677 [16:23<06:04,  1.23it/s]

batch loss: 1.099459171295166


 73%|███████▎  | 1229/1677 [16:23<06:08,  1.21it/s]

batch loss: 1.0933589935302734


 73%|███████▎  | 1230/1677 [16:24<05:50,  1.28it/s]

batch loss: 1.0979481935501099


 73%|███████▎  | 1231/1677 [16:25<05:46,  1.29it/s]

batch loss: 1.092090368270874


 73%|███████▎  | 1232/1677 [16:26<05:56,  1.25it/s]

batch loss: 1.1126370429992676


 74%|███████▎  | 1233/1677 [16:27<06:11,  1.20it/s]

batch loss: 1.0975191593170166


 74%|███████▎  | 1234/1677 [16:27<05:58,  1.24it/s]

batch loss: 1.0980823040008545


 74%|███████▎  | 1235/1677 [16:28<05:49,  1.26it/s]

batch loss: 1.0935256481170654


 74%|███████▎  | 1236/1677 [16:29<05:50,  1.26it/s]

batch loss: 1.097949504852295


 74%|███████▍  | 1237/1677 [16:30<05:58,  1.23it/s]

batch loss: 1.1032665967941284


 74%|███████▍  | 1238/1677 [16:30<05:23,  1.36it/s]

batch loss: 1.0957231521606445


 74%|███████▍  | 1239/1677 [16:31<05:39,  1.29it/s]

batch loss: 1.102182149887085


 74%|███████▍  | 1240/1677 [16:32<05:41,  1.28it/s]

batch loss: 1.094907522201538


 74%|███████▍  | 1241/1677 [16:33<05:53,  1.23it/s]

batch loss: 1.0970606803894043


 74%|███████▍  | 1242/1677 [16:34<05:30,  1.32it/s]

batch loss: 1.087917447090149


 74%|███████▍  | 1243/1677 [16:34<05:29,  1.32it/s]

batch loss: 1.0924876928329468


 74%|███████▍  | 1244/1677 [16:35<05:42,  1.27it/s]

batch loss: 1.092186450958252


 74%|███████▍  | 1245/1677 [16:36<05:34,  1.29it/s]

batch loss: 1.0930495262145996


 74%|███████▍  | 1246/1677 [16:37<05:47,  1.24it/s]

batch loss: 1.099456548690796


 74%|███████▍  | 1247/1677 [16:38<05:55,  1.21it/s]

batch loss: 1.111379861831665


 74%|███████▍  | 1248/1677 [16:39<06:02,  1.18it/s]

batch loss: 1.0920463800430298


 74%|███████▍  | 1249/1677 [16:39<05:48,  1.23it/s]

batch loss: 1.0947502851486206


 75%|███████▍  | 1250/1677 [16:40<05:33,  1.28it/s]

batch loss: 1.0936925411224365


 75%|███████▍  | 1251/1677 [16:41<05:29,  1.29it/s]

batch loss: 1.0939263105392456


 75%|███████▍  | 1252/1677 [16:42<05:46,  1.23it/s]

batch loss: 1.0928596258163452


 75%|███████▍  | 1253/1677 [16:42<05:35,  1.26it/s]

batch loss: 1.0934321880340576


 75%|███████▍  | 1254/1677 [16:43<05:48,  1.21it/s]

batch loss: 1.1114616394042969


 75%|███████▍  | 1255/1677 [16:44<05:14,  1.34it/s]

batch loss: 1.0953006744384766


 75%|███████▍  | 1256/1677 [16:45<05:22,  1.31it/s]

batch loss: 1.0974692106246948


 75%|███████▍  | 1257/1677 [16:45<05:00,  1.40it/s]

batch loss: 1.0985994338989258


 75%|███████▌  | 1258/1677 [16:46<05:05,  1.37it/s]

batch loss: 1.0985010862350464


 75%|███████▌  | 1259/1677 [16:47<05:05,  1.37it/s]

batch loss: 1.0986019372940063


 75%|███████▌  | 1260/1677 [16:47<04:59,  1.39it/s]

batch loss: 1.098647952079773


 75%|███████▌  | 1261/1677 [16:48<05:16,  1.32it/s]

batch loss: 1.0986599922180176


 75%|███████▌  | 1262/1677 [16:49<05:17,  1.31it/s]

batch loss: 1.0986238718032837


 75%|███████▌  | 1263/1677 [16:50<05:15,  1.31it/s]

batch loss: 1.0986379384994507


 75%|███████▌  | 1264/1677 [16:51<05:08,  1.34it/s]

batch loss: 1.0985833406448364


 75%|███████▌  | 1265/1677 [16:51<05:20,  1.29it/s]

batch loss: 1.0986626148223877


 75%|███████▌  | 1266/1677 [16:52<05:21,  1.28it/s]

batch loss: 1.0986030101776123


 76%|███████▌  | 1267/1677 [16:53<05:16,  1.30it/s]

batch loss: 1.0986167192459106


 76%|███████▌  | 1268/1677 [16:54<05:13,  1.31it/s]

batch loss: 1.0986173152923584


 76%|███████▌  | 1269/1677 [16:54<05:05,  1.33it/s]

batch loss: 1.0986101627349854


 76%|███████▌  | 1270/1677 [16:55<05:23,  1.26it/s]

batch loss: 1.0986125469207764


 76%|███████▌  | 1271/1677 [16:56<05:21,  1.26it/s]

batch loss: 1.0986292362213135


 76%|███████▌  | 1272/1677 [16:57<05:29,  1.23it/s]

batch loss: 1.09861159324646


 76%|███████▌  | 1273/1677 [16:58<05:56,  1.13it/s]

batch loss: 1.0986355543136597


 76%|███████▌  | 1274/1677 [16:59<05:37,  1.20it/s]

batch loss: 1.098630428314209


 76%|███████▌  | 1275/1677 [17:00<05:42,  1.17it/s]

batch loss: 1.0986043214797974


 76%|███████▌  | 1276/1677 [17:00<05:26,  1.23it/s]

batch loss: 1.0986135005950928


 76%|███████▌  | 1277/1677 [17:01<05:32,  1.20it/s]

batch loss: 1.0986135005950928


 76%|███████▌  | 1278/1677 [17:02<05:20,  1.24it/s]

batch loss: 1.0986053943634033


 76%|███████▋  | 1279/1677 [17:03<05:14,  1.26it/s]

batch loss: 1.0986238718032837


 76%|███████▋  | 1280/1677 [17:03<05:02,  1.31it/s]

batch loss: 1.0986181497573853


 76%|███████▋  | 1281/1677 [17:04<04:59,  1.32it/s]

batch loss: 1.098605990409851


 76%|███████▋  | 1282/1677 [17:05<05:02,  1.30it/s]

batch loss: 1.09861159324646


 77%|███████▋  | 1283/1677 [17:06<04:59,  1.32it/s]

batch loss: 1.0986067056655884


 77%|███████▋  | 1284/1677 [17:07<05:11,  1.26it/s]

batch loss: 1.09861421585083


 77%|███████▋  | 1285/1677 [17:07<05:12,  1.25it/s]

batch loss: 1.0986181497573853


 77%|███████▋  | 1286/1677 [17:08<05:05,  1.28it/s]

batch loss: 1.0986146926879883


 77%|███████▋  | 1287/1677 [17:09<05:12,  1.25it/s]

batch loss: 1.0986111164093018


 77%|███████▋  | 1288/1677 [17:10<05:31,  1.17it/s]

batch loss: 1.0986093282699585


 77%|███████▋  | 1289/1677 [17:11<05:15,  1.23it/s]

batch loss: 1.0986117124557495


 77%|███████▋  | 1290/1677 [17:11<05:13,  1.23it/s]

batch loss: 1.0986113548278809


 77%|███████▋  | 1291/1677 [17:12<05:06,  1.26it/s]

batch loss: 1.098608136177063


 77%|███████▋  | 1292/1677 [17:13<05:16,  1.22it/s]

batch loss: 1.0986151695251465


 77%|███████▋  | 1293/1677 [17:14<05:06,  1.25it/s]

batch loss: 1.0986114740371704


 77%|███████▋  | 1294/1677 [17:15<05:14,  1.22it/s]

batch loss: 1.0986233949661255


 77%|███████▋  | 1295/1677 [17:15<05:11,  1.23it/s]

batch loss: 1.0986143350601196


 77%|███████▋  | 1296/1677 [17:16<05:16,  1.20it/s]

batch loss: 1.098605990409851


 77%|███████▋  | 1297/1677 [17:17<05:21,  1.18it/s]

batch loss: 1.098608136177063


 77%|███████▋  | 1298/1677 [17:18<05:25,  1.17it/s]

batch loss: 1.0986137390136719


 77%|███████▋  | 1299/1677 [17:19<05:24,  1.17it/s]

batch loss: 1.0986058712005615


 78%|███████▊  | 1300/1677 [17:20<05:11,  1.21it/s]

batch loss: 1.098616123199463


 78%|███████▊  | 1301/1677 [17:20<05:02,  1.24it/s]

batch loss: 1.0986030101776123


 78%|███████▊  | 1302/1677 [17:21<04:55,  1.27it/s]

batch loss: 1.0986157655715942


 78%|███████▊  | 1303/1677 [17:22<04:49,  1.29it/s]

batch loss: 1.0986124277114868


 78%|███████▊  | 1304/1677 [17:23<04:51,  1.28it/s]

batch loss: 1.0986113548278809


 78%|███████▊  | 1305/1677 [17:24<05:01,  1.23it/s]

batch loss: 1.0986058712005615


 78%|███████▊  | 1306/1677 [17:24<04:53,  1.26it/s]

batch loss: 1.098608374595642


 78%|███████▊  | 1307/1677 [17:25<04:50,  1.27it/s]

batch loss: 1.0986100435256958


 78%|███████▊  | 1308/1677 [17:26<04:52,  1.26it/s]

batch loss: 1.0986171960830688


 78%|███████▊  | 1309/1677 [17:27<05:02,  1.21it/s]

batch loss: 1.098612666130066


 78%|███████▊  | 1310/1677 [17:28<04:53,  1.25it/s]

batch loss: 1.0986130237579346


 78%|███████▊  | 1311/1677 [17:28<04:49,  1.26it/s]

batch loss: 1.098616361618042


 78%|███████▊  | 1312/1677 [17:29<04:49,  1.26it/s]

batch loss: 1.098609447479248


 78%|███████▊  | 1313/1677 [17:30<04:56,  1.23it/s]

batch loss: 1.0986149311065674


 78%|███████▊  | 1314/1677 [17:31<04:48,  1.26it/s]

batch loss: 1.0986143350601196


 78%|███████▊  | 1315/1677 [17:32<04:59,  1.21it/s]

batch loss: 1.0986034870147705


 78%|███████▊  | 1316/1677 [17:32<04:48,  1.25it/s]

batch loss: 1.0986082553863525


 79%|███████▊  | 1317/1677 [17:33<04:44,  1.27it/s]

batch loss: 1.098616361618042


 79%|███████▊  | 1318/1677 [17:34<04:55,  1.22it/s]

batch loss: 1.0986074209213257


 79%|███████▊  | 1319/1677 [17:35<04:46,  1.25it/s]

batch loss: 1.098612666130066


 79%|███████▊  | 1320/1677 [17:36<04:40,  1.27it/s]

batch loss: 1.098610520362854


 79%|███████▉  | 1321/1677 [17:36<04:35,  1.29it/s]

batch loss: 1.0986098051071167


 79%|███████▉  | 1322/1677 [17:37<04:33,  1.30it/s]

batch loss: 1.0986170768737793


 79%|███████▉  | 1323/1677 [17:38<04:35,  1.28it/s]

batch loss: 1.0986030101776123


 79%|███████▉  | 1324/1677 [17:39<04:37,  1.27it/s]

batch loss: 1.0986101627349854


 79%|███████▉  | 1325/1677 [17:39<04:35,  1.28it/s]

batch loss: 1.0986199378967285


 79%|███████▉  | 1326/1677 [17:40<04:31,  1.29it/s]

batch loss: 1.0986132621765137


 79%|███████▉  | 1327/1677 [17:41<04:42,  1.24it/s]

batch loss: 1.0986056327819824


 79%|███████▉  | 1328/1677 [17:42<04:41,  1.24it/s]

batch loss: 1.0986143350601196


 79%|███████▉  | 1329/1677 [17:43<04:47,  1.21it/s]

batch loss: 1.0986157655715942


 79%|███████▉  | 1330/1677 [17:44<04:44,  1.22it/s]

batch loss: 1.0986008644104004


 79%|███████▉  | 1331/1677 [17:44<04:50,  1.19it/s]

batch loss: 1.0986108779907227


 79%|███████▉  | 1332/1677 [17:45<04:52,  1.18it/s]

batch loss: 1.0986096858978271


 79%|███████▉  | 1333/1677 [17:46<04:54,  1.17it/s]

batch loss: 1.0986148118972778


 80%|███████▉  | 1334/1677 [17:47<04:47,  1.19it/s]

batch loss: 1.0986207723617554


 80%|███████▉  | 1335/1677 [17:48<04:42,  1.21it/s]

batch loss: 1.0986143350601196


 80%|███████▉  | 1336/1677 [17:49<04:51,  1.17it/s]

batch loss: 1.0986138582229614


 80%|███████▉  | 1337/1677 [17:50<04:45,  1.19it/s]

batch loss: 1.098605990409851


 80%|███████▉  | 1338/1677 [17:50<04:46,  1.18it/s]

batch loss: 1.0986149311065674


 80%|███████▉  | 1339/1677 [17:51<04:41,  1.20it/s]

batch loss: 1.0986169576644897


 80%|███████▉  | 1340/1677 [17:52<04:45,  1.18it/s]

batch loss: 1.0986177921295166


 80%|███████▉  | 1341/1677 [17:53<04:39,  1.20it/s]

batch loss: 1.0986123085021973


 80%|████████  | 1342/1677 [17:54<04:30,  1.24it/s]

batch loss: 1.0986038446426392


 80%|████████  | 1343/1677 [17:54<04:28,  1.24it/s]

batch loss: 1.0986167192459106


 80%|████████  | 1344/1677 [17:55<04:27,  1.25it/s]

batch loss: 1.0986148118972778


 80%|████████  | 1345/1677 [17:56<04:33,  1.21it/s]

batch loss: 1.0986173152923584


 80%|████████  | 1346/1677 [17:57<04:31,  1.22it/s]

batch loss: 1.098609209060669


 80%|████████  | 1347/1677 [17:58<04:22,  1.26it/s]

batch loss: 1.0986127853393555


 80%|████████  | 1348/1677 [17:59<04:29,  1.22it/s]

batch loss: 1.0986148118972778


 80%|████████  | 1349/1677 [17:59<04:21,  1.25it/s]

batch loss: 1.098610758781433


 81%|████████  | 1350/1677 [18:00<04:28,  1.22it/s]

batch loss: 1.0986050367355347


 81%|████████  | 1351/1677 [18:01<04:19,  1.26it/s]

batch loss: 1.0986095666885376


 81%|████████  | 1352/1677 [18:02<04:16,  1.27it/s]

batch loss: 1.098617672920227


 81%|████████  | 1353/1677 [18:02<04:10,  1.29it/s]

batch loss: 1.098620891571045


 81%|████████  | 1354/1677 [18:03<03:55,  1.37it/s]

batch loss: 1.0986138582229614


 81%|████████  | 1355/1677 [18:04<04:01,  1.33it/s]

batch loss: 1.098602294921875


 81%|████████  | 1356/1677 [18:05<04:06,  1.30it/s]

batch loss: 1.098620057106018


 81%|████████  | 1357/1677 [18:06<04:16,  1.25it/s]

batch loss: 1.0986063480377197


 81%|████████  | 1358/1677 [18:06<04:10,  1.28it/s]

batch loss: 1.098613977432251


 81%|████████  | 1359/1677 [18:07<04:06,  1.29it/s]

batch loss: 1.0986160039901733


 81%|████████  | 1360/1677 [18:08<03:51,  1.37it/s]

batch loss: 1.0986071825027466


 81%|████████  | 1361/1677 [18:08<03:57,  1.33it/s]

batch loss: 1.0986205339431763


 81%|████████  | 1362/1677 [18:09<03:59,  1.31it/s]

batch loss: 1.098610758781433


 81%|████████▏ | 1363/1677 [18:10<03:58,  1.32it/s]

batch loss: 1.0986175537109375


 81%|████████▏ | 1364/1677 [18:11<04:16,  1.22it/s]

batch loss: 1.0986175537109375


 81%|████████▏ | 1365/1677 [18:12<04:20,  1.20it/s]

batch loss: 1.0986186265945435


 81%|████████▏ | 1366/1677 [18:13<04:16,  1.21it/s]

batch loss: 1.0986171960830688


 82%|████████▏ | 1367/1677 [18:13<04:07,  1.25it/s]

batch loss: 1.0986162424087524


 82%|████████▏ | 1368/1677 [18:14<04:02,  1.28it/s]

batch loss: 1.0986120700836182


 82%|████████▏ | 1369/1677 [18:15<04:02,  1.27it/s]

batch loss: 1.0986089706420898


 82%|████████▏ | 1370/1677 [18:16<04:03,  1.26it/s]

batch loss: 1.0986167192459106


 82%|████████▏ | 1371/1677 [18:16<03:57,  1.29it/s]

batch loss: 1.0986202955245972


 82%|████████▏ | 1372/1677 [18:17<03:43,  1.36it/s]

batch loss: 1.0986135005950928


 82%|████████▏ | 1373/1677 [18:18<03:43,  1.36it/s]

batch loss: 1.0986193418502808


 82%|████████▏ | 1374/1677 [18:19<03:56,  1.28it/s]

batch loss: 1.0986167192459106


 82%|████████▏ | 1375/1677 [18:19<03:56,  1.28it/s]

batch loss: 1.0986127853393555


 82%|████████▏ | 1376/1677 [18:20<03:56,  1.27it/s]

batch loss: 1.0986144542694092


 82%|████████▏ | 1377/1677 [18:21<03:52,  1.29it/s]

batch loss: 1.0986089706420898


 82%|████████▏ | 1378/1677 [18:22<03:54,  1.27it/s]

batch loss: 1.098610520362854


 82%|████████▏ | 1379/1677 [18:23<03:51,  1.29it/s]

batch loss: 1.0986106395721436


 82%|████████▏ | 1380/1677 [18:23<03:51,  1.28it/s]

batch loss: 1.0986112356185913


 82%|████████▏ | 1381/1677 [18:24<03:53,  1.27it/s]

batch loss: 1.0986090898513794


 82%|████████▏ | 1382/1677 [18:25<03:47,  1.30it/s]

batch loss: 1.0986151695251465


 82%|████████▏ | 1383/1677 [18:26<03:44,  1.31it/s]

batch loss: 1.0986151695251465


 83%|████████▎ | 1384/1677 [18:27<03:55,  1.25it/s]

batch loss: 1.098610758781433


 83%|████████▎ | 1385/1677 [18:27<03:54,  1.25it/s]

batch loss: 1.0986087322235107


 83%|████████▎ | 1386/1677 [18:28<03:59,  1.21it/s]

batch loss: 1.0986158847808838


 83%|████████▎ | 1387/1677 [18:29<03:52,  1.25it/s]

batch loss: 1.098617434501648


 83%|████████▎ | 1388/1677 [18:30<03:47,  1.27it/s]

batch loss: 1.098610281944275


 83%|████████▎ | 1389/1677 [18:31<04:00,  1.20it/s]

batch loss: 1.0986175537109375


 83%|████████▎ | 1390/1677 [18:31<03:51,  1.24it/s]

batch loss: 1.0986073017120361


 83%|████████▎ | 1391/1677 [18:32<03:48,  1.25it/s]

batch loss: 1.0986119508743286


 83%|████████▎ | 1392/1677 [18:33<03:54,  1.21it/s]

batch loss: 1.098616361618042


 83%|████████▎ | 1393/1677 [18:34<03:57,  1.20it/s]

batch loss: 1.098609447479248


 83%|████████▎ | 1394/1677 [18:35<03:49,  1.23it/s]

batch loss: 1.0986114740371704


 83%|████████▎ | 1395/1677 [18:35<03:41,  1.27it/s]

batch loss: 1.0986111164093018


 83%|████████▎ | 1396/1677 [18:36<03:43,  1.26it/s]

batch loss: 1.098612904548645


 83%|████████▎ | 1397/1677 [18:37<03:44,  1.25it/s]

batch loss: 1.0986112356185913


 83%|████████▎ | 1398/1677 [18:38<03:38,  1.28it/s]

batch loss: 1.098610758781433


 83%|████████▎ | 1399/1677 [18:39<03:49,  1.21it/s]

batch loss: 1.0986112356185913


 83%|████████▎ | 1400/1677 [18:40<03:53,  1.19it/s]

batch loss: 1.098610758781433


 84%|████████▎ | 1401/1677 [18:40<03:44,  1.23it/s]

batch loss: 1.0986124277114868


 84%|████████▎ | 1402/1677 [18:41<03:46,  1.21it/s]

batch loss: 1.0986099243164062


 84%|████████▎ | 1403/1677 [18:42<03:41,  1.24it/s]

batch loss: 1.098612904548645


 84%|████████▎ | 1404/1677 [18:43<03:40,  1.24it/s]

batch loss: 1.098610281944275


 84%|████████▍ | 1405/1677 [18:44<03:44,  1.21it/s]

batch loss: 1.0986096858978271


 84%|████████▍ | 1406/1677 [18:44<03:37,  1.24it/s]

batch loss: 1.098609209060669


 84%|████████▍ | 1407/1677 [18:45<03:34,  1.26it/s]

batch loss: 1.0986133813858032


 84%|████████▍ | 1408/1677 [18:46<03:33,  1.26it/s]

batch loss: 1.0986151695251465


 84%|████████▍ | 1409/1677 [18:47<03:42,  1.21it/s]

batch loss: 1.098615050315857


 84%|████████▍ | 1410/1677 [18:48<03:34,  1.24it/s]

batch loss: 1.0986155271530151


 84%|████████▍ | 1411/1677 [18:49<03:39,  1.21it/s]

batch loss: 1.098616600036621


 84%|████████▍ | 1412/1677 [18:49<03:37,  1.22it/s]

batch loss: 1.0986124277114868


 84%|████████▍ | 1413/1677 [18:50<03:35,  1.23it/s]

batch loss: 1.0986124277114868


 84%|████████▍ | 1414/1677 [18:51<03:34,  1.23it/s]

batch loss: 1.0986137390136719


 84%|████████▍ | 1415/1677 [18:52<03:32,  1.23it/s]

batch loss: 1.0986120700836182


 84%|████████▍ | 1416/1677 [18:53<03:36,  1.21it/s]

batch loss: 1.098616123199463


 84%|████████▍ | 1417/1677 [18:53<03:29,  1.24it/s]

batch loss: 1.0986133813858032


 85%|████████▍ | 1418/1677 [18:54<03:24,  1.27it/s]

batch loss: 1.0986123085021973


 85%|████████▍ | 1419/1677 [18:55<03:20,  1.29it/s]

batch loss: 1.0986109972000122


 85%|████████▍ | 1420/1677 [18:56<03:25,  1.25it/s]

batch loss: 1.098612904548645


 85%|████████▍ | 1421/1677 [18:56<03:21,  1.27it/s]

batch loss: 1.0986120700836182


 85%|████████▍ | 1422/1677 [18:57<03:29,  1.22it/s]

batch loss: 1.0986135005950928


 85%|████████▍ | 1423/1677 [18:58<03:22,  1.25it/s]

batch loss: 1.0986132621765137


 85%|████████▍ | 1424/1677 [18:59<03:20,  1.26it/s]

batch loss: 1.0986135005950928


 85%|████████▍ | 1425/1677 [19:00<03:24,  1.23it/s]

batch loss: 1.0986119508743286


 85%|████████▌ | 1426/1677 [19:00<03:18,  1.26it/s]

batch loss: 1.098611831665039


 85%|████████▌ | 1427/1677 [19:01<03:14,  1.28it/s]

batch loss: 1.098611831665039


 85%|████████▌ | 1428/1677 [19:02<03:11,  1.30it/s]

batch loss: 1.0986117124557495


 85%|████████▌ | 1429/1677 [19:03<03:05,  1.34it/s]

batch loss: 1.0986090898513794


 85%|████████▌ | 1430/1677 [19:03<03:07,  1.31it/s]

batch loss: 1.098609209060669


 85%|████████▌ | 1431/1677 [19:04<03:09,  1.30it/s]

batch loss: 1.098612666130066


 85%|████████▌ | 1432/1677 [19:05<03:17,  1.24it/s]

batch loss: 1.098612904548645


 85%|████████▌ | 1433/1677 [19:06<03:12,  1.27it/s]

batch loss: 1.0986132621765137


 86%|████████▌ | 1434/1677 [19:07<03:12,  1.26it/s]

batch loss: 1.0986117124557495


 86%|████████▌ | 1435/1677 [19:08<03:12,  1.26it/s]

batch loss: 1.0986098051071167


 86%|████████▌ | 1436/1677 [19:08<03:07,  1.29it/s]

batch loss: 1.0986117124557495


 86%|████████▌ | 1437/1677 [19:09<03:13,  1.24it/s]

batch loss: 1.0986120700836182


 86%|████████▌ | 1438/1677 [19:10<03:11,  1.25it/s]

batch loss: 1.0986132621765137


 86%|████████▌ | 1439/1677 [19:11<03:15,  1.22it/s]

batch loss: 1.0986100435256958


 86%|████████▌ | 1440/1677 [19:12<03:11,  1.24it/s]

batch loss: 1.0986136198043823


 86%|████████▌ | 1441/1677 [19:12<03:15,  1.21it/s]

batch loss: 1.0986109972000122


 86%|████████▌ | 1442/1677 [19:13<03:12,  1.22it/s]

batch loss: 1.0986112356185913


 86%|████████▌ | 1443/1677 [19:14<03:07,  1.25it/s]

batch loss: 1.0986127853393555


 86%|████████▌ | 1444/1677 [19:15<03:06,  1.25it/s]

batch loss: 1.0986136198043823


 86%|████████▌ | 1445/1677 [19:16<03:09,  1.22it/s]

batch loss: 1.0986087322235107


 86%|████████▌ | 1446/1677 [19:17<03:12,  1.20it/s]

batch loss: 1.0986121892929077


 86%|████████▋ | 1447/1677 [19:17<03:06,  1.24it/s]

batch loss: 1.098612666130066


 86%|████████▋ | 1448/1677 [19:18<03:01,  1.26it/s]

batch loss: 1.0986145734786987


 86%|████████▋ | 1449/1677 [19:19<03:00,  1.26it/s]

batch loss: 1.098612904548645


 86%|████████▋ | 1450/1677 [19:20<03:05,  1.22it/s]

batch loss: 1.0986121892929077


 87%|████████▋ | 1451/1677 [19:21<03:09,  1.19it/s]

batch loss: 1.0986123085021973


 87%|████████▋ | 1452/1677 [19:21<03:11,  1.17it/s]

batch loss: 1.098609209060669


 87%|████████▋ | 1453/1677 [19:22<03:11,  1.17it/s]

batch loss: 1.0986124277114868


 87%|████████▋ | 1454/1677 [19:23<03:06,  1.19it/s]

batch loss: 1.0986114740371704


 87%|████████▋ | 1455/1677 [19:24<02:58,  1.24it/s]

batch loss: 1.098613977432251


 87%|████████▋ | 1456/1677 [19:25<02:56,  1.25it/s]

batch loss: 1.098610758781433


 87%|████████▋ | 1457/1677 [19:25<02:55,  1.25it/s]

batch loss: 1.0986144542694092


 87%|████████▋ | 1458/1677 [19:26<02:59,  1.22it/s]

batch loss: 1.0986117124557495


 87%|████████▋ | 1459/1677 [19:27<02:53,  1.26it/s]

batch loss: 1.0986101627349854


 87%|████████▋ | 1460/1677 [19:28<02:50,  1.27it/s]

batch loss: 1.09861421585083


 87%|████████▋ | 1461/1677 [19:29<02:55,  1.23it/s]

batch loss: 1.0986099243164062


 87%|████████▋ | 1462/1677 [19:30<02:57,  1.21it/s]

batch loss: 1.0986138582229614


 87%|████████▋ | 1463/1677 [19:30<02:58,  1.20it/s]

batch loss: 1.0986164808273315


 87%|████████▋ | 1464/1677 [19:31<03:00,  1.18it/s]

batch loss: 1.098609447479248


 87%|████████▋ | 1465/1677 [19:32<02:53,  1.23it/s]

batch loss: 1.0986148118972778


 87%|████████▋ | 1466/1677 [19:33<02:54,  1.21it/s]

batch loss: 1.098616361618042


 87%|████████▋ | 1467/1677 [19:34<02:48,  1.24it/s]

batch loss: 1.098613977432251


 88%|████████▊ | 1468/1677 [19:34<02:52,  1.21it/s]

batch loss: 1.0986101627349854


 88%|████████▊ | 1469/1677 [19:35<02:52,  1.20it/s]

batch loss: 1.0986104011535645


 88%|████████▊ | 1470/1677 [19:36<02:47,  1.24it/s]

batch loss: 1.0986090898513794


 88%|████████▊ | 1471/1677 [19:37<02:43,  1.26it/s]

batch loss: 1.0986125469207764


 88%|████████▊ | 1472/1677 [19:38<02:39,  1.29it/s]

batch loss: 1.0986111164093018


 88%|████████▊ | 1473/1677 [19:38<02:36,  1.30it/s]

batch loss: 1.098619818687439


 88%|████████▊ | 1474/1677 [19:39<02:34,  1.31it/s]

batch loss: 1.0986113548278809


 88%|████████▊ | 1475/1677 [19:40<02:34,  1.31it/s]

batch loss: 1.098610520362854


 88%|████████▊ | 1476/1677 [19:41<02:39,  1.26it/s]

batch loss: 1.0986117124557495


 88%|████████▊ | 1477/1677 [19:41<02:35,  1.29it/s]

batch loss: 1.0986131429672241


 88%|████████▊ | 1478/1677 [19:42<02:33,  1.30it/s]

batch loss: 1.098610281944275


 88%|████████▊ | 1479/1677 [19:43<02:38,  1.25it/s]

batch loss: 1.0986099243164062


 88%|████████▊ | 1480/1677 [19:44<02:36,  1.26it/s]

batch loss: 1.098610520362854


 88%|████████▊ | 1481/1677 [19:45<02:43,  1.20it/s]

batch loss: 1.0986125469207764


 88%|████████▊ | 1482/1677 [19:46<02:36,  1.25it/s]

batch loss: 1.0986095666885376


 88%|████████▊ | 1483/1677 [19:46<02:32,  1.27it/s]

batch loss: 1.0986096858978271


 88%|████████▊ | 1484/1677 [19:47<02:31,  1.27it/s]

batch loss: 1.098610758781433


 89%|████████▊ | 1485/1677 [19:48<02:27,  1.30it/s]

batch loss: 1.0986109972000122


 89%|████████▊ | 1486/1677 [19:49<02:28,  1.28it/s]

batch loss: 1.0986114740371704


 89%|████████▊ | 1487/1677 [19:49<02:29,  1.27it/s]

batch loss: 1.0986114740371704


 89%|████████▊ | 1488/1677 [19:50<02:33,  1.23it/s]

batch loss: 1.0986125469207764


 89%|████████▉ | 1489/1677 [19:51<02:34,  1.22it/s]

batch loss: 1.0986131429672241


 89%|████████▉ | 1490/1677 [19:52<02:35,  1.21it/s]

batch loss: 1.0986117124557495


 89%|████████▉ | 1491/1677 [19:53<02:29,  1.25it/s]

batch loss: 1.0986096858978271


 89%|████████▉ | 1492/1677 [19:53<02:24,  1.28it/s]

batch loss: 1.0986125469207764


 89%|████████▉ | 1493/1677 [19:54<02:32,  1.21it/s]

batch loss: 1.0986096858978271


 89%|████████▉ | 1494/1677 [19:55<02:28,  1.23it/s]

batch loss: 1.0986073017120361


 89%|████████▉ | 1495/1677 [19:56<02:26,  1.24it/s]

batch loss: 1.0986088514328003


 89%|████████▉ | 1496/1677 [19:57<02:35,  1.17it/s]

batch loss: 1.0986136198043823


 89%|████████▉ | 1497/1677 [19:58<02:28,  1.22it/s]

batch loss: 1.0986062288284302


 89%|████████▉ | 1498/1677 [19:59<02:30,  1.19it/s]

batch loss: 1.0986149311065674


 89%|████████▉ | 1499/1677 [19:59<02:24,  1.24it/s]

batch loss: 1.0986078977584839


 89%|████████▉ | 1500/1677 [20:00<02:25,  1.22it/s]

batch loss: 1.0986136198043823


 90%|████████▉ | 1501/1677 [20:01<02:26,  1.20it/s]

batch loss: 1.0986108779907227


 90%|████████▉ | 1502/1677 [20:02<02:21,  1.24it/s]

batch loss: 1.0986157655715942


 90%|████████▉ | 1503/1677 [20:02<02:18,  1.26it/s]

batch loss: 1.0986109972000122


 90%|████████▉ | 1504/1677 [20:03<02:15,  1.28it/s]

batch loss: 1.0986076593399048


 90%|████████▉ | 1505/1677 [20:04<02:10,  1.32it/s]

batch loss: 1.0986123085021973


 90%|████████▉ | 1506/1677 [20:05<02:09,  1.32it/s]

batch loss: 1.0986137390136719


 90%|████████▉ | 1507/1677 [20:05<02:10,  1.30it/s]

batch loss: 1.0986045598983765


 90%|████████▉ | 1508/1677 [20:06<02:14,  1.25it/s]

batch loss: 1.0986052751541138


 90%|████████▉ | 1509/1677 [20:07<02:14,  1.25it/s]

batch loss: 1.0986113548278809


 90%|█████████ | 1510/1677 [20:08<02:10,  1.28it/s]

batch loss: 1.0986229181289673


 90%|█████████ | 1511/1677 [20:09<02:07,  1.30it/s]

batch loss: 1.0986042022705078


 90%|█████████ | 1512/1677 [20:09<02:08,  1.29it/s]

batch loss: 1.0986175537109375


 90%|█████████ | 1513/1677 [20:10<02:08,  1.27it/s]

batch loss: 1.0985982418060303


 90%|█████████ | 1514/1677 [20:11<02:16,  1.19it/s]

batch loss: 1.0986056327819824


 90%|█████████ | 1515/1677 [20:12<02:11,  1.24it/s]

batch loss: 1.0985925197601318


 90%|█████████ | 1516/1677 [20:13<02:13,  1.21it/s]

batch loss: 1.0986038446426392


 90%|█████████ | 1517/1677 [20:14<02:11,  1.22it/s]

batch loss: 1.0986006259918213


 91%|█████████ | 1518/1677 [20:14<02:12,  1.20it/s]

batch loss: 1.0986407995224


 91%|█████████ | 1519/1677 [20:15<02:08,  1.23it/s]

batch loss: 1.0986038446426392


 91%|█████████ | 1520/1677 [20:16<02:02,  1.28it/s]

batch loss: 1.0986101627349854


 91%|█████████ | 1521/1677 [20:17<02:02,  1.28it/s]

batch loss: 1.09861159324646


 91%|█████████ | 1522/1677 [20:18<02:05,  1.23it/s]

batch loss: 1.0985909700393677


 91%|█████████ | 1523/1677 [20:19<02:09,  1.19it/s]

batch loss: 1.0986071825027466


 91%|█████████ | 1524/1677 [20:19<02:08,  1.19it/s]

batch loss: 1.098578929901123


 91%|█████████ | 1525/1677 [20:20<02:14,  1.13it/s]

batch loss: 1.0985957384109497


 91%|█████████ | 1526/1677 [20:21<02:12,  1.14it/s]

batch loss: 1.098619818687439


 91%|█████████ | 1527/1677 [20:22<02:05,  1.19it/s]

batch loss: 1.0986075401306152


 91%|█████████ | 1528/1677 [20:23<02:00,  1.24it/s]

batch loss: 1.0985774993896484


 91%|█████████ | 1529/1677 [20:24<02:02,  1.21it/s]

batch loss: 1.0986847877502441


 91%|█████████ | 1530/1677 [20:24<01:57,  1.25it/s]

batch loss: 1.0986193418502808


 91%|█████████▏| 1531/1677 [20:25<02:00,  1.21it/s]

batch loss: 1.0986045598983765


 91%|█████████▏| 1532/1677 [20:26<02:00,  1.20it/s]

batch loss: 1.0985567569732666


 91%|█████████▏| 1533/1677 [20:27<02:01,  1.18it/s]

batch loss: 1.098707914352417


 91%|█████████▏| 1534/1677 [20:28<01:56,  1.23it/s]

batch loss: 1.098652720451355


 92%|█████████▏| 1535/1677 [20:28<01:55,  1.23it/s]

batch loss: 1.0986324548721313


 92%|█████████▏| 1536/1677 [20:29<01:52,  1.25it/s]

batch loss: 1.098610281944275


 92%|█████████▏| 1537/1677 [20:30<01:49,  1.28it/s]

batch loss: 1.098572015762329


 92%|█████████▏| 1538/1677 [20:31<01:47,  1.30it/s]

batch loss: 1.0985853672027588


 92%|█████████▏| 1539/1677 [20:32<01:55,  1.20it/s]

batch loss: 1.098616123199463


 92%|█████████▏| 1540/1677 [20:32<01:52,  1.22it/s]

batch loss: 1.0986992120742798


 92%|█████████▏| 1541/1677 [20:33<01:53,  1.20it/s]

batch loss: 1.0986151695251465


 92%|█████████▏| 1542/1677 [20:34<01:54,  1.18it/s]

batch loss: 1.0985910892486572


 92%|█████████▏| 1543/1677 [20:35<01:48,  1.23it/s]

batch loss: 1.098598599433899


 92%|█████████▏| 1544/1677 [20:36<01:44,  1.27it/s]

batch loss: 1.0986249446868896


 92%|█████████▏| 1545/1677 [20:37<01:48,  1.22it/s]

batch loss: 1.0986138582229614


 92%|█████████▏| 1546/1677 [20:37<01:46,  1.23it/s]

batch loss: 1.0986180305480957


 92%|█████████▏| 1547/1677 [20:38<01:44,  1.24it/s]

batch loss: 1.0986196994781494


 92%|█████████▏| 1548/1677 [20:39<01:43,  1.25it/s]

batch loss: 1.0986131429672241


 92%|█████████▏| 1549/1677 [20:40<01:45,  1.21it/s]

batch loss: 1.0986168384552002


 92%|█████████▏| 1550/1677 [20:41<01:42,  1.24it/s]

batch loss: 1.0986217260360718


 92%|█████████▏| 1551/1677 [20:41<01:39,  1.26it/s]

batch loss: 1.098606824874878


 93%|█████████▎| 1552/1677 [20:42<01:37,  1.29it/s]

batch loss: 1.0986262559890747


 93%|█████████▎| 1553/1677 [20:43<01:39,  1.24it/s]

batch loss: 1.0986127853393555


 93%|█████████▎| 1554/1677 [20:44<01:41,  1.22it/s]

batch loss: 1.0986123085021973


 93%|█████████▎| 1555/1677 [20:45<01:41,  1.20it/s]

batch loss: 1.0986026525497437


 93%|█████████▎| 1556/1677 [20:46<01:42,  1.18it/s]

batch loss: 1.0986144542694092


 93%|█████████▎| 1557/1677 [20:46<01:37,  1.23it/s]

batch loss: 1.0986199378967285


 93%|█████████▎| 1558/1677 [20:47<01:36,  1.23it/s]

batch loss: 1.0986149311065674


 93%|█████████▎| 1559/1677 [20:48<01:33,  1.26it/s]

batch loss: 1.0986080169677734


 93%|█████████▎| 1560/1677 [20:49<01:35,  1.23it/s]

batch loss: 1.0986136198043823


 93%|█████████▎| 1561/1677 [20:50<01:37,  1.19it/s]

batch loss: 1.098607063293457


 93%|█████████▎| 1562/1677 [20:50<01:33,  1.24it/s]

batch loss: 1.0986111164093018


 93%|█████████▎| 1563/1677 [20:51<01:31,  1.24it/s]

batch loss: 1.0986101627349854


 93%|█████████▎| 1564/1677 [20:52<01:28,  1.28it/s]

batch loss: 1.098609447479248


 93%|█████████▎| 1565/1677 [20:53<01:26,  1.30it/s]

batch loss: 1.0986123085021973


 93%|█████████▎| 1566/1677 [20:53<01:26,  1.29it/s]

batch loss: 1.0986146926879883


 93%|█████████▎| 1567/1677 [20:54<01:25,  1.28it/s]

batch loss: 1.0986124277114868


 94%|█████████▎| 1568/1677 [20:55<01:28,  1.23it/s]

batch loss: 1.0986109972000122


 94%|█████████▎| 1569/1677 [20:56<01:27,  1.24it/s]

batch loss: 1.0986138582229614


 94%|█████████▎| 1570/1677 [20:57<01:32,  1.16it/s]

batch loss: 1.0986111164093018


 94%|█████████▎| 1571/1677 [20:58<01:27,  1.21it/s]

batch loss: 1.0986146926879883


 94%|█████████▎| 1572/1677 [20:58<01:23,  1.26it/s]

batch loss: 1.098610758781433


 94%|█████████▍| 1573/1677 [20:59<01:21,  1.28it/s]

batch loss: 1.0986123085021973


 94%|█████████▍| 1574/1677 [21:00<01:20,  1.27it/s]

batch loss: 1.0986111164093018


 94%|█████████▍| 1575/1677 [21:01<01:19,  1.29it/s]

batch loss: 1.0986117124557495


 94%|█████████▍| 1576/1677 [21:02<01:21,  1.23it/s]

batch loss: 1.0986111164093018


 94%|█████████▍| 1577/1677 [21:02<01:19,  1.26it/s]

batch loss: 1.0986111164093018


 94%|█████████▍| 1578/1677 [21:03<01:14,  1.33it/s]

batch loss: 1.0986071825027466


 94%|█████████▍| 1579/1677 [21:04<01:15,  1.31it/s]

batch loss: 1.0986144542694092


 94%|█████████▍| 1580/1677 [21:05<01:17,  1.26it/s]

batch loss: 1.0986117124557495


 94%|█████████▍| 1581/1677 [21:05<01:15,  1.28it/s]

batch loss: 1.0986096858978271


 94%|█████████▍| 1582/1677 [21:06<01:12,  1.31it/s]

batch loss: 1.098612666130066


 94%|█████████▍| 1583/1677 [21:07<01:11,  1.31it/s]

batch loss: 1.0986162424087524


 94%|█████████▍| 1584/1677 [21:08<01:11,  1.29it/s]

batch loss: 1.0986108779907227


 95%|█████████▍| 1585/1677 [21:08<01:10,  1.30it/s]

batch loss: 1.098615288734436


 95%|█████████▍| 1586/1677 [21:09<01:09,  1.30it/s]

batch loss: 1.0986064672470093


 95%|█████████▍| 1587/1677 [21:10<01:08,  1.32it/s]

batch loss: 1.0986084938049316


 95%|█████████▍| 1588/1677 [21:11<01:10,  1.26it/s]

batch loss: 1.0986119508743286


 95%|█████████▍| 1589/1677 [21:12<01:08,  1.28it/s]

batch loss: 1.0986113548278809


 95%|█████████▍| 1590/1677 [21:12<01:07,  1.29it/s]

batch loss: 1.0986077785491943


 95%|█████████▍| 1591/1677 [21:13<01:09,  1.24it/s]

batch loss: 1.098607063293457


 95%|█████████▍| 1592/1677 [21:14<01:09,  1.22it/s]

batch loss: 1.0986157655715942


 95%|█████████▍| 1593/1677 [21:15<01:07,  1.25it/s]

batch loss: 1.0986095666885376


 95%|█████████▌| 1594/1677 [21:16<01:08,  1.21it/s]

batch loss: 1.0986127853393555


 95%|█████████▌| 1595/1677 [21:17<01:08,  1.19it/s]

batch loss: 1.0986119508743286


 95%|█████████▌| 1596/1677 [21:17<01:06,  1.21it/s]

batch loss: 1.0986113548278809


 95%|█████████▌| 1597/1677 [21:18<01:11,  1.11it/s]

batch loss: 1.0986237525939941


 95%|█████████▌| 1598/1677 [21:19<01:10,  1.12it/s]

batch loss: 1.0986078977584839


 95%|█████████▌| 1599/1677 [21:20<01:07,  1.16it/s]

batch loss: 1.0986111164093018


 95%|█████████▌| 1600/1677 [21:21<01:06,  1.15it/s]

batch loss: 1.0986039638519287


 95%|█████████▌| 1601/1677 [21:22<01:04,  1.19it/s]

batch loss: 1.0986112356185913


 96%|█████████▌| 1602/1677 [21:22<01:01,  1.23it/s]

batch loss: 1.098608374595642


 96%|█████████▌| 1603/1677 [21:24<01:07,  1.09it/s]

batch loss: 1.0986024141311646


 96%|█████████▌| 1604/1677 [21:25<01:05,  1.11it/s]

batch loss: 1.0986140966415405


 96%|█████████▌| 1605/1677 [21:25<01:01,  1.17it/s]

batch loss: 1.098606824874878


 96%|█████████▌| 1606/1677 [21:26<00:58,  1.22it/s]

batch loss: 1.0986127853393555


 96%|█████████▌| 1607/1677 [21:27<00:56,  1.24it/s]

batch loss: 1.0986045598983765


 96%|█████████▌| 1608/1677 [21:27<00:48,  1.44it/s]

batch loss: 1.0986050367355347


 96%|█████████▌| 1609/1677 [21:28<00:47,  1.43it/s]

batch loss: 1.0986099243164062


 96%|█████████▌| 1610/1677 [21:29<00:50,  1.33it/s]

batch loss: 1.098603367805481


 96%|█████████▌| 1611/1677 [21:30<00:51,  1.27it/s]

batch loss: 1.0986123085021973


 96%|█████████▌| 1612/1677 [21:30<00:50,  1.29it/s]

batch loss: 1.0986043214797974


 96%|█████████▌| 1613/1677 [21:31<00:52,  1.23it/s]

batch loss: 1.0986357927322388


 96%|█████████▌| 1614/1677 [21:32<00:52,  1.20it/s]

batch loss: 1.098599910736084


 96%|█████████▋| 1615/1677 [21:33<00:50,  1.24it/s]

batch loss: 1.098596453666687


 96%|█████████▋| 1616/1677 [21:34<00:48,  1.27it/s]

batch loss: 1.0986173152923584


 96%|█████████▋| 1617/1677 [21:34<00:46,  1.30it/s]

batch loss: 1.0986101627349854


 96%|█████████▋| 1618/1677 [21:35<00:47,  1.25it/s]

batch loss: 1.0986143350601196


 97%|█████████▋| 1619/1677 [21:36<00:46,  1.25it/s]

batch loss: 1.0985994338989258


 97%|█████████▋| 1620/1677 [21:37<00:44,  1.28it/s]

batch loss: 1.0986106395721436


 97%|█████████▋| 1621/1677 [21:38<00:43,  1.27it/s]

batch loss: 1.0986040830612183


 97%|█████████▋| 1622/1677 [21:38<00:43,  1.27it/s]

batch loss: 1.098612666130066


 97%|█████████▋| 1623/1677 [21:39<00:44,  1.22it/s]

batch loss: 1.0986063480377197


 97%|█████████▋| 1624/1677 [21:40<00:46,  1.15it/s]

batch loss: 1.098600149154663


 97%|█████████▋| 1625/1677 [21:41<00:43,  1.18it/s]

batch loss: 1.0986101627349854


 97%|█████████▋| 1626/1677 [21:42<00:41,  1.22it/s]

batch loss: 1.0986106395721436


 97%|█████████▋| 1627/1677 [21:43<00:39,  1.26it/s]

batch loss: 1.0986051559448242


 97%|█████████▋| 1628/1677 [21:43<00:38,  1.27it/s]

batch loss: 1.098591923713684


 97%|█████████▋| 1629/1677 [21:44<00:37,  1.29it/s]

batch loss: 1.0986204147338867


 97%|█████████▋| 1630/1677 [21:45<00:36,  1.31it/s]

batch loss: 1.0986227989196777


 97%|█████████▋| 1631/1677 [21:46<00:35,  1.29it/s]

batch loss: 1.0985785722732544


 97%|█████████▋| 1632/1677 [21:47<00:36,  1.24it/s]

batch loss: 1.0986194610595703


 97%|█████████▋| 1633/1677 [21:47<00:36,  1.21it/s]

batch loss: 1.0986419916152954


 97%|█████████▋| 1634/1677 [21:48<00:33,  1.27it/s]

batch loss: 1.0985803604125977


 97%|█████████▋| 1635/1677 [21:49<00:33,  1.26it/s]

batch loss: 1.0985742807388306


 98%|█████████▊| 1636/1677 [21:50<00:32,  1.28it/s]

batch loss: 1.0986162424087524


 98%|█████████▊| 1637/1677 [21:50<00:30,  1.29it/s]

batch loss: 1.0985817909240723


 98%|█████████▊| 1638/1677 [21:51<00:30,  1.28it/s]

batch loss: 1.0985857248306274


 98%|█████████▊| 1639/1677 [21:52<00:29,  1.28it/s]

batch loss: 1.0985876321792603


 98%|█████████▊| 1640/1677 [21:53<00:29,  1.27it/s]

batch loss: 1.0986664295196533


 98%|█████████▊| 1641/1677 [21:54<00:28,  1.26it/s]

batch loss: 1.0986301898956299


 98%|█████████▊| 1642/1677 [21:54<00:27,  1.28it/s]

batch loss: 1.098559856414795


 98%|█████████▊| 1643/1677 [21:55<00:26,  1.30it/s]

batch loss: 1.0985503196716309


 98%|█████████▊| 1644/1677 [21:56<00:25,  1.31it/s]

batch loss: 1.0986844301223755


 98%|█████████▊| 1645/1677 [21:57<00:24,  1.30it/s]

batch loss: 1.0985749959945679


 98%|█████████▊| 1646/1677 [21:57<00:23,  1.31it/s]

batch loss: 1.0986188650131226


 98%|█████████▊| 1647/1677 [21:58<00:22,  1.32it/s]

batch loss: 1.0986133813858032


 98%|█████████▊| 1648/1677 [21:59<00:21,  1.32it/s]

batch loss: 1.0985643863677979


 98%|█████████▊| 1649/1677 [22:00<00:21,  1.30it/s]

batch loss: 1.0986146926879883


 98%|█████████▊| 1650/1677 [22:01<00:21,  1.23it/s]

batch loss: 1.098654866218567


 98%|█████████▊| 1651/1677 [22:01<00:21,  1.21it/s]

batch loss: 1.0985474586486816


 99%|█████████▊| 1652/1677 [22:02<00:20,  1.23it/s]

batch loss: 1.098512887954712


 99%|█████████▊| 1653/1677 [22:03<00:18,  1.27it/s]

batch loss: 1.0987123250961304


 99%|█████████▊| 1654/1677 [22:04<00:17,  1.30it/s]

batch loss: 1.0986039638519287


 99%|█████████▊| 1655/1677 [22:04<00:16,  1.31it/s]

batch loss: 1.0976448059082031


 99%|█████████▊| 1656/1677 [22:05<00:16,  1.31it/s]

batch loss: 1.0985971689224243


 99%|█████████▉| 1657/1677 [22:06<00:15,  1.30it/s]

batch loss: 1.0982381105422974


 99%|█████████▉| 1658/1677 [22:07<00:14,  1.31it/s]

batch loss: 1.1097378730773926


 99%|█████████▉| 1659/1677 [22:08<00:13,  1.29it/s]

batch loss: 1.1078463792800903


 99%|█████████▉| 1660/1677 [22:08<00:13,  1.29it/s]

batch loss: 1.0986484289169312


 99%|█████████▉| 1661/1677 [22:09<00:12,  1.24it/s]

batch loss: 1.0985690355300903


 99%|█████████▉| 1662/1677 [22:10<00:12,  1.24it/s]

batch loss: 1.0986049175262451


 99%|█████████▉| 1663/1677 [22:11<00:11,  1.22it/s]

batch loss: 1.0986294746398926


 99%|█████████▉| 1664/1677 [22:12<00:10,  1.25it/s]

batch loss: 1.0986409187316895


 99%|█████████▉| 1665/1677 [22:12<00:09,  1.22it/s]

batch loss: 1.098610520362854


 99%|█████████▉| 1666/1677 [22:13<00:08,  1.23it/s]

batch loss: 1.0986164808273315


 99%|█████████▉| 1667/1677 [22:14<00:08,  1.21it/s]

batch loss: 1.0986158847808838


 99%|█████████▉| 1668/1677 [22:15<00:07,  1.24it/s]

batch loss: 1.098615288734436


100%|█████████▉| 1669/1677 [22:16<00:06,  1.26it/s]

batch loss: 1.0986135005950928


100%|█████████▉| 1670/1677 [22:16<00:05,  1.27it/s]

batch loss: 1.0986123085021973


100%|█████████▉| 1671/1677 [22:17<00:04,  1.23it/s]

batch loss: 1.0986082553863525


100%|█████████▉| 1672/1677 [22:18<00:04,  1.23it/s]

batch loss: 1.098605751991272


100%|█████████▉| 1673/1677 [22:19<00:03,  1.24it/s]

batch loss: 1.0986192226409912


100%|█████████▉| 1674/1677 [22:20<00:02,  1.24it/s]

batch loss: 1.0986130237579346


100%|█████████▉| 1675/1677 [22:20<00:01,  1.27it/s]

batch loss: 1.0986144542694092


100%|██████████| 1677/1677 [22:21<00:00,  1.25it/s]

batch loss: 1.0986076593399048
batch loss: 1.098608136177063



100%|██████████| 210/210 [02:37<00:00,  1.33it/s]


Epoch 0 validation loss: 230.70818209648132 validation accuracy: 21.39%


  0%|          | 1/1677 [00:00<23:11,  1.20it/s]

batch loss: 1.098612666130066


  0%|          | 2/1677 [00:01<21:46,  1.28it/s]

batch loss: 1.0986042022705078


  0%|          | 3/1677 [00:02<21:37,  1.29it/s]

batch loss: 1.0986214876174927


  0%|          | 4/1677 [00:03<22:40,  1.23it/s]

batch loss: 1.0985950231552124


  0%|          | 5/1677 [00:03<22:00,  1.27it/s]

batch loss: 1.0986030101776123


  0%|          | 6/1677 [00:04<21:31,  1.29it/s]

batch loss: 1.0985920429229736


  0%|          | 7/1677 [00:05<21:34,  1.29it/s]

batch loss: 1.0985996723175049


  0%|          | 8/1677 [00:06<21:45,  1.28it/s]

batch loss: 1.0985987186431885


  1%|          | 9/1677 [00:07<23:00,  1.21it/s]

batch loss: 1.0985957384109497


  1%|          | 10/1677 [00:08<23:23,  1.19it/s]

batch loss: 1.0986038446426392


  1%|          | 11/1677 [00:08<23:29,  1.18it/s]

batch loss: 1.098649501800537


  1%|          | 12/1677 [00:09<23:36,  1.18it/s]

batch loss: 1.0986435413360596


  1%|          | 13/1677 [00:10<23:03,  1.20it/s]

batch loss: 1.0985674858093262


  1%|          | 14/1677 [00:11<22:48,  1.22it/s]

batch loss: 1.0985653400421143


  1%|          | 15/1677 [00:12<22:11,  1.25it/s]

batch loss: 1.0986292362213135


  1%|          | 16/1677 [00:13<22:43,  1.22it/s]

batch loss: 1.0986361503601074


  1%|          | 17/1677 [00:13<22:07,  1.25it/s]

batch loss: 1.0986202955245972


  1%|          | 18/1677 [00:14<22:02,  1.25it/s]

batch loss: 1.0985774993896484


  1%|          | 19/1677 [00:15<21:17,  1.30it/s]

batch loss: 1.0986658334732056


  1%|          | 20/1677 [00:16<22:40,  1.22it/s]

batch loss: 1.0985909700393677


  1%|▏         | 21/1677 [00:16<21:38,  1.28it/s]

batch loss: 1.0985628366470337


  1%|▏         | 22/1677 [00:17<22:11,  1.24it/s]

batch loss: 1.0985822677612305


  1%|▏         | 23/1677 [00:18<22:44,  1.21it/s]

batch loss: 1.0985620021820068


  1%|▏         | 24/1677 [00:19<23:25,  1.18it/s]

batch loss: 1.0985459089279175


  1%|▏         | 25/1677 [00:20<22:33,  1.22it/s]

batch loss: 1.098656415939331


  2%|▏         | 26/1677 [00:21<22:58,  1.20it/s]

batch loss: 1.0987430810928345


  2%|▏         | 27/1677 [00:21<22:12,  1.24it/s]

batch loss: 1.0986814498901367


  2%|▏         | 28/1677 [00:22<22:38,  1.21it/s]

batch loss: 1.0987579822540283


  2%|▏         | 29/1677 [00:23<22:27,  1.22it/s]

batch loss: 1.0986076593399048


  2%|▏         | 30/1677 [00:24<22:14,  1.23it/s]

batch loss: 1.0985760688781738


  2%|▏         | 31/1677 [00:25<22:39,  1.21it/s]

batch loss: 1.0986331701278687


  2%|▏         | 32/1677 [00:25<22:00,  1.25it/s]

batch loss: 1.0986303091049194


  2%|▏         | 33/1677 [00:26<21:53,  1.25it/s]

batch loss: 1.0986806154251099


  2%|▏         | 34/1677 [00:27<21:51,  1.25it/s]

batch loss: 1.0985915660858154


  2%|▏         | 35/1677 [00:28<21:35,  1.27it/s]

batch loss: 1.0985760688781738


  2%|▏         | 36/1677 [00:29<21:17,  1.28it/s]

batch loss: 1.0986665487289429


  2%|▏         | 37/1677 [00:29<21:27,  1.27it/s]

batch loss: 1.0986095666885376


  2%|▏         | 38/1677 [00:30<22:23,  1.22it/s]

batch loss: 1.098610281944275


  2%|▏         | 39/1677 [00:31<22:14,  1.23it/s]

batch loss: 1.0986195802688599


  2%|▏         | 40/1677 [00:32<23:04,  1.18it/s]

batch loss: 1.0986037254333496


  2%|▏         | 41/1677 [00:33<22:29,  1.21it/s]

batch loss: 1.0986149311065674


  3%|▎         | 42/1677 [00:34<21:47,  1.25it/s]

batch loss: 1.0986074209213257


  3%|▎         | 43/1677 [00:34<21:22,  1.27it/s]

batch loss: 1.0986076593399048


  3%|▎         | 44/1677 [00:35<20:50,  1.31it/s]

batch loss: 1.0986095666885376


  3%|▎         | 45/1677 [00:36<21:04,  1.29it/s]

batch loss: 1.0986076593399048


  3%|▎         | 46/1677 [00:37<21:51,  1.24it/s]

batch loss: 1.0986096858978271


  3%|▎         | 47/1677 [00:37<21:44,  1.25it/s]

batch loss: 1.098610758781433


  3%|▎         | 48/1677 [00:38<22:27,  1.21it/s]

batch loss: 1.098610520362854


  3%|▎         | 49/1677 [00:39<22:58,  1.18it/s]

batch loss: 1.0986127853393555


  3%|▎         | 50/1677 [00:40<22:03,  1.23it/s]

batch loss: 1.0986135005950928


  3%|▎         | 51/1677 [00:41<21:57,  1.23it/s]

batch loss: 1.0986120700836182


  3%|▎         | 52/1677 [00:42<22:31,  1.20it/s]

batch loss: 1.0986140966415405


  3%|▎         | 53/1677 [00:42<21:48,  1.24it/s]

batch loss: 1.0986133813858032


  3%|▎         | 54/1677 [00:43<21:02,  1.29it/s]

batch loss: 1.098610520362854


  3%|▎         | 55/1677 [00:44<20:37,  1.31it/s]

batch loss: 1.098613977432251


  3%|▎         | 56/1677 [00:45<21:18,  1.27it/s]

batch loss: 1.0986121892929077


  3%|▎         | 57/1677 [00:45<20:50,  1.30it/s]

batch loss: 1.0986132621765137


  3%|▎         | 58/1677 [00:46<21:32,  1.25it/s]

batch loss: 1.0986104011535645


  4%|▎         | 59/1677 [00:47<20:58,  1.29it/s]

batch loss: 1.0986098051071167


  4%|▎         | 60/1677 [00:48<20:35,  1.31it/s]

batch loss: 1.0986113548278809


  4%|▎         | 61/1677 [00:48<20:29,  1.31it/s]

batch loss: 1.0986119508743286


  4%|▎         | 62/1677 [00:49<20:24,  1.32it/s]

batch loss: 1.0986120700836182


  4%|▍         | 63/1677 [00:50<20:16,  1.33it/s]

batch loss: 1.09861159324646


  4%|▍         | 64/1677 [00:51<20:14,  1.33it/s]

batch loss: 1.098612666130066


  4%|▍         | 65/1677 [00:51<19:56,  1.35it/s]

batch loss: 1.0986106395721436


  4%|▍         | 66/1677 [00:52<20:51,  1.29it/s]

batch loss: 1.098611831665039


  4%|▍         | 67/1677 [00:53<21:02,  1.27it/s]

batch loss: 1.0986108779907227


  4%|▍         | 68/1677 [00:54<21:40,  1.24it/s]

batch loss: 1.098613977432251


  4%|▍         | 69/1677 [00:55<20:25,  1.31it/s]

batch loss: 1.098612904548645


  4%|▍         | 70/1677 [00:55<20:43,  1.29it/s]

batch loss: 1.0986148118972778


  4%|▍         | 71/1677 [00:56<20:40,  1.29it/s]

batch loss: 1.0986145734786987


  4%|▍         | 72/1677 [00:57<20:11,  1.32it/s]

batch loss: 1.098609209060669


  4%|▍         | 73/1677 [00:58<20:59,  1.27it/s]

batch loss: 1.09861421585083


  4%|▍         | 74/1677 [00:59<21:32,  1.24it/s]

batch loss: 1.0986131429672241


  4%|▍         | 75/1677 [00:59<21:00,  1.27it/s]

batch loss: 1.0986112356185913


  5%|▍         | 76/1677 [01:00<20:40,  1.29it/s]

batch loss: 1.0986123085021973


  5%|▍         | 77/1677 [01:01<20:26,  1.30it/s]

batch loss: 1.0986133813858032


  5%|▍         | 78/1677 [01:02<20:46,  1.28it/s]

batch loss: 1.09861421585083


  5%|▍         | 79/1677 [01:02<20:54,  1.27it/s]

batch loss: 1.0986140966415405


  5%|▍         | 80/1677 [01:03<20:37,  1.29it/s]

batch loss: 1.0986133813858032


  5%|▍         | 81/1677 [01:04<21:18,  1.25it/s]

batch loss: 1.0986136198043823


  5%|▍         | 82/1677 [01:05<20:38,  1.29it/s]

batch loss: 1.0986117124557495


  5%|▍         | 83/1677 [01:06<21:20,  1.24it/s]

batch loss: 1.0986108779907227


  5%|▍         | 83/1677 [01:06<21:25,  1.24it/s]


KeyboardInterrupt: 