In [None]:
!pip install sentencepiece
# !wget https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt

Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/e5/2d/6d4ca4bef9a67070fa1cac508606328329152b1df10bdf31fb6e4e727894/sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl (1.1MB)
[K     |▎                               | 10kB 18.0MB/s eta 0:00:01[K     |▋                               | 20kB 23.8MB/s eta 0:00:01[K     |▉                               | 30kB 14.3MB/s eta 0:00:01[K     |█▏                              | 40kB 10.3MB/s eta 0:00:01[K     |█▌                              | 51kB 7.0MB/s eta 0:00:01[K     |█▊                              | 61kB 6.8MB/s eta 0:00:01[K     |██                              | 71kB 7.2MB/s eta 0:00:01[K     |██▍                             | 81kB 7.8MB/s eta 0:00:01[K     |██▋                             | 92kB 8.1MB/s eta 0:00:01[K     |███                             | 102kB 8.5MB/s eta 0:00:01[K     |███▎                            | 112kB 8.5MB/s eta 0:00:01[K     |███▌               

In [None]:
import gc
import os
import random

import numpy as np
import sentencepiece as spm
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset
from tqdm import tqdm


class SortedBatchSampler:
    def __init__(self, data, batch_size, shuffle, sort_key):
        self.shuffle = shuffle

        print("Sorting....")
        zip_ = []
        for i, row in tqdm(enumerate(data), total=len(data)):
            zip_.append(tuple([i, sort_key(row)]))
        zip_ = sorted(zip_, key=lambda r: r[1])
        indexes = [item[0] for item in zip_]

        self.batches = np.array_split(indexes, len(indexes) // batch_size)

    def __iter__(self):
        if self.shuffle:
            random.shuffle(self.batches)
        return iter(self.batches)

    def __len__(self):
        return len(self.batches)


class TextProcessor:
    def __init__(self, sub_dim, training_file="", prefix="subwords", model_type="bpe"):
        super().__init__()
        self.prefix = prefix + "_" + str(sub_dim) + "_" + model_type
        # training_file = ""
        if not os.path.isfile(self.prefix + ".model"):
            spm.SentencePieceTrainer.Train('--input=' + training_file +
                                           ' --model_prefix=' + self.prefix +
                                           ' --character_coverage=1.0' +
                                           ' --vocab_size=' + str(sub_dim) +
                                           ' --model_type=' + model_type +
                                           ' --max_sentence_length=100000' +
                                           ' --split_by_whitespace=true')

        self.sp = spm.SentencePieceProcessor()
        self.sp.Load(self.prefix + ".model")

    def encode(self, text):
        return self.sp.EncodeAsIds(text)

    def decode(self, id_array):
        return self.sp.decode_ids(id_array)


class GPT2Dataset(Dataset):
    def __init__(self, corpus_path, max_len, n_tokens, dataset_plk_path="data/gpt2_dataset.npy"):

        self.max_len = max_len
        if os.path.isfile(dataset_plk_path):
            self.corpus = np.load(dataset_plk_path, allow_pickle=True)
        else:
            self.corpus = []
            processor = TextProcessor(n_tokens, corpus_path)
            print("Encoding texts...")
            with open(corpus_path, 'r') as file:
                for line in tqdm(file):
                    encoded = processor.encode(line)
                    self.corpus.append(np.array(encoded, dtype=np.int16))
            gc.collect()
            np.save(dataset_plk_path, self.corpus)

    def __getitem__(self, index):
        if index > self.__len__():
            print(index)
            raise IndexError()

        encoded = self.corpus[index]
        offset = random.randint(0, max(0, len(encoded) - self.max_len))
        encoded = encoded[offset:self.max_len + offset]

        return torch.LongTensor(encoded)

    def __len__(self):
        return len(self.corpus)


def collate(batch):
    return pad_sequence(batch, batch_first=True, padding_value=0)


# Model GPT-2


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.init import kaiming_normal_


class ScaledDotProductAttention(nn.Module):
    def __init__(self, key_dim, drop=0.1):
        super().__init__()
        self.temperature = np.power(key_dim, 0.5)
        self.dropout = nn.Dropout(drop)

    def forward(self, q, k, v):
        energies = (torch.bmm(q, k.transpose(1, 2))) / self.temperature

        seq_len = energies.size(1)
        mask = (torch.tril(torch.ones(seq_len, seq_len)) == 0).to(energies.device)
        energies.masked_fill_(mask, -np.inf)

        attention = self.dropout(F.softmax(energies, dim=2))
        context = torch.bmm(attention, v).squeeze(1)
        return context, attention


class MultiHeadAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.n_heads = num_heads
        self.attn = ScaledDotProductAttention(embed_dim)

        self.query_trans = nn.Linear(embed_dim, embed_dim)
        self.keys_trans = nn.Linear(embed_dim, embed_dim)
        self.value_trans = nn.Linear(embed_dim, embed_dim)
        self.projection = nn.Linear(embed_dim, embed_dim)
        kaiming_normal_(self.query_trans.weight, nonlinearity="linear")
        kaiming_normal_(self.keys_trans.weight, nonlinearity="linear")
        kaiming_normal_(self.value_trans.weight, nonlinearity="linear")
        kaiming_normal_(self.projection.weight, nonlinearity="linear")

        self.keys_trans = self.keys_trans.half()
        self.value_trans = self.value_trans.half()
        self.projection = self.projection.half()

    def split_heads(self, x):
        bs, seq_len, _ = x.size()
        # result: (head*batch_size) x seq_len x new features
        return x.view(bs, seq_len, self.n_heads, -1).permute(2, 0, 1, 3).reshape(self.n_heads * bs, seq_len, -1)

    def merge_heads(self, x):
        _, seq_len, features_size = x.size()
        x = x.view(self.n_heads, -1, seq_len, features_size)
        bs = x.size(1)
        # batch_size x seq_len x heads x features
        return x.permute(1, 2, 0, 3).reshape(bs, seq_len, -1)

    def forward(self, x):
        q = self.split_heads(self.query_trans(x))
        k = self.split_heads(self.keys_trans(x.half()).float())
        v = self.split_heads(self.value_trans(x.half()).float())
        a, _ = self.attn(q, k, v)
        a = self.merge_heads(a)
        return self.projection(a.half())


class MLP(nn.Module):
    def __init__(self, embed_dim, factor=4):
        super(MLP, self).__init__()
        self.fc = nn.Linear(embed_dim, embed_dim * factor)
        self.fc2 = nn.Linear(embed_dim * factor, embed_dim)

        kaiming_normal_(self.fc.weight, nonlinearity="relu")
        kaiming_normal_(self.fc2.weight, nonlinearity="linear")

        self.fc = self.fc.half()
        self.fc2 = self.fc2.half()

    def forward(self, x):
        h = F.gelu(self.fc(x.half()).float())
        return self.fc2(h.half())


class Block(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(Block, self).__init__()
        self.ln_1 = nn.LayerNorm(embed_dim)
        self.attn = MultiHeadAttention(embed_dim, num_heads)
        self.ln_2 = nn.LayerNorm(embed_dim)
        self.mlp = MLP(embed_dim)

    def forward(self, x):
        x = x + self.attn(self.ln_1(x))
        x = x + self.mlp(self.ln_2(x))
        return x


def get_positional_encoding(n_positions, n_embd):
    def angle_defn(pos, i, d_model_size):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model_size))
        return pos * angle_rates

    angle_rads = angle_defn(np.arange(n_positions)[:, np.newaxis], np.arange(n_embd)[np.newaxis, :], n_embd)
    sines = np.sin(angle_rads[:, 0::2])
    cosines = np.cos(angle_rads[:, 1::2])
    pos_encoding = torch.tensor(np.concatenate([sines, cosines], axis=-1)[np.newaxis, ...], dtype=torch.float)
    return pos_encoding


class GPT2Model(nn.Module):
    def __init__(self):
        super(GPT2Model, self).__init__()
        self.n_layer = 12
        self.n_embd = 768 + 128
        self.n_head = 12 + 2
        self.n_tokens = 512
        self.n_positions = 512  # TODO

        self.wte = nn.Embedding(self.n_tokens, self.n_embd, padding_idx=0).half()
        self.register_buffer('positional_encoding', get_positional_encoding(self.n_positions, self.n_embd))
        self.blocks = nn.ModuleList([Block(self.n_embd, self.n_head) for _ in range(self.n_layer)])

        self.decoder_norm = nn.LayerNorm(self.n_embd)
        self.decoder = nn.Linear(self.n_embd, self.n_tokens, bias=False)
        kaiming_normal_(self.decoder.weight, nonlinearity="linear")

    def forward(self, input_ids):
        inputs_embeds = self.wte(input_ids)
        position_embeds = self.positional_encoding[:, :input_ids.size(1), :]
        hidden_states = (inputs_embeds + position_embeds).float()

        for block in self.blocks:
            hidden_states = block(hidden_states)

        decoded = self.decoder(self.decoder_norm(hidden_states))
        return F.log_softmax(decoded, dim=-1)


# Train



In [None]:
! git clone https://github.com/NVIDIA/apex.git
% cd apex
% cd fp16_utils
!pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
%cd ../..

fatal: destination path 'apex' already exists and is not an empty directory.
/content/apex
[Errno 2] No such file or directory: 'fp16_utils'
/content/apex
  cmdoptions.check_install_build_global(options)
Processing /content/apex
Skipping wheel build for apex, due to binaries being disabled for it.
Installing collected packages: apex
    Running setup.py install for apex ... [?25l[?25hdone
Successfully installed apex-0.1
/


In [None]:
import torch.optim as optim
from torch.optim import SGD

In [None]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [None]:
root = "/gdrive/My Drive/CS470"
import os
os.chdir(root)
os.getcwd()

'/gdrive/My Drive/CS470'

In [None]:
import torch
from apex.fp16_utils import FP16_Optimizer
from torch.utils.data.dataloader import DataLoader
from tqdm import tqdm

# from lm.gpt2.data.data_loader import GPT2Dataset, collate, SortedBatchSampler
# from lm.gpt2.lamb import Lamb
# from lm.gpt2.model import GPT2Model

if __name__ == '__main__':
    # lr = 0.0001
    lr = 1e-7
    wd = 1.2e-6
    batch_size = 8
    load_ver = 0
    device = "cuda:0"
    corpus_path = "data/processed_lyrics.txt"

    model = GPT2Model()
    model = model.to(device)
    dataset = GPT2Dataset(corpus_path, model.n_positions + 1, model.n_tokens)
    sampler = SortedBatchSampler(dataset, batch_size, True, sort_key=lambda d: d.size(0))
    train_loader = DataLoader(dataset, batch_sampler=sampler, collate_fn=collate, num_workers=1, pin_memory=True)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', 
            factor=0.25, patience=1, threshold=0.0001, threshold_mode='rel', 
            cooldown=0, min_lr=0, eps=1e-08, verbose=False)
    # optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    # optimizer = FP16_Optimizer(optimizerr, dynamic_loss_scale=True)
    # criterion = torch.nn.NLLLoss(ignore_index=0).to(device)

    criterion = nn.CrossEntropyLoss() 

    print("\n\nTotal params: " + str(sum(p.numel() for p in model.parameters())))

    if load_ver != 0:
        model.load_state_dict(torch.load(f"models/model{load_ver}", map_location=device))
        optimizer.load_state_dict(torch.load(f"models/optimizer_state", map_location=device))

    running_loss = 0
    # for iteration in range(1 + load_ver, 30):
    for iteration in range(1):
        print("\n\n\nIteration: " + str(iteration))
        
        for i, contexts in enumerate(tqdm(train_loader, smoothing=0)):
            contexts = contexts.to(device)
            # if (len(contexts) > 0 and contexts.shape[0] != 0 and contexts.shape[1]):
            [a, b] = list(contexts.size())
            # print(contexts)
            input_x = contexts[:, :-1]
            # input_x = contexts.contiguous().view(a, 1 if b==0 else -1)
            # 
            model.train()
            if (input_x.shape[1] > 1):
              optimizer.zero_grad()
              # print('input', input_x)
              targets = contexts[:, 1:]
              
              # print('nan', torch.isnan(targets))
              # print('inf', torch.isinf(targets))
              # targets = contexts[:, (1 if b!=0 else 0):].contiguous().view(a, 1 if (b-1)==0 else -1)
              # print('target', targets.shape)
              
              model = GPT2Model() 
              model = model.to(device)
              output = model(input_x)
              # print('output', output)
              loss = criterion(output.view(output.shape[0] * output.shape[1], -1), targets.reshape(-1))
              # optimizer.backward(loss)
              
              loss.backward()
              # optimizer.clip_master_grads(5)
              torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
              optimizer.step()

              # Loss printer
              loss_value = loss.item()
              print('loss', loss_value)
              running_loss = loss_value if running_loss == 0 else (running_loss * 0.99 + loss_value * 0.01)
              if i % 100 == 0:
                  print(f"Loss: {running_loss} Current loss: {loss_value}")

              # Partial saver
              if i > 0 and i % 500 == 0:
                  print("\n\nPartial save...")
                  torch.save(model.state_dict(), f"models/model_partial")
                  torch.save(optimizer.state_dict(), f"models/optimizer_state_partial")

        scheduler.step(running_loss)

        print("\n\nSave model...")
        torch.save(model.state_dict(), f"models/model" + str(iteration))
        torch.save(optimizer.state_dict(), f"models/optimizer_state")
















  0%|          | 0/15173 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A













 29%|██▉       | 4426/15173 [00:00<00:00, 44253.08it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A

Sorting....
















 58%|█████▊    | 8773/15173 [00:00<00:00, 44013.53it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A













100%|██████████| 15173/15173 [00:00<00:00, 43616.62it/s]














  0%|          | 0/1896 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A



Total params: 116664576



Iteration: 0
















  0%|          | 1/1896 [00:02<1:33:44,  2.97s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.832450866699219
Loss: 6.832450866699219 Current loss: 6.832450866699219
















  0%|          | 2/1896 [00:05<1:28:31,  2.80s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.75419282913208
















  0%|          | 3/1896 [00:08<1:27:05,  2.76s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.358262062072754
















  0%|          | 4/1896 [00:10<1:25:59,  2.73s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.883780002593994
















  0%|          | 5/1896 [00:13<1:25:21,  2.71s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.562350749969482
















  0%|          | 6/1896 [00:16<1:24:45,  2.69s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.590854644775391
















  0%|          | 7/1896 [00:18<1:24:30,  2.68s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.000006198883057
















  0%|          | 8/1896 [00:21<1:24:16,  2.68s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8091325759887695
















  0%|          | 9/1896 [00:24<1:23:58,  2.67s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.581447601318359
















  1%|          | 10/1896 [00:26<1:23:42,  2.66s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.920803070068359
















  1%|          | 11/1896 [00:29<1:23:33,  2.66s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.94904088973999
















  1%|          | 12/1896 [00:31<1:23:31,  2.66s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.679119110107422
















  1%|          | 13/1896 [00:34<1:23:24,  2.66s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6423773765563965
















  1%|          | 14/1896 [00:37<1:23:15,  2.65s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.653047561645508
















  1%|          | 15/1896 [00:39<1:23:08,  2.65s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.884103775024414
















  1%|          | 16/1896 [00:42<1:23:02,  2.65s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6534037590026855
















  1%|          | 17/1896 [00:45<1:23:01,  2.65s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.479946136474609
















  1%|          | 18/1896 [00:47<1:22:59,  2.65s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.797831058502197
















  1%|          | 19/1896 [00:50<1:22:49,  2.65s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.333127498626709
















  1%|          | 20/1896 [00:52<1:22:42,  2.65s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.171147346496582
















  1%|          | 21/1896 [00:55<1:22:35,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.69708776473999
















  1%|          | 22/1896 [00:58<1:22:32,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.980018138885498
















  1%|          | 23/1896 [01:00<1:22:27,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.964940071105957
















  1%|▏         | 24/1896 [01:03<1:22:23,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.824216365814209
















  1%|▏         | 25/1896 [01:06<1:22:19,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.881292819976807
















  1%|▏         | 26/1896 [01:08<1:22:13,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.802831172943115
















  1%|▏         | 27/1896 [01:11<1:22:09,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.583911895751953
















  1%|▏         | 28/1896 [01:13<1:22:07,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.73768424987793
















  2%|▏         | 29/1896 [01:16<1:22:03,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9283318519592285
















  2%|▏         | 30/1896 [01:19<1:22:00,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.914011478424072
















  2%|▏         | 31/1896 [01:21<1:21:58,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.796518325805664
















  2%|▏         | 32/1896 [01:24<1:21:52,  2.64s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.841530799865723
















  2%|▏         | 33/1896 [01:26<1:21:47,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.641579627990723
















  2%|▏         | 34/1896 [01:29<1:21:43,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.871303081512451
















  2%|▏         | 35/1896 [01:32<1:21:41,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.860577583312988
















  2%|▏         | 36/1896 [01:34<1:21:38,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.919724464416504
















  2%|▏         | 37/1896 [01:37<1:21:36,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.74665641784668
















  2%|▏         | 38/1896 [01:40<1:21:34,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8279500007629395
















  2%|▏         | 39/1896 [01:42<1:21:32,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.832892417907715
















  2%|▏         | 40/1896 [01:45<1:21:29,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.594130039215088
















  2%|▏         | 41/1896 [01:48<1:21:27,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.233733654022217
















  2%|▏         | 42/1896 [01:50<1:21:24,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.635280132293701
















  2%|▏         | 43/1896 [01:53<1:21:20,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.005838871002197
















  2%|▏         | 44/1896 [01:55<1:21:17,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.798622131347656
















  2%|▏         | 45/1896 [01:58<1:21:13,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.049228668212891
















  2%|▏         | 46/1896 [02:01<1:21:10,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.305850982666016
















  2%|▏         | 47/1896 [02:03<1:21:08,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.602763652801514
















  3%|▎         | 48/1896 [02:06<1:21:05,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.890411376953125
















  3%|▎         | 49/1896 [02:08<1:21:00,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.787694931030273
















  3%|▎         | 50/1896 [02:11<1:21:02,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.754730224609375
















  3%|▎         | 51/1896 [02:14<1:20:59,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.947638511657715
















  3%|▎         | 52/1896 [02:16<1:20:56,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.022204399108887
















  3%|▎         | 53/1896 [02:19<1:20:54,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.691074848175049
















  3%|▎         | 55/1896 [02:22<1:19:21,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.414587020874023
loss 6.816498279571533
















  3%|▎         | 57/1896 [02:27<1:19:17,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.651459217071533
loss 6.651821613311768
















  3%|▎         | 59/1896 [02:32<1:19:15,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5809736251831055
loss 7.219332218170166
















  3%|▎         | 61/1896 [02:37<1:19:11,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.065330505371094
loss 6.123622894287109
















  3%|▎         | 63/1896 [02:43<1:19:08,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8190083503723145
loss 6.826785087585449
















  3%|▎         | 65/1896 [02:48<1:19:05,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.727625370025635
loss 6.677080154418945
















  4%|▎         | 67/1896 [02:53<1:19:00,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.998593807220459
loss 6.658993244171143
















  4%|▎         | 69/1896 [02:58<1:18:55,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.816888809204102
loss 6.762754440307617
















  4%|▎         | 71/1896 [03:04<1:18:52,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5462775230407715
loss 6.766262054443359
















  4%|▍         | 73/1896 [03:09<1:18:49,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.644355297088623
loss 6.835740089416504
















  4%|▍         | 75/1896 [03:14<1:18:46,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.316471576690674
loss 6.502997398376465
















  4%|▍         | 77/1896 [03:19<1:18:43,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.694737434387207
loss 6.614903450012207
















  4%|▍         | 79/1896 [03:25<1:18:38,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.72227144241333
loss 6.747639179229736
















  4%|▍         | 81/1896 [03:30<1:18:33,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.693138599395752
loss 6.89222526550293
















  4%|▍         | 83/1896 [03:35<1:18:29,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.663137912750244
loss 6.637808322906494
















  4%|▍         | 85/1896 [03:40<1:18:25,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.968385219573975
loss 6.6681389808654785
















  5%|▍         | 87/1896 [03:46<1:18:20,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.589049816131592
loss 6.6745476722717285
















  5%|▍         | 89/1896 [03:51<1:18:16,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.746039390563965
loss 6.805563449859619
















  5%|▍         | 91/1896 [03:56<1:18:12,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.3847126960754395
loss 7.026041030883789
















  5%|▍         | 93/1896 [04:01<1:18:07,  2.60s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.99684476852417
















  5%|▌         | 95/1896 [04:04<1:17:13,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.634820938110352
















  5%|▌         | 97/1896 [04:07<1:16:21,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.848821640014648
loss 6.9641594886779785
















  5%|▌         | 99/1896 [04:12<1:16:19,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.673901557922363
loss 6.836423873901367
















  5%|▌         | 101/1896 [04:17<1:16:18,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.730770587921143
Loss: 6.783285720610754 Current loss: 6.730770587921143
loss 7.406824588775635
















  5%|▌         | 103/1896 [04:22<1:16:17,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5396013259887695
loss 6.641259670257568
















  6%|▌         | 105/1896 [04:28<1:16:15,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.559059143066406
loss 6.921621322631836
















  6%|▌         | 107/1896 [04:33<1:16:12,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.701081275939941
loss 6.527522563934326
















  6%|▌         | 109/1896 [04:38<1:16:09,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.899744033813477
loss 6.731227874755859
















  6%|▌         | 111/1896 [04:43<1:16:06,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.756067276000977
loss 6.740653991699219
















  6%|▌         | 113/1896 [04:49<1:16:03,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.838419437408447
loss 6.808045387268066
















  6%|▌         | 115/1896 [04:54<1:15:59,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.428625106811523
loss 6.7665910720825195
















  6%|▌         | 117/1896 [04:59<1:15:56,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.568483352661133
loss 6.796536445617676
















  6%|▋         | 119/1896 [05:04<1:15:52,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6212158203125
loss 6.626857757568359
















  6%|▋         | 121/1896 [05:10<1:15:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.229128837585449
loss 6.948502063751221
















  6%|▋         | 123/1896 [05:15<1:15:46,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.765650272369385
















  7%|▋         | 125/1896 [05:18<1:15:05,  2.54s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.735250473022461
loss 6.719325542449951
















  7%|▋         | 127/1896 [05:23<1:15:01,  2.54s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.582040786743164
loss 6.800788879394531
















  7%|▋         | 129/1896 [05:28<1:14:59,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6365814208984375
loss 6.552929878234863
















  7%|▋         | 131/1896 [05:33<1:14:56,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.993305206298828
loss 6.723988056182861
















  7%|▋         | 133/1896 [05:38<1:14:53,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.987706661224365
loss 6.76379919052124
















  7%|▋         | 135/1896 [05:44<1:14:50,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.612234115600586
loss 6.800942420959473
















  7%|▋         | 137/1896 [05:49<1:14:46,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.881843090057373
loss 6.716943264007568
















  7%|▋         | 139/1896 [05:54<1:14:43,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.783745288848877
loss 6.720621585845947
















  7%|▋         | 141/1896 [05:59<1:14:40,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.575000762939453
loss 6.745454788208008
















  8%|▊         | 143/1896 [06:05<1:14:36,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.690268039703369
loss 6.733863353729248
















  8%|▊         | 145/1896 [06:10<1:14:32,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.573740005493164
loss 6.647915363311768
















  8%|▊         | 147/1896 [06:15<1:14:29,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.787347793579102
loss 7.181148529052734
















  8%|▊         | 149/1896 [06:20<1:14:26,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.678556442260742
loss 6.484338760375977
















  8%|▊         | 151/1896 [06:26<1:14:22,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.427098274230957
loss 6.41668701171875
















  8%|▊         | 153/1896 [06:31<1:14:19,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.762680530548096
loss 6.943584442138672
















  8%|▊         | 155/1896 [06:36<1:14:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8575334548950195
loss 6.939368724822998
















  8%|▊         | 157/1896 [06:41<1:14:12,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.745162010192871
loss 6.810059547424316
















  8%|▊         | 159/1896 [06:47<1:14:08,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.571520805358887
loss 6.580671787261963
















  8%|▊         | 161/1896 [06:52<1:14:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.85538387298584
loss 6.509293556213379
















  9%|▊         | 163/1896 [06:57<1:14:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.746362686157227
loss 6.6301374435424805
















  9%|▊         | 165/1896 [07:03<1:13:57,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.869856357574463
loss 6.534698963165283
















  9%|▉         | 167/1896 [07:08<1:13:54,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9263596534729
loss 6.867238998413086
















  9%|▉         | 169/1896 [07:13<1:13:49,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.925644874572754
loss 6.812694549560547
















  9%|▉         | 171/1896 [07:18<1:13:46,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.811349868774414
loss 6.4786224365234375
















  9%|▉         | 173/1896 [07:24<1:13:42,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.756347179412842
loss 6.7212700843811035
















  9%|▉         | 175/1896 [07:29<1:13:38,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.78185510635376
loss 6.601021766662598
















  9%|▉         | 177/1896 [07:34<1:13:34,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.809401512145996
loss 6.692765712738037
















  9%|▉         | 179/1896 [07:39<1:13:30,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.755113124847412
loss 6.953767776489258
















 10%|▉         | 181/1896 [07:45<1:13:26,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.772005558013916
loss 6.233985900878906
















 10%|▉         | 183/1896 [07:50<1:13:23,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.434023857116699
loss 6.501288890838623
















 10%|▉         | 185/1896 [07:55<1:13:18,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.766810417175293
loss 6.554262638092041
















 10%|▉         | 187/1896 [08:00<1:13:14,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.816859245300293
loss 6.492237567901611
















 10%|▉         | 189/1896 [08:06<1:13:10,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.331269264221191
loss 6.906883716583252
















 10%|█         | 191/1896 [08:11<1:13:06,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.369815349578857
loss 6.7753143310546875
















 10%|█         | 193/1896 [08:16<1:13:02,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.678462028503418
loss 6.6937127113342285
















 10%|█         | 195/1896 [08:21<1:12:58,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.445566177368164
loss 6.8311448097229
















 10%|█         | 197/1896 [08:27<1:12:53,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6783857345581055
loss 6.706820487976074
















 10%|█         | 199/1896 [08:32<1:12:49,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.84489631652832
loss 6.6759514808654785
















 11%|█         | 201/1896 [08:37<1:12:45,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.283282279968262
Loss: 6.747991965364525 Current loss: 6.283282279968262
loss 6.7754364013671875
















 11%|█         | 203/1896 [08:42<1:12:40,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.871647357940674
















 11%|█         | 205/1896 [08:45<1:12:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.868836879730225
loss 6.724880695343018
















 11%|█         | 207/1896 [08:50<1:12:10,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.673519134521484
loss 6.8296732902526855
















 11%|█         | 209/1896 [08:55<1:12:06,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.590490341186523
loss 6.862362384796143
















 11%|█         | 211/1896 [09:01<1:12:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8155646324157715
loss 7.150808334350586
















 11%|█         | 213/1896 [09:06<1:11:58,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.97149133682251
loss 6.597837924957275
















 11%|█▏        | 215/1896 [09:11<1:11:54,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.690680027008057
loss 6.157214641571045
















 11%|█▏        | 217/1896 [09:17<1:11:49,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.786316394805908
loss 6.644560813903809
















 12%|█▏        | 219/1896 [09:22<1:11:46,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.47222900390625
loss 6.764277458190918
















 12%|█▏        | 221/1896 [09:27<1:11:43,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.608577728271484
loss 6.678407192230225
















 12%|█▏        | 223/1896 [09:33<1:11:38,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.400891304016113
loss 6.8675642013549805
















 12%|█▏        | 225/1896 [09:38<1:11:34,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.3065571784973145
loss 6.862884521484375
















 12%|█▏        | 227/1896 [09:43<1:11:30,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.582200527191162
loss 6.736751556396484
















 12%|█▏        | 229/1896 [09:48<1:11:26,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.896230697631836
loss 6.517513275146484
















 12%|█▏        | 231/1896 [09:54<1:11:22,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.565232753753662
loss 6.899889945983887
















 12%|█▏        | 233/1896 [09:59<1:11:18,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.683994770050049
loss 6.77739953994751
















 12%|█▏        | 235/1896 [10:04<1:11:14,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7731404304504395
loss 6.82003116607666
















 12%|█▎        | 237/1896 [10:09<1:11:09,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.822965145111084
loss 7.057733058929443
















 13%|█▎        | 239/1896 [10:15<1:11:05,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5378875732421875
loss 6.6708879470825195
















 13%|█▎        | 241/1896 [10:20<1:11:00,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.767318248748779
loss 7.068765640258789
















 13%|█▎        | 243/1896 [10:25<1:10:56,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.091821670532227
loss 6.701704502105713
















 13%|█▎        | 245/1896 [10:30<1:10:51,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.573385238647461
loss 6.579319953918457
















 13%|█▎        | 247/1896 [10:36<1:10:47,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5042033195495605
loss 6.682669162750244
















 13%|█▎        | 249/1896 [10:41<1:10:42,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.76708984375
loss 6.436801433563232
















 13%|█▎        | 251/1896 [10:46<1:10:38,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.806188106536865
loss 6.773292541503906
















 13%|█▎        | 253/1896 [10:51<1:10:33,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7886199951171875
loss 6.957498550415039
















 13%|█▎        | 255/1896 [10:57<1:10:29,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.907498359680176
loss 6.75024938583374
















 14%|█▎        | 257/1896 [11:02<1:10:24,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.411110877990723
loss 7.025724411010742
















 14%|█▎        | 259/1896 [11:07<1:10:19,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.724334716796875
loss 6.731227397918701
















 14%|█▍        | 261/1896 [11:12<1:10:15,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.66370153427124
loss 6.692409038543701
















 14%|█▍        | 263/1896 [11:18<1:10:10,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5557074546813965
loss 6.734564304351807
















 14%|█▍        | 265/1896 [11:23<1:10:06,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.855685234069824
loss 6.864461898803711
















 14%|█▍        | 267/1896 [11:28<1:10:01,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9115142822265625
loss 7.353301525115967
















 14%|█▍        | 269/1896 [11:33<1:09:57,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8586602210998535
loss 6.968280792236328
















 14%|█▍        | 271/1896 [11:39<1:09:52,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.589818477630615
loss 6.732903957366943
















 14%|█▍        | 273/1896 [11:44<1:09:48,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.623297691345215
loss 6.655998706817627
















 15%|█▍        | 275/1896 [11:49<1:09:43,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.08890438079834
loss 6.800534725189209
















 15%|█▍        | 277/1896 [11:54<1:09:38,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.627568244934082
loss 6.993329048156738
















 15%|█▍        | 279/1896 [12:00<1:09:34,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.713618278503418
loss 6.8685126304626465
















 15%|█▍        | 281/1896 [12:05<1:09:29,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7601141929626465
loss 6.739857196807861
















 15%|█▍        | 283/1896 [12:10<1:09:25,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.029162406921387
loss 6.5188775062561035
















 15%|█▌        | 285/1896 [12:15<1:09:20,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.560279369354248
loss 6.5854878425598145
















 15%|█▌        | 287/1896 [12:21<1:09:15,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.596282482147217
loss 6.557489395141602
















 15%|█▌        | 289/1896 [12:26<1:09:11,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.863758087158203
loss 6.987882137298584
















 15%|█▌        | 291/1896 [12:31<1:09:06,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.497568130493164
loss 6.83805513381958
















 15%|█▌        | 293/1896 [12:37<1:09:01,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.080794811248779
loss 6.726133823394775
















 16%|█▌        | 295/1896 [12:42<1:08:57,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.642071723937988
loss 6.902730464935303
















 16%|█▌        | 297/1896 [12:47<1:08:53,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.817373752593994
loss 6.88955020904541
















 16%|█▌        | 299/1896 [12:52<1:08:48,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.701080322265625
loss 7.147559642791748
















 16%|█▌        | 301/1896 [12:58<1:08:43,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.615890979766846
Loss: 6.760345208574875 Current loss: 6.615890979766846
loss 6.899819374084473
















 16%|█▌        | 303/1896 [13:03<1:08:39,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8852667808532715
loss 6.766878604888916
















 16%|█▌        | 305/1896 [13:08<1:08:34,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.703759670257568
loss 6.690066337585449
















 16%|█▌        | 307/1896 [13:14<1:08:30,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.462817668914795
loss 6.918043613433838
















 16%|█▋        | 309/1896 [13:19<1:08:26,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.796450138092041
loss 6.632455348968506
















 16%|█▋        | 311/1896 [13:24<1:08:21,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7006916999816895
loss 6.596015453338623
















 17%|█▋        | 313/1896 [13:30<1:08:17,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.879006862640381
loss 6.816280364990234
















 17%|█▋        | 315/1896 [13:35<1:08:12,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.961283206939697
loss 7.027836322784424
















 17%|█▋        | 317/1896 [13:40<1:08:07,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.730828762054443
loss 6.966465473175049
















 17%|█▋        | 319/1896 [13:45<1:08:03,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.846127033233643
loss 6.83314323425293
















 17%|█▋        | 321/1896 [13:51<1:07:58,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.795731067657471
loss 6.198415756225586
















 17%|█▋        | 323/1896 [13:56<1:07:54,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.780938625335693
loss 6.7898993492126465
















 17%|█▋        | 325/1896 [14:01<1:07:49,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.549360275268555
loss 6.694628715515137
















 17%|█▋        | 327/1896 [14:07<1:07:44,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.858137130737305
loss 6.760761260986328
















 17%|█▋        | 329/1896 [14:12<1:07:39,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.636585235595703
loss 6.82213830947876
















 17%|█▋        | 331/1896 [14:17<1:07:35,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.689765453338623
loss 6.854220390319824
















 18%|█▊        | 333/1896 [14:23<1:07:30,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8267083168029785
loss 6.867765426635742
















 18%|█▊        | 335/1896 [14:28<1:07:26,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7707343101501465
loss 6.9624714851379395
















 18%|█▊        | 337/1896 [14:33<1:07:21,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.361139297485352
loss 6.531378269195557
















 18%|█▊        | 339/1896 [14:39<1:07:17,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.790897369384766
loss 6.6971917152404785
















 18%|█▊        | 341/1896 [14:44<1:07:13,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.598705768585205
loss 6.887872219085693
















 18%|█▊        | 343/1896 [14:49<1:07:08,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.778981685638428
loss 6.996355056762695
















 18%|█▊        | 345/1896 [14:55<1:07:04,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7817559242248535
loss 6.735796928405762
















 18%|█▊        | 347/1896 [15:00<1:06:59,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.621992588043213
















 18%|█▊        | 349/1896 [15:03<1:06:42,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8842267990112305
loss 6.698884963989258
















 19%|█▊        | 351/1896 [15:08<1:06:38,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.49400520324707
loss 6.770130157470703
















 19%|█▊        | 353/1896 [15:13<1:06:34,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.713913440704346
loss 6.4415788650512695
















 19%|█▊        | 355/1896 [15:19<1:06:29,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5605669021606445
loss 6.794843673706055
















 19%|█▉        | 357/1896 [15:24<1:06:25,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.597776889801025
loss 6.812312602996826
















 19%|█▉        | 359/1896 [15:29<1:06:20,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.759008407592773
loss 6.81776762008667
















 19%|█▉        | 361/1896 [15:35<1:06:15,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.564139366149902
loss 6.751039981842041
















 19%|█▉        | 363/1896 [15:40<1:06:11,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.616302967071533
loss 6.814624786376953
















 19%|█▉        | 365/1896 [15:45<1:06:06,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.946691989898682
loss 6.633347034454346
















 19%|█▉        | 367/1896 [15:50<1:06:01,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.747137069702148
loss 6.605981349945068
















 19%|█▉        | 369/1896 [15:56<1:05:57,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.009578704833984
loss 6.511932849884033
















 20%|█▉        | 371/1896 [16:01<1:05:52,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.609252452850342
















 20%|█▉        | 373/1896 [16:04<1:05:36,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.377363204956055
loss 6.762178897857666
















 20%|█▉        | 375/1896 [16:09<1:05:32,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.503692626953125
loss 6.939124584197998
















 20%|█▉        | 377/1896 [16:14<1:05:27,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.732358455657959
loss 6.337446212768555
















 20%|█▉        | 379/1896 [16:20<1:05:22,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8487467765808105
loss 6.623323440551758
















 20%|██        | 381/1896 [16:25<1:05:18,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.644078254699707
loss 6.624967575073242
















 20%|██        | 383/1896 [16:30<1:05:13,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.720646858215332
loss 6.463606357574463
















 20%|██        | 385/1896 [16:35<1:05:08,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.008231163024902
loss 6.542819976806641
















 20%|██        | 387/1896 [16:41<1:05:04,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.514679431915283
loss 6.656619548797607
















 21%|██        | 389/1896 [16:46<1:04:59,  2.59s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8876519203186035
















 21%|██        | 391/1896 [16:49<1:04:44,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.677517414093018
loss 6.594677448272705
















 21%|██        | 393/1896 [16:54<1:04:39,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.398035526275635
















 21%|██        | 395/1896 [16:57<1:04:25,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.534289836883545
loss 6.178616046905518
















 21%|██        | 397/1896 [17:02<1:04:20,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.683403968811035
















 21%|██        | 399/1896 [17:05<1:04:06,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.632674217224121
loss 6.799764156341553
















 21%|██        | 401/1896 [17:10<1:04:01,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.554616451263428
Loss: 6.7182524878210685 Current loss: 6.554616451263428
loss 6.692615985870361
















 21%|██▏       | 403/1896 [17:15<1:03:57,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.661735534667969
loss 6.778732776641846
















 21%|██▏       | 405/1896 [17:21<1:03:52,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.579657077789307
loss 6.95005989074707
















 21%|██▏       | 407/1896 [17:26<1:03:47,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.850589275360107
loss 6.8078484535217285
















 22%|██▏       | 409/1896 [17:31<1:03:43,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5658087730407715
















 22%|██▏       | 411/1896 [17:34<1:03:29,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.447030067443848
loss 6.554309844970703
















 22%|██▏       | 413/1896 [17:39<1:03:25,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.688720703125
loss 6.829652309417725
















 22%|██▏       | 415/1896 [17:44<1:03:20,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7952752113342285
loss 6.771350860595703
















 22%|██▏       | 417/1896 [17:50<1:03:15,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.679940223693848
loss 6.615406036376953
















 22%|██▏       | 419/1896 [17:55<1:03:11,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.618526935577393
loss 6.704105854034424
















 22%|██▏       | 421/1896 [18:00<1:03:06,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.747057914733887
loss 6.74108362197876
















 22%|██▏       | 423/1896 [18:06<1:03:02,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.775094509124756
loss 6.850162506103516
















 22%|██▏       | 425/1896 [18:11<1:02:57,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.4404449462890625
loss 6.5047430992126465
















 23%|██▎       | 427/1896 [18:16<1:02:52,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7555131912231445
loss 6.739809036254883
















 23%|██▎       | 429/1896 [18:21<1:02:47,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.84307861328125
loss 6.581506729125977
















 23%|██▎       | 431/1896 [18:27<1:02:43,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.831370830535889
loss 6.563370227813721
















 23%|██▎       | 433/1896 [18:32<1:02:38,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.632094860076904
loss 6.575125694274902
















 23%|██▎       | 435/1896 [18:37<1:02:34,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.656517028808594
loss 6.657013416290283
















 23%|██▎       | 437/1896 [18:43<1:02:29,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.723560810089111
loss 6.757215976715088
















 23%|██▎       | 439/1896 [18:48<1:02:24,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.853684902191162
loss 6.856965065002441
















 23%|██▎       | 441/1896 [18:53<1:02:20,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.835258960723877
loss 6.9961371421813965
















 23%|██▎       | 443/1896 [18:58<1:02:15,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.786889553070068
loss 6.832712173461914
















 23%|██▎       | 445/1896 [19:04<1:02:10,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.034538269042969
loss 6.784012794494629
















 24%|██▎       | 447/1896 [19:09<1:02:05,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.786373615264893
loss 6.622804641723633
















 24%|██▎       | 449/1896 [19:14<1:02:00,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.757437705993652
loss 6.593920707702637
















 24%|██▍       | 451/1896 [19:19<1:01:56,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.932765007019043
loss 6.691247940063477
















 24%|██▍       | 453/1896 [19:25<1:01:51,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7420830726623535
loss 7.116666793823242
















 24%|██▍       | 455/1896 [19:30<1:01:46,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.718634605407715
loss 6.731106281280518
















 24%|██▍       | 457/1896 [19:35<1:01:41,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6499810218811035
loss 6.541750431060791
















 24%|██▍       | 459/1896 [19:40<1:01:37,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.582156658172607
loss 6.81773567199707
















 24%|██▍       | 461/1896 [19:46<1:01:32,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6720051765441895
loss 6.544644355773926
















 24%|██▍       | 463/1896 [19:51<1:01:28,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.655478000640869
loss 6.931271553039551
















 25%|██▍       | 465/1896 [19:56<1:01:23,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.659998416900635
loss 6.900584697723389
















 25%|██▍       | 467/1896 [20:02<1:01:18,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.612400054931641
loss 6.62531852722168
















 25%|██▍       | 469/1896 [20:07<1:01:13,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 5.962902545928955
loss 7.023195743560791
















 25%|██▍       | 471/1896 [20:12<1:01:09,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.778783321380615
loss 6.836562633514404
















 25%|██▍       | 473/1896 [20:17<1:01:04,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.703197002410889
















 25%|██▌       | 475/1896 [20:20<1:00:51,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6362409591674805
loss 6.6930084228515625
















 25%|██▌       | 477/1896 [20:25<1:00:46,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5527567863464355
loss 6.6758832931518555
















 25%|██▌       | 479/1896 [20:31<1:00:42,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.576137542724609
loss 6.881778240203857
















 25%|██▌       | 481/1896 [20:36<1:00:37,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.890051364898682
loss 6.86532735824585
















 25%|██▌       | 483/1896 [20:41<1:00:32,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6520466804504395
loss 6.671989440917969
















 26%|██▌       | 485/1896 [20:46<1:00:27,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.721738815307617
loss 6.936343669891357
















 26%|██▌       | 487/1896 [20:52<1:00:22,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.734251499176025
loss 6.507533550262451
















 26%|██▌       | 489/1896 [20:57<1:00:18,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.070952892303467
loss 6.571678638458252
















 26%|██▌       | 491/1896 [21:02<1:00:13,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.812516689300537
loss 6.8691086769104
















 26%|██▌       | 493/1896 [21:08<1:00:08,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.920847415924072
loss 6.792886734008789
















 26%|██▌       | 495/1896 [21:13<1:00:03,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.688732147216797
loss 7.08512544631958
















 26%|██▌       | 497/1896 [21:18<59:58,  2.57s/it]  [A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6325364112854
loss 6.619093418121338
















 26%|██▋       | 499/1896 [21:23<59:54,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.873802661895752
loss 6.793088912963867
loss 6.813225269317627
Loss: 6.730116325257404 Current loss: 6.813225269317627


Partial save...
















 26%|██▋       | 501/1896 [21:30<59:53,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.885926723480225
















 27%|██▋       | 503/1896 [21:35<59:49,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.138694763183594
loss 6.819051265716553
















 27%|██▋       | 505/1896 [21:41<59:44,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.786355495452881
loss 6.8054585456848145
















 27%|██▋       | 507/1896 [21:46<59:39,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.615455150604248
loss 6.7201247215271
















 27%|██▋       | 509/1896 [21:51<59:34,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.49050235748291
loss 6.713526248931885
















 27%|██▋       | 511/1896 [21:57<59:29,  2.58s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.03253698348999
















 27%|██▋       | 513/1896 [21:59<59:17,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.969977378845215
loss 6.582107067108154
















 27%|██▋       | 515/1896 [22:05<59:13,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.682041645050049
loss 6.627418518066406
















 27%|██▋       | 517/1896 [22:10<59:08,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.443435192108154
loss 7.073436260223389
















 27%|██▋       | 519/1896 [22:15<59:03,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.525626182556152
loss 6.771419048309326
















 27%|██▋       | 521/1896 [22:21<58:59,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.072315216064453
loss 6.563023090362549
















 28%|██▊       | 523/1896 [22:26<58:54,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.579776287078857
















 28%|██▊       | 525/1896 [22:28<58:42,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.904809951782227
loss 6.665196418762207
















 28%|██▊       | 527/1896 [22:34<58:38,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.4954609870910645
loss 6.728274345397949
















 28%|██▊       | 529/1896 [22:39<58:33,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.880672931671143
loss 6.366496562957764
















 28%|██▊       | 531/1896 [22:44<58:28,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.574913024902344
loss 6.8170976638793945
















 28%|██▊       | 533/1896 [22:50<58:23,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.95997953414917
loss 6.884809970855713
















 28%|██▊       | 535/1896 [22:55<58:19,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.694564342498779
loss 6.7505998611450195
















 28%|██▊       | 537/1896 [23:00<58:14,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.90882682800293
loss 6.862967014312744
















 28%|██▊       | 539/1896 [23:06<58:09,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7475385665893555
loss 6.88330078125
















 29%|██▊       | 541/1896 [23:11<58:04,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.800231456756592
















 29%|██▊       | 543/1896 [23:13<57:53,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.871837139129639
loss 6.65106201171875
















 29%|██▊       | 545/1896 [23:19<57:48,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8178863525390625
loss 7.229831218719482
















 29%|██▉       | 547/1896 [23:24<57:43,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.770654201507568
loss 6.584331035614014
















 29%|██▉       | 549/1896 [23:29<57:39,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.610013484954834
















 29%|██▉       | 551/1896 [23:32<57:28,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.843964576721191
loss 6.789112567901611
















 29%|██▉       | 553/1896 [23:38<57:23,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.830781936645508
loss 6.62916898727417
















 29%|██▉       | 555/1896 [23:43<57:19,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.670629978179932
loss 6.528668403625488
















 29%|██▉       | 557/1896 [23:48<57:14,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.719308853149414
loss 6.672570705413818
















 29%|██▉       | 559/1896 [23:53<57:09,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.47005033493042
loss 7.007637977600098
















 30%|██▉       | 561/1896 [23:59<57:04,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.707045555114746
loss 6.980431079864502
















 30%|██▉       | 563/1896 [24:04<57:00,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.596642017364502
loss 6.824103832244873
















 30%|██▉       | 565/1896 [24:09<56:55,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.540592670440674
loss 6.625619888305664
















 30%|██▉       | 567/1896 [24:15<56:50,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.32436990737915
loss 6.790439128875732
















 30%|███       | 569/1896 [24:20<56:45,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.190963268280029
loss 7.289146900177002
















 30%|███       | 571/1896 [24:25<56:41,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.783153533935547
loss 6.6385579109191895
















 30%|███       | 573/1896 [24:30<56:36,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.62044095993042
loss 6.52965784072876
















 30%|███       | 575/1896 [24:36<56:31,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.852629661560059
loss 6.794703006744385
















 30%|███       | 577/1896 [24:41<56:26,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.495345115661621
loss 6.60711669921875
















 31%|███       | 579/1896 [24:46<56:22,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.45199728012085
loss 6.690886974334717
















 31%|███       | 581/1896 [24:52<56:17,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.694023609161377
loss 6.884249687194824
















 31%|███       | 583/1896 [24:57<56:12,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9001054763793945
















 31%|███       | 585/1896 [25:00<56:02,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.710587978363037
loss 6.597055912017822
















 31%|███       | 587/1896 [25:05<55:57,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.989839553833008
loss 6.497333526611328
















 31%|███       | 589/1896 [25:10<55:52,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5544819831848145
loss 6.278311729431152
















 31%|███       | 591/1896 [25:16<55:48,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.736391067504883
loss 6.992910861968994
















 31%|███▏      | 593/1896 [25:21<55:43,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9254889488220215
loss 6.914977073669434
















 31%|███▏      | 595/1896 [25:26<55:38,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.69689416885376
loss 6.680769443511963
















 31%|███▏      | 597/1896 [25:32<55:33,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.94155216217041
loss 6.813880920410156
















 32%|███▏      | 599/1896 [25:37<55:29,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.782334804534912
loss 6.817270755767822
















 32%|███▏      | 601/1896 [25:42<55:24,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.233410835266113
Loss: 6.7371333261396975 Current loss: 7.233410835266113
loss 6.537479877471924
















 32%|███▏      | 603/1896 [25:47<55:19,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.529551982879639
















 32%|███▏      | 605/1896 [25:50<55:08,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.395094394683838
loss 6.56926965713501
















 32%|███▏      | 607/1896 [25:55<55:04,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.754428863525391
loss 6.73173713684082
















 32%|███▏      | 609/1896 [26:01<54:59,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.677380084991455
loss 6.7591166496276855
















 32%|███▏      | 611/1896 [26:06<54:54,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.786485195159912
loss 6.911238193511963
















 32%|███▏      | 613/1896 [26:11<54:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.517168045043945
loss 6.608504772186279
















 32%|███▏      | 615/1896 [26:16<54:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.778451442718506
loss 6.75984001159668
















 33%|███▎      | 617/1896 [26:22<54:40,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.918304443359375
















 33%|███▎      | 619/1896 [26:24<54:29,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.824671268463135
loss 6.674273490905762
















 33%|███▎      | 621/1896 [26:30<54:24,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5900702476501465
loss 6.508857727050781
















 33%|███▎      | 623/1896 [26:35<54:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.764314651489258
loss 7.317256927490234
















 33%|███▎      | 625/1896 [26:41<54:16,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.702477931976318
loss 6.583829879760742
















 33%|███▎      | 627/1896 [26:46<54:11,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9140448570251465
loss 6.5814385414123535
















 33%|███▎      | 629/1896 [26:51<54:06,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.0219340324401855
loss 6.7162957191467285
















 33%|███▎      | 631/1896 [26:56<54:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.903461456298828
loss 6.858567237854004
















 33%|███▎      | 633/1896 [27:02<53:56,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.502138137817383
loss 6.861700534820557
















 33%|███▎      | 635/1896 [27:07<53:52,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.743880748748779
loss 6.710012912750244
















 34%|███▎      | 637/1896 [27:12<53:47,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.749995231628418
loss 6.739627838134766
















 34%|███▎      | 639/1896 [27:18<53:42,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.757967472076416
















 34%|███▍      | 641/1896 [27:20<53:32,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.939883232116699
loss 6.766382217407227
















 34%|███▍      | 643/1896 [27:26<53:27,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.49680757522583
loss 6.777304172515869
















 34%|███▍      | 645/1896 [27:31<53:22,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.660050392150879
loss 7.035804748535156
















 34%|███▍      | 647/1896 [27:36<53:18,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.577599048614502
loss 6.701296806335449
















 34%|███▍      | 649/1896 [27:41<53:13,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.811819553375244
loss 6.5919108390808105
















 34%|███▍      | 651/1896 [27:47<53:08,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.744020462036133
loss 6.782900333404541
















 34%|███▍      | 653/1896 [27:52<53:03,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.623354911804199
loss 7.017103672027588
















 35%|███▍      | 655/1896 [27:57<52:58,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.714540958404541
loss 6.838369846343994
















 35%|███▍      | 657/1896 [28:02<52:53,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.869137287139893
loss 6.8177666664123535
















 35%|███▍      | 659/1896 [28:08<52:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.869123935699463
loss 6.843274116516113
















 35%|███▍      | 661/1896 [28:13<52:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.724938869476318
loss 6.569558143615723
















 35%|███▍      | 663/1896 [28:18<52:39,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.774560928344727
loss 6.841228485107422
















 35%|███▌      | 665/1896 [28:24<52:34,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.685888767242432
loss 6.568493366241455
















 35%|███▌      | 667/1896 [28:29<52:29,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.807355880737305
loss 6.704825401306152
















 35%|███▌      | 669/1896 [28:34<52:24,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7161054611206055
loss 6.716076374053955
















 35%|███▌      | 671/1896 [28:40<52:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.4728007316589355
loss 6.822062969207764
















 35%|███▌      | 673/1896 [28:45<52:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.878403663635254
loss 6.690945148468018
















 36%|███▌      | 675/1896 [28:50<52:10,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.778199672698975
loss 6.703677654266357
















 36%|███▌      | 677/1896 [28:56<52:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.536257266998291
loss 6.92629337310791
















 36%|███▌      | 679/1896 [29:01<52:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.5845232009887695
loss 6.574577808380127
















 36%|███▌      | 681/1896 [29:06<51:56,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.733057022094727
















 36%|███▌      | 683/1896 [29:09<51:46,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.618189334869385
loss 6.835760116577148
















 36%|███▌      | 685/1896 [29:14<51:41,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.762970924377441
loss 6.638128280639648
















 36%|███▌      | 687/1896 [29:19<51:36,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.748706817626953
loss 6.784414768218994
















 36%|███▋      | 689/1896 [29:25<51:32,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.515942573547363
















 36%|███▋      | 691/1896 [29:27<51:22,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.4301438331604
loss 6.821429252624512
















 37%|███▋      | 693/1896 [29:33<51:17,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.868841648101807
loss 6.759862422943115
















 37%|███▋      | 695/1896 [29:38<51:13,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.631631374359131
loss 6.885714530944824
















 37%|███▋      | 697/1896 [29:43<51:08,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7724080085754395
loss 6.748666286468506
















 37%|███▋      | 699/1896 [29:48<51:03,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.349121570587158
loss 6.7353668212890625
















 37%|███▋      | 701/1896 [29:54<50:58,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.728192329406738
Loss: 6.730461512690973 Current loss: 6.728192329406738
loss 6.5605597496032715
















 37%|███▋      | 703/1896 [29:59<50:53,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.667309761047363
loss 6.658866882324219
















 37%|███▋      | 705/1896 [30:04<50:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.593605041503906
loss 6.5689005851745605
















 37%|███▋      | 707/1896 [30:10<50:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6770524978637695
loss 6.578214645385742
















 37%|███▋      | 709/1896 [30:15<50:39,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.338260173797607
loss 6.716159343719482
















 38%|███▊      | 711/1896 [30:20<50:34,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8845720291137695
loss 6.763777256011963
















 38%|███▊      | 713/1896 [30:26<50:30,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.690988540649414
loss 6.751880168914795
















 38%|███▊      | 715/1896 [30:31<50:25,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.372469425201416
loss 6.447288513183594
















 38%|███▊      | 717/1896 [30:37<50:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.950891971588135
loss 6.758164882659912
















 38%|███▊      | 719/1896 [30:42<50:16,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7742695808410645
loss 7.180035591125488
















 38%|███▊      | 721/1896 [30:47<50:11,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.973608493804932
loss 6.5681962966918945
















 38%|███▊      | 723/1896 [30:53<50:06,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.944607734680176
loss 6.453460693359375
















 38%|███▊      | 725/1896 [30:58<50:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.0119709968566895
















 38%|███▊      | 727/1896 [31:01<49:52,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.585973262786865
loss 6.75081729888916
















 38%|███▊      | 729/1896 [31:06<49:47,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.211365222930908
loss 6.753554344177246
















 39%|███▊      | 731/1896 [31:11<49:43,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.831119060516357
loss 6.69915246963501
















 39%|███▊      | 733/1896 [31:17<49:38,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.564515590667725
loss 6.749163627624512
















 39%|███▉      | 735/1896 [31:22<49:33,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.726056098937988
loss 6.743485927581787
















 39%|███▉      | 737/1896 [31:27<49:28,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.988365173339844
loss 6.963767051696777
















 39%|███▉      | 739/1896 [31:33<49:24,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.666195869445801
loss 6.723556041717529
















 39%|███▉      | 741/1896 [31:38<49:19,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.403574466705322
loss 6.7521185874938965
















 39%|███▉      | 743/1896 [31:43<49:14,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8318986892700195
loss 6.821686744689941
















 39%|███▉      | 745/1896 [31:48<49:09,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.869832992553711
loss 6.553497791290283
















 39%|███▉      | 747/1896 [31:54<49:04,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.598193645477295
loss 7.26231050491333
















 40%|███▉      | 749/1896 [31:59<48:59,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.688327312469482
loss 6.611302375793457
















 40%|███▉      | 751/1896 [32:04<48:54,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.546408653259277
loss 6.76544713973999
















 40%|███▉      | 753/1896 [32:10<48:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7529802322387695
loss 6.839976787567139
















 40%|███▉      | 755/1896 [32:15<48:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6044721603393555
loss 6.76754093170166
















 40%|███▉      | 757/1896 [32:20<48:39,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8674821853637695
loss 6.835554599761963
















 40%|████      | 759/1896 [32:25<48:34,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.827854633331299
loss 6.733936309814453
















 40%|████      | 761/1896 [32:31<48:29,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.647414207458496
loss 6.586161136627197
















 40%|████      | 763/1896 [32:36<48:25,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.4198899269104
loss 6.594048023223877
















 40%|████      | 765/1896 [32:41<48:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.919271945953369
loss 6.643967151641846
















 40%|████      | 767/1896 [32:46<48:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.343738555908203
loss 6.600219249725342
















 41%|████      | 769/1896 [32:52<48:10,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.648610591888428
loss 6.845837116241455
















 41%|████      | 771/1896 [32:57<48:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.642965793609619
loss 6.677165985107422
















 41%|████      | 773/1896 [33:02<48:00,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.919713497161865
loss 6.850310802459717
















 41%|████      | 775/1896 [33:08<47:55,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.703531742095947
loss 6.695079803466797
















 41%|████      | 777/1896 [33:13<47:50,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.244154453277588
loss 6.869668006896973
















 41%|████      | 779/1896 [33:18<47:45,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.839689254760742
















 41%|████      | 781/1896 [33:21<47:36,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.878918170928955
loss 6.9293928146362305
















 41%|████▏     | 783/1896 [33:26<47:32,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.657607078552246
loss 6.882928848266602
















 41%|████▏     | 785/1896 [33:31<47:27,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.841880798339844
















 42%|████▏     | 787/1896 [33:34<47:18,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.829010963439941
loss 7.255021095275879
















 42%|████▏     | 789/1896 [33:39<47:13,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.652934551239014
loss 6.569891452789307
















 42%|████▏     | 791/1896 [33:44<47:08,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.731329441070557
loss 7.05072021484375
















 42%|████▏     | 793/1896 [33:50<47:03,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.722789764404297
loss 6.592713832855225
















 42%|████▏     | 795/1896 [33:55<46:58,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.852145195007324
loss 6.795713901519775
















 42%|████▏     | 797/1896 [34:00<46:53,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.129842758178711
loss 6.362980842590332
















 42%|████▏     | 799/1896 [34:05<46:48,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.0054755210876465
loss 6.818109035491943
















 42%|████▏     | 801/1896 [34:11<46:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.055051803588867
Loss: 6.746368376434004 Current loss: 7.055051803588867
loss 6.802062511444092
















 42%|████▏     | 803/1896 [34:16<46:39,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.825136661529541
loss 6.5156402587890625
















 42%|████▏     | 805/1896 [34:21<46:34,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.870481491088867
loss 6.743773937225342
















 43%|████▎     | 807/1896 [34:27<46:29,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.542664051055908
loss 7.0894999504089355
















 43%|████▎     | 809/1896 [34:32<46:24,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6644287109375
loss 6.873906135559082
















 43%|████▎     | 811/1896 [34:37<46:19,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.787800312042236
loss 6.781250476837158
















 43%|████▎     | 813/1896 [34:42<46:14,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8795294761657715
















 43%|████▎     | 815/1896 [34:45<46:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.703160762786865
loss 6.59480619430542
















 43%|████▎     | 817/1896 [34:50<46:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.247361183166504
loss 7.106170654296875
















 43%|████▎     | 819/1896 [34:55<45:56,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.043539524078369
















 43%|████▎     | 821/1896 [34:58<45:47,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.70380163192749
loss 6.583049297332764
















 43%|████▎     | 823/1896 [35:03<45:42,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.510892391204834
loss 6.932903289794922
















 44%|████▎     | 825/1896 [35:09<45:38,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.0127973556518555
loss 6.748722553253174
















 44%|████▎     | 827/1896 [35:14<45:33,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.822122097015381
loss 7.036069869995117
















 44%|████▎     | 829/1896 [35:19<45:28,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.819925308227539
loss 6.408015727996826
















 44%|████▍     | 831/1896 [35:25<45:23,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.901612758636475
loss 6.820417404174805
















 44%|████▍     | 833/1896 [35:30<45:18,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.671470642089844
loss 6.68870735168457
















 44%|████▍     | 835/1896 [35:35<45:13,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.87006139755249
loss 6.707840442657471
















 44%|████▍     | 837/1896 [35:41<45:08,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.874905586242676
loss 6.7549591064453125
















 44%|████▍     | 839/1896 [35:46<45:04,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.875611782073975
loss 6.909107685089111
















 44%|████▍     | 841/1896 [35:51<44:59,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.714659214019775
loss 6.812211036682129
















 44%|████▍     | 843/1896 [35:56<44:54,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.866317272186279
loss 6.4776763916015625
















 45%|████▍     | 845/1896 [36:02<44:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.727327823638916
loss 6.671654224395752
















 45%|████▍     | 847/1896 [36:07<44:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.845880031585693
loss 7.232278347015381
















 45%|████▍     | 849/1896 [36:12<44:39,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.660623073577881
loss 6.896068096160889
















 45%|████▍     | 851/1896 [36:17<44:34,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.84075403213501
loss 6.95808744430542
















 45%|████▍     | 853/1896 [36:23<44:29,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.867402076721191
loss 6.502252578735352
















 45%|████▌     | 855/1896 [36:28<44:24,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.577561855316162
loss 6.508018970489502
















 45%|████▌     | 857/1896 [36:33<44:19,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.851287364959717
loss 6.681912422180176
















 45%|████▌     | 859/1896 [36:38<44:14,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8570733070373535
loss 6.745950222015381
















 45%|████▌     | 861/1896 [36:44<44:09,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.57025671005249
loss 6.646055221557617
















 46%|████▌     | 863/1896 [36:49<44:04,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.807563304901123
loss 6.735720157623291
















 46%|████▌     | 865/1896 [36:54<43:59,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.724783420562744
loss 6.606352806091309
















 46%|████▌     | 867/1896 [37:00<43:54,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.672997951507568
loss 6.575932025909424
















 46%|████▌     | 869/1896 [37:05<43:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.000655174255371
loss 7.192716598510742
















 46%|████▌     | 871/1896 [37:10<43:45,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.3607916831970215
loss 6.7522501945495605
















 46%|████▌     | 873/1896 [37:15<43:40,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.71103572845459
loss 6.650002956390381
















 46%|████▌     | 875/1896 [37:21<43:35,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8345818519592285
loss 6.622895240783691
















 46%|████▋     | 877/1896 [37:26<43:30,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.482802867889404
loss 6.6915178298950195
















 46%|████▋     | 879/1896 [37:31<43:25,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.675182819366455
loss 6.594105243682861
















 46%|████▋     | 881/1896 [37:37<43:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.562978267669678
loss 6.489512920379639
















 47%|████▋     | 883/1896 [37:42<43:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.604353904724121
loss 6.801277160644531
















 47%|████▋     | 885/1896 [37:47<43:10,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.472081184387207
loss 6.835573673248291
















 47%|████▋     | 887/1896 [37:52<43:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.444185256958008
loss 6.917240619659424
















 47%|████▋     | 889/1896 [37:58<43:00,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.109062671661377
loss 6.9252424240112305
















 47%|████▋     | 891/1896 [38:03<42:55,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6882548332214355
loss 6.7553277015686035
















 47%|████▋     | 893/1896 [38:08<42:50,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.797468662261963
loss 6.752593517303467
















 47%|████▋     | 895/1896 [38:13<42:45,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.698757171630859
loss 6.558219909667969
















 47%|████▋     | 897/1896 [38:19<42:40,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.831154823303223
loss 6.715692520141602
















 47%|████▋     | 899/1896 [38:24<42:35,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.756074905395508
loss 6.484945297241211
















 48%|████▊     | 901/1896 [38:29<42:30,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.079782009124756
Loss: 6.745904983501196 Current loss: 7.079782009124756
loss 6.9438958168029785
















 48%|████▊     | 903/1896 [38:35<42:25,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.600635051727295
loss 7.607672214508057
















 48%|████▊     | 905/1896 [38:40<42:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.67762565612793
loss 6.720310211181641
















 48%|████▊     | 907/1896 [38:45<42:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.517805099487305
loss 6.606271743774414
















 48%|████▊     | 909/1896 [38:51<42:11,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.475869655609131
loss 6.7761712074279785
















 48%|████▊     | 911/1896 [38:56<42:06,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.561772346496582
loss 6.655850410461426
















 48%|████▊     | 913/1896 [39:01<42:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.652168273925781
loss 6.2690887451171875
















 48%|████▊     | 915/1896 [39:06<41:56,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.952677249908447
loss 6.767180442810059
















 48%|████▊     | 917/1896 [39:12<41:51,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.615932941436768
loss 6.946334362030029
















 48%|████▊     | 919/1896 [39:17<41:46,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.803224563598633
loss 6.785123348236084
















 49%|████▊     | 921/1896 [39:22<41:41,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7493486404418945
loss 6.79537296295166
















 49%|████▊     | 923/1896 [39:28<41:36,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.896261692047119
loss 6.632350444793701
















 49%|████▉     | 925/1896 [39:33<41:31,  2.57s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.653395652770996
















 49%|████▉     | 927/1896 [39:35<41:23,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.608328342437744
















 49%|████▉     | 929/1896 [39:38<41:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.760662078857422
loss 6.5597429275512695
















 49%|████▉     | 931/1896 [39:43<41:10,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.659377574920654
loss 6.936506748199463
















 49%|████▉     | 933/1896 [39:49<41:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.830549716949463
loss 6.736047267913818
















 49%|████▉     | 935/1896 [39:54<41:00,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.626347541809082
loss 6.63700008392334
















 49%|████▉     | 937/1896 [39:59<40:55,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9635186195373535
loss 6.53157377243042
















 50%|████▉     | 939/1896 [40:04<40:51,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.627536773681641
loss 6.690426349639893
















 50%|████▉     | 941/1896 [40:10<40:46,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.3457722663879395
loss 6.781795501708984
















 50%|████▉     | 943/1896 [40:15<40:41,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.805327892303467
loss 7.290684223175049
















 50%|████▉     | 945/1896 [40:20<40:36,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.587222099304199
loss 6.303616046905518
















 50%|████▉     | 947/1896 [40:26<40:31,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.74164342880249
loss 6.595161437988281
















 50%|█████     | 949/1896 [40:31<40:26,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.854377269744873
loss 6.853152275085449
















 50%|█████     | 951/1896 [40:36<40:21,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.073825836181641
loss 6.771288871765137
















 50%|█████     | 953/1896 [40:42<40:16,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.831238269805908
loss 6.8438520431518555
















 50%|█████     | 955/1896 [40:47<40:11,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.307191371917725
loss 6.7664642333984375
















 50%|█████     | 957/1896 [40:52<40:06,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8079986572265625
loss 6.614767074584961
















 51%|█████     | 959/1896 [40:57<40:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.68275260925293
loss 6.710160255432129
















 51%|█████     | 961/1896 [41:03<39:56,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.855928421020508
loss 6.869238376617432
















 51%|█████     | 963/1896 [41:08<39:51,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.454206466674805
loss 6.904394626617432
















 51%|█████     | 965/1896 [41:13<39:46,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.778648376464844
loss 6.758569717407227
















 51%|█████     | 967/1896 [41:18<39:41,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.094324111938477
loss 6.801214694976807
















 51%|█████     | 969/1896 [41:24<39:36,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.764475345611572
loss 6.531595230102539
















 51%|█████     | 971/1896 [41:29<39:31,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.889711380004883
loss 6.742956161499023
















 51%|█████▏    | 973/1896 [41:34<39:26,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.860416889190674
loss 6.786674499511719
















 51%|█████▏    | 975/1896 [41:39<39:21,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.802109241485596
loss 6.719846725463867
















 52%|█████▏    | 977/1896 [41:45<39:16,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.607570171356201
loss 6.68657112121582
















 52%|█████▏    | 979/1896 [41:50<39:11,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.569079399108887
loss 6.93276309967041
















 52%|█████▏    | 981/1896 [41:55<39:06,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.991888999938965
loss 6.525918483734131
















 52%|█████▏    | 983/1896 [42:00<39:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.893611431121826
loss 6.082049369812012
















 52%|█████▏    | 985/1896 [42:06<38:56,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.653340816497803
loss 6.743836402893066
















 52%|█████▏    | 987/1896 [42:11<38:51,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.682925224304199
















 52%|█████▏    | 989/1896 [42:14<38:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.784176826477051
loss 6.733542442321777
















 52%|█████▏    | 991/1896 [42:19<38:39,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.587695121765137
loss 6.66646671295166
















 52%|█████▏    | 993/1896 [42:24<38:34,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.976340293884277
loss 6.7439141273498535
















 52%|█████▏    | 995/1896 [42:30<38:29,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.844436168670654
loss 6.625515937805176
















 53%|█████▎    | 997/1896 [42:35<38:24,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.025106906890869
loss 6.617674827575684
















 53%|█████▎    | 999/1896 [42:40<38:19,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.94950532913208
loss 6.824057102203369
loss 6.697837829589844
Loss: 6.7602080441999295 Current loss: 6.697837829589844


Partial save...
















 53%|█████▎    | 1001/1896 [42:46<38:14,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.771522045135498
















 53%|█████▎    | 1003/1896 [42:51<38:09,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.0587663650512695
loss 6.924829959869385
















 53%|█████▎    | 1005/1896 [42:57<38:04,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.072213649749756
loss 6.719216346740723
















 53%|█████▎    | 1007/1896 [43:02<37:59,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.625875949859619
loss 7.330531597137451
















 53%|█████▎    | 1009/1896 [43:07<37:54,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.821964263916016
loss 6.51563024520874
















 53%|█████▎    | 1011/1896 [43:12<37:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.634193420410156
loss 6.843476295471191
















 53%|█████▎    | 1013/1896 [43:18<37:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.978486061096191
















 54%|█████▎    | 1015/1896 [43:20<37:37,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.812448501586914
loss 6.241677761077881
















 54%|█████▎    | 1017/1896 [43:25<37:32,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.335835933685303
loss 6.375019550323486
















 54%|█████▎    | 1019/1896 [43:31<37:27,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.779222011566162
loss 6.815987586975098
















 54%|█████▍    | 1021/1896 [43:36<37:22,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.641462802886963
loss 6.538413047790527
















 54%|█████▍    | 1023/1896 [43:41<37:17,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.786947727203369
















 54%|█████▍    | 1025/1896 [43:44<37:09,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.660860061645508
loss 6.869141101837158
















 54%|█████▍    | 1027/1896 [43:49<37:04,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.421042442321777
loss 6.939354419708252
















 54%|█████▍    | 1029/1896 [43:54<37:00,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.644876003265381
loss 6.378571033477783
















 54%|█████▍    | 1031/1896 [44:00<36:55,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.81121826171875
loss 6.485157012939453
















 54%|█████▍    | 1033/1896 [44:05<36:50,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.717285633087158
loss 6.898959636688232
















 55%|█████▍    | 1035/1896 [44:10<36:45,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.003030776977539
loss 6.737199783325195
















 55%|█████▍    | 1037/1896 [44:15<36:40,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.489421367645264
loss 6.6534743309021
















 55%|█████▍    | 1039/1896 [44:21<36:35,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.725004196166992
loss 6.454951763153076
















 55%|█████▍    | 1041/1896 [44:26<36:30,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.75586462020874
loss 6.8252692222595215
















 55%|█████▌    | 1043/1896 [44:32<36:25,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.492371082305908
loss 6.7061448097229
















 55%|█████▌    | 1045/1896 [44:37<36:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.740791320800781
loss 6.638707637786865
















 55%|█████▌    | 1047/1896 [44:42<36:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.723898887634277
loss 6.780794620513916
















 55%|█████▌    | 1049/1896 [44:48<36:10,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.757617950439453
loss 6.448061466217041
















 55%|█████▌    | 1051/1896 [44:53<36:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.514641284942627
loss 6.912931442260742
















 56%|█████▌    | 1053/1896 [44:58<36:00,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.624915599822998
loss 6.843540668487549
















 56%|█████▌    | 1055/1896 [45:03<35:55,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.83332633972168
loss 7.191131114959717
















 56%|█████▌    | 1057/1896 [45:09<35:50,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.53258752822876
loss 6.623716831207275
















 56%|█████▌    | 1059/1896 [45:14<35:45,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.617332935333252
loss 6.589517593383789
















 56%|█████▌    | 1061/1896 [45:19<35:40,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.488471508026123
loss 6.737260341644287
















 56%|█████▌    | 1063/1896 [45:24<35:35,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.762545585632324
















 56%|█████▌    | 1065/1896 [45:27<35:28,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.644532203674316
loss 6.904410362243652
















 56%|█████▋    | 1067/1896 [45:32<35:23,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.701809406280518
loss 6.904533386230469
















 56%|█████▋    | 1069/1896 [45:38<35:18,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.892547130584717
loss 6.962183475494385
















 56%|█████▋    | 1071/1896 [45:43<35:13,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.696890830993652
loss 6.476161003112793
















 57%|█████▋    | 1073/1896 [45:48<35:08,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9448628425598145
loss 6.884264945983887
















 57%|█████▋    | 1075/1896 [45:54<35:03,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.823868751525879
loss 6.882043838500977
















 57%|█████▋    | 1077/1896 [45:59<34:58,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.632997512817383
loss 6.8422088623046875
















 57%|█████▋    | 1079/1896 [46:04<34:53,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.495795726776123
loss 6.748563766479492
















 57%|█████▋    | 1081/1896 [46:09<34:48,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.678342342376709
loss 6.886967658996582
















 57%|█████▋    | 1083/1896 [46:15<34:43,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.587481498718262
















 57%|█████▋    | 1085/1896 [46:17<34:36,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.943715572357178
loss 6.380991458892822
















 57%|█████▋    | 1087/1896 [46:23<34:31,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.734856605529785
loss 6.773100852966309
















 57%|█████▋    | 1089/1896 [46:28<34:26,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.134915828704834
















 58%|█████▊    | 1091/1896 [46:31<34:19,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.860537052154541
loss 6.8216094970703125
















 58%|█████▊    | 1093/1896 [46:36<34:14,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.534459590911865
loss 6.834932804107666
















 58%|█████▊    | 1095/1896 [46:41<34:09,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.593584060668945
loss 6.623334884643555
















 58%|█████▊    | 1097/1896 [46:46<34:04,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.422830104827881
loss 6.536248207092285
















 58%|█████▊    | 1099/1896 [46:52<33:59,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.568993091583252
loss 6.825753688812256
















 58%|█████▊    | 1101/1896 [46:57<33:54,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.678194999694824
Loss: 6.740471515587246 Current loss: 6.678194999694824
loss 6.283190727233887
















 58%|█████▊    | 1103/1896 [47:02<33:49,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.831595420837402
loss 6.739559650421143
















 58%|█████▊    | 1105/1896 [47:07<33:44,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8588738441467285
loss 6.798184871673584
















 58%|█████▊    | 1107/1896 [47:13<33:39,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.752265930175781
loss 6.677001476287842
















 58%|█████▊    | 1109/1896 [47:18<33:34,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.837890625
















 59%|█████▊    | 1111/1896 [47:21<33:27,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.409584045410156
loss 6.831116676330566
















 59%|█████▊    | 1113/1896 [47:26<33:22,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.638256072998047
loss 6.672186851501465
















 59%|█████▉    | 1115/1896 [47:31<33:17,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.868044853210449
loss 6.8087239265441895
















 59%|█████▉    | 1117/1896 [47:36<33:12,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.383831024169922
















 59%|█████▉    | 1119/1896 [47:39<33:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.827035903930664
loss 6.890331268310547
















 59%|█████▉    | 1121/1896 [47:44<33:00,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.062898635864258
loss 6.789181232452393
















 59%|█████▉    | 1123/1896 [47:49<32:55,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.906522274017334
loss 6.8726091384887695
















 59%|█████▉    | 1125/1896 [47:55<32:50,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.67535400390625
loss 6.642977237701416
















 59%|█████▉    | 1127/1896 [48:00<32:45,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.363906383514404
loss 6.579457759857178
















 60%|█████▉    | 1129/1896 [48:05<32:40,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.570885181427002
loss 6.803430557250977
















 60%|█████▉    | 1131/1896 [48:11<32:35,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.808304786682129
loss 6.80991268157959
















 60%|█████▉    | 1133/1896 [48:16<32:30,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.788391590118408
loss 6.58931303024292
















 60%|█████▉    | 1135/1896 [48:21<32:25,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.730087757110596
loss 6.8378448486328125
















 60%|█████▉    | 1137/1896 [48:26<32:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.781099796295166
loss 6.420791149139404
















 60%|██████    | 1139/1896 [48:32<32:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.584994792938232
loss 6.623276233673096
















 60%|██████    | 1141/1896 [48:37<32:10,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.680858612060547
















 60%|██████    | 1143/1896 [48:40<32:03,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.854183673858643
loss 6.734607219696045
















 60%|██████    | 1145/1896 [48:45<31:58,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.579384803771973
loss 7.0440850257873535
















 60%|██████    | 1147/1896 [48:50<31:53,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.545103073120117
loss 6.7238006591796875
















 61%|██████    | 1149/1896 [48:55<31:48,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.541375637054443
















 61%|██████    | 1151/1896 [48:58<31:42,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.816122055053711
loss 6.961226463317871
















 61%|██████    | 1153/1896 [49:03<31:37,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.60605001449585
loss 6.790580749511719
















 61%|██████    | 1155/1896 [49:09<31:32,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9383416175842285
loss 6.655564308166504
















 61%|██████    | 1157/1896 [49:14<31:27,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.64674186706543
loss 6.691298007965088
















 61%|██████    | 1159/1896 [49:19<31:22,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.611823558807373
loss 6.8668928146362305
















 61%|██████    | 1161/1896 [49:25<31:17,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.69639778137207
loss 6.809331893920898
















 61%|██████▏   | 1163/1896 [49:30<31:12,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.474661350250244
loss 6.820438861846924
















 61%|██████▏   | 1165/1896 [49:35<31:07,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.0194621086120605
loss 5.990710735321045
















 62%|██████▏   | 1167/1896 [49:40<31:02,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6757402420043945
loss 6.810479164123535
















 62%|██████▏   | 1169/1896 [49:46<30:57,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.121500492095947
loss 6.541767597198486
















 62%|██████▏   | 1171/1896 [49:51<30:52,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.916722774505615
loss 6.568803787231445
















 62%|██████▏   | 1173/1896 [49:56<30:47,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.053678035736084
loss 6.711571216583252
















 62%|██████▏   | 1175/1896 [50:01<30:42,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.778832912445068
loss 6.883174896240234
















 62%|██████▏   | 1177/1896 [50:07<30:37,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.585730075836182
loss 6.75364351272583
















 62%|██████▏   | 1179/1896 [50:12<30:32,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.809873104095459
loss 6.6795654296875
















 62%|██████▏   | 1181/1896 [50:17<30:27,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.628881931304932
loss 7.0947699546813965
















 62%|██████▏   | 1183/1896 [50:23<30:21,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7235283851623535
loss 6.952775001525879
















 62%|██████▎   | 1185/1896 [50:28<30:16,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.601000785827637
loss 6.770770072937012
















 63%|██████▎   | 1187/1896 [50:33<30:11,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.899537563323975
loss 7.090120315551758
















 63%|██████▎   | 1189/1896 [50:38<30:06,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.454370975494385
loss 6.775978088378906
















 63%|██████▎   | 1191/1896 [50:44<30:01,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.771241188049316
loss 6.78647518157959
















 63%|██████▎   | 1193/1896 [50:49<29:56,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.639231204986572
loss 6.770610332489014
















 63%|██████▎   | 1195/1896 [50:54<29:51,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.628774166107178
loss 6.682943344116211
















 63%|██████▎   | 1197/1896 [51:00<29:46,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.764305591583252
















 63%|██████▎   | 1199/1896 [51:02<29:40,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.747827529907227
loss 6.782468318939209
















 63%|██████▎   | 1201/1896 [51:08<29:35,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.841241359710693
Loss: 6.739652895642635 Current loss: 6.841241359710693
loss 6.622834205627441
















 63%|██████▎   | 1203/1896 [51:13<29:30,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.453101634979248
loss 6.875786304473877
















 64%|██████▎   | 1205/1896 [51:18<29:25,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.616870880126953
loss 6.696485996246338
















 64%|██████▎   | 1207/1896 [51:23<29:20,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.771817207336426
loss 6.81727409362793
















 64%|██████▍   | 1209/1896 [51:29<29:15,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.946829319000244
loss 6.675985813140869
















 64%|██████▍   | 1211/1896 [51:34<29:10,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.86830997467041
loss 6.386730194091797
















 64%|██████▍   | 1213/1896 [51:39<29:05,  2.56s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.833199501037598
















 64%|██████▍   | 1215/1896 [51:42<28:58,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.499882698059082
loss 6.944706439971924
















 64%|██████▍   | 1217/1896 [51:47<28:53,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.418211460113525
loss 6.563670635223389
















 64%|██████▍   | 1219/1896 [51:53<28:48,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7412238121032715
loss 7.190188884735107
















 64%|██████▍   | 1221/1896 [51:58<28:43,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.791799545288086
loss 6.725685119628906
















 65%|██████▍   | 1223/1896 [52:03<28:38,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.546255111694336
loss 6.9016242027282715
















 65%|██████▍   | 1225/1896 [52:08<28:33,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.47598934173584
loss 6.972872257232666
















 65%|██████▍   | 1227/1896 [52:14<28:28,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.844488620758057
loss 6.761598587036133
















 65%|██████▍   | 1229/1896 [52:19<28:23,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.687443256378174
















 65%|██████▍   | 1231/1896 [52:21<28:17,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.833952903747559
loss 6.5125932693481445
















 65%|██████▌   | 1233/1896 [52:27<28:12,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.594881534576416
loss 6.683325290679932
















 65%|██████▌   | 1235/1896 [52:32<28:07,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.518764972686768
loss 6.971045017242432
















 65%|██████▌   | 1237/1896 [52:37<28:02,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.597949981689453
















 65%|██████▌   | 1239/1896 [52:40<27:55,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.8566975593566895
loss 6.681178569793701
















 65%|██████▌   | 1241/1896 [52:45<27:50,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.551823616027832
loss 6.630600452423096
















 66%|██████▌   | 1243/1896 [52:51<27:45,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.801822185516357
loss 6.565620422363281
















 66%|██████▌   | 1245/1896 [52:56<27:40,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.815032005310059
loss 6.681924343109131
















 66%|██████▌   | 1247/1896 [53:01<27:35,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.810539722442627
loss 6.813272476196289
















 66%|██████▌   | 1249/1896 [53:06<27:30,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.406102180480957
loss 6.579217910766602
















 66%|██████▌   | 1251/1896 [53:12<27:25,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.847405433654785
loss 6.820847988128662
















 66%|██████▌   | 1253/1896 [53:17<27:20,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6145853996276855
loss 6.7486982345581055
















 66%|██████▌   | 1255/1896 [53:22<27:15,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.763615608215332
loss 6.5687994956970215
















 66%|██████▋   | 1257/1896 [53:27<27:10,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.016773223876953
loss 6.948890209197998
















 66%|██████▋   | 1259/1896 [53:33<27:05,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.713716983795166
loss 6.599165916442871
















 67%|██████▋   | 1261/1896 [53:38<27:00,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.90612268447876
loss 6.723941326141357
















 67%|██████▋   | 1263/1896 [53:43<26:55,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7313032150268555
loss 6.68301248550415
















 67%|██████▋   | 1265/1896 [53:48<26:50,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.695895195007324
















 67%|██████▋   | 1267/1896 [53:51<26:44,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.710925102233887
loss 6.75652551651001
















 67%|██████▋   | 1269/1896 [53:56<26:39,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.648395538330078
loss 6.797999382019043
















 67%|██████▋   | 1271/1896 [54:02<26:34,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7890400886535645
loss 6.734351634979248
















 67%|██████▋   | 1273/1896 [54:07<26:29,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7584052085876465
loss 6.669377326965332
















 67%|██████▋   | 1275/1896 [54:12<26:24,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7149152755737305
loss 6.76779842376709
















 67%|██████▋   | 1277/1896 [54:17<26:19,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.784849643707275
loss 6.740594863891602
















 67%|██████▋   | 1279/1896 [54:23<26:14,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.044651985168457
loss 6.568620681762695
















 68%|██████▊   | 1281/1896 [54:28<26:09,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.587254524230957
















 68%|██████▊   | 1283/1896 [54:31<26:02,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.665293216705322
loss 6.570276737213135
















 68%|██████▊   | 1285/1896 [54:36<25:57,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.826075077056885
loss 6.644901275634766
















 68%|██████▊   | 1287/1896 [54:41<25:52,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.576324939727783
loss 6.763253211975098
















 68%|██████▊   | 1289/1896 [54:47<25:47,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.837714195251465
loss 6.6625590324401855
















 68%|██████▊   | 1291/1896 [54:53<25:43,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.876407623291016
loss 6.8916215896606445
















 68%|██████▊   | 1293/1896 [54:58<25:38,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.787095546722412
loss 6.644663333892822
















 68%|██████▊   | 1295/1896 [55:03<25:33,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.643476486206055
loss 6.570367813110352
















 68%|██████▊   | 1297/1896 [55:08<25:28,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.704566955566406
loss 6.560113906860352
















 69%|██████▊   | 1299/1896 [55:14<25:23,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.677190780639648
loss 7.184237003326416
















 69%|██████▊   | 1301/1896 [55:19<25:18,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.979768753051758
Loss: 6.73419103124886 Current loss: 6.979768753051758
loss 6.82116174697876
















 69%|██████▊   | 1303/1896 [55:24<25:13,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.94711971282959
loss 6.874083995819092
















 69%|██████▉   | 1305/1896 [55:29<25:08,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.704851150512695
loss 6.896214485168457
















 69%|██████▉   | 1307/1896 [55:35<25:02,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.712546348571777
loss 6.716995716094971
















 69%|██████▉   | 1309/1896 [55:40<24:57,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.895273208618164
loss 6.670273303985596
















 69%|██████▉   | 1311/1896 [55:45<24:52,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.50632905960083
loss 6.573615074157715
















 69%|██████▉   | 1313/1896 [55:50<24:47,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.079517364501953
loss 6.767770767211914
















 69%|██████▉   | 1315/1896 [55:56<24:42,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.84205436706543
loss 6.622779369354248
















 69%|██████▉   | 1317/1896 [56:01<24:37,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.893359184265137
loss 6.710233211517334
















 70%|██████▉   | 1319/1896 [56:06<24:32,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.893646240234375
loss 6.745396137237549
















 70%|██████▉   | 1321/1896 [56:12<24:27,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.717745304107666
loss 6.690431594848633
















 70%|██████▉   | 1323/1896 [56:17<24:22,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.690059185028076
loss 6.798775672912598
















 70%|██████▉   | 1325/1896 [56:22<24:17,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.730626583099365
loss 6.639875888824463
















 70%|██████▉   | 1327/1896 [56:27<24:12,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.856706142425537
loss 6.593963146209717
















 70%|███████   | 1329/1896 [56:33<24:07,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.472867965698242
loss 6.708614349365234
















 70%|███████   | 1331/1896 [56:38<24:02,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.829952716827393
loss 6.88961935043335
















 70%|███████   | 1333/1896 [56:43<23:57,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.549728870391846
loss 6.8347039222717285
















 70%|███████   | 1335/1896 [56:49<23:52,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.961947917938232
loss 6.578750133514404
















 71%|███████   | 1337/1896 [56:54<23:47,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.770939826965332
loss 6.705412864685059
















 71%|███████   | 1339/1896 [56:59<23:42,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.638667106628418
loss 6.5497517585754395
















 71%|███████   | 1341/1896 [57:04<23:37,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.9066948890686035
loss 6.699801921844482
















 71%|███████   | 1343/1896 [57:10<23:32,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.871875286102295
loss 6.501653671264648
















 71%|███████   | 1345/1896 [57:15<23:27,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.601261615753174
loss 6.778101921081543
















 71%|███████   | 1347/1896 [57:20<23:22,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.6277337074279785
















 71%|███████   | 1349/1896 [57:23<23:16,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.933446407318115
















 71%|███████▏  | 1351/1896 [57:25<23:10,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.902107238769531
loss 6.7989935874938965
















 71%|███████▏  | 1353/1896 [57:31<23:05,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.721794128417969
loss 6.622146129608154
















 71%|███████▏  | 1355/1896 [57:36<23:00,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.623356342315674
loss 6.818410873413086
















 72%|███████▏  | 1357/1896 [57:41<22:54,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.783820152282715
loss 6.840534687042236
















 72%|███████▏  | 1359/1896 [57:46<22:49,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.722026824951172
loss 6.634819507598877
















 72%|███████▏  | 1361/1896 [57:52<22:44,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.620071887969971
















 72%|███████▏  | 1363/1896 [57:54<22:38,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.719432830810547
loss 6.834283828735352
















 72%|███████▏  | 1365/1896 [58:00<22:33,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.272221088409424
loss 6.509943962097168
















 72%|███████▏  | 1367/1896 [58:05<22:28,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.875372409820557
loss 6.658797740936279
















 72%|███████▏  | 1369/1896 [58:10<22:23,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.647335529327393
loss 6.754393100738525
















 72%|███████▏  | 1371/1896 [58:15<22:18,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.966453552246094
loss 6.852232933044434
















 72%|███████▏  | 1373/1896 [58:21<22:13,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.3879265785217285
loss 6.816726207733154
















 73%|███████▎  | 1375/1896 [58:26<22:08,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.73441743850708
loss 6.666574954986572
















 73%|███████▎  | 1377/1896 [58:31<22:03,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.722996711730957
loss 6.739109516143799
















 73%|███████▎  | 1379/1896 [58:37<21:58,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.722606182098389
loss 6.689566612243652
















 73%|███████▎  | 1381/1896 [58:42<21:53,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.674482345581055
loss 6.844661235809326
















 73%|███████▎  | 1383/1896 [58:47<21:48,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.65573787689209
















 73%|███████▎  | 1385/1896 [58:50<21:42,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.391857624053955
loss 6.786366939544678
















 73%|███████▎  | 1387/1896 [58:55<21:37,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.727560043334961
loss 6.870096683502197
















 73%|███████▎  | 1389/1896 [59:00<21:32,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.676620006561279
loss 6.6751885414123535
















 73%|███████▎  | 1391/1896 [59:06<21:27,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.397993087768555
loss 6.552857875823975
















 73%|███████▎  | 1393/1896 [59:11<21:22,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.881265640258789
loss 6.790279865264893
















 74%|███████▎  | 1395/1896 [59:16<21:17,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.597940921783447
loss 6.717591762542725
















 74%|███████▎  | 1397/1896 [59:22<21:12,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.905044078826904
loss 6.663982391357422
















 74%|███████▍  | 1399/1896 [59:27<21:07,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.485172271728516
loss 6.828554153442383
















 74%|███████▍  | 1401/1896 [59:32<21:02,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.786909580230713
Loss: 6.73402343012076 Current loss: 6.786909580230713
loss 6.783779621124268
















 74%|███████▍  | 1403/1896 [59:37<20:57,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.619830131530762
loss 6.754312992095947
















 74%|███████▍  | 1405/1896 [59:43<20:52,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.895736217498779
loss 6.75919246673584
















 74%|███████▍  | 1407/1896 [59:48<20:47,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.557723522186279
loss 6.554063320159912
















 74%|███████▍  | 1409/1896 [59:53<20:42,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.450861930847168
loss 6.602735996246338
















 74%|███████▍  | 1411/1896 [59:59<20:37,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.46317720413208
loss 6.658844470977783
















 75%|███████▍  | 1413/1896 [1:00:04<20:32,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.700287818908691
loss 6.8737382888793945
















 75%|███████▍  | 1415/1896 [1:00:09<20:26,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.005450248718262
loss 6.999853134155273
















 75%|███████▍  | 1417/1896 [1:00:14<20:21,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.802064895629883
loss 6.569552898406982
















 75%|███████▍  | 1419/1896 [1:00:20<20:16,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.704771041870117
















 75%|███████▍  | 1421/1896 [1:00:22<20:10,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.799650192260742
loss 6.6837897300720215
















 75%|███████▌  | 1423/1896 [1:00:28<20:05,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.571262836456299
loss 6.845120429992676
















 75%|███████▌  | 1425/1896 [1:00:33<20:00,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.920388698577881
loss 6.6119208335876465
















 75%|███████▌  | 1427/1896 [1:00:38<19:55,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.029934883117676
loss 6.224116802215576
















 75%|███████▌  | 1429/1896 [1:00:44<19:50,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.639242172241211
loss 6.847756862640381
















 75%|███████▌  | 1431/1896 [1:00:49<19:45,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.600433349609375
loss 6.6674323081970215
















 76%|███████▌  | 1433/1896 [1:00:54<19:40,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.81435489654541
loss 6.798628330230713
















 76%|███████▌  | 1435/1896 [1:00:59<19:35,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.919374465942383
loss 6.751801013946533
















 76%|███████▌  | 1437/1896 [1:01:05<19:30,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.617591857910156
loss 6.538723945617676
















 76%|███████▌  | 1439/1896 [1:01:10<19:25,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.617763042449951
loss 6.622567653656006
















 76%|███████▌  | 1441/1896 [1:01:15<19:20,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.524595260620117
loss 6.538496494293213
















 76%|███████▌  | 1443/1896 [1:01:21<19:15,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.813316345214844
loss 6.8101983070373535
















 76%|███████▌  | 1445/1896 [1:01:26<19:10,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.766960144042969
loss 6.356053829193115
















 76%|███████▋  | 1447/1896 [1:01:31<19:05,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.491986274719238
loss 6.924163818359375
















 76%|███████▋  | 1449/1896 [1:01:36<19:00,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.735790252685547
loss 6.8063764572143555
















 77%|███████▋  | 1451/1896 [1:01:41<18:55,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.585981369018555
loss 6.962169647216797
















 77%|███████▋  | 1453/1896 [1:01:47<18:50,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.056386470794678
loss 6.568072319030762
















 77%|███████▋  | 1455/1896 [1:01:52<18:45,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.654719352722168
loss 6.548052787780762
















 77%|███████▋  | 1457/1896 [1:01:57<18:40,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.239717483520508
loss 7.080364227294922
















 77%|███████▋  | 1459/1896 [1:02:02<18:35,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.716587543487549
loss 6.623327255249023
















 77%|███████▋  | 1461/1896 [1:02:08<18:30,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.637093544006348
loss 7.048588752746582
















 77%|███████▋  | 1463/1896 [1:02:13<18:24,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.690904140472412
loss 6.68347692489624
















 77%|███████▋  | 1465/1896 [1:02:18<18:19,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.689571380615234
loss 6.863519668579102
















 77%|███████▋  | 1467/1896 [1:02:23<18:14,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.165949821472168
loss 6.760462284088135
















 77%|███████▋  | 1469/1896 [1:02:29<18:09,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.392814636230469
loss 6.536839962005615
















 78%|███████▊  | 1471/1896 [1:02:34<18:04,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.703508377075195
loss 6.974555969238281
















 78%|███████▊  | 1473/1896 [1:02:39<17:59,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.860724449157715
loss 6.625674724578857
















 78%|███████▊  | 1475/1896 [1:02:45<17:54,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.809808254241943
loss 6.461966037750244
















 78%|███████▊  | 1477/1896 [1:02:50<17:49,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.803239345550537
loss 6.713450908660889
















 78%|███████▊  | 1479/1896 [1:02:55<17:44,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.702041149139404
loss 6.9086198806762695
















 78%|███████▊  | 1481/1896 [1:03:00<17:39,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.672353744506836
















 78%|███████▊  | 1483/1896 [1:03:03<17:33,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.016271591186523
loss 6.757065773010254
















 78%|███████▊  | 1485/1896 [1:03:08<17:28,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.592325210571289
loss 6.9150710105896
















 78%|███████▊  | 1487/1896 [1:03:13<17:23,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.022873401641846
loss 6.661837100982666
















 79%|███████▊  | 1489/1896 [1:03:19<17:18,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.819560527801514
loss 6.724614143371582
















 79%|███████▊  | 1491/1896 [1:03:24<17:13,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.739170551300049
loss 6.643351078033447
















 79%|███████▊  | 1493/1896 [1:03:29<17:08,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.816518306732178
loss 6.633179187774658
















 79%|███████▉  | 1495/1896 [1:03:35<17:03,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 5.903728008270264
loss 6.724611282348633
















 79%|███████▉  | 1497/1896 [1:03:40<16:58,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.923727035522461
loss 6.8758931159973145
















 79%|███████▉  | 1499/1896 [1:03:46<16:53,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.911932945251465
















 79%|███████▉  | 1501/1896 [1:03:49<16:47,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.7124481201171875
loss 6.952678203582764
















 79%|███████▉  | 1503/1896 [1:03:54<16:42,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.73085355758667
loss 6.695918560028076
















 79%|███████▉  | 1505/1896 [1:03:59<16:37,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.839024543762207
loss 6.842416286468506
















 79%|███████▉  | 1507/1896 [1:04:04<16:32,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 7.150914669036865
















 80%|███████▉  | 1509/1896 [1:04:07<16:26,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.936000347137451
loss 6.713621616363525
















 80%|███████▉  | 1511/1896 [1:04:12<16:21,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.803598403930664
loss 6.726401329040527
















 80%|███████▉  | 1513/1896 [1:04:18<16:16,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.726373672485352
loss 6.52983283996582
















 80%|███████▉  | 1515/1896 [1:04:23<16:11,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.911994934082031
















 80%|████████  | 1517/1896 [1:04:26<16:05,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.503606796264648
loss 6.916902542114258
















 80%|████████  | 1519/1896 [1:04:31<16:00,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.721122741699219
loss 6.760342121124268
















 80%|████████  | 1521/1896 [1:04:36<15:55,  2.55s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A

loss 6.485570907592773



# Generate

In [None]:
def generate(start_sent, max_tokens=300, temperature=0.7, top_k=32):
    start_sent = text_processor.encode(start_sent)

    with torch.no_grad():
        for i in range(max_tokens):
            input_ids = torch.LongTensor(start_sent).unsqueeze(0).to(device)
            output = model(input_ids)

            word_weights = output.squeeze()[-1].div(temperature).exp().cpu()
            tops = word_weights.topk(top_k)
            word_idx = torch.multinomial(tops[0], 1)[0].item()
            word_idx = tops[1][word_idx].item()

            start_sent.append(word_idx)

    decoded = text_processor.decode(start_sent)
    # print(decoded)

if __name__ == '__main__':
    device = "cuda:0"
    saved_model_path = "models/model_partial"
    start_sentence = "opel"
    # corpus_path = "data/processed_lyrics.txt"
    model = GPT2Model().to(device)
    text_processor = TextProcessor(model.n_tokens, "data/processed_lyrics.txt",prefix="")
    model.load_state_dict(torch.load(saved_model_path, map_location=device))
    model.eval()

    generate(start_sentence)

RuntimeError: ignored