In [1]:
import torch, os, tqdm
import numpy as np
import pandas as pd
import torch.nn.functional as F


from PIL import Image
from data import TRdataset, dataset
from utils import ocr_transformer, val_transformer, encoder
from torch.utils.data import DataLoader, Dataset

from models import VitOCR #, SeArchitecture
from utils import save_model


device = "cuda" if torch.cuda.is_available() else "cpu"



In [2]:
val = pd.read_csv("dataset/val_annotation.csv")
test = pd.read_csv("dataset/test_annotation.csv")
train = pd.read_csv("dataset/train_annotation.csv")
train.drop(8, inplace=True)
train.drop(64, inplace=True)
train.drop(117, inplace=True)
train.drop(213, inplace=True)

In [3]:
trainLoader = DataLoader(TRdataset(train, encoder, max_sequence=19, transformer=val_transformer), batch_size=64, shuffle=True)
valLoader = DataLoader(TRdataset(val, encoder, max_sequence=19, transformer=val_transformer), batch_size=64, shuffle=False)
testLoader = DataLoader(TRdataset(test, encoder, max_sequence=19, transformer=val_transformer), batch_size=64, shuffle=False)

In [4]:
model = VitOCR().to(device)
# model = SeArchitecture(in_channels=9).to(device)
loss_fn = torch.nn.CTCLoss(blank=0, reduction="mean", zero_infinity=False)
optimizer = torch.optim.Adam(model.parameters(), 1e-3)

In [3]:
os.makedirs("models", exist_ok=True)

In [6]:
num_epochs, prev_loss = 10000, torch.inf
train_loss, val_loss = [], []

for epoch in tqdm.trange(num_epochs):
    model.train()
    loss_list, count = 0, 0
    for images, targets, input_lengths, target_lengths in trainLoader:
        images = images.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        predictions = model(images)
        predictions = F.log_softmax(predictions, dim=2)
        loss = loss_fn(predictions.permute(1, 0, 2), targets, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()
        loss_list += loss.cpu().item()
        count += 1
    loss = loss_list/count
    train_loss.append(loss)
    
    model.eval()
    @torch.no_grad()
    def validation():
        val_loss, val_counter = 0, 0
        for images, targets, input_lengths, target_lengths in valLoader:
            images = images.to(device)
            targets = targets.to(device)
            predictions = model(images)
            predictions = F.log_softmax(predictions, dim=2)
            loss = loss_fn(predictions.permute(1, 0, 2), targets, input_lengths, target_lengths)
            val_loss += loss.cpu().item()
            val_counter += 1
        return val_loss / val_counter
    v_loss = validation()
    val_loss.append(v_loss)
    prev_loss = save_model("attention_model", epoch, prev_loss, v_loss, model, optimizer)
    
    print(f"Epoch: {epoch} | Training loss {loss} | Validation loss {v_loss}")

  0%|                                                                             | 1/10000 [00:24<67:34:48, 24.33s/it]

The best model was saved!
Epoch: 0 | Training loss 3.722171388566494 | Validation loss 3.0559626817703247



  0%|                                                                             | 2/10000 [00:32<41:35:33, 14.98s/it]

The best model was saved!
Epoch: 1 | Training loss 3.4047480821609497 | Validation loss 3.0326748192310333



  0%|                                                                             | 3/10000 [00:41<33:39:37, 12.12s/it]

Epoch: 2 | Training loss 3.374485820531845 | Validation loss 3.035908877849579



  0%|                                                                             | 4/10000 [00:50<30:08:42, 10.86s/it]

The best model was saved!
Epoch: 3 | Training loss 3.352010063827038 | Validation loss 2.981612890958786



  0%|                                                                             | 5/10000 [00:59<27:59:29, 10.08s/it]

Epoch: 4 | Training loss 3.3384325057268143 | Validation loss 3.0023130774497986



  0%|                                                                             | 6/10000 [01:07<26:27:42,  9.53s/it]

The best model was saved!
Epoch: 5 | Training loss 3.3421389758586884 | Validation loss 2.949601322412491



  0%|                                                                             | 7/10000 [01:15<25:20:06,  9.13s/it]

The best model was saved!
Epoch: 6 | Training loss 3.3040824458003044 | Validation loss 2.928759425878525



  0%|                                                                             | 8/10000 [01:24<24:36:14,  8.86s/it]

The best model was saved!
Epoch: 7 | Training loss 3.288074642419815 | Validation loss 2.9285820722579956



  0%|                                                                             | 9/10000 [01:32<24:06:00,  8.68s/it]

Epoch: 8 | Training loss 3.27336298674345 | Validation loss 2.941985547542572



  0%|                                                                            | 10/10000 [01:40<23:43:52,  8.55s/it]

The best model was saved!
Epoch: 9 | Training loss 3.257691815495491 | Validation loss 2.9095742106437683



  0%|                                                                            | 11/10000 [01:48<23:28:27,  8.46s/it]

Epoch: 10 | Training loss 3.2585645020008087 | Validation loss 2.9275936782360077



  0%|                                                                            | 12/10000 [01:57<23:25:47,  8.44s/it]

The best model was saved!
Epoch: 11 | Training loss 3.243673652410507 | Validation loss 2.8951600193977356



  0%|                                                                            | 13/10000 [02:05<23:22:00,  8.42s/it]

Epoch: 12 | Training loss 3.2257173135876656 | Validation loss 2.900058925151825



  0%|                                                                            | 14/10000 [02:14<23:39:48,  8.53s/it]

Epoch: 13 | Training loss 3.2259381711483 | Validation loss 2.918866604566574



  0%|                                                                            | 15/10000 [02:22<23:34:32,  8.50s/it]

Epoch: 14 | Training loss 3.210638888180256 | Validation loss 2.89813095331192



  0%|                                                                            | 16/10000 [02:31<23:17:29,  8.40s/it]

Epoch: 15 | Training loss 3.1983926966786385 | Validation loss 2.9006352722644806



  0%|▏                                                                           | 17/10000 [02:39<23:23:42,  8.44s/it]

The best model was saved!
Epoch: 16 | Training loss 3.174991026520729 | Validation loss 2.887450188398361



  0%|▏                                                                           | 18/10000 [02:47<23:19:13,  8.41s/it]

The best model was saved!
Epoch: 17 | Training loss 3.1764937564730644 | Validation loss 2.8874107599258423



  0%|▏                                                                           | 19/10000 [02:56<23:16:06,  8.39s/it]

Epoch: 18 | Training loss 3.1553447246551514 | Validation loss 2.9035818576812744



  0%|▏                                                                           | 20/10000 [03:04<23:13:08,  8.38s/it]

Epoch: 19 | Training loss 3.162417285144329 | Validation loss 2.899806171655655



  0%|▏                                                                           | 21/10000 [03:12<23:05:23,  8.33s/it]

Epoch: 20 | Training loss 3.1623460054397583 | Validation loss 2.890698105096817



  0%|▏                                                                           | 22/10000 [03:21<23:00:02,  8.30s/it]

Epoch: 21 | Training loss 3.1451978236436844 | Validation loss 2.893072247505188



  0%|▏                                                                           | 23/10000 [03:29<22:52:50,  8.26s/it]

Epoch: 22 | Training loss 3.1545196026563644 | Validation loss 2.89259672164917



  0%|▏                                                                           | 24/10000 [03:37<22:55:58,  8.28s/it]

The best model was saved!
Epoch: 23 | Training loss 3.13473042845726 | Validation loss 2.874520719051361



  0%|▏                                                                           | 25/10000 [03:45<22:59:07,  8.30s/it]

Epoch: 24 | Training loss 3.124086059629917 | Validation loss 2.8823920488357544



  0%|▏                                                                           | 26/10000 [03:54<23:00:05,  8.30s/it]

The best model was saved!
Epoch: 25 | Training loss 3.1078304052352905 | Validation loss 2.861311912536621



  0%|▏                                                                           | 27/10000 [04:02<23:03:52,  8.33s/it]

Epoch: 26 | Training loss 3.1204168871045113 | Validation loss 2.887830972671509



  0%|▏                                                                           | 28/10000 [04:11<23:11:43,  8.37s/it]

Epoch: 27 | Training loss 3.1026688143610954 | Validation loss 2.8673053085803986



  0%|▏                                                                           | 29/10000 [04:19<23:11:04,  8.37s/it]

Epoch: 28 | Training loss 3.1017576083540916 | Validation loss 2.8793818950653076



  0%|▏                                                                           | 30/10000 [04:27<23:08:44,  8.36s/it]

Epoch: 29 | Training loss 3.1047490388154984 | Validation loss 2.8675369322299957



  0%|▏                                                                           | 31/10000 [04:36<23:13:45,  8.39s/it]

The best model was saved!
Epoch: 30 | Training loss 3.0836775302886963 | Validation loss 2.858667403459549



  0%|▏                                                                           | 32/10000 [04:45<23:57:20,  8.65s/it]

Epoch: 31 | Training loss 3.0883063226938248 | Validation loss 2.8724268078804016



  0%|▎                                                                           | 33/10000 [04:53<23:40:23,  8.55s/it]

Epoch: 32 | Training loss 3.083708666265011 | Validation loss 2.8723995089530945



  0%|▎                                                                           | 34/10000 [05:02<23:27:05,  8.47s/it]

Epoch: 33 | Training loss 3.0775343254208565 | Validation loss 2.8606408536434174



  0%|▎                                                                           | 35/10000 [05:10<23:32:44,  8.51s/it]

Epoch: 34 | Training loss 3.0769104212522507 | Validation loss 2.8620152175426483



  0%|▎                                                                           | 36/10000 [05:19<23:26:10,  8.47s/it]

Epoch: 35 | Training loss 3.067528799176216 | Validation loss 2.8685644268989563



  0%|▎                                                                           | 37/10000 [05:27<23:20:29,  8.43s/it]

Epoch: 36 | Training loss 3.064709536731243 | Validation loss 2.8670210242271423



  0%|▎                                                                           | 38/10000 [05:35<23:15:31,  8.41s/it]

The best model was saved!
Epoch: 37 | Training loss 3.062437057495117 | Validation loss 2.850393533706665



  0%|▎                                                                           | 39/10000 [05:44<23:20:36,  8.44s/it]

Epoch: 38 | Training loss 3.047317884862423 | Validation loss 2.8769724369049072



  0%|▎                                                                           | 40/10000 [05:52<23:15:53,  8.41s/it]

Epoch: 39 | Training loss 3.0521871000528336 | Validation loss 2.864910304546356



  0%|▎                                                                           | 41/10000 [06:04<25:52:00,  9.35s/it]

Epoch: 40 | Training loss 3.047557719051838 | Validation loss 2.867438465356827



  0%|▎                                                                           | 42/10000 [06:16<28:34:10, 10.33s/it]

The best model was saved!
Epoch: 41 | Training loss 3.0376225486397743 | Validation loss 2.849816679954529



  0%|▎                                                                           | 43/10000 [06:29<30:19:04, 10.96s/it]

Epoch: 42 | Training loss 3.0395899787545204 | Validation loss 2.857745110988617



  0%|▎                                                                           | 44/10000 [06:41<31:32:58, 11.41s/it]

The best model was saved!
Epoch: 43 | Training loss 3.03837388753891 | Validation loss 2.8495912551879883



  0%|▎                                                                           | 45/10000 [06:54<32:19:37, 11.69s/it]

Epoch: 44 | Training loss 3.0356090515851974 | Validation loss 2.8621709048748016



  0%|▎                                                                           | 46/10000 [07:06<32:52:48, 11.89s/it]

Epoch: 45 | Training loss 3.026677720248699 | Validation loss 2.8620636463165283



  0%|▎                                                                           | 47/10000 [07:18<33:09:43, 11.99s/it]

Epoch: 46 | Training loss 3.0258956775069237 | Validation loss 2.8545773923397064



  0%|▎                                                                           | 48/10000 [07:31<33:30:59, 12.12s/it]

Epoch: 47 | Training loss 3.0213318839669228 | Validation loss 2.868063896894455



  0%|▎                                                                           | 49/10000 [07:42<32:44:47, 11.85s/it]

Epoch: 48 | Training loss 3.0218409076333046 | Validation loss 2.8642172813415527



  0%|▍                                                                           | 50/10000 [07:50<29:46:47, 10.77s/it]

Epoch: 49 | Training loss 3.019010826945305 | Validation loss 2.86037215590477



  1%|▍                                                                           | 51/10000 [07:58<27:49:21, 10.07s/it]

Epoch: 50 | Training loss 3.0155755653977394 | Validation loss 2.869988977909088



  1%|▍                                                                           | 52/10000 [08:07<26:22:20,  9.54s/it]

The best model was saved!
Epoch: 51 | Training loss 3.012760691344738 | Validation loss 2.848252832889557



  1%|▍                                                                           | 53/10000 [08:15<25:16:07,  9.15s/it]

Epoch: 52 | Training loss 3.0064139887690544 | Validation loss 2.8572470247745514



  1%|▍                                                                           | 54/10000 [08:23<24:32:59,  8.89s/it]

The best model was saved!
Epoch: 53 | Training loss 3.0023082569241524 | Validation loss 2.8463697731494904



  1%|▍                                                                           | 55/10000 [08:32<24:02:21,  8.70s/it]

The best model was saved!
Epoch: 54 | Training loss 3.0149958804249763 | Validation loss 2.843060702085495



  1%|▍                                                                           | 56/10000 [08:40<23:36:18,  8.55s/it]

Epoch: 55 | Training loss 2.9957740530371666 | Validation loss 2.867668718099594



  1%|▍                                                                           | 57/10000 [08:48<23:24:49,  8.48s/it]

Epoch: 56 | Training loss 3.0006550922989845 | Validation loss 2.854873299598694



  1%|▍                                                                           | 58/10000 [08:56<23:14:20,  8.41s/it]

Epoch: 57 | Training loss 3.006232962012291 | Validation loss 2.8561313152313232



  1%|▍                                                                           | 59/10000 [09:05<23:07:02,  8.37s/it]

Epoch: 58 | Training loss 2.9959266930818558 | Validation loss 2.853837013244629



  1%|▍                                                                           | 60/10000 [09:13<23:10:39,  8.39s/it]

Epoch: 59 | Training loss 2.9916026294231415 | Validation loss 2.849108338356018



  1%|▍                                                                           | 61/10000 [09:21<23:07:26,  8.38s/it]

Epoch: 60 | Training loss 2.9909398704767227 | Validation loss 2.850495457649231



  1%|▍                                                                           | 62/10000 [09:30<22:56:43,  8.31s/it]

Epoch: 61 | Training loss 2.992449700832367 | Validation loss 2.85270419716835



  1%|▍                                                                           | 63/10000 [09:38<22:51:16,  8.28s/it]

Epoch: 62 | Training loss 2.9867168217897415 | Validation loss 2.8560456037521362



  1%|▍                                                                           | 64/10000 [09:46<22:50:03,  8.27s/it]

Epoch: 63 | Training loss 2.990668460726738 | Validation loss 2.843831032514572



  1%|▍                                                                           | 65/10000 [09:54<22:52:16,  8.29s/it]

Epoch: 64 | Training loss 2.992385484278202 | Validation loss 2.853452891111374



  1%|▌                                                                           | 66/10000 [10:03<22:55:56,  8.31s/it]

Epoch: 65 | Training loss 2.9866181761026382 | Validation loss 2.8496663570404053



  1%|▌                                                                           | 67/10000 [10:11<22:57:04,  8.32s/it]

Epoch: 66 | Training loss 2.9820272102952003 | Validation loss 2.8568919897079468



  1%|▌                                                                           | 68/10000 [10:19<22:53:07,  8.30s/it]

Epoch: 67 | Training loss 2.9915463775396347 | Validation loss 2.8585093915462494



  1%|▌                                                                           | 69/10000 [10:27<22:45:36,  8.25s/it]

Epoch: 68 | Training loss 2.982325755059719 | Validation loss 2.8483135402202606



  1%|▌                                                                           | 70/10000 [10:36<22:39:32,  8.21s/it]

Epoch: 69 | Training loss 2.9790543764829636 | Validation loss 2.843168169260025



  1%|▌                                                                           | 71/10000 [10:44<22:33:02,  8.18s/it]

Epoch: 70 | Training loss 2.990106612443924 | Validation loss 2.8495143353939056



  1%|▌                                                                           | 72/10000 [10:52<22:36:25,  8.20s/it]

Epoch: 71 | Training loss 2.973267190158367 | Validation loss 2.8457979261875153



  1%|▌                                                                           | 73/10000 [11:00<22:45:01,  8.25s/it]

The best model was saved!
Epoch: 72 | Training loss 2.9785715118050575 | Validation loss 2.8380447030067444



  1%|▌                                                                           | 74/10000 [11:09<22:45:41,  8.26s/it]

The best model was saved!
Epoch: 73 | Training loss 2.9791864827275276 | Validation loss 2.836652934551239



  1%|▌                                                                           | 75/10000 [11:17<22:47:08,  8.26s/it]

Epoch: 74 | Training loss 2.9814852103590965 | Validation loss 2.8407943546772003



  1%|▌                                                                           | 76/10000 [11:25<22:44:30,  8.25s/it]

Epoch: 75 | Training loss 2.9737362414598465 | Validation loss 2.843067914247513



  1%|▌                                                                           | 77/10000 [11:33<22:47:15,  8.27s/it]

Epoch: 76 | Training loss 2.9798818081617355 | Validation loss 2.8402445018291473



  1%|▌                                                                           | 78/10000 [11:42<22:48:07,  8.27s/it]

Epoch: 77 | Training loss 2.9705209881067276 | Validation loss 2.8475311994552612



  1%|▌                                                                           | 79/10000 [11:50<22:49:56,  8.29s/it]

Epoch: 78 | Training loss 2.969278521835804 | Validation loss 2.8402768969535828



  1%|▌                                                                           | 80/10000 [11:58<22:57:21,  8.33s/it]

Epoch: 79 | Training loss 2.980130650103092 | Validation loss 2.841562122106552



  1%|▌                                                                           | 81/10000 [12:07<22:57:58,  8.34s/it]

Epoch: 80 | Training loss 2.9684500098228455 | Validation loss 2.839938133955002



  1%|▌                                                                           | 82/10000 [12:18<25:11:26,  9.14s/it]

Epoch: 81 | Training loss 2.974705196917057 | Validation loss 2.845498889684677



  1%|▋                                                                           | 83/10000 [12:42<37:56:47, 13.78s/it]

The best model was saved!
Epoch: 82 | Training loss 2.9699767008423805 | Validation loss 2.836365222930908



  1%|▋                                                                           | 84/10000 [12:51<33:51:44, 12.29s/it]

Epoch: 83 | Training loss 2.967612810432911 | Validation loss 2.8482468724250793



  1%|▋                                                                           | 85/10000 [12:59<30:29:05, 11.07s/it]

Epoch: 84 | Training loss 2.972057066857815 | Validation loss 2.8415416181087494



  1%|▋                                                                           | 86/10000 [13:08<28:10:24, 10.23s/it]

Epoch: 85 | Training loss 2.9622820615768433 | Validation loss 2.848676770925522



  1%|▋                                                                           | 87/10000 [13:16<26:33:58,  9.65s/it]

The best model was saved!
Epoch: 86 | Training loss 2.962933510541916 | Validation loss 2.835657149553299



  1%|▋                                                                           | 88/10000 [13:24<25:28:00,  9.25s/it]

The best model was saved!
Epoch: 87 | Training loss 2.960641026496887 | Validation loss 2.8317960500717163



  1%|▋                                                                           | 89/10000 [13:33<24:38:31,  8.95s/it]

Epoch: 88 | Training loss 2.9534911513328552 | Validation loss 2.839278519153595



  1%|▋                                                                           | 90/10000 [13:41<24:01:12,  8.73s/it]

Epoch: 89 | Training loss 2.9623979553580284 | Validation loss 2.837800085544586



  1%|▋                                                                           | 91/10000 [13:49<23:40:54,  8.60s/it]

Epoch: 90 | Training loss 2.9557243809103966 | Validation loss 2.842845916748047



  1%|▋                                                                           | 92/10000 [13:57<23:25:29,  8.51s/it]

The best model was saved!
Epoch: 91 | Training loss 2.9594824239611626 | Validation loss 2.8277469873428345



  1%|▋                                                                           | 93/10000 [14:06<23:08:53,  8.41s/it]

Epoch: 92 | Training loss 2.9604793712496758 | Validation loss 2.830954909324646



  1%|▋                                                                           | 94/10000 [14:14<22:56:40,  8.34s/it]

Epoch: 93 | Training loss 2.9599843323230743 | Validation loss 2.8297178149223328



  1%|▋                                                                           | 95/10000 [14:22<22:44:44,  8.27s/it]

Epoch: 94 | Training loss 2.953994408249855 | Validation loss 2.8311444222927094



  1%|▋                                                                           | 96/10000 [14:30<22:44:21,  8.27s/it]

Epoch: 95 | Training loss 2.952913746237755 | Validation loss 2.8308890759944916



  1%|▋                                                                           | 97/10000 [14:38<22:43:05,  8.26s/it]

Epoch: 96 | Training loss 2.956203453242779 | Validation loss 2.8373445570468903



  1%|▋                                                                           | 98/10000 [14:47<22:42:13,  8.25s/it]

Epoch: 97 | Training loss 2.956907741725445 | Validation loss 2.8389560282230377



  1%|▊                                                                           | 99/10000 [14:55<22:41:09,  8.25s/it]

Epoch: 98 | Training loss 2.9569308683276176 | Validation loss 2.8281836807727814



  1%|▊                                                                          | 100/10000 [15:03<22:45:32,  8.28s/it]

The best model was saved!
Epoch: 99 | Training loss 2.9515669494867325 | Validation loss 2.8225817680358887



  1%|▊                                                                          | 101/10000 [15:11<22:44:32,  8.27s/it]

Epoch: 100 | Training loss 2.95015886425972 | Validation loss 2.838306427001953



  1%|▊                                                                          | 102/10000 [15:20<22:44:28,  8.27s/it]

Epoch: 101 | Training loss 2.94572626799345 | Validation loss 2.8356404304504395



  1%|▊                                                                          | 103/10000 [15:28<22:36:46,  8.23s/it]

Epoch: 102 | Training loss 2.955796703696251 | Validation loss 2.825502634048462



  1%|▊                                                                          | 104/10000 [15:36<22:32:04,  8.20s/it]

The best model was saved!
Epoch: 103 | Training loss 2.9481299966573715 | Validation loss 2.8205684423446655



  1%|▊                                                                          | 105/10000 [15:44<22:28:34,  8.18s/it]

Epoch: 104 | Training loss 2.9534851014614105 | Validation loss 2.831050306558609



  1%|▊                                                                          | 106/10000 [15:52<22:25:33,  8.16s/it]

Epoch: 105 | Training loss 2.948189824819565 | Validation loss 2.830144852399826



  1%|▊                                                                          | 107/10000 [16:00<22:25:52,  8.16s/it]

Epoch: 106 | Training loss 2.9503616616129875 | Validation loss 2.838455855846405



  1%|▊                                                                          | 108/10000 [16:08<22:25:52,  8.16s/it]

Epoch: 107 | Training loss 2.9484555572271347 | Validation loss 2.8254909217357635



  1%|▊                                                                          | 109/10000 [16:17<22:24:34,  8.16s/it]

Epoch: 108 | Training loss 2.944209672510624 | Validation loss 2.8312444984912872



  1%|▊                                                                          | 110/10000 [16:25<22:32:53,  8.21s/it]

Epoch: 109 | Training loss 2.94931423664093 | Validation loss 2.8288612365722656



  1%|▊                                                                          | 111/10000 [16:33<22:33:44,  8.21s/it]

Epoch: 110 | Training loss 2.948140874505043 | Validation loss 2.8302328884601593



  1%|▊                                                                          | 112/10000 [16:41<22:30:47,  8.20s/it]

Epoch: 111 | Training loss 2.945414215326309 | Validation loss 2.8375711739063263



  1%|▊                                                                          | 113/10000 [16:49<22:29:08,  8.19s/it]

Epoch: 112 | Training loss 2.9478964507579803 | Validation loss 2.8334002792835236



  1%|▊                                                                          | 114/10000 [16:58<22:33:57,  8.22s/it]

Epoch: 113 | Training loss 2.9425450041890144 | Validation loss 2.8208873867988586



  1%|▊                                                                          | 115/10000 [17:06<22:32:51,  8.21s/it]

Epoch: 114 | Training loss 2.9361669048666954 | Validation loss 2.828123092651367



  1%|▊                                                                          | 116/10000 [17:14<22:32:44,  8.21s/it]

Epoch: 115 | Training loss 2.9489388316869736 | Validation loss 2.8407828211784363



  1%|▉                                                                          | 117/10000 [17:22<22:31:22,  8.20s/it]

Epoch: 116 | Training loss 2.9398719668388367 | Validation loss 2.8323143422603607



  1%|▉                                                                          | 118/10000 [17:31<22:33:26,  8.22s/it]

Epoch: 117 | Training loss 2.9393715783953667 | Validation loss 2.829890489578247



  1%|▉                                                                          | 119/10000 [17:39<22:38:22,  8.25s/it]

Epoch: 118 | Training loss 2.9372118711471558 | Validation loss 2.823585867881775



  1%|▉                                                                          | 120/10000 [17:47<22:34:52,  8.23s/it]

Epoch: 119 | Training loss 2.9400225207209587 | Validation loss 2.824445217847824



  1%|▉                                                                          | 121/10000 [17:55<22:35:18,  8.23s/it]

The best model was saved!
Epoch: 120 | Training loss 2.939157098531723 | Validation loss 2.819985181093216



  1%|▉                                                                          | 122/10000 [18:04<22:31:52,  8.21s/it]

Epoch: 121 | Training loss 2.9363458827137947 | Validation loss 2.8267050981521606



  1%|▉                                                                          | 123/10000 [18:12<22:30:41,  8.21s/it]

The best model was saved!
Epoch: 122 | Training loss 2.935164175927639 | Validation loss 2.8197083175182343



  1%|▉                                                                          | 124/10000 [18:20<22:24:12,  8.17s/it]

Epoch: 123 | Training loss 2.9469520896673203 | Validation loss 2.8458366990089417



  1%|▉                                                                          | 125/10000 [18:28<22:22:36,  8.16s/it]

Epoch: 124 | Training loss 2.9430382922291756 | Validation loss 2.837933748960495



  1%|▉                                                                          | 126/10000 [18:36<22:25:06,  8.17s/it]

Epoch: 125 | Training loss 2.9314651042222977 | Validation loss 2.820550322532654



  1%|▉                                                                          | 127/10000 [18:44<22:22:38,  8.16s/it]

Epoch: 126 | Training loss 2.93122598528862 | Validation loss 2.8291338980197906



  1%|▉                                                                          | 128/10000 [18:53<22:26:36,  8.18s/it]

Epoch: 127 | Training loss 2.9346268102526665 | Validation loss 2.8412844240665436



  1%|▉                                                                          | 129/10000 [19:01<22:27:42,  8.19s/it]

Epoch: 128 | Training loss 2.9325386211276054 | Validation loss 2.8256579637527466



  1%|▉                                                                          | 130/10000 [19:09<22:27:51,  8.19s/it]

Epoch: 129 | Training loss 2.93005833029747 | Validation loss 2.8362369537353516



  1%|▉                                                                          | 131/10000 [19:17<22:30:51,  8.21s/it]

Epoch: 130 | Training loss 2.9336354956030846 | Validation loss 2.8370052576065063



  1%|▉                                                                          | 132/10000 [19:25<22:32:18,  8.22s/it]

Epoch: 131 | Training loss 2.9381413757801056 | Validation loss 2.842497766017914



  1%|▉                                                                          | 133/10000 [19:33<22:24:38,  8.18s/it]

Epoch: 132 | Training loss 2.9262819215655327 | Validation loss 2.8286532759666443



  1%|█                                                                          | 134/10000 [19:42<22:24:02,  8.17s/it]

Epoch: 133 | Training loss 2.927531883120537 | Validation loss 2.8458884358406067



  1%|█                                                                          | 135/10000 [19:50<22:29:05,  8.21s/it]

Epoch: 134 | Training loss 2.9378192499279976 | Validation loss 2.8213050067424774



  1%|█                                                                          | 136/10000 [19:58<22:28:25,  8.20s/it]

Epoch: 135 | Training loss 2.9282337054610252 | Validation loss 2.833183139562607



  1%|█                                                                          | 137/10000 [20:06<22:26:54,  8.19s/it]

Epoch: 136 | Training loss 2.9272854924201965 | Validation loss 2.8255675137043



  1%|█                                                                          | 138/10000 [20:14<22:24:32,  8.18s/it]

Epoch: 137 | Training loss 2.929031826555729 | Validation loss 2.836599051952362



  1%|█                                                                          | 139/10000 [20:23<22:26:50,  8.20s/it]

Epoch: 138 | Training loss 2.9187824204564095 | Validation loss 2.837875783443451



  1%|█                                                                          | 140/10000 [20:31<22:28:24,  8.21s/it]

Epoch: 139 | Training loss 2.9357545748353004 | Validation loss 2.82888326048851



  1%|█                                                                          | 141/10000 [20:39<22:21:12,  8.16s/it]

Epoch: 140 | Training loss 2.930316910147667 | Validation loss 2.825633555650711



  1%|█                                                                          | 142/10000 [20:47<22:22:03,  8.17s/it]

Epoch: 141 | Training loss 2.9203648418188095 | Validation loss 2.8448596596717834



  1%|█                                                                          | 143/10000 [20:55<22:20:56,  8.16s/it]

Epoch: 142 | Training loss 2.9352857246994972 | Validation loss 2.8251544535160065



  1%|█                                                                          | 144/10000 [21:03<22:18:07,  8.15s/it]

Epoch: 143 | Training loss 2.9215587377548218 | Validation loss 2.8383792340755463



  1%|█                                                                          | 145/10000 [21:12<22:15:44,  8.13s/it]

Epoch: 144 | Training loss 2.9198744744062424 | Validation loss 2.857653498649597



  1%|█                                                                          | 146/10000 [21:20<22:17:25,  8.14s/it]

The best model was saved!
Epoch: 145 | Training loss 2.9227710887789726 | Validation loss 2.815565675497055



  1%|█                                                                          | 147/10000 [21:28<22:19:09,  8.15s/it]

The best model was saved!
Epoch: 146 | Training loss 2.923702284693718 | Validation loss 2.812768965959549



  1%|█                                                                          | 148/10000 [21:36<22:14:50,  8.13s/it]

Epoch: 147 | Training loss 2.9180139303207397 | Validation loss 2.839718461036682



  1%|█                                                                          | 149/10000 [21:44<22:16:51,  8.14s/it]

Epoch: 148 | Training loss 2.918702132999897 | Validation loss 2.8819564282894135



  2%|█▏                                                                         | 150/10000 [21:52<22:13:12,  8.12s/it]

Epoch: 149 | Training loss 2.921840764582157 | Validation loss 2.825160652399063



  2%|█▏                                                                         | 151/10000 [22:00<22:12:03,  8.11s/it]

Epoch: 150 | Training loss 2.92040978372097 | Validation loss 2.8422906398773193



  2%|█▏                                                                         | 152/10000 [22:09<22:18:21,  8.15s/it]

Epoch: 151 | Training loss 2.921512931585312 | Validation loss 2.8713013231754303



  2%|█▏                                                                         | 153/10000 [22:17<22:17:13,  8.15s/it]

Epoch: 152 | Training loss 2.918315850198269 | Validation loss 2.8347909450531006



  2%|█▏                                                                         | 154/10000 [22:25<22:23:20,  8.19s/it]

Epoch: 153 | Training loss 2.923257037997246 | Validation loss 2.834474414587021



  2%|█▏                                                                         | 155/10000 [22:33<22:23:37,  8.19s/it]

Epoch: 154 | Training loss 2.9137377589941025 | Validation loss 2.834835946559906



  2%|█▏                                                                         | 156/10000 [22:41<22:27:43,  8.21s/it]

Epoch: 155 | Training loss 2.9154735282063484 | Validation loss 2.833262711763382



  2%|█▏                                                                         | 157/10000 [22:50<22:26:56,  8.21s/it]

Epoch: 156 | Training loss 2.9172904938459396 | Validation loss 2.82183900475502



  2%|█▏                                                                         | 158/10000 [22:58<22:28:27,  8.22s/it]

Epoch: 157 | Training loss 2.913261756300926 | Validation loss 2.8507241010665894



  2%|█▏                                                                         | 159/10000 [23:06<22:26:18,  8.21s/it]

Epoch: 158 | Training loss 2.919694997370243 | Validation loss 2.828743577003479



  2%|█▏                                                                         | 160/10000 [23:14<22:24:58,  8.20s/it]

Epoch: 159 | Training loss 2.905385382473469 | Validation loss 2.8284705579280853



  2%|█▏                                                                         | 161/10000 [23:22<22:26:47,  8.21s/it]

Epoch: 160 | Training loss 2.912192389369011 | Validation loss 2.8426593840122223



  2%|█▏                                                                         | 162/10000 [23:31<22:24:36,  8.20s/it]

Epoch: 161 | Training loss 2.912578471004963 | Validation loss 2.8389913141727448



  2%|█▏                                                                         | 163/10000 [23:39<22:18:13,  8.16s/it]

Epoch: 162 | Training loss 2.9141195863485336 | Validation loss 2.8248884975910187



  2%|█▏                                                                         | 164/10000 [23:47<22:16:40,  8.15s/it]

Epoch: 163 | Training loss 2.907996289432049 | Validation loss 2.8270004987716675



  2%|█▏                                                                         | 165/10000 [23:55<22:16:15,  8.15s/it]

Epoch: 164 | Training loss 2.912848263978958 | Validation loss 2.8288393020629883



  2%|█▏                                                                         | 166/10000 [24:03<22:19:07,  8.17s/it]

Epoch: 165 | Training loss 2.9133006781339645 | Validation loss 2.8367576003074646



  2%|█▎                                                                         | 167/10000 [24:11<22:19:27,  8.17s/it]

Epoch: 166 | Training loss 2.9109858945012093 | Validation loss 2.8442817628383636



  2%|█▎                                                                         | 168/10000 [24:20<22:19:34,  8.17s/it]

Epoch: 167 | Training loss 2.9061676040291786 | Validation loss 2.8322217762470245



  2%|█▎                                                                         | 169/10000 [24:28<22:17:27,  8.16s/it]

Epoch: 168 | Training loss 2.91611061245203 | Validation loss 2.8573477268218994



  2%|█▎                                                                         | 170/10000 [24:36<22:19:53,  8.18s/it]

Epoch: 169 | Training loss 2.906616911292076 | Validation loss 2.8430868089199066



  2%|█▎                                                                         | 171/10000 [24:44<22:13:47,  8.14s/it]

Epoch: 170 | Training loss 2.914214752614498 | Validation loss 2.8476986289024353



  2%|█▎                                                                         | 172/10000 [24:52<22:09:18,  8.12s/it]

Epoch: 171 | Training loss 2.9087320417165756 | Validation loss 2.838574469089508



  2%|█▎                                                                         | 173/10000 [25:00<22:06:09,  8.10s/it]

Epoch: 172 | Training loss 2.9089435413479805 | Validation loss 2.831004172563553



  2%|█▎                                                                         | 174/10000 [25:08<22:10:17,  8.12s/it]

Epoch: 173 | Training loss 2.9088492915034294 | Validation loss 2.832317531108856



  2%|█▎                                                                         | 175/10000 [25:16<22:16:02,  8.16s/it]

Epoch: 174 | Training loss 2.9043639600276947 | Validation loss 2.8301849365234375



  2%|█▎                                                                         | 176/10000 [25:25<22:18:34,  8.18s/it]

Epoch: 175 | Training loss 2.9082758873701096 | Validation loss 2.8262847661972046



  2%|█▎                                                                         | 177/10000 [25:33<22:17:58,  8.17s/it]

Epoch: 176 | Training loss 2.908922016620636 | Validation loss 2.8182192742824554



  2%|█▎                                                                         | 178/10000 [25:41<22:16:50,  8.17s/it]

Epoch: 177 | Training loss 2.9064130038022995 | Validation loss 2.8194997012615204



  2%|█▎                                                                         | 179/10000 [25:49<22:17:36,  8.17s/it]

Epoch: 178 | Training loss 2.9074475318193436 | Validation loss 2.8374610543251038



  2%|█▎                                                                         | 180/10000 [25:57<22:17:50,  8.17s/it]

Epoch: 179 | Training loss 2.907182477414608 | Validation loss 2.850668877363205



  2%|█▎                                                                         | 181/10000 [26:05<22:14:55,  8.16s/it]

Epoch: 180 | Training loss 2.9013076573610306 | Validation loss 2.8500458002090454



  2%|█▎                                                                         | 182/10000 [26:14<22:11:57,  8.14s/it]

Epoch: 181 | Training loss 2.901925206184387 | Validation loss 2.8307795226573944



  2%|█▎                                                                         | 183/10000 [26:22<22:11:27,  8.14s/it]

Epoch: 182 | Training loss 2.901190906763077 | Validation loss 2.8215985000133514



  2%|█▍                                                                         | 184/10000 [26:30<22:14:33,  8.16s/it]

Epoch: 183 | Training loss 2.905335806310177 | Validation loss 2.832350492477417



  2%|█▍                                                                         | 185/10000 [26:38<22:14:01,  8.15s/it]

Epoch: 184 | Training loss 2.901947408914566 | Validation loss 2.830196738243103



  2%|█▍                                                                         | 186/10000 [26:46<22:16:06,  8.17s/it]

Epoch: 185 | Training loss 2.902075916528702 | Validation loss 2.824961394071579



  2%|█▍                                                                         | 187/10000 [26:55<22:19:26,  8.19s/it]

Epoch: 186 | Training loss 2.8900265842676163 | Validation loss 2.8494249880313873



  2%|█▍                                                                         | 188/10000 [27:03<22:19:40,  8.19s/it]

Epoch: 187 | Training loss 2.905670166015625 | Validation loss 2.826505035161972



  2%|█▍                                                                         | 189/10000 [27:11<22:15:45,  8.17s/it]

Epoch: 188 | Training loss 2.9033337980508804 | Validation loss 2.8276731073856354



  2%|█▍                                                                         | 190/10000 [27:19<22:17:36,  8.18s/it]

Epoch: 189 | Training loss 2.892797164618969 | Validation loss 2.81876078248024



  2%|█▍                                                                         | 191/10000 [27:27<22:15:53,  8.17s/it]

Epoch: 190 | Training loss 2.9038377180695534 | Validation loss 2.8379535377025604



  2%|█▍                                                                         | 192/10000 [27:35<22:16:58,  8.18s/it]

Epoch: 191 | Training loss 2.9068543612957 | Validation loss 2.820272773504257



  2%|█▍                                                                         | 193/10000 [27:44<22:15:43,  8.17s/it]

Epoch: 192 | Training loss 2.8950502797961235 | Validation loss 2.828349381685257



  2%|█▍                                                                         | 194/10000 [27:52<22:18:47,  8.19s/it]

Epoch: 193 | Training loss 2.896157681941986 | Validation loss 2.81422621011734



  2%|█▍                                                                         | 195/10000 [28:00<22:18:00,  8.19s/it]

Epoch: 194 | Training loss 2.8957572504878044 | Validation loss 2.825023651123047



  2%|█▍                                                                         | 196/10000 [28:08<22:10:29,  8.14s/it]

Epoch: 195 | Training loss 2.8952481523156166 | Validation loss 2.8299969136714935



  2%|█▍                                                                         | 197/10000 [28:16<22:09:20,  8.14s/it]

Epoch: 196 | Training loss 2.8971701562404633 | Validation loss 2.8381219506263733



  2%|█▍                                                                         | 198/10000 [28:24<22:08:15,  8.13s/it]

Epoch: 197 | Training loss 2.8997464701533318 | Validation loss 2.81817489862442



  2%|█▍                                                                         | 199/10000 [28:32<22:05:06,  8.11s/it]

Epoch: 198 | Training loss 2.8953321874141693 | Validation loss 2.8379975259304047



  2%|█▌                                                                         | 200/10000 [28:41<22:10:36,  8.15s/it]

The best model was saved!
Epoch: 199 | Training loss 2.8949304446578026 | Validation loss 2.812360465526581



  2%|█▌                                                                         | 201/10000 [28:49<22:07:26,  8.13s/it]

Epoch: 200 | Training loss 2.8937804996967316 | Validation loss 2.840059220790863



  2%|█▌                                                                         | 202/10000 [28:57<22:09:18,  8.14s/it]

Epoch: 201 | Training loss 2.8954285755753517 | Validation loss 2.844532936811447



  2%|█▌                                                                         | 203/10000 [29:05<22:15:40,  8.18s/it]

Epoch: 202 | Training loss 2.895597331225872 | Validation loss 2.829009473323822



  2%|█▌                                                                         | 204/10000 [29:13<22:20:13,  8.21s/it]

Epoch: 203 | Training loss 2.896477125585079 | Validation loss 2.829245448112488



  2%|█▌                                                                         | 205/10000 [29:21<22:17:00,  8.19s/it]

Epoch: 204 | Training loss 2.892250806093216 | Validation loss 2.8217140436172485



  2%|█▌                                                                         | 206/10000 [29:30<22:14:45,  8.18s/it]

Epoch: 205 | Training loss 2.890646144747734 | Validation loss 2.8161593079566956



  2%|█▌                                                                         | 207/10000 [29:38<22:14:18,  8.18s/it]

Epoch: 206 | Training loss 2.8901819959282875 | Validation loss 2.8160063922405243



  2%|█▌                                                                         | 208/10000 [29:46<22:18:12,  8.20s/it]

Epoch: 207 | Training loss 2.896564558148384 | Validation loss 2.822462797164917



  2%|█▌                                                                         | 209/10000 [29:54<22:17:11,  8.19s/it]

Epoch: 208 | Training loss 2.8920484632253647 | Validation loss 2.8384039402008057



  2%|█▌                                                                         | 210/10000 [30:02<22:14:49,  8.18s/it]

Epoch: 209 | Training loss 2.8886221423745155 | Validation loss 2.8352378010749817



  2%|█▌                                                                         | 211/10000 [30:11<22:15:19,  8.18s/it]

The best model was saved!
Epoch: 210 | Training loss 2.896968834102154 | Validation loss 2.81060791015625



  2%|█▌                                                                         | 212/10000 [30:19<22:17:13,  8.20s/it]

The best model was saved!
Epoch: 211 | Training loss 2.893523909151554 | Validation loss 2.8087865710258484



  2%|█▌                                                                         | 213/10000 [30:27<22:16:17,  8.19s/it]

Epoch: 212 | Training loss 2.888570338487625 | Validation loss 2.8184866905212402



  2%|█▌                                                                         | 214/10000 [30:35<22:17:04,  8.20s/it]

Epoch: 213 | Training loss 2.891228973865509 | Validation loss 2.8158823549747467



  2%|█▌                                                                         | 215/10000 [30:43<22:22:05,  8.23s/it]

Epoch: 214 | Training loss 2.8832420706748962 | Validation loss 2.8124343752861023



  2%|█▌                                                                         | 216/10000 [30:52<22:18:49,  8.21s/it]

Epoch: 215 | Training loss 2.892543986439705 | Validation loss 2.8367107808589935



  2%|█▋                                                                         | 217/10000 [31:00<22:10:58,  8.16s/it]

Epoch: 216 | Training loss 2.889965757727623 | Validation loss 2.8249303698539734



  2%|█▋                                                                         | 218/10000 [31:08<22:11:37,  8.17s/it]

Epoch: 217 | Training loss 2.8931869342923164 | Validation loss 2.820354700088501



  2%|█▋                                                                         | 219/10000 [31:16<22:12:21,  8.17s/it]

Epoch: 218 | Training loss 2.888102889060974 | Validation loss 2.8266727328300476



  2%|█▋                                                                         | 220/10000 [31:24<22:07:14,  8.14s/it]

Epoch: 219 | Training loss 2.884337641298771 | Validation loss 2.8162408769130707



  2%|█▋                                                                         | 221/10000 [31:32<22:06:40,  8.14s/it]

Epoch: 220 | Training loss 2.888609901070595 | Validation loss 2.8291091322898865



  2%|█▋                                                                         | 222/10000 [31:40<22:00:46,  8.10s/it]

Epoch: 221 | Training loss 2.8954028263688087 | Validation loss 2.816715896129608



  2%|█▋                                                                         | 223/10000 [31:49<22:06:29,  8.14s/it]

Epoch: 222 | Training loss 2.8873084038496017 | Validation loss 2.812347710132599



  2%|█▋                                                                         | 224/10000 [31:57<22:08:59,  8.16s/it]

Epoch: 223 | Training loss 2.8873172104358673 | Validation loss 2.8113445937633514



  2%|█▋                                                                         | 225/10000 [32:05<22:10:26,  8.17s/it]

Epoch: 224 | Training loss 2.885678730905056 | Validation loss 2.8174698054790497



  2%|█▋                                                                         | 226/10000 [32:13<22:11:38,  8.17s/it]

Epoch: 225 | Training loss 2.891973316669464 | Validation loss 2.8364681005477905



  2%|█▋                                                                         | 227/10000 [32:21<22:10:15,  8.17s/it]

Epoch: 226 | Training loss 2.8869140073657036 | Validation loss 2.825464427471161



  2%|█▋                                                                         | 228/10000 [32:29<22:09:35,  8.16s/it]

Epoch: 227 | Training loss 2.8864162862300873 | Validation loss 2.827544391155243



  2%|█▋                                                                         | 229/10000 [32:38<22:17:04,  8.21s/it]

Epoch: 228 | Training loss 2.8939779698848724 | Validation loss 2.8166646361351013



  2%|█▋                                                                         | 230/10000 [32:46<22:20:01,  8.23s/it]

Epoch: 229 | Training loss 2.8833707347512245 | Validation loss 2.8418902456760406



  2%|█▋                                                                         | 231/10000 [32:54<22:11:28,  8.18s/it]

Epoch: 230 | Training loss 2.8865356370806694 | Validation loss 2.8122750222682953



  2%|█▋                                                                         | 232/10000 [33:02<22:11:28,  8.18s/it]

Epoch: 231 | Training loss 2.8878841549158096 | Validation loss 2.8246322870254517



  2%|█▋                                                                         | 233/10000 [33:11<22:17:44,  8.22s/it]

The best model was saved!
Epoch: 232 | Training loss 2.8832851350307465 | Validation loss 2.803531676530838



  2%|█▊                                                                         | 234/10000 [33:19<22:18:03,  8.22s/it]

Epoch: 233 | Training loss 2.8845594823360443 | Validation loss 2.814085930585861



  2%|█▊                                                                         | 235/10000 [33:27<22:18:55,  8.23s/it]

Epoch: 234 | Training loss 2.884550951421261 | Validation loss 2.8397643864154816



  2%|█▊                                                                         | 236/10000 [33:35<22:14:58,  8.20s/it]

Epoch: 235 | Training loss 2.888135150074959 | Validation loss 2.820057064294815



  2%|█▊                                                                         | 237/10000 [33:43<22:10:43,  8.18s/it]

Epoch: 236 | Training loss 2.8878428041934967 | Validation loss 2.810660630464554



  2%|█▊                                                                         | 238/10000 [33:51<22:00:49,  8.12s/it]

Epoch: 237 | Training loss 2.883970096707344 | Validation loss 2.825204074382782



  2%|█▊                                                                         | 239/10000 [33:59<21:57:34,  8.10s/it]

Epoch: 238 | Training loss 2.884163498878479 | Validation loss 2.804349184036255



  2%|█▊                                                                         | 240/10000 [34:07<21:59:13,  8.11s/it]

Epoch: 239 | Training loss 2.881037153303623 | Validation loss 2.8086136281490326



  2%|█▊                                                                         | 241/10000 [34:16<22:00:59,  8.12s/it]

Epoch: 240 | Training loss 2.87711600959301 | Validation loss 2.8109123706817627



  2%|█▊                                                                         | 242/10000 [34:24<21:55:13,  8.09s/it]

Epoch: 241 | Training loss 2.8834142237901688 | Validation loss 2.8257488906383514



  2%|█▊                                                                         | 243/10000 [34:32<22:00:27,  8.12s/it]

Epoch: 242 | Training loss 2.8845210149884224 | Validation loss 2.8086836636066437



  2%|█▊                                                                         | 244/10000 [34:40<22:02:33,  8.13s/it]

Epoch: 243 | Training loss 2.8817958384752274 | Validation loss 2.809186667203903



  2%|█▊                                                                         | 245/10000 [34:48<22:06:12,  8.16s/it]

Epoch: 244 | Training loss 2.8871535435318947 | Validation loss 2.833947390317917



  2%|█▊                                                                         | 246/10000 [34:56<22:04:53,  8.15s/it]

Epoch: 245 | Training loss 2.8830083906650543 | Validation loss 2.8095057606697083



  2%|█▊                                                                         | 247/10000 [35:04<22:04:48,  8.15s/it]

Epoch: 246 | Training loss 2.88129311054945 | Validation loss 2.8105791211128235



  2%|█▊                                                                         | 248/10000 [35:13<21:59:57,  8.12s/it]

Epoch: 247 | Training loss 2.885071247816086 | Validation loss 2.8300353586673737



  2%|█▊                                                                         | 249/10000 [35:21<22:06:30,  8.16s/it]

Epoch: 248 | Training loss 2.883931040763855 | Validation loss 2.8324528634548187



  2%|█▉                                                                         | 250/10000 [35:29<22:02:46,  8.14s/it]

The best model was saved!
Epoch: 249 | Training loss 2.8816629201173782 | Validation loss 2.7994788587093353



  3%|█▉                                                                         | 251/10000 [35:37<21:59:59,  8.12s/it]

Epoch: 250 | Training loss 2.8827415481209755 | Validation loss 2.811208486557007



  3%|█▉                                                                         | 252/10000 [35:45<22:05:38,  8.16s/it]

The best model was saved!
Epoch: 251 | Training loss 2.8803402334451675 | Validation loss 2.7947816252708435



  3%|█▉                                                                         | 253/10000 [35:53<22:03:32,  8.15s/it]

Epoch: 252 | Training loss 2.8889282196760178 | Validation loss 2.8077381253242493



  3%|█▉                                                                         | 254/10000 [36:01<22:02:42,  8.14s/it]

Epoch: 253 | Training loss 2.882318302989006 | Validation loss 2.8052240908145905



  3%|█▉                                                                         | 255/10000 [36:10<22:04:25,  8.15s/it]

Epoch: 254 | Training loss 2.879630960524082 | Validation loss 2.80164897441864



  3%|█▉                                                                         | 256/10000 [36:18<22:03:12,  8.15s/it]

Epoch: 255 | Training loss 2.8876407891511917 | Validation loss 2.799450308084488



  3%|█▉                                                                         | 257/10000 [36:26<22:04:44,  8.16s/it]

Epoch: 256 | Training loss 2.883458264172077 | Validation loss 2.809245377779007



  3%|█▉                                                                         | 258/10000 [36:34<22:01:18,  8.14s/it]

Epoch: 257 | Training loss 2.8819019570946693 | Validation loss 2.8089226484298706



  3%|█▉                                                                         | 259/10000 [36:42<21:54:43,  8.10s/it]

Epoch: 258 | Training loss 2.8784415125846863 | Validation loss 2.7993321120738983



  3%|█▉                                                                         | 260/10000 [36:50<21:51:53,  8.08s/it]

Epoch: 259 | Training loss 2.8869284614920616 | Validation loss 2.8076057732105255



  3%|█▉                                                                         | 261/10000 [36:58<21:54:18,  8.10s/it]

Epoch: 260 | Training loss 2.881510339677334 | Validation loss 2.8094716370105743



  3%|█▉                                                                         | 262/10000 [37:06<21:49:44,  8.07s/it]

Epoch: 261 | Training loss 2.8843924701213837 | Validation loss 2.805660128593445



  3%|█▉                                                                         | 263/10000 [37:14<21:53:30,  8.09s/it]

Epoch: 262 | Training loss 2.879949413239956 | Validation loss 2.811226576566696



  3%|█▉                                                                         | 264/10000 [37:22<21:54:20,  8.10s/it]

Epoch: 263 | Training loss 2.8810257017612457 | Validation loss 2.815747171640396



  3%|█▉                                                                         | 265/10000 [37:31<21:54:43,  8.10s/it]

Epoch: 264 | Training loss 2.88136026263237 | Validation loss 2.8068425953388214



  3%|█▉                                                                         | 266/10000 [37:39<22:02:11,  8.15s/it]

Epoch: 265 | Training loss 2.8858564868569374 | Validation loss 2.806410074234009



  3%|██                                                                         | 267/10000 [37:47<21:58:09,  8.13s/it]

Epoch: 266 | Training loss 2.883491761982441 | Validation loss 2.823417693376541



  3%|██                                                                         | 268/10000 [37:55<21:56:05,  8.11s/it]

Epoch: 267 | Training loss 2.880035661160946 | Validation loss 2.804485946893692



  3%|██                                                                         | 269/10000 [38:03<21:50:01,  8.08s/it]

Epoch: 268 | Training loss 2.884393736720085 | Validation loss 2.812217563390732



  3%|██                                                                         | 270/10000 [38:11<21:51:54,  8.09s/it]

Epoch: 269 | Training loss 2.8823688328266144 | Validation loss 2.8134641647338867



  3%|██                                                                         | 271/10000 [38:19<21:51:41,  8.09s/it]

Epoch: 270 | Training loss 2.8751617074012756 | Validation loss 2.8273790180683136



  3%|██                                                                         | 272/10000 [38:27<21:48:19,  8.07s/it]

Epoch: 271 | Training loss 2.8795828446745872 | Validation loss 2.7982876300811768



  3%|██                                                                         | 273/10000 [38:35<21:51:15,  8.09s/it]

Epoch: 272 | Training loss 2.880386918783188 | Validation loss 2.8141920268535614



  3%|██                                                                         | 274/10000 [38:44<21:56:33,  8.12s/it]

Epoch: 273 | Training loss 2.884956605732441 | Validation loss 2.7986608743667603



  3%|██                                                                         | 275/10000 [38:52<21:57:43,  8.13s/it]

Epoch: 274 | Training loss 2.8851134181022644 | Validation loss 2.834045112133026



  3%|██                                                                         | 276/10000 [39:00<21:57:48,  8.13s/it]

Epoch: 275 | Training loss 2.8784559071063995 | Validation loss 2.8058681786060333



  3%|██                                                                         | 277/10000 [39:08<21:55:35,  8.12s/it]

Epoch: 276 | Training loss 2.8808901831507683 | Validation loss 2.8041596114635468



  3%|██                                                                         | 278/10000 [39:16<22:09:04,  8.20s/it]

Epoch: 277 | Training loss 2.879695236682892 | Validation loss 2.8057044446468353



  3%|██                                                                         | 279/10000 [39:25<22:14:51,  8.24s/it]

Epoch: 278 | Training loss 2.8774944320321083 | Validation loss 2.8011536300182343



  3%|██                                                                         | 280/10000 [39:33<22:12:41,  8.23s/it]

Epoch: 279 | Training loss 2.883869044482708 | Validation loss 2.8049533665180206



  3%|██                                                                         | 281/10000 [39:41<22:08:01,  8.20s/it]

Epoch: 280 | Training loss 2.885078564286232 | Validation loss 2.8097814321517944



  3%|██                                                                         | 282/10000 [39:49<22:03:58,  8.17s/it]

Epoch: 281 | Training loss 2.879194587469101 | Validation loss 2.808283716440201



  3%|██                                                                         | 283/10000 [39:57<22:00:28,  8.15s/it]

Epoch: 282 | Training loss 2.8895048573613167 | Validation loss 2.805786609649658



  3%|██▏                                                                        | 284/10000 [40:05<22:00:04,  8.15s/it]

Epoch: 283 | Training loss 2.880750745534897 | Validation loss 2.8129125237464905



  3%|██▏                                                                        | 285/10000 [40:13<21:57:26,  8.14s/it]

Epoch: 284 | Training loss 2.8730150759220123 | Validation loss 2.8027341961860657



  3%|██▏                                                                        | 286/10000 [40:22<21:59:32,  8.15s/it]

Epoch: 285 | Training loss 2.876193530857563 | Validation loss 2.806150943040848



  3%|██▏                                                                        | 287/10000 [40:30<22:01:43,  8.16s/it]

Epoch: 286 | Training loss 2.8811718225479126 | Validation loss 2.823228508234024



  3%|██▏                                                                        | 288/10000 [40:38<22:01:33,  8.16s/it]

Epoch: 287 | Training loss 2.8803432285785675 | Validation loss 2.8026064932346344



  3%|██▏                                                                        | 289/10000 [40:46<21:56:00,  8.13s/it]

Epoch: 288 | Training loss 2.90156427025795 | Validation loss 2.8186290860176086



  3%|██▏                                                                        | 290/10000 [40:54<21:53:40,  8.12s/it]

Epoch: 289 | Training loss 2.8948431611061096 | Validation loss 2.806619703769684



  3%|██▏                                                                        | 291/10000 [41:02<21:56:33,  8.14s/it]

Epoch: 290 | Training loss 2.888730376958847 | Validation loss 2.807580679655075



  3%|██▏                                                                        | 292/10000 [41:10<21:56:59,  8.14s/it]

Epoch: 291 | Training loss 2.8829996809363365 | Validation loss 2.8252320885658264



  3%|██▏                                                                        | 293/10000 [41:19<21:58:39,  8.15s/it]

Epoch: 292 | Training loss 2.887413911521435 | Validation loss 2.796960264444351



  3%|██▏                                                                        | 294/10000 [41:27<21:59:10,  8.15s/it]

Epoch: 293 | Training loss 2.8864163905382156 | Validation loss 2.841395616531372



  3%|██▏                                                                        | 295/10000 [41:35<21:56:25,  8.14s/it]

Epoch: 294 | Training loss 2.887209601700306 | Validation loss 2.8058135211467743



  3%|██▏                                                                        | 296/10000 [41:43<21:54:43,  8.13s/it]

Epoch: 295 | Training loss 2.8845406994223595 | Validation loss 2.8066835701465607



  3%|██▏                                                                        | 297/10000 [41:51<21:58:38,  8.15s/it]

Epoch: 296 | Training loss 2.884382650256157 | Validation loss 2.8127700984477997



  3%|██▏                                                                        | 298/10000 [41:59<22:00:22,  8.17s/it]

Epoch: 297 | Training loss 2.8889683857560158 | Validation loss 2.8121787309646606



  3%|██▏                                                                        | 299/10000 [42:08<22:04:49,  8.19s/it]

Epoch: 298 | Training loss 2.8861522376537323 | Validation loss 2.8188222646713257



  3%|██▎                                                                        | 300/10000 [42:16<22:04:09,  8.19s/it]

Epoch: 299 | Training loss 2.8817890286445618 | Validation loss 2.8087370693683624



  3%|██▎                                                                        | 301/10000 [42:24<21:55:14,  8.14s/it]

Epoch: 300 | Training loss 2.887520454823971 | Validation loss 2.815861761569977



  3%|██▎                                                                        | 302/10000 [42:32<21:53:20,  8.13s/it]

Epoch: 301 | Training loss 2.88134004175663 | Validation loss 2.805788367986679



  3%|██▎                                                                        | 303/10000 [42:40<22:01:29,  8.18s/it]

Epoch: 302 | Training loss 2.8813536539673805 | Validation loss 2.8133524656295776



  3%|██▎                                                                        | 304/10000 [42:48<22:01:09,  8.18s/it]

Epoch: 303 | Training loss 2.881923921406269 | Validation loss 2.8079830706119537



  3%|██▎                                                                        | 305/10000 [42:57<21:56:30,  8.15s/it]

Epoch: 304 | Training loss 2.8809031695127487 | Validation loss 2.8114874362945557



  3%|██▎                                                                        | 306/10000 [43:05<21:57:09,  8.15s/it]

Epoch: 305 | Training loss 2.877331517636776 | Validation loss 2.8051668405532837



  3%|██▎                                                                        | 307/10000 [43:13<21:59:55,  8.17s/it]

Epoch: 306 | Training loss 2.8792831748723984 | Validation loss 2.8115313947200775



  3%|██▎                                                                        | 308/10000 [43:21<21:59:35,  8.17s/it]

Epoch: 307 | Training loss 2.8813097551465034 | Validation loss 2.812547892332077



  3%|██▎                                                                        | 309/10000 [43:29<21:58:33,  8.16s/it]

Epoch: 308 | Training loss 2.877824805676937 | Validation loss 2.801786184310913



  3%|██▎                                                                        | 310/10000 [43:37<21:54:41,  8.14s/it]

Epoch: 309 | Training loss 2.8818534910678864 | Validation loss 2.805381953716278



  3%|██▎                                                                        | 311/10000 [43:45<21:55:05,  8.14s/it]

Epoch: 310 | Training loss 2.8798633217811584 | Validation loss 2.8028878271579742



  3%|██▎                                                                        | 312/10000 [43:54<21:56:41,  8.15s/it]

Epoch: 311 | Training loss 2.881644532084465 | Validation loss 2.8099328577518463



  3%|██▎                                                                        | 313/10000 [44:02<21:56:13,  8.15s/it]

Epoch: 312 | Training loss 2.87871403247118 | Validation loss 2.7993550300598145



  3%|██▎                                                                        | 314/10000 [44:10<21:56:08,  8.15s/it]

Epoch: 313 | Training loss 2.8799570202827454 | Validation loss 2.8055062294006348



  3%|██▎                                                                        | 315/10000 [44:18<21:58:52,  8.17s/it]

Epoch: 314 | Training loss 2.880453310906887 | Validation loss 2.80463370680809



  3%|██▎                                                                        | 316/10000 [44:26<22:02:18,  8.19s/it]

Epoch: 315 | Training loss 2.8765730187296867 | Validation loss 2.8111677169799805



  3%|██▍                                                                        | 317/10000 [44:35<22:03:54,  8.20s/it]

Epoch: 316 | Training loss 2.8802925422787666 | Validation loss 2.809334874153137



  3%|██▍                                                                        | 318/10000 [44:43<21:57:18,  8.16s/it]

Epoch: 317 | Training loss 2.876428611576557 | Validation loss 2.8019883036613464



  3%|██▍                                                                        | 319/10000 [44:51<22:05:08,  8.21s/it]

Epoch: 318 | Training loss 2.876103140413761 | Validation loss 2.8028871417045593



  3%|██▍                                                                        | 320/10000 [44:59<21:59:37,  8.18s/it]

Epoch: 319 | Training loss 2.882212348282337 | Validation loss 2.798908770084381



  3%|██▍                                                                        | 321/10000 [45:07<21:57:57,  8.17s/it]

Epoch: 320 | Training loss 2.880885034799576 | Validation loss 2.800994783639908



  3%|██▍                                                                        | 322/10000 [45:15<21:59:17,  8.18s/it]

Epoch: 321 | Training loss 2.882189780473709 | Validation loss 2.8020692467689514



  3%|██▍                                                                        | 323/10000 [45:24<22:01:30,  8.19s/it]

Epoch: 322 | Training loss 2.8842076510190964 | Validation loss 2.8076183795928955



  3%|██▍                                                                        | 324/10000 [45:32<22:00:03,  8.19s/it]

Epoch: 323 | Training loss 2.8810568004846573 | Validation loss 2.8010973930358887



  3%|██▍                                                                        | 325/10000 [45:40<22:00:20,  8.19s/it]

Epoch: 324 | Training loss 2.8797008469700813 | Validation loss 2.818633794784546



  3%|██▍                                                                        | 326/10000 [45:48<21:52:14,  8.14s/it]

Epoch: 325 | Training loss 2.8783291205763817 | Validation loss 2.7999152541160583



  3%|██▍                                                                        | 327/10000 [45:56<21:50:09,  8.13s/it]

Epoch: 326 | Training loss 2.877459369599819 | Validation loss 2.8000360429286957



  3%|██▍                                                                        | 328/10000 [46:04<21:51:50,  8.14s/it]

Epoch: 327 | Training loss 2.8765081390738487 | Validation loss 2.8022985458374023



  3%|██▍                                                                        | 329/10000 [46:13<21:52:11,  8.14s/it]

Epoch: 328 | Training loss 2.877350963652134 | Validation loss 2.797757238149643



  3%|██▍                                                                        | 330/10000 [46:21<21:56:14,  8.17s/it]

Epoch: 329 | Training loss 2.880951590836048 | Validation loss 2.806992530822754



  3%|██▍                                                                        | 331/10000 [46:29<21:58:26,  8.18s/it]

Epoch: 330 | Training loss 2.8814110830426216 | Validation loss 2.7990750670433044



  3%|██▍                                                                        | 332/10000 [46:37<21:53:43,  8.15s/it]

Epoch: 331 | Training loss 2.8776023983955383 | Validation loss 2.79659840464592



  3%|██▍                                                                        | 333/10000 [46:45<21:54:45,  8.16s/it]

Epoch: 332 | Training loss 2.8811104893684387 | Validation loss 2.818515568971634



  3%|██▌                                                                        | 334/10000 [46:53<21:57:03,  8.18s/it]

Epoch: 333 | Training loss 2.8781548365950584 | Validation loss 2.7975966930389404



  3%|██▌                                                                        | 335/10000 [47:02<21:56:15,  8.17s/it]

Epoch: 334 | Training loss 2.8773290812969208 | Validation loss 2.8009380102157593



  3%|██▌                                                                        | 336/10000 [47:10<21:51:10,  8.14s/it]

Epoch: 335 | Training loss 2.885528303682804 | Validation loss 2.801472157239914



  3%|██▌                                                                        | 337/10000 [47:18<21:48:29,  8.12s/it]

Epoch: 336 | Training loss 2.8794926777482033 | Validation loss 2.802297055721283



  3%|██▌                                                                        | 338/10000 [47:26<21:46:28,  8.11s/it]

Epoch: 337 | Training loss 2.8763715848326683 | Validation loss 2.807697594165802



  3%|██▌                                                                        | 339/10000 [47:34<21:47:18,  8.12s/it]

Epoch: 338 | Training loss 2.8789147287607193 | Validation loss 2.8158113956451416



  3%|██▌                                                                        | 340/10000 [47:42<21:44:43,  8.10s/it]

Epoch: 339 | Training loss 2.883908435702324 | Validation loss 2.799097239971161



  3%|██▌                                                                        | 341/10000 [47:50<21:45:28,  8.11s/it]

Epoch: 340 | Training loss 2.88103237003088 | Validation loss 2.8122282326221466



  3%|██▌                                                                        | 342/10000 [47:58<21:41:06,  8.08s/it]

Epoch: 341 | Training loss 2.873560145497322 | Validation loss 2.802671104669571



  3%|██▌                                                                        | 343/10000 [48:06<21:40:16,  8.08s/it]

Epoch: 342 | Training loss 2.8827125057578087 | Validation loss 2.8022956550121307



  3%|██▌                                                                        | 344/10000 [48:14<21:40:52,  8.08s/it]

Epoch: 343 | Training loss 2.8803838193416595 | Validation loss 2.803882449865341



  3%|██▌                                                                        | 345/10000 [48:22<21:41:37,  8.09s/it]

Epoch: 344 | Training loss 2.885855384171009 | Validation loss 2.805088698863983



  3%|██▌                                                                        | 346/10000 [48:30<21:39:03,  8.07s/it]

Epoch: 345 | Training loss 2.8807309716939926 | Validation loss 2.8022240698337555



  3%|██▌                                                                        | 347/10000 [48:39<21:41:46,  8.09s/it]

Epoch: 346 | Training loss 2.8800073340535164 | Validation loss 2.819590985774994



  3%|██▌                                                                        | 348/10000 [48:47<21:43:38,  8.10s/it]

Epoch: 347 | Training loss 2.8791159987449646 | Validation loss 2.812655359506607



  3%|██▌                                                                        | 349/10000 [48:55<21:42:34,  8.10s/it]

Epoch: 348 | Training loss 2.8779508098959923 | Validation loss 2.8092797696590424



  4%|██▋                                                                        | 350/10000 [49:03<21:44:02,  8.11s/it]

Epoch: 349 | Training loss 2.8783818259835243 | Validation loss 2.800765037536621



  4%|██▋                                                                        | 351/10000 [49:11<21:41:22,  8.09s/it]

Epoch: 350 | Training loss 2.87916948646307 | Validation loss 2.8015879094600677



  4%|██▋                                                                        | 352/10000 [49:19<21:45:23,  8.12s/it]

Epoch: 351 | Training loss 2.880598947405815 | Validation loss 2.808795243501663



  4%|██▋                                                                        | 353/10000 [49:27<21:46:05,  8.12s/it]

Epoch: 352 | Training loss 2.8799504935741425 | Validation loss 2.79846453666687



  4%|██▋                                                                        | 354/10000 [49:35<21:43:23,  8.11s/it]

Epoch: 353 | Training loss 2.8760970309376717 | Validation loss 2.8021899461746216



  4%|██▋                                                                        | 355/10000 [49:44<21:44:43,  8.12s/it]

Epoch: 354 | Training loss 2.8859182968735695 | Validation loss 2.7992036044597626



  4%|██▋                                                                        | 356/10000 [49:52<21:40:53,  8.09s/it]

Epoch: 355 | Training loss 2.8839484229683876 | Validation loss 2.8105909526348114



  4%|██▋                                                                        | 357/10000 [50:00<21:36:39,  8.07s/it]

Epoch: 356 | Training loss 2.879261612892151 | Validation loss 2.8020670115947723



  4%|██▋                                                                        | 358/10000 [50:08<21:32:07,  8.04s/it]

Epoch: 357 | Training loss 2.8824766352772713 | Validation loss 2.811167985200882



  4%|██▋                                                                        | 359/10000 [50:16<21:27:58,  8.02s/it]

Epoch: 358 | Training loss 2.8817864060401917 | Validation loss 2.8059226870536804



  4%|██▋                                                                        | 360/10000 [50:24<21:30:25,  8.03s/it]

Epoch: 359 | Training loss 2.872012160718441 | Validation loss 2.8143323957920074



  4%|██▋                                                                        | 361/10000 [50:32<21:35:17,  8.06s/it]

Epoch: 360 | Training loss 2.8783544301986694 | Validation loss 2.806490868330002



  4%|██▋                                                                        | 362/10000 [50:40<21:37:05,  8.07s/it]

Epoch: 361 | Training loss 2.874263696372509 | Validation loss 2.7984144389629364



  4%|██▋                                                                        | 363/10000 [50:48<21:40:30,  8.10s/it]

Epoch: 362 | Training loss 2.878857582807541 | Validation loss 2.8231508433818817



  4%|██▋                                                                        | 364/10000 [50:56<21:34:41,  8.06s/it]

Epoch: 363 | Training loss 2.8795838207006454 | Validation loss 2.797460436820984



  4%|██▋                                                                        | 365/10000 [51:04<21:38:49,  8.09s/it]

Epoch: 364 | Training loss 2.878122642636299 | Validation loss 2.8070763647556305



  4%|██▋                                                                        | 366/10000 [51:12<21:37:49,  8.08s/it]

Epoch: 365 | Training loss 2.8762440755963326 | Validation loss 2.8018643260002136



  4%|██▊                                                                        | 367/10000 [51:20<21:40:05,  8.10s/it]

Epoch: 366 | Training loss 2.878058023750782 | Validation loss 2.7980694472789764



  4%|██▊                                                                        | 368/10000 [51:28<21:37:58,  8.09s/it]

Epoch: 367 | Training loss 2.8786312118172646 | Validation loss 2.8105626702308655



  4%|██▊                                                                        | 369/10000 [51:36<21:39:26,  8.10s/it]

Epoch: 368 | Training loss 2.8850817009806633 | Validation loss 2.813641995191574



  4%|██▊                                                                        | 370/10000 [51:45<21:41:02,  8.11s/it]

Epoch: 369 | Training loss 2.876365564763546 | Validation loss 2.800088584423065



  4%|██▊                                                                        | 371/10000 [51:53<21:43:37,  8.12s/it]

Epoch: 370 | Training loss 2.875378094613552 | Validation loss 2.8011162877082825



  4%|██▊                                                                        | 372/10000 [52:01<21:42:33,  8.12s/it]

Epoch: 371 | Training loss 2.88203664124012 | Validation loss 2.807819426059723



  4%|██▊                                                                        | 373/10000 [52:09<21:41:42,  8.11s/it]

Epoch: 372 | Training loss 2.8824310526251793 | Validation loss 2.8120854794979095



  4%|██▊                                                                        | 374/10000 [52:17<21:40:21,  8.11s/it]

Epoch: 373 | Training loss 2.880429193377495 | Validation loss 2.8039097487926483



  4%|██▊                                                                        | 375/10000 [52:25<21:40:51,  8.11s/it]

Epoch: 374 | Training loss 2.8851857781410217 | Validation loss 2.806763768196106



  4%|██▊                                                                        | 376/10000 [52:33<21:40:20,  8.11s/it]

Epoch: 375 | Training loss 2.8829638063907623 | Validation loss 2.8078619241714478



  4%|██▊                                                                        | 377/10000 [52:41<21:41:31,  8.12s/it]

Epoch: 376 | Training loss 2.877923049032688 | Validation loss 2.8102817833423615



  4%|██▊                                                                        | 378/10000 [52:49<21:32:31,  8.06s/it]

Epoch: 377 | Training loss 2.878806658089161 | Validation loss 2.807448297739029



  4%|██▊                                                                        | 379/10000 [52:57<21:31:25,  8.05s/it]

Epoch: 378 | Training loss 2.8792335465550423 | Validation loss 2.7976202964782715



  4%|██▊                                                                        | 380/10000 [53:06<21:37:22,  8.09s/it]

Epoch: 379 | Training loss 2.8789645060896873 | Validation loss 2.8108794689178467



  4%|██▊                                                                        | 381/10000 [53:14<21:37:48,  8.10s/it]

Epoch: 380 | Training loss 2.8815136924386024 | Validation loss 2.8149580657482147



  4%|██▊                                                                        | 382/10000 [53:22<21:34:57,  8.08s/it]

Epoch: 381 | Training loss 2.8776291236281395 | Validation loss 2.798475980758667



  4%|██▊                                                                        | 383/10000 [53:30<21:36:00,  8.09s/it]

Epoch: 382 | Training loss 2.8774502277374268 | Validation loss 2.798080801963806



  4%|██▉                                                                        | 384/10000 [53:38<21:41:34,  8.12s/it]

Epoch: 383 | Training loss 2.8802808299660683 | Validation loss 2.80877748131752



  4%|██▉                                                                        | 385/10000 [53:46<21:38:13,  8.10s/it]

Epoch: 384 | Training loss 2.8802654966712 | Validation loss 2.8028195798397064



  4%|██▉                                                                        | 386/10000 [53:54<21:38:49,  8.11s/it]

Epoch: 385 | Training loss 2.8778380528092384 | Validation loss 2.798231929540634



  4%|██▉                                                                        | 387/10000 [54:02<21:38:38,  8.11s/it]

Epoch: 386 | Training loss 2.8772155195474625 | Validation loss 2.8021669387817383



  4%|██▉                                                                        | 388/10000 [54:10<21:41:23,  8.12s/it]

Epoch: 387 | Training loss 2.877973333001137 | Validation loss 2.8016859889030457



  4%|██▉                                                                        | 389/10000 [54:19<21:41:40,  8.13s/it]

Epoch: 388 | Training loss 2.884292908012867 | Validation loss 2.8039604127407074



  4%|██▉                                                                        | 390/10000 [54:27<21:43:18,  8.14s/it]

Epoch: 389 | Training loss 2.8818352594971657 | Validation loss 2.8057404458522797



  4%|██▉                                                                        | 391/10000 [54:35<21:42:20,  8.13s/it]

Epoch: 390 | Training loss 2.882107235491276 | Validation loss 2.813020199537277



  4%|██▉                                                                        | 392/10000 [54:43<21:42:19,  8.13s/it]

Epoch: 391 | Training loss 2.8740528523921967 | Validation loss 2.804487705230713



  4%|██▉                                                                        | 393/10000 [54:51<21:43:37,  8.14s/it]

Epoch: 392 | Training loss 2.881121687591076 | Validation loss 2.8149832785129547



  4%|██▉                                                                        | 394/10000 [54:59<21:43:08,  8.14s/it]

Epoch: 393 | Training loss 2.8757067024707794 | Validation loss 2.809193581342697



  4%|██▉                                                                        | 395/10000 [55:07<21:44:20,  8.15s/it]

Epoch: 394 | Training loss 2.8819364681839943 | Validation loss 2.8026362359523773



  4%|██▉                                                                        | 396/10000 [55:16<21:43:26,  8.14s/it]

Epoch: 395 | Training loss 2.87767893075943 | Validation loss 2.804858297109604



  4%|██▉                                                                        | 397/10000 [55:24<21:39:16,  8.12s/it]

Epoch: 396 | Training loss 2.8722211197018623 | Validation loss 2.801328867673874



  4%|██▉                                                                        | 398/10000 [55:32<21:35:18,  8.09s/it]

Epoch: 397 | Training loss 2.8789372965693474 | Validation loss 2.8152620792388916



  4%|██▉                                                                        | 399/10000 [55:40<21:37:51,  8.11s/it]

Epoch: 398 | Training loss 2.8843891099095345 | Validation loss 2.8096114695072174



  4%|███                                                                        | 400/10000 [55:48<21:40:14,  8.13s/it]

Epoch: 399 | Training loss 2.8754796385765076 | Validation loss 2.799042195081711



  4%|███                                                                        | 401/10000 [55:56<21:39:04,  8.12s/it]

Epoch: 400 | Training loss 2.87886643409729 | Validation loss 2.8006836473941803



  4%|███                                                                        | 402/10000 [56:04<21:39:41,  8.12s/it]

Epoch: 401 | Training loss 2.8798932284116745 | Validation loss 2.807142496109009



  4%|███                                                                        | 403/10000 [56:12<21:43:20,  8.15s/it]

Epoch: 402 | Training loss 2.8781735226511955 | Validation loss 2.81254905462265



  4%|███                                                                        | 404/10000 [56:21<21:46:13,  8.17s/it]

Epoch: 403 | Training loss 2.8788593634963036 | Validation loss 2.8066427409648895



  4%|███                                                                        | 405/10000 [56:29<21:50:04,  8.19s/it]

Epoch: 404 | Training loss 2.879836343228817 | Validation loss 2.803549438714981



  4%|███                                                                        | 406/10000 [56:37<21:54:37,  8.22s/it]

Epoch: 405 | Training loss 2.879633992910385 | Validation loss 2.8197500109672546



  4%|███                                                                        | 407/10000 [56:45<21:51:03,  8.20s/it]

Epoch: 406 | Training loss 2.876758798956871 | Validation loss 2.796487808227539



  4%|███                                                                        | 408/10000 [56:53<21:45:30,  8.17s/it]

Epoch: 407 | Training loss 2.880433276295662 | Validation loss 2.811428874731064



  4%|███                                                                        | 409/10000 [57:01<21:37:41,  8.12s/it]

Epoch: 408 | Training loss 2.877524457871914 | Validation loss 2.803757429122925



  4%|███                                                                        | 410/10000 [57:10<21:42:25,  8.15s/it]

Epoch: 409 | Training loss 2.877238653600216 | Validation loss 2.8073309659957886



  4%|███                                                                        | 411/10000 [57:18<21:44:39,  8.16s/it]

Epoch: 410 | Training loss 2.8768669441342354 | Validation loss 2.803259998559952



  4%|███                                                                        | 412/10000 [57:26<21:44:14,  8.16s/it]

Epoch: 411 | Training loss 2.880230836570263 | Validation loss 2.799110233783722



  4%|███                                                                        | 413/10000 [57:34<21:40:33,  8.14s/it]

Epoch: 412 | Training loss 2.8809729591012 | Validation loss 2.8026534020900726



  4%|███                                                                        | 414/10000 [57:42<21:38:33,  8.13s/it]

Epoch: 413 | Training loss 2.874659337103367 | Validation loss 2.8141623437404633



  4%|███                                                                        | 415/10000 [57:50<21:38:40,  8.13s/it]

Epoch: 414 | Training loss 2.8779268339276314 | Validation loss 2.8025380671024323



  4%|███                                                                        | 416/10000 [57:59<21:41:54,  8.15s/it]

Epoch: 415 | Training loss 2.874851420521736 | Validation loss 2.805885374546051



  4%|███▏                                                                       | 417/10000 [58:07<21:53:42,  8.23s/it]

Epoch: 416 | Training loss 2.8822412118315697 | Validation loss 2.8067571222782135



  4%|███▏                                                                       | 418/10000 [58:15<21:43:50,  8.16s/it]

Epoch: 417 | Training loss 2.882806956768036 | Validation loss 2.804465115070343



  4%|███▏                                                                       | 419/10000 [58:23<21:40:49,  8.15s/it]

Epoch: 418 | Training loss 2.8829939290881157 | Validation loss 2.80639910697937



  4%|███▏                                                                       | 420/10000 [58:31<21:38:33,  8.13s/it]

Epoch: 419 | Training loss 2.8741481006145477 | Validation loss 2.819655865430832



  4%|███▏                                                                       | 421/10000 [58:39<21:37:40,  8.13s/it]

Epoch: 420 | Training loss 2.8756061494350433 | Validation loss 2.795132100582123



  4%|███▏                                                                       | 422/10000 [58:47<21:37:29,  8.13s/it]

Epoch: 421 | Training loss 2.8748027980327606 | Validation loss 2.8137928545475006



  4%|███▏                                                                       | 423/10000 [58:55<21:33:57,  8.11s/it]

Epoch: 422 | Training loss 2.8758509159088135 | Validation loss 2.800154685974121



  4%|███▏                                                                       | 424/10000 [59:04<21:35:51,  8.12s/it]

Epoch: 423 | Training loss 2.883193276822567 | Validation loss 2.801494747400284



  4%|███▏                                                                       | 425/10000 [59:12<21:35:16,  8.12s/it]

Epoch: 424 | Training loss 2.8806254491209984 | Validation loss 2.801139771938324



  4%|███▏                                                                       | 426/10000 [59:20<21:35:27,  8.12s/it]

Epoch: 425 | Training loss 2.8827929869294167 | Validation loss 2.8166456520557404



  4%|███▏                                                                       | 427/10000 [59:28<21:35:58,  8.12s/it]

Epoch: 426 | Training loss 2.8791310265660286 | Validation loss 2.8050648868083954



  4%|███▏                                                                       | 428/10000 [59:36<21:37:47,  8.13s/it]

Epoch: 427 | Training loss 2.8801324665546417 | Validation loss 2.8105025589466095



  4%|███▏                                                                       | 429/10000 [59:44<21:41:23,  8.16s/it]

Epoch: 428 | Training loss 2.877499036490917 | Validation loss 2.797609329223633



  4%|███▏                                                                       | 430/10000 [59:53<21:40:00,  8.15s/it]

Epoch: 429 | Training loss 2.8787396922707558 | Validation loss 2.8048457503318787



  4%|███▏                                                                     | 431/10000 [1:00:01<21:42:03,  8.16s/it]

Epoch: 430 | Training loss 2.8803145363926888 | Validation loss 2.808080643415451



  4%|███▏                                                                     | 432/10000 [1:00:09<21:45:37,  8.19s/it]

Epoch: 431 | Training loss 2.8793612718582153 | Validation loss 2.8075865507125854



  4%|███▏                                                                     | 433/10000 [1:00:17<21:44:29,  8.18s/it]

Epoch: 432 | Training loss 2.878153569996357 | Validation loss 2.808573752641678



  4%|███▏                                                                     | 434/10000 [1:00:25<21:45:28,  8.19s/it]

Epoch: 433 | Training loss 2.8749737590551376 | Validation loss 2.801048904657364



  4%|███▏                                                                     | 435/10000 [1:00:34<21:48:00,  8.20s/it]

Epoch: 434 | Training loss 2.8757447823882103 | Validation loss 2.8129727840423584



  4%|███▏                                                                     | 436/10000 [1:00:42<21:49:39,  8.22s/it]

Epoch: 435 | Training loss 2.8746388778090477 | Validation loss 2.8035184741020203



  4%|███▏                                                                     | 437/10000 [1:00:50<21:48:02,  8.21s/it]

Epoch: 436 | Training loss 2.876670703291893 | Validation loss 2.8029078245162964



  4%|███▏                                                                     | 438/10000 [1:00:58<21:50:48,  8.23s/it]

Epoch: 437 | Training loss 2.880591742694378 | Validation loss 2.8236614763736725



  4%|███▏                                                                     | 439/10000 [1:01:06<21:48:29,  8.21s/it]

Epoch: 438 | Training loss 2.874137870967388 | Validation loss 2.816397041082382



  4%|███▏                                                                     | 440/10000 [1:01:15<21:44:34,  8.19s/it]

Epoch: 439 | Training loss 2.876477837562561 | Validation loss 2.804591029882431



  4%|███▏                                                                     | 441/10000 [1:01:23<21:39:37,  8.16s/it]

Epoch: 440 | Training loss 2.874868795275688 | Validation loss 2.8010514080524445



  4%|███▏                                                                     | 442/10000 [1:01:31<21:38:05,  8.15s/it]

Epoch: 441 | Training loss 2.8782810494303703 | Validation loss 2.7957263588905334



  4%|███▏                                                                     | 443/10000 [1:01:39<21:43:49,  8.19s/it]

Epoch: 442 | Training loss 2.883573241531849 | Validation loss 2.8024687469005585



  4%|███▏                                                                     | 444/10000 [1:01:47<21:41:50,  8.17s/it]

Epoch: 443 | Training loss 2.882779762148857 | Validation loss 2.804496943950653



  4%|███▏                                                                     | 445/10000 [1:01:55<21:40:40,  8.17s/it]

Epoch: 444 | Training loss 2.883853755891323 | Validation loss 2.8076659440994263



  4%|███▎                                                                     | 446/10000 [1:02:04<21:41:49,  8.18s/it]

Epoch: 445 | Training loss 2.879350371658802 | Validation loss 2.808027684688568



  4%|███▎                                                                     | 447/10000 [1:02:12<21:52:26,  8.24s/it]

Epoch: 446 | Training loss 2.8776794001460075 | Validation loss 2.799984395503998



  4%|███▎                                                                     | 448/10000 [1:02:20<21:50:10,  8.23s/it]

Epoch: 447 | Training loss 2.8796323388814926 | Validation loss 2.8057002425193787



  4%|███▎                                                                     | 449/10000 [1:02:28<21:40:31,  8.17s/it]

Epoch: 448 | Training loss 2.874132387340069 | Validation loss 2.814587652683258



  4%|███▎                                                                     | 450/10000 [1:02:36<21:40:01,  8.17s/it]

Epoch: 449 | Training loss 2.878288298845291 | Validation loss 2.8059933483600616



  5%|███▎                                                                     | 451/10000 [1:02:45<21:41:10,  8.18s/it]

Epoch: 450 | Training loss 2.877050518989563 | Validation loss 2.8226912319660187



  5%|███▎                                                                     | 452/10000 [1:02:53<21:40:06,  8.17s/it]

Epoch: 451 | Training loss 2.877387210726738 | Validation loss 2.799840569496155



  5%|███▎                                                                     | 453/10000 [1:03:01<21:38:24,  8.16s/it]

Epoch: 452 | Training loss 2.8816431760787964 | Validation loss 2.8050448894500732



  5%|███▎                                                                     | 454/10000 [1:03:09<21:40:49,  8.18s/it]

Epoch: 453 | Training loss 2.876336969435215 | Validation loss 2.804933339357376



  5%|███▎                                                                     | 455/10000 [1:03:17<21:37:21,  8.16s/it]

Epoch: 454 | Training loss 2.8733282685279846 | Validation loss 2.79997056722641



  5%|███▎                                                                     | 456/10000 [1:03:25<21:37:41,  8.16s/it]

Epoch: 455 | Training loss 2.878783293068409 | Validation loss 2.801557570695877



  5%|███▎                                                                     | 457/10000 [1:03:33<21:35:46,  8.15s/it]

Epoch: 456 | Training loss 2.882932849228382 | Validation loss 2.8206894993782043



  5%|███▎                                                                     | 458/10000 [1:03:42<21:32:11,  8.13s/it]

Epoch: 457 | Training loss 2.8769159466028214 | Validation loss 2.816593289375305



  5%|███▎                                                                     | 459/10000 [1:03:50<21:36:12,  8.15s/it]

Epoch: 458 | Training loss 2.8814572617411613 | Validation loss 2.8041951060295105



  5%|███▎                                                                     | 460/10000 [1:03:58<21:35:23,  8.15s/it]

Epoch: 459 | Training loss 2.8711046800017357 | Validation loss 2.80816712975502



  5%|███▎                                                                     | 461/10000 [1:04:06<21:34:39,  8.14s/it]

Epoch: 460 | Training loss 2.87556529045105 | Validation loss 2.799983561038971



  5%|███▎                                                                     | 462/10000 [1:04:14<21:37:44,  8.16s/it]

Epoch: 461 | Training loss 2.8759813457727432 | Validation loss 2.795691579580307



  5%|███▍                                                                     | 463/10000 [1:04:22<21:43:03,  8.20s/it]

Epoch: 462 | Training loss 2.874042049050331 | Validation loss 2.808328002691269



  5%|███▍                                                                     | 464/10000 [1:04:31<21:41:24,  8.19s/it]

Epoch: 463 | Training loss 2.8729459196329117 | Validation loss 2.8008580803871155



  5%|███▍                                                                     | 465/10000 [1:04:39<21:37:51,  8.17s/it]

Epoch: 464 | Training loss 2.8727829828858376 | Validation loss 2.8011545538902283



  5%|███▍                                                                     | 466/10000 [1:04:47<21:33:53,  8.14s/it]

Epoch: 465 | Training loss 2.879326283931732 | Validation loss 2.8002071380615234



  5%|███▍                                                                     | 467/10000 [1:04:55<21:33:16,  8.14s/it]

Epoch: 466 | Training loss 2.8767420649528503 | Validation loss 2.8029271364212036



  5%|███▍                                                                     | 468/10000 [1:05:03<21:33:34,  8.14s/it]

Epoch: 467 | Training loss 2.8779226914048195 | Validation loss 2.808911085128784



  5%|███▍                                                                     | 469/10000 [1:05:11<21:30:47,  8.13s/it]

Epoch: 468 | Training loss 2.874771326780319 | Validation loss 2.810552477836609



  5%|███▍                                                                     | 470/10000 [1:05:19<21:31:43,  8.13s/it]

Epoch: 469 | Training loss 2.875734567642212 | Validation loss 2.8159587681293488



  5%|███▍                                                                     | 471/10000 [1:05:27<21:30:55,  8.13s/it]

Epoch: 470 | Training loss 2.877198725938797 | Validation loss 2.8066510558128357



  5%|███▍                                                                     | 472/10000 [1:05:35<21:24:25,  8.09s/it]

Epoch: 471 | Training loss 2.8722241520881653 | Validation loss 2.8019685447216034



  5%|███▍                                                                     | 473/10000 [1:05:44<21:25:50,  8.10s/it]

Epoch: 472 | Training loss 2.8798399046063423 | Validation loss 2.803952068090439



  5%|███▍                                                                     | 474/10000 [1:05:52<21:24:33,  8.09s/it]

Epoch: 473 | Training loss 2.8746333867311478 | Validation loss 2.8125592470169067



  5%|███▍                                                                     | 475/10000 [1:06:00<21:24:06,  8.09s/it]

Epoch: 474 | Training loss 2.884258419275284 | Validation loss 2.8053249418735504



  5%|███▍                                                                     | 476/10000 [1:06:08<21:24:41,  8.09s/it]

Epoch: 475 | Training loss 2.877286396920681 | Validation loss 2.799782872200012



  5%|███▍                                                                     | 477/10000 [1:06:16<21:26:26,  8.11s/it]

Epoch: 476 | Training loss 2.8795407190918922 | Validation loss 2.801682561635971



  5%|███▍                                                                     | 478/10000 [1:06:24<21:22:24,  8.08s/it]

Epoch: 477 | Training loss 2.875663585960865 | Validation loss 2.797769159078598



  5%|███▍                                                                     | 479/10000 [1:06:32<21:23:15,  8.09s/it]

Epoch: 478 | Training loss 2.8746610954403877 | Validation loss 2.8064694106578827



  5%|███▌                                                                     | 480/10000 [1:06:40<21:25:19,  8.10s/it]

Epoch: 479 | Training loss 2.8779202103614807 | Validation loss 2.8089821338653564



  5%|███▌                                                                     | 481/10000 [1:06:48<21:28:58,  8.12s/it]

Epoch: 480 | Training loss 2.8719188645482063 | Validation loss 2.807273417711258



  5%|███▌                                                                     | 482/10000 [1:06:56<21:23:16,  8.09s/it]

Epoch: 481 | Training loss 2.876613013446331 | Validation loss 2.8085987269878387



  5%|███▌                                                                     | 483/10000 [1:07:05<21:25:56,  8.11s/it]

Epoch: 482 | Training loss 2.882605142891407 | Validation loss 2.806077629327774



  5%|███▌                                                                     | 484/10000 [1:07:13<21:27:51,  8.12s/it]

Epoch: 483 | Training loss 2.8827634379267693 | Validation loss 2.8133960962295532



  5%|███▌                                                                     | 485/10000 [1:07:21<21:27:29,  8.12s/it]

Epoch: 484 | Training loss 2.871698260307312 | Validation loss 2.803792029619217



  5%|███▌                                                                     | 486/10000 [1:07:29<21:29:41,  8.13s/it]

Epoch: 485 | Training loss 2.878294810652733 | Validation loss 2.803451269865036



  5%|███▌                                                                     | 487/10000 [1:07:37<21:26:44,  8.12s/it]

Epoch: 486 | Training loss 2.880313776433468 | Validation loss 2.801459163427353



  5%|███▌                                                                     | 488/10000 [1:07:45<21:27:24,  8.12s/it]

Epoch: 487 | Training loss 2.8779498264193535 | Validation loss 2.8123127222061157



  5%|███▌                                                                     | 489/10000 [1:07:53<21:25:41,  8.11s/it]

Epoch: 488 | Training loss 2.874907188117504 | Validation loss 2.816597044467926



  5%|███▌                                                                     | 490/10000 [1:08:01<21:22:47,  8.09s/it]

Epoch: 489 | Training loss 2.8725648000836372 | Validation loss 2.81498447060585



  5%|███▌                                                                     | 491/10000 [1:08:09<21:18:44,  8.07s/it]

Epoch: 490 | Training loss 2.881333574652672 | Validation loss 2.8017444014549255



  5%|███▌                                                                     | 492/10000 [1:08:18<21:22:19,  8.09s/it]

Epoch: 491 | Training loss 2.8757324516773224 | Validation loss 2.8076781630516052



  5%|███▌                                                                     | 493/10000 [1:08:26<21:21:57,  8.09s/it]

Epoch: 492 | Training loss 2.8768366798758507 | Validation loss 2.809410333633423



  5%|███▌                                                                     | 494/10000 [1:08:34<21:23:05,  8.10s/it]

Epoch: 493 | Training loss 2.87674081325531 | Validation loss 2.803864985704422



  5%|███▌                                                                     | 495/10000 [1:08:42<21:24:29,  8.11s/it]

Epoch: 494 | Training loss 2.874382495880127 | Validation loss 2.8064084351062775



  5%|███▌                                                                     | 496/10000 [1:08:50<21:27:47,  8.13s/it]

Epoch: 495 | Training loss 2.8739721700549126 | Validation loss 2.7976573407649994



  5%|███▋                                                                     | 497/10000 [1:08:58<21:21:11,  8.09s/it]

Epoch: 496 | Training loss 2.87980242818594 | Validation loss 2.8146398961544037



  5%|███▋                                                                     | 498/10000 [1:09:06<21:20:21,  8.08s/it]

Epoch: 497 | Training loss 2.871804393827915 | Validation loss 2.8050980269908905



  5%|███▋                                                                     | 499/10000 [1:09:14<21:21:00,  8.09s/it]

Epoch: 498 | Training loss 2.8770399913191795 | Validation loss 2.8053078055381775



  5%|███▋                                                                     | 500/10000 [1:09:22<21:23:03,  8.10s/it]

Epoch: 499 | Training loss 2.8844355791807175 | Validation loss 2.7981630861759186



  5%|███▋                                                                     | 501/10000 [1:09:30<21:22:46,  8.10s/it]

Epoch: 500 | Training loss 2.8745406046509743 | Validation loss 2.7986286282539368



  5%|███▋                                                                     | 502/10000 [1:09:39<21:28:26,  8.14s/it]

The best model was saved!
Epoch: 501 | Training loss 2.8740441128611565 | Validation loss 2.794308692216873



  5%|███▋                                                                     | 503/10000 [1:09:47<21:30:12,  8.15s/it]

Epoch: 502 | Training loss 2.876932956278324 | Validation loss 2.8148085474967957



  5%|███▋                                                                     | 504/10000 [1:09:55<21:29:19,  8.15s/it]

Epoch: 503 | Training loss 2.8821722269058228 | Validation loss 2.806839495897293



  5%|███▋                                                                     | 505/10000 [1:10:03<21:32:55,  8.17s/it]

Epoch: 504 | Training loss 2.8778949454426765 | Validation loss 2.8016558587551117



  5%|███▋                                                                     | 506/10000 [1:10:11<21:25:45,  8.13s/it]

Epoch: 505 | Training loss 2.8715256676077843 | Validation loss 2.7978053390979767



  5%|███▋                                                                     | 507/10000 [1:10:19<21:23:00,  8.11s/it]

Epoch: 506 | Training loss 2.8764689192175865 | Validation loss 2.797475278377533



  5%|███▋                                                                     | 508/10000 [1:10:28<21:27:47,  8.14s/it]

Epoch: 507 | Training loss 2.8783919885754585 | Validation loss 2.8056384921073914



  5%|███▋                                                                     | 509/10000 [1:10:36<21:24:16,  8.12s/it]

Epoch: 508 | Training loss 2.884109251201153 | Validation loss 2.8187520802021027



  5%|███▋                                                                     | 510/10000 [1:10:44<21:22:41,  8.11s/it]

Epoch: 509 | Training loss 2.875503718852997 | Validation loss 2.800489157438278



  5%|███▋                                                                     | 511/10000 [1:10:52<21:28:14,  8.15s/it]

Epoch: 510 | Training loss 2.880109429359436 | Validation loss 2.81449431180954



  5%|███▋                                                                     | 512/10000 [1:11:00<21:26:00,  8.13s/it]

Epoch: 511 | Training loss 2.87506390362978 | Validation loss 2.7978683710098267



  5%|███▋                                                                     | 513/10000 [1:11:08<21:22:08,  8.11s/it]

Epoch: 512 | Training loss 2.8725218772888184 | Validation loss 2.809027522802353



  5%|███▊                                                                     | 514/10000 [1:11:16<21:17:51,  8.08s/it]

Epoch: 513 | Training loss 2.878172144293785 | Validation loss 2.80381241440773



  5%|███▊                                                                     | 515/10000 [1:11:24<21:21:01,  8.10s/it]

Epoch: 514 | Training loss 2.873913459479809 | Validation loss 2.798401355743408



  5%|███▊                                                                     | 516/10000 [1:11:32<21:24:25,  8.13s/it]

Epoch: 515 | Training loss 2.8823939710855484 | Validation loss 2.8038387298583984



  5%|███▊                                                                     | 517/10000 [1:11:41<21:29:07,  8.16s/it]

Epoch: 516 | Training loss 2.873929776251316 | Validation loss 2.815029799938202



  5%|███▊                                                                     | 518/10000 [1:11:49<21:27:20,  8.15s/it]

Epoch: 517 | Training loss 2.8763637244701385 | Validation loss 2.8034729659557343



  5%|███▊                                                                     | 519/10000 [1:11:57<21:28:12,  8.15s/it]

Epoch: 518 | Training loss 2.8783697858452797 | Validation loss 2.8052559792995453



  5%|███▊                                                                     | 520/10000 [1:12:05<21:27:44,  8.15s/it]

Epoch: 519 | Training loss 2.878537394106388 | Validation loss 2.812657505273819



  5%|███▊                                                                     | 521/10000 [1:12:13<21:29:01,  8.16s/it]

Epoch: 520 | Training loss 2.8787576258182526 | Validation loss 2.810077130794525



  5%|███▊                                                                     | 522/10000 [1:12:21<21:30:29,  8.17s/it]

Epoch: 521 | Training loss 2.8763606548309326 | Validation loss 2.804132968187332



  5%|███▊                                                                     | 523/10000 [1:12:30<21:28:46,  8.16s/it]

Epoch: 522 | Training loss 2.8753994181752205 | Validation loss 2.8117844462394714



  5%|███▊                                                                     | 524/10000 [1:12:38<21:23:43,  8.13s/it]

Epoch: 523 | Training loss 2.8740233406424522 | Validation loss 2.810616672039032



  5%|███▊                                                                     | 525/10000 [1:12:46<21:23:10,  8.13s/it]

Epoch: 524 | Training loss 2.8728592917323112 | Validation loss 2.8024990260601044



  5%|███▊                                                                     | 526/10000 [1:12:54<21:20:31,  8.11s/it]

Epoch: 525 | Training loss 2.8764735758304596 | Validation loss 2.7966553568840027



  5%|███▊                                                                     | 527/10000 [1:13:02<21:20:02,  8.11s/it]

Epoch: 526 | Training loss 2.8718250170350075 | Validation loss 2.8025615215301514



  5%|███▊                                                                     | 528/10000 [1:13:10<21:22:41,  8.13s/it]

Epoch: 527 | Training loss 2.874529004096985 | Validation loss 2.8018544018268585



  5%|███▊                                                                     | 529/10000 [1:13:18<21:26:41,  8.15s/it]

Epoch: 528 | Training loss 2.876224309206009 | Validation loss 2.805676430463791



  5%|███▊                                                                     | 530/10000 [1:13:27<21:28:43,  8.17s/it]

Epoch: 529 | Training loss 2.874922126531601 | Validation loss 2.8018019795417786



  5%|███▉                                                                     | 531/10000 [1:13:35<21:24:04,  8.14s/it]

Epoch: 530 | Training loss 2.879607640206814 | Validation loss 2.8077909350395203



  5%|███▉                                                                     | 532/10000 [1:13:43<21:19:11,  8.11s/it]

Epoch: 531 | Training loss 2.882495753467083 | Validation loss 2.809190332889557



  5%|███▉                                                                     | 533/10000 [1:13:51<21:18:10,  8.10s/it]

Epoch: 532 | Training loss 2.8734892457723618 | Validation loss 2.8005057275295258



  5%|███▉                                                                     | 534/10000 [1:13:59<21:15:05,  8.08s/it]

Epoch: 533 | Training loss 2.8759513944387436 | Validation loss 2.80232834815979



  5%|███▉                                                                     | 535/10000 [1:14:07<21:19:33,  8.11s/it]

Epoch: 534 | Training loss 2.879431039094925 | Validation loss 2.805571049451828



  5%|███▉                                                                     | 536/10000 [1:14:15<21:14:25,  8.08s/it]

Epoch: 535 | Training loss 2.8754326477646828 | Validation loss 2.803173750638962



  5%|███▉                                                                     | 537/10000 [1:14:23<21:13:54,  8.08s/it]

Epoch: 536 | Training loss 2.8739578053355217 | Validation loss 2.802807480096817



  5%|███▉                                                                     | 538/10000 [1:14:31<21:14:58,  8.08s/it]

Epoch: 537 | Training loss 2.8743200674653053 | Validation loss 2.8090222775936127



  5%|███▉                                                                     | 539/10000 [1:14:39<21:22:07,  8.13s/it]

The best model was saved!
Epoch: 538 | Training loss 2.8703794330358505 | Validation loss 2.794149726629257



  5%|███▉                                                                     | 540/10000 [1:14:48<21:25:05,  8.15s/it]

Epoch: 539 | Training loss 2.8827154338359833 | Validation loss 2.8016471564769745



  5%|███▉                                                                     | 541/10000 [1:14:56<21:26:24,  8.16s/it]

Epoch: 540 | Training loss 2.880936734378338 | Validation loss 2.8090746700763702



  5%|███▉                                                                     | 542/10000 [1:15:04<21:28:17,  8.17s/it]

Epoch: 541 | Training loss 2.877834565937519 | Validation loss 2.80549293756485



  5%|███▉                                                                     | 543/10000 [1:15:12<21:29:57,  8.18s/it]

Epoch: 542 | Training loss 2.8779025822877884 | Validation loss 2.8108982145786285



  5%|███▉                                                                     | 544/10000 [1:15:20<21:33:28,  8.21s/it]

Epoch: 543 | Training loss 2.8786452412605286 | Validation loss 2.797572076320648



  5%|███▉                                                                     | 545/10000 [1:15:28<21:25:34,  8.16s/it]

Epoch: 544 | Training loss 2.8761073276400566 | Validation loss 2.8016416132450104



  5%|███▉                                                                     | 546/10000 [1:15:37<21:24:36,  8.15s/it]

Epoch: 545 | Training loss 2.8817152455449104 | Validation loss 2.7980037927627563



  5%|███▉                                                                     | 547/10000 [1:15:45<21:28:02,  8.18s/it]

Epoch: 546 | Training loss 2.880371481180191 | Validation loss 2.809326320886612



  5%|████                                                                     | 548/10000 [1:15:53<21:25:50,  8.16s/it]

Epoch: 547 | Training loss 2.8837924376130104 | Validation loss 2.8105648159980774



  5%|████                                                                     | 549/10000 [1:16:01<21:31:00,  8.20s/it]

Epoch: 548 | Training loss 2.8814909532666206 | Validation loss 2.8061682879924774



  6%|████                                                                     | 550/10000 [1:16:09<21:27:17,  8.17s/it]

Epoch: 549 | Training loss 2.8807164132595062 | Validation loss 2.8079289495944977



  6%|████                                                                     | 551/10000 [1:16:18<21:31:17,  8.20s/it]

Epoch: 550 | Training loss 2.877367153763771 | Validation loss 2.810807913541794



  6%|████                                                                     | 552/10000 [1:16:26<21:30:30,  8.20s/it]

Epoch: 551 | Training loss 2.876903399825096 | Validation loss 2.806829422712326



  6%|████                                                                     | 553/10000 [1:16:34<21:25:14,  8.16s/it]

Epoch: 552 | Training loss 2.8816875144839287 | Validation loss 2.8190564811229706



  6%|████                                                                     | 554/10000 [1:16:42<21:25:03,  8.16s/it]

Epoch: 553 | Training loss 2.877623200416565 | Validation loss 2.8064030706882477



  6%|████                                                                     | 555/10000 [1:16:50<21:22:43,  8.15s/it]

Epoch: 554 | Training loss 2.8742921948432922 | Validation loss 2.807154595851898



  6%|████                                                                     | 556/10000 [1:16:58<21:17:24,  8.12s/it]

Epoch: 555 | Training loss 2.8803063929080963 | Validation loss 2.8044897615909576



  6%|████                                                                     | 557/10000 [1:17:06<21:21:38,  8.14s/it]

Epoch: 556 | Training loss 2.8805915117263794 | Validation loss 2.8095080852508545



  6%|████                                                                     | 558/10000 [1:17:15<21:28:42,  8.19s/it]

Epoch: 557 | Training loss 2.8733774796128273 | Validation loss 2.801608681678772



  6%|████                                                                     | 559/10000 [1:17:23<21:26:45,  8.18s/it]

Epoch: 558 | Training loss 2.8775505870580673 | Validation loss 2.799132853746414



  6%|████                                                                     | 560/10000 [1:17:31<21:23:40,  8.16s/it]

Epoch: 559 | Training loss 2.8765754103660583 | Validation loss 2.804723173379898



  6%|████                                                                     | 561/10000 [1:17:39<21:20:11,  8.14s/it]

Epoch: 560 | Training loss 2.8833343982696533 | Validation loss 2.811217963695526



  6%|████                                                                     | 562/10000 [1:17:47<21:21:16,  8.15s/it]

Epoch: 561 | Training loss 2.8776023536920547 | Validation loss 2.796909838914871



  6%|████                                                                     | 563/10000 [1:17:55<21:18:27,  8.13s/it]

Epoch: 562 | Training loss 2.889394037425518 | Validation loss 2.8047000765800476



  6%|████                                                                     | 564/10000 [1:18:03<21:19:19,  8.13s/it]

Epoch: 563 | Training loss 2.875442259013653 | Validation loss 2.8101598024368286



  6%|████                                                                     | 565/10000 [1:18:12<21:19:45,  8.14s/it]

Epoch: 564 | Training loss 2.874747581779957 | Validation loss 2.811022400856018



  6%|████▏                                                                    | 566/10000 [1:18:20<21:20:09,  8.14s/it]

Epoch: 565 | Training loss 2.8735892847180367 | Validation loss 2.800055146217346



  6%|████▏                                                                    | 567/10000 [1:18:28<21:21:14,  8.15s/it]

Epoch: 566 | Training loss 2.8762489333748817 | Validation loss 2.7985439896583557



  6%|████▏                                                                    | 568/10000 [1:18:36<21:19:51,  8.14s/it]

Epoch: 567 | Training loss 2.8816429153084755 | Validation loss 2.8003839254379272



  6%|████▏                                                                    | 569/10000 [1:18:44<21:18:34,  8.13s/it]

Epoch: 568 | Training loss 2.8793089762330055 | Validation loss 2.8170548379421234



  6%|████▏                                                                    | 570/10000 [1:18:52<21:20:38,  8.15s/it]

Epoch: 569 | Training loss 2.876584880053997 | Validation loss 2.809955507516861



  6%|████▏                                                                    | 571/10000 [1:19:01<21:22:03,  8.16s/it]

Epoch: 570 | Training loss 2.8764518201351166 | Validation loss 2.800952285528183



  6%|████▏                                                                    | 572/10000 [1:19:09<21:20:03,  8.15s/it]

Epoch: 571 | Training loss 2.8760851696133614 | Validation loss 2.804291158914566



  6%|████▏                                                                    | 573/10000 [1:19:17<21:17:09,  8.13s/it]

Epoch: 572 | Training loss 2.874756157398224 | Validation loss 2.8012690246105194



  6%|████▏                                                                    | 574/10000 [1:19:25<21:19:30,  8.14s/it]

Epoch: 573 | Training loss 2.8803377971053123 | Validation loss 2.812298357486725



  6%|████▏                                                                    | 575/10000 [1:19:33<21:18:03,  8.14s/it]

Epoch: 574 | Training loss 2.874603785574436 | Validation loss 2.8007260859012604



  6%|████▏                                                                    | 576/10000 [1:19:41<21:19:24,  8.15s/it]

Epoch: 575 | Training loss 2.872314266860485 | Validation loss 2.8176649808883667



  6%|████▏                                                                    | 577/10000 [1:19:49<21:12:47,  8.10s/it]

Epoch: 576 | Training loss 2.8743912130594254 | Validation loss 2.801009088754654



  6%|████▏                                                                    | 578/10000 [1:19:57<21:13:23,  8.11s/it]

Epoch: 577 | Training loss 2.884509727358818 | Validation loss 2.8053587079048157



  6%|████▏                                                                    | 579/10000 [1:20:05<21:12:12,  8.10s/it]

Epoch: 578 | Training loss 2.8788172975182533 | Validation loss 2.808481216430664



  6%|████▏                                                                    | 580/10000 [1:20:13<21:05:20,  8.06s/it]

Epoch: 579 | Training loss 2.8771169632673264 | Validation loss 2.79810294508934



  6%|████▏                                                                    | 581/10000 [1:20:21<21:07:49,  8.08s/it]

Epoch: 580 | Training loss 2.8796351701021194 | Validation loss 2.8009168207645416



  6%|████▏                                                                    | 582/10000 [1:20:30<21:07:31,  8.08s/it]

Epoch: 581 | Training loss 2.878058075904846 | Validation loss 2.7997328341007233



  6%|████▎                                                                    | 583/10000 [1:20:38<21:06:26,  8.07s/it]

Epoch: 582 | Training loss 2.880203001201153 | Validation loss 2.8044238686561584



  6%|████▎                                                                    | 584/10000 [1:20:46<21:08:33,  8.08s/it]

Epoch: 583 | Training loss 2.8763721883296967 | Validation loss 2.803412139415741



  6%|████▎                                                                    | 585/10000 [1:20:54<21:09:29,  8.09s/it]

Epoch: 584 | Training loss 2.88304653018713 | Validation loss 2.813014954328537



  6%|████▎                                                                    | 586/10000 [1:21:02<21:11:18,  8.10s/it]

Epoch: 585 | Training loss 2.879286766052246 | Validation loss 2.8043493926525116



  6%|████▎                                                                    | 587/10000 [1:21:10<21:12:31,  8.11s/it]

Epoch: 586 | Training loss 2.8737624809145927 | Validation loss 2.8087680637836456



  6%|████▎                                                                    | 588/10000 [1:21:18<21:11:11,  8.10s/it]

Epoch: 587 | Training loss 2.8796083331108093 | Validation loss 2.8050975799560547



  6%|████▎                                                                    | 589/10000 [1:21:26<21:11:01,  8.10s/it]

Epoch: 588 | Training loss 2.876502439379692 | Validation loss 2.80537086725235



  6%|████▎                                                                    | 590/10000 [1:21:34<21:10:05,  8.10s/it]

Epoch: 589 | Training loss 2.880489304661751 | Validation loss 2.804650366306305



  6%|████▎                                                                    | 591/10000 [1:21:42<21:09:23,  8.09s/it]

Epoch: 590 | Training loss 2.8769183605909348 | Validation loss 2.799019068479538



  6%|████▎                                                                    | 592/10000 [1:21:51<21:11:15,  8.11s/it]

Epoch: 591 | Training loss 2.878200203180313 | Validation loss 2.802960067987442



  6%|████▎                                                                    | 593/10000 [1:21:59<21:08:36,  8.09s/it]

Epoch: 592 | Training loss 2.8782291561365128 | Validation loss 2.7995945513248444



  6%|████▎                                                                    | 594/10000 [1:22:07<21:13:20,  8.12s/it]

Epoch: 593 | Training loss 2.8738955184817314 | Validation loss 2.8008144199848175



  6%|████▎                                                                    | 595/10000 [1:22:15<21:14:21,  8.13s/it]

Epoch: 594 | Training loss 2.87505029886961 | Validation loss 2.8031548857688904



  6%|████▎                                                                    | 596/10000 [1:22:23<21:14:18,  8.13s/it]

Epoch: 595 | Training loss 2.877770058810711 | Validation loss 2.7996861338615417



  6%|████▎                                                                    | 597/10000 [1:22:31<21:16:27,  8.14s/it]

Epoch: 596 | Training loss 2.8776921182870865 | Validation loss 2.8103495240211487



  6%|████▎                                                                    | 598/10000 [1:22:39<21:11:22,  8.11s/it]

Epoch: 597 | Training loss 2.8735703974962234 | Validation loss 2.8019180595874786



  6%|████▎                                                                    | 599/10000 [1:22:48<21:15:08,  8.14s/it]

Epoch: 598 | Training loss 2.8743594735860825 | Validation loss 2.8027290403842926



  6%|████▍                                                                    | 600/10000 [1:22:56<21:10:46,  8.11s/it]

Epoch: 599 | Training loss 2.8715547174215317 | Validation loss 2.8007755875587463



  6%|████▍                                                                    | 601/10000 [1:23:04<21:14:36,  8.14s/it]

Epoch: 600 | Training loss 2.8753861784934998 | Validation loss 2.811845302581787



  6%|████▍                                                                    | 602/10000 [1:23:12<21:15:49,  8.15s/it]

Epoch: 601 | Training loss 2.8840990513563156 | Validation loss 2.810758501291275



  6%|████▍                                                                    | 603/10000 [1:23:20<21:17:19,  8.16s/it]

Epoch: 602 | Training loss 2.8746489360928535 | Validation loss 2.8033665120601654



  6%|████▍                                                                    | 604/10000 [1:23:28<21:09:21,  8.11s/it]

Epoch: 603 | Training loss 2.8738778233528137 | Validation loss 2.7969034910202026



  6%|████▍                                                                    | 605/10000 [1:23:36<21:07:23,  8.09s/it]

Epoch: 604 | Training loss 2.878659188747406 | Validation loss 2.8001761436462402



  6%|████▍                                                                    | 606/10000 [1:23:44<21:06:04,  8.09s/it]

Epoch: 605 | Training loss 2.8779441490769386 | Validation loss 2.8000397086143494



  6%|████▍                                                                    | 607/10000 [1:23:52<21:11:07,  8.12s/it]

Epoch: 606 | Training loss 2.8780109882354736 | Validation loss 2.798608660697937



  6%|████▍                                                                    | 608/10000 [1:24:01<21:08:44,  8.11s/it]

Epoch: 607 | Training loss 2.876840829849243 | Validation loss 2.807600438594818



  6%|████▍                                                                    | 609/10000 [1:24:09<21:08:30,  8.10s/it]

Epoch: 608 | Training loss 2.8722185492515564 | Validation loss 2.798739433288574



  6%|████▍                                                                    | 610/10000 [1:24:17<21:09:40,  8.11s/it]

Epoch: 609 | Training loss 2.8787425607442856 | Validation loss 2.803756058216095



  6%|████▍                                                                    | 611/10000 [1:24:25<21:11:09,  8.12s/it]

Epoch: 610 | Training loss 2.874554991722107 | Validation loss 2.8104404509067535



  6%|████▍                                                                    | 612/10000 [1:24:33<21:12:22,  8.13s/it]

Epoch: 611 | Training loss 2.8770436197519302 | Validation loss 2.7965304851531982



  6%|████▍                                                                    | 613/10000 [1:24:41<21:14:23,  8.15s/it]

Epoch: 612 | Training loss 2.8741598278284073 | Validation loss 2.79899263381958



  6%|████▍                                                                    | 614/10000 [1:24:50<21:21:45,  8.19s/it]

Epoch: 613 | Training loss 2.8807550594210625 | Validation loss 2.8025782704353333



  6%|████▍                                                                    | 615/10000 [1:24:58<21:20:56,  8.19s/it]

Epoch: 614 | Training loss 2.8775250762701035 | Validation loss 2.802737385034561



  6%|████▍                                                                    | 616/10000 [1:25:06<21:15:36,  8.16s/it]

Epoch: 615 | Training loss 2.873172849416733 | Validation loss 2.802939385175705



  6%|████▌                                                                    | 617/10000 [1:25:14<21:16:33,  8.16s/it]

Epoch: 616 | Training loss 2.8752882704138756 | Validation loss 2.8142634332180023



  6%|████▌                                                                    | 618/10000 [1:25:22<21:15:48,  8.16s/it]

Epoch: 617 | Training loss 2.8804973140358925 | Validation loss 2.8083942532539368



  6%|████▌                                                                    | 619/10000 [1:25:30<21:16:05,  8.16s/it]

Epoch: 618 | Training loss 2.8765589967370033 | Validation loss 2.80350923538208



  6%|████▌                                                                    | 620/10000 [1:25:39<21:19:01,  8.18s/it]

Epoch: 619 | Training loss 2.8777607455849648 | Validation loss 2.8024128675460815



  6%|████▌                                                                    | 621/10000 [1:25:47<21:19:37,  8.19s/it]

Epoch: 620 | Training loss 2.8778844699263573 | Validation loss 2.8084321916103363



  6%|████▌                                                                    | 622/10000 [1:25:55<21:18:24,  8.18s/it]

Epoch: 621 | Training loss 2.8728253170847893 | Validation loss 2.8008198738098145



  6%|████▌                                                                    | 623/10000 [1:26:03<21:21:39,  8.20s/it]

Epoch: 622 | Training loss 2.8837979808449745 | Validation loss 2.80557182431221



  6%|████▌                                                                    | 624/10000 [1:26:11<21:20:37,  8.20s/it]

Epoch: 623 | Training loss 2.878368303179741 | Validation loss 2.808160275220871



  6%|████▌                                                                    | 625/10000 [1:26:20<21:25:05,  8.22s/it]

Epoch: 624 | Training loss 2.8768790289759636 | Validation loss 2.8092693090438843



  6%|████▌                                                                    | 626/10000 [1:26:28<21:24:34,  8.22s/it]

Epoch: 625 | Training loss 2.8762607276439667 | Validation loss 2.798350363969803



  6%|████▌                                                                    | 627/10000 [1:26:36<21:14:40,  8.16s/it]

Epoch: 626 | Training loss 2.8819545581936836 | Validation loss 2.8114474415779114



  6%|████▌                                                                    | 628/10000 [1:26:44<21:14:22,  8.16s/it]

Epoch: 627 | Training loss 2.8815069794654846 | Validation loss 2.8154922425746918



  6%|████▌                                                                    | 629/10000 [1:26:52<21:12:43,  8.15s/it]

Epoch: 628 | Training loss 2.8762610629200935 | Validation loss 2.8012940883636475



  6%|████▌                                                                    | 630/10000 [1:27:00<21:12:29,  8.15s/it]

Epoch: 629 | Training loss 2.8854509741067886 | Validation loss 2.8073690235614777



  6%|████▌                                                                    | 631/10000 [1:27:08<21:11:09,  8.14s/it]

Epoch: 630 | Training loss 2.872070051729679 | Validation loss 2.808484733104706



  6%|████▌                                                                    | 632/10000 [1:27:16<21:07:45,  8.12s/it]

Epoch: 631 | Training loss 2.872922755777836 | Validation loss 2.7994372844696045



  6%|████▌                                                                    | 633/10000 [1:27:25<21:04:32,  8.10s/it]

Epoch: 632 | Training loss 2.875852547585964 | Validation loss 2.8084098398685455



  6%|████▋                                                                    | 634/10000 [1:27:33<21:04:25,  8.10s/it]

Epoch: 633 | Training loss 2.8785406798124313 | Validation loss 2.803413063287735



  6%|████▋                                                                    | 635/10000 [1:27:41<21:08:07,  8.12s/it]

Epoch: 634 | Training loss 2.872861884534359 | Validation loss 2.8024416863918304



  6%|████▋                                                                    | 636/10000 [1:27:49<21:09:02,  8.13s/it]

Epoch: 635 | Training loss 2.88040691614151 | Validation loss 2.8100070357322693



  6%|████▋                                                                    | 637/10000 [1:27:57<21:16:14,  8.18s/it]

Epoch: 636 | Training loss 2.881143420934677 | Validation loss 2.80393448472023



  6%|████▋                                                                    | 638/10000 [1:28:05<21:19:05,  8.20s/it]

Epoch: 637 | Training loss 2.8813986256718636 | Validation loss 2.800707459449768



  6%|████▋                                                                    | 639/10000 [1:28:14<21:19:37,  8.20s/it]

Epoch: 638 | Training loss 2.875728189945221 | Validation loss 2.8010732233524323



  6%|████▋                                                                    | 640/10000 [1:28:22<21:17:55,  8.19s/it]

Epoch: 639 | Training loss 2.87475199252367 | Validation loss 2.8083863258361816



  6%|████▋                                                                    | 641/10000 [1:28:30<21:13:37,  8.17s/it]

Epoch: 640 | Training loss 2.8827031925320625 | Validation loss 2.804137349128723



  6%|████▋                                                                    | 642/10000 [1:28:38<21:10:33,  8.15s/it]

Epoch: 641 | Training loss 2.8755775690078735 | Validation loss 2.8058374524116516



  6%|████▋                                                                    | 643/10000 [1:28:46<21:07:34,  8.13s/it]

Epoch: 642 | Training loss 2.880696013569832 | Validation loss 2.800611197948456



  6%|████▋                                                                    | 644/10000 [1:28:54<21:06:07,  8.12s/it]

Epoch: 643 | Training loss 2.8778554126620293 | Validation loss 2.8101675510406494



  6%|████▋                                                                    | 645/10000 [1:29:02<21:01:33,  8.09s/it]

Epoch: 644 | Training loss 2.8761623203754425 | Validation loss 2.8083685636520386



  6%|████▋                                                                    | 646/10000 [1:29:10<21:00:25,  8.08s/it]

Epoch: 645 | Training loss 2.878027729690075 | Validation loss 2.803810566663742



  6%|████▋                                                                    | 647/10000 [1:29:18<21:03:11,  8.10s/it]

Epoch: 646 | Training loss 2.8788361698389053 | Validation loss 2.8111273050308228



  6%|████▋                                                                    | 648/10000 [1:29:27<21:04:52,  8.12s/it]

Epoch: 647 | Training loss 2.875421106815338 | Validation loss 2.79978409409523



  6%|████▋                                                                    | 649/10000 [1:29:35<21:08:31,  8.14s/it]

Epoch: 648 | Training loss 2.872467190027237 | Validation loss 2.8007932603359222



  6%|████▋                                                                    | 650/10000 [1:29:43<21:10:58,  8.16s/it]

Epoch: 649 | Training loss 2.874069906771183 | Validation loss 2.8109437823295593



  7%|████▊                                                                    | 651/10000 [1:29:51<21:07:41,  8.14s/it]

Epoch: 650 | Training loss 2.874501682817936 | Validation loss 2.811136394739151



  7%|████▊                                                                    | 652/10000 [1:29:59<21:06:41,  8.13s/it]

Epoch: 651 | Training loss 2.8749508187174797 | Validation loss 2.8154430091381073



  7%|████▊                                                                    | 653/10000 [1:30:07<21:10:18,  8.15s/it]

Epoch: 652 | Training loss 2.8771710842847824 | Validation loss 2.8028707802295685



  7%|████▊                                                                    | 654/10000 [1:30:16<21:07:42,  8.14s/it]

Epoch: 653 | Training loss 2.879816748201847 | Validation loss 2.800884246826172



  7%|████▊                                                                    | 655/10000 [1:30:24<21:05:07,  8.12s/it]

Epoch: 654 | Training loss 2.8839123472571373 | Validation loss 2.805654287338257



  7%|████▊                                                                    | 656/10000 [1:30:32<21:02:08,  8.10s/it]

Epoch: 655 | Training loss 2.8788081407546997 | Validation loss 2.7990781664848328



  7%|████▊                                                                    | 657/10000 [1:30:40<21:00:21,  8.09s/it]

Epoch: 656 | Training loss 2.8817769289016724 | Validation loss 2.8005082607269287



  7%|████▊                                                                    | 658/10000 [1:30:48<21:04:21,  8.12s/it]

Epoch: 657 | Training loss 2.878051407635212 | Validation loss 2.8042809665203094



  7%|████▊                                                                    | 659/10000 [1:30:56<21:05:28,  8.13s/it]

Epoch: 658 | Training loss 2.8785651177167892 | Validation loss 2.8093436658382416



  7%|████▊                                                                    | 660/10000 [1:31:04<21:06:20,  8.13s/it]

Epoch: 659 | Training loss 2.879624418914318 | Validation loss 2.8075472116470337



  7%|████▊                                                                    | 661/10000 [1:31:12<21:06:18,  8.14s/it]

Epoch: 660 | Training loss 2.870112717151642 | Validation loss 2.8066757321357727



  7%|████▊                                                                    | 662/10000 [1:31:21<21:06:04,  8.13s/it]

Epoch: 661 | Training loss 2.8746132776141167 | Validation loss 2.799460291862488



  7%|████▊                                                                    | 663/10000 [1:31:29<21:09:26,  8.16s/it]

Epoch: 662 | Training loss 2.880996473133564 | Validation loss 2.8121267557144165



  7%|████▊                                                                    | 664/10000 [1:31:37<21:05:15,  8.13s/it]

Epoch: 663 | Training loss 2.8738339319825172 | Validation loss 2.8170162439346313



  7%|████▊                                                                    | 665/10000 [1:31:45<21:05:51,  8.14s/it]

Epoch: 664 | Training loss 2.8787234723567963 | Validation loss 2.801888346672058



  7%|████▊                                                                    | 666/10000 [1:31:53<21:05:33,  8.14s/it]

Epoch: 665 | Training loss 2.880250468850136 | Validation loss 2.802983731031418



  7%|████▊                                                                    | 667/10000 [1:32:01<21:09:39,  8.16s/it]

Epoch: 666 | Training loss 2.874976634979248 | Validation loss 2.804413139820099



  7%|████▉                                                                    | 668/10000 [1:32:09<21:05:54,  8.14s/it]

Epoch: 667 | Training loss 2.8789767175912857 | Validation loss 2.802650421857834



  7%|████▉                                                                    | 669/10000 [1:32:18<21:08:38,  8.16s/it]

Epoch: 668 | Training loss 2.878734141588211 | Validation loss 2.8068235218524933



  7%|████▉                                                                    | 670/10000 [1:32:26<21:11:49,  8.18s/it]

Epoch: 669 | Training loss 2.8764792755246162 | Validation loss 2.8148883283138275



  7%|████▉                                                                    | 671/10000 [1:32:34<21:13:09,  8.19s/it]

Epoch: 670 | Training loss 2.8792951181530952 | Validation loss 2.799980640411377



  7%|████▉                                                                    | 672/10000 [1:32:42<21:12:39,  8.19s/it]

Epoch: 671 | Training loss 2.8769439831376076 | Validation loss 2.8025574684143066



  7%|████▉                                                                    | 673/10000 [1:32:50<21:12:37,  8.19s/it]

Epoch: 672 | Training loss 2.877782605588436 | Validation loss 2.826203227043152



  7%|████▉                                                                    | 674/10000 [1:32:59<21:10:20,  8.17s/it]

Epoch: 673 | Training loss 2.88299074023962 | Validation loss 2.8120770156383514



  7%|████▉                                                                    | 675/10000 [1:33:07<21:12:51,  8.19s/it]

Epoch: 674 | Training loss 2.881400965154171 | Validation loss 2.8153924345970154



  7%|████▉                                                                    | 676/10000 [1:33:15<21:07:05,  8.15s/it]

Epoch: 675 | Training loss 2.8733565360307693 | Validation loss 2.807079464197159



  7%|████▉                                                                    | 677/10000 [1:33:23<21:03:43,  8.13s/it]

Epoch: 676 | Training loss 2.87453443557024 | Validation loss 2.8131270110607147



  7%|████▉                                                                    | 678/10000 [1:33:31<21:07:59,  8.16s/it]

Epoch: 677 | Training loss 2.8782463371753693 | Validation loss 2.8035446405410767



  7%|████▉                                                                    | 679/10000 [1:33:39<21:07:13,  8.16s/it]

Epoch: 678 | Training loss 2.8726933747529984 | Validation loss 2.8078795969486237



  7%|████▉                                                                    | 680/10000 [1:33:47<21:07:28,  8.16s/it]

Epoch: 679 | Training loss 2.876770608127117 | Validation loss 2.8043252825737



  7%|████▉                                                                    | 681/10000 [1:33:56<21:10:25,  8.18s/it]

Epoch: 680 | Training loss 2.876023232936859 | Validation loss 2.810955047607422



  7%|████▉                                                                    | 682/10000 [1:34:04<21:10:42,  8.18s/it]

Epoch: 681 | Training loss 2.8781536370515823 | Validation loss 2.8065414130687714



  7%|████▉                                                                    | 683/10000 [1:34:12<21:11:17,  8.19s/it]

Epoch: 682 | Training loss 2.872032530605793 | Validation loss 2.81473046541214



  7%|████▉                                                                    | 684/10000 [1:34:20<21:11:35,  8.19s/it]

Epoch: 683 | Training loss 2.8786089569330215 | Validation loss 2.811642825603485



  7%|█████                                                                    | 685/10000 [1:34:28<21:11:43,  8.19s/it]

Epoch: 684 | Training loss 2.8744683861732483 | Validation loss 2.8116108775138855



  7%|█████                                                                    | 686/10000 [1:34:37<21:05:17,  8.15s/it]

Epoch: 685 | Training loss 2.8723688572645187 | Validation loss 2.800815463066101



  7%|█████                                                                    | 687/10000 [1:34:45<20:59:21,  8.11s/it]

Epoch: 686 | Training loss 2.8785996586084366 | Validation loss 2.804798424243927



  7%|█████                                                                    | 688/10000 [1:34:53<20:55:10,  8.09s/it]

Epoch: 687 | Training loss 2.877297528088093 | Validation loss 2.804536283016205



  7%|█████                                                                    | 689/10000 [1:35:01<20:52:04,  8.07s/it]

Epoch: 688 | Training loss 2.87616066634655 | Validation loss 2.8096013963222504



  7%|█████                                                                    | 690/10000 [1:35:09<20:46:50,  8.04s/it]

Epoch: 689 | Training loss 2.880674235522747 | Validation loss 2.804218977689743



  7%|█████                                                                    | 691/10000 [1:35:17<20:47:36,  8.04s/it]

Epoch: 690 | Training loss 2.879101626574993 | Validation loss 2.821712076663971



  7%|█████                                                                    | 692/10000 [1:35:25<20:51:26,  8.07s/it]

Epoch: 691 | Training loss 2.8776003047823906 | Validation loss 2.8100838661193848



  7%|█████                                                                    | 693/10000 [1:35:33<20:54:23,  8.09s/it]

Epoch: 692 | Training loss 2.8850088715553284 | Validation loss 2.8003305196762085



  7%|█████                                                                    | 694/10000 [1:35:41<20:55:05,  8.09s/it]

Epoch: 693 | Training loss 2.8785614743828773 | Validation loss 2.8190726339817047



  7%|█████                                                                    | 695/10000 [1:35:49<20:48:54,  8.05s/it]

Epoch: 694 | Training loss 2.8749314695596695 | Validation loss 2.8014434576034546



  7%|█████                                                                    | 696/10000 [1:35:57<20:55:27,  8.10s/it]

Epoch: 695 | Training loss 2.8752450048923492 | Validation loss 2.8053207099437714



  7%|█████                                                                    | 697/10000 [1:36:05<20:56:20,  8.10s/it]

Epoch: 696 | Training loss 2.8814574629068375 | Validation loss 2.811510920524597



  7%|█████                                                                    | 698/10000 [1:36:13<20:59:32,  8.12s/it]

Epoch: 697 | Training loss 2.877540335059166 | Validation loss 2.809396654367447



  7%|█████                                                                    | 699/10000 [1:36:22<20:59:10,  8.12s/it]

Epoch: 698 | Training loss 2.8757366612553596 | Validation loss 2.807953506708145



  7%|█████                                                                    | 700/10000 [1:36:30<20:59:26,  8.13s/it]

Epoch: 699 | Training loss 2.872824214398861 | Validation loss 2.801781088113785



  7%|█████                                                                    | 701/10000 [1:36:38<20:59:45,  8.13s/it]

Epoch: 700 | Training loss 2.8729202076792717 | Validation loss 2.798324018716812



  7%|█████                                                                    | 702/10000 [1:36:46<21:00:31,  8.13s/it]

Epoch: 701 | Training loss 2.8758548349142075 | Validation loss 2.7976323664188385



  7%|█████▏                                                                   | 703/10000 [1:36:54<20:57:26,  8.12s/it]

Epoch: 702 | Training loss 2.875827081501484 | Validation loss 2.7982956171035767



  7%|█████▏                                                                   | 704/10000 [1:37:02<20:56:44,  8.11s/it]

Epoch: 703 | Training loss 2.876371517777443 | Validation loss 2.8018758594989777



  7%|█████▏                                                                   | 705/10000 [1:37:10<20:56:58,  8.11s/it]

Epoch: 704 | Training loss 2.8742543682456017 | Validation loss 2.80520698428154



  7%|█████▏                                                                   | 706/10000 [1:37:18<21:01:01,  8.14s/it]

Epoch: 705 | Training loss 2.874073699116707 | Validation loss 2.7989823818206787



  7%|█████▏                                                                   | 707/10000 [1:37:27<21:05:27,  8.17s/it]

Epoch: 706 | Training loss 2.8734748736023903 | Validation loss 2.7975286841392517



  7%|█████▏                                                                   | 708/10000 [1:37:35<20:57:15,  8.12s/it]

Epoch: 707 | Training loss 2.876470163464546 | Validation loss 2.8031564950942993



  7%|█████▏                                                                   | 709/10000 [1:37:43<21:01:36,  8.15s/it]

Epoch: 708 | Training loss 2.87603909522295 | Validation loss 2.8026887476444244



  7%|█████▏                                                                   | 710/10000 [1:37:51<21:02:12,  8.15s/it]

Epoch: 709 | Training loss 2.8774505108594894 | Validation loss 2.8184843361377716



  7%|█████▏                                                                   | 711/10000 [1:37:59<20:55:21,  8.11s/it]

Epoch: 710 | Training loss 2.875859871506691 | Validation loss 2.800809293985367



  7%|█████▏                                                                   | 712/10000 [1:38:07<21:01:00,  8.15s/it]

Epoch: 711 | Training loss 2.8720372915267944 | Validation loss 2.8040718138217926



  7%|█████▏                                                                   | 713/10000 [1:38:15<20:57:23,  8.12s/it]

Epoch: 712 | Training loss 2.8721801415085793 | Validation loss 2.8097708225250244



  7%|█████▏                                                                   | 714/10000 [1:38:24<21:02:49,  8.16s/it]

Epoch: 713 | Training loss 2.873501144349575 | Validation loss 2.8061026334762573



  7%|█████▏                                                                   | 715/10000 [1:38:32<20:58:32,  8.13s/it]

Epoch: 714 | Training loss 2.877988189458847 | Validation loss 2.8130842745304108



  7%|█████▏                                                                   | 716/10000 [1:38:40<21:04:24,  8.17s/it]

Epoch: 715 | Training loss 2.875812128186226 | Validation loss 2.814323902130127



  7%|█████▏                                                                   | 717/10000 [1:38:48<21:06:07,  8.18s/it]

Epoch: 716 | Training loss 2.8795629292726517 | Validation loss 2.803690552711487



  7%|█████▏                                                                   | 718/10000 [1:38:56<21:04:21,  8.17s/it]

Epoch: 717 | Training loss 2.8798573687672615 | Validation loss 2.817564904689789



  7%|█████▏                                                                   | 719/10000 [1:39:05<21:07:18,  8.19s/it]

Epoch: 718 | Training loss 2.877152644097805 | Validation loss 2.7997673749923706



  7%|█████▎                                                                   | 720/10000 [1:39:13<21:08:40,  8.20s/it]

Epoch: 719 | Training loss 2.8781543225049973 | Validation loss 2.808800548315048



  7%|█████▎                                                                   | 721/10000 [1:39:21<21:11:00,  8.22s/it]

Epoch: 720 | Training loss 2.8790797889232635 | Validation loss 2.795440673828125



  7%|█████▎                                                                   | 722/10000 [1:39:29<21:07:36,  8.20s/it]

Epoch: 721 | Training loss 2.874516561627388 | Validation loss 2.812925845384598



  7%|█████▎                                                                   | 723/10000 [1:39:37<21:04:34,  8.18s/it]

Epoch: 722 | Training loss 2.8765572980046272 | Validation loss 2.8004690408706665



  7%|█████▎                                                                   | 724/10000 [1:39:46<21:05:19,  8.18s/it]

Epoch: 723 | Training loss 2.881165735423565 | Validation loss 2.813653528690338



  7%|█████▎                                                                   | 725/10000 [1:39:54<21:12:10,  8.23s/it]

Epoch: 724 | Training loss 2.879126764833927 | Validation loss 2.8056057393550873



  7%|█████▎                                                                   | 726/10000 [1:40:02<21:10:12,  8.22s/it]

Epoch: 725 | Training loss 2.8790592178702354 | Validation loss 2.806782364845276



  7%|█████▎                                                                   | 727/10000 [1:40:10<21:05:05,  8.19s/it]

Epoch: 726 | Training loss 2.875971905887127 | Validation loss 2.7964982390403748



  7%|█████▎                                                                   | 728/10000 [1:40:18<21:03:59,  8.18s/it]

Epoch: 727 | Training loss 2.8778054416179657 | Validation loss 2.799759477376938



  7%|█████▎                                                                   | 729/10000 [1:40:26<21:00:13,  8.16s/it]

Epoch: 728 | Training loss 2.8741110414266586 | Validation loss 2.800782173871994



  7%|█████▎                                                                   | 730/10000 [1:40:35<21:06:53,  8.20s/it]

Epoch: 729 | Training loss 2.875135727226734 | Validation loss 2.8067507147789



  7%|█████▎                                                                   | 731/10000 [1:40:43<20:58:40,  8.15s/it]

Epoch: 730 | Training loss 2.881640613079071 | Validation loss 2.8087795674800873



  7%|█████▎                                                                   | 732/10000 [1:40:51<21:00:13,  8.16s/it]

Epoch: 731 | Training loss 2.8752697706222534 | Validation loss 2.798108071088791



  7%|█████▎                                                                   | 733/10000 [1:40:59<21:01:54,  8.17s/it]

Epoch: 732 | Training loss 2.874903403222561 | Validation loss 2.799133449792862



  7%|█████▎                                                                   | 734/10000 [1:41:07<20:58:53,  8.15s/it]

Epoch: 733 | Training loss 2.872923232614994 | Validation loss 2.7995398938655853



  7%|█████▎                                                                   | 735/10000 [1:41:15<20:54:19,  8.12s/it]

Epoch: 734 | Training loss 2.874131552875042 | Validation loss 2.8031594455242157



  7%|█████▎                                                                   | 736/10000 [1:41:23<20:54:41,  8.13s/it]

Epoch: 735 | Training loss 2.879867561161518 | Validation loss 2.8148833513259888



  7%|█████▍                                                                   | 737/10000 [1:41:32<20:54:13,  8.12s/it]

Epoch: 736 | Training loss 2.879209779202938 | Validation loss 2.804125279188156



  7%|█████▍                                                                   | 738/10000 [1:41:40<20:58:52,  8.16s/it]

Epoch: 737 | Training loss 2.8804973661899567 | Validation loss 2.8115671277046204



  7%|█████▍                                                                   | 739/10000 [1:41:48<21:02:10,  8.18s/it]

Epoch: 738 | Training loss 2.871839389204979 | Validation loss 2.813898354768753



  7%|█████▍                                                                   | 740/10000 [1:41:56<21:03:39,  8.19s/it]

Epoch: 739 | Training loss 2.8769311904907227 | Validation loss 2.7993334233760834



  7%|█████▍                                                                   | 741/10000 [1:42:04<20:58:52,  8.16s/it]

Epoch: 740 | Training loss 2.875964306294918 | Validation loss 2.8148966133594513



  7%|█████▍                                                                   | 742/10000 [1:42:13<21:02:13,  8.18s/it]

Epoch: 741 | Training loss 2.8694542422890663 | Validation loss 2.805883288383484



  7%|█████▍                                                                   | 743/10000 [1:42:21<20:58:30,  8.16s/it]

Epoch: 742 | Training loss 2.87446191906929 | Validation loss 2.814865827560425



  7%|█████▍                                                                   | 744/10000 [1:42:29<21:00:49,  8.17s/it]

Epoch: 743 | Training loss 2.879722833633423 | Validation loss 2.815919131040573



  7%|█████▍                                                                   | 745/10000 [1:42:37<20:59:35,  8.17s/it]

Epoch: 744 | Training loss 2.8777549117803574 | Validation loss 2.805694103240967



  7%|█████▍                                                                   | 746/10000 [1:42:45<20:58:08,  8.16s/it]

Epoch: 745 | Training loss 2.884085863828659 | Validation loss 2.800648808479309



  7%|█████▍                                                                   | 747/10000 [1:42:53<20:58:48,  8.16s/it]

Epoch: 746 | Training loss 2.880539335310459 | Validation loss 2.813505619764328



  7%|█████▍                                                                   | 748/10000 [1:43:02<21:01:36,  8.18s/it]

Epoch: 747 | Training loss 2.8771005049347878 | Validation loss 2.806325525045395



  7%|█████▍                                                                   | 749/10000 [1:43:10<21:05:17,  8.21s/it]

Epoch: 748 | Training loss 2.8752371594309807 | Validation loss 2.799960643053055



  8%|█████▍                                                                   | 750/10000 [1:43:18<21:03:12,  8.19s/it]

Epoch: 749 | Training loss 2.870048314332962 | Validation loss 2.8024835288524628



  8%|█████▍                                                                   | 751/10000 [1:43:26<20:55:59,  8.15s/it]

Epoch: 750 | Training loss 2.874674528837204 | Validation loss 2.8038089871406555



  8%|█████▍                                                                   | 752/10000 [1:43:34<20:57:16,  8.16s/it]

Epoch: 751 | Training loss 2.876880496740341 | Validation loss 2.801580995321274



  8%|█████▍                                                                   | 753/10000 [1:43:42<20:56:05,  8.15s/it]

Epoch: 752 | Training loss 2.880766898393631 | Validation loss 2.807317078113556



  8%|█████▌                                                                   | 754/10000 [1:43:50<20:51:36,  8.12s/it]

Epoch: 753 | Training loss 2.8724229633808136 | Validation loss 2.802816331386566



  8%|█████▌                                                                   | 755/10000 [1:43:58<20:49:13,  8.11s/it]

Epoch: 754 | Training loss 2.8754678443074226 | Validation loss 2.800385296344757



  8%|█████▌                                                                   | 756/10000 [1:44:07<20:50:59,  8.12s/it]

Epoch: 755 | Training loss 2.876040630042553 | Validation loss 2.8066890835762024



  8%|█████▌                                                                   | 757/10000 [1:44:15<20:52:13,  8.13s/it]

Epoch: 756 | Training loss 2.875266082584858 | Validation loss 2.8091123402118683



  8%|█████▌                                                                   | 758/10000 [1:44:23<20:55:20,  8.15s/it]

Epoch: 757 | Training loss 2.8762606233358383 | Validation loss 2.804592400789261



  8%|█████▌                                                                   | 759/10000 [1:44:31<20:52:59,  8.14s/it]

Epoch: 758 | Training loss 2.877570353448391 | Validation loss 2.8140827417373657



  8%|█████▌                                                                   | 760/10000 [1:44:39<20:50:54,  8.12s/it]

Epoch: 759 | Training loss 2.880252905189991 | Validation loss 2.804216116666794



  8%|█████▌                                                                   | 761/10000 [1:44:47<20:47:03,  8.10s/it]

Epoch: 760 | Training loss 2.8788128197193146 | Validation loss 2.8093872666358948



  8%|█████▌                                                                   | 762/10000 [1:44:55<20:47:52,  8.10s/it]

Epoch: 761 | Training loss 2.8724258691072464 | Validation loss 2.8001632690429688



  8%|█████▌                                                                   | 763/10000 [1:45:03<20:48:33,  8.11s/it]

Epoch: 762 | Training loss 2.877621866762638 | Validation loss 2.8070081770420074



  8%|█████▌                                                                   | 764/10000 [1:45:12<20:50:11,  8.12s/it]

Epoch: 763 | Training loss 2.875121220946312 | Validation loss 2.8109154999256134



  8%|█████▌                                                                   | 765/10000 [1:45:20<20:54:53,  8.15s/it]

Epoch: 764 | Training loss 2.8752115666866302 | Validation loss 2.805027514696121



  8%|█████▌                                                                   | 766/10000 [1:45:28<20:58:02,  8.17s/it]

Epoch: 765 | Training loss 2.877613715827465 | Validation loss 2.8062082827091217



  8%|█████▌                                                                   | 767/10000 [1:45:36<20:58:58,  8.18s/it]

Epoch: 766 | Training loss 2.8724119886755943 | Validation loss 2.8151233792304993



  8%|█████▌                                                                   | 768/10000 [1:45:45<21:15:18,  8.29s/it]

Epoch: 767 | Training loss 2.8732027783989906 | Validation loss 2.8018115162849426



  8%|█████▌                                                                   | 769/10000 [1:45:59<26:12:22, 10.22s/it]

Epoch: 768 | Training loss 2.880354344844818 | Validation loss 2.8101266026496887



  8%|█████▌                                                                   | 770/10000 [1:46:18<32:44:16, 12.77s/it]

Epoch: 769 | Training loss 2.8769123926758766 | Validation loss 2.802157998085022



  8%|█████▋                                                                   | 771/10000 [1:46:29<31:11:46, 12.17s/it]

Epoch: 770 | Training loss 2.877151742577553 | Validation loss 2.8013696670532227



  8%|█████▋                                                                   | 772/10000 [1:46:39<29:39:26, 11.57s/it]

Epoch: 771 | Training loss 2.875436596572399 | Validation loss 2.7973425686359406



  8%|█████▋                                                                   | 773/10000 [1:46:50<28:49:15, 11.24s/it]

Epoch: 772 | Training loss 2.871018707752228 | Validation loss 2.8035875856876373



  8%|█████▋                                                                   | 774/10000 [1:47:00<28:11:07, 11.00s/it]

Epoch: 773 | Training loss 2.879401311278343 | Validation loss 2.8030045330524445



  8%|█████▋                                                                   | 775/10000 [1:47:10<27:44:26, 10.83s/it]

Epoch: 774 | Training loss 2.8743614703416824 | Validation loss 2.7971730530261993



  8%|█████▋                                                                   | 776/10000 [1:47:21<27:12:43, 10.62s/it]

Epoch: 775 | Training loss 2.8801242113113403 | Validation loss 2.8195050954818726



  8%|█████▋                                                                   | 777/10000 [1:47:30<25:55:27, 10.12s/it]

Epoch: 776 | Training loss 2.8778325840830803 | Validation loss 2.8009411990642548



  8%|█████▋                                                                   | 778/10000 [1:47:40<26:14:21, 10.24s/it]

Epoch: 777 | Training loss 2.878616653382778 | Validation loss 2.8061404824256897



  8%|█████▋                                                                   | 779/10000 [1:47:51<26:30:29, 10.35s/it]

Epoch: 778 | Training loss 2.877180978655815 | Validation loss 2.8077004849910736



  8%|█████▋                                                                   | 780/10000 [1:48:01<26:16:30, 10.26s/it]

Epoch: 779 | Training loss 2.8816454857587814 | Validation loss 2.8070698976516724



  8%|█████▋                                                                   | 781/10000 [1:48:10<25:24:36,  9.92s/it]

Epoch: 780 | Training loss 2.877968281507492 | Validation loss 2.8007532358169556



  8%|█████▋                                                                   | 782/10000 [1:48:19<24:47:33,  9.68s/it]

Epoch: 781 | Training loss 2.8736961856484413 | Validation loss 2.8035057187080383



  8%|█████▋                                                                   | 783/10000 [1:48:28<24:26:48,  9.55s/it]

Epoch: 782 | Training loss 2.8750843554735184 | Validation loss 2.8062209486961365



  8%|█████▋                                                                   | 784/10000 [1:48:37<23:48:56,  9.30s/it]

Epoch: 783 | Training loss 2.8734259381890297 | Validation loss 2.80287367105484



  8%|█████▋                                                                   | 785/10000 [1:48:46<23:20:07,  9.12s/it]

Epoch: 784 | Training loss 2.8793317526578903 | Validation loss 2.8001734912395477



  8%|█████▋                                                                   | 786/10000 [1:48:54<22:59:25,  8.98s/it]

Epoch: 785 | Training loss 2.8777820467948914 | Validation loss 2.795729011297226



  8%|█████▋                                                                   | 787/10000 [1:49:03<22:43:22,  8.88s/it]

Epoch: 786 | Training loss 2.8761336281895638 | Validation loss 2.8047837615013123



  8%|█████▊                                                                   | 788/10000 [1:49:12<22:41:55,  8.87s/it]

Epoch: 787 | Training loss 2.8783590346574783 | Validation loss 2.806048184633255



  8%|█████▊                                                                   | 789/10000 [1:49:21<23:05:14,  9.02s/it]

Epoch: 788 | Training loss 2.878369480371475 | Validation loss 2.8148439526557922



  8%|█████▊                                                                   | 790/10000 [1:49:31<23:18:58,  9.11s/it]

Epoch: 789 | Training loss 2.8804777413606644 | Validation loss 2.8036166727542877



  8%|█████▊                                                                   | 791/10000 [1:49:39<23:11:13,  9.06s/it]

Epoch: 790 | Training loss 2.8727169036865234 | Validation loss 2.8034651577472687



  8%|█████▊                                                                   | 792/10000 [1:49:48<23:07:02,  9.04s/it]

The best model was saved!
Epoch: 791 | Training loss 2.8797307908535004 | Validation loss 2.7936735451221466



  8%|█████▊                                                                   | 793/10000 [1:49:57<23:04:13,  9.02s/it]

Epoch: 792 | Training loss 2.876755468547344 | Validation loss 2.7981731295585632



  8%|█████▊                                                                   | 794/10000 [1:50:06<23:04:18,  9.02s/it]

Epoch: 793 | Training loss 2.8754092305898666 | Validation loss 2.8034183979034424



  8%|█████▊                                                                   | 795/10000 [1:50:15<23:04:20,  9.02s/it]

Epoch: 794 | Training loss 2.8787983059883118 | Validation loss 2.809262216091156



  8%|█████▊                                                                   | 796/10000 [1:50:24<22:52:44,  8.95s/it]

Epoch: 795 | Training loss 2.8733483999967575 | Validation loss 2.798533469438553



  8%|█████▊                                                                   | 797/10000 [1:50:33<22:53:23,  8.95s/it]

Epoch: 796 | Training loss 2.8760406896471977 | Validation loss 2.799324780702591



  8%|█████▊                                                                   | 798/10000 [1:50:42<22:54:29,  8.96s/it]

Epoch: 797 | Training loss 2.8738773316144943 | Validation loss 2.8023197650909424



  8%|█████▊                                                                   | 799/10000 [1:50:51<22:42:58,  8.89s/it]

Epoch: 798 | Training loss 2.8734268993139267 | Validation loss 2.8029473423957825



  8%|█████▊                                                                   | 800/10000 [1:51:00<22:40:01,  8.87s/it]

Epoch: 799 | Training loss 2.8818841725587845 | Validation loss 2.809898942708969



  8%|█████▊                                                                   | 801/10000 [1:51:09<22:40:36,  8.87s/it]

Epoch: 800 | Training loss 2.8759066238999367 | Validation loss 2.808721899986267



  8%|█████▊                                                                   | 802/10000 [1:51:17<22:31:33,  8.82s/it]

Epoch: 801 | Training loss 2.8772937282919884 | Validation loss 2.805624306201935



  8%|█████▊                                                                   | 803/10000 [1:51:26<22:29:44,  8.81s/it]

Epoch: 802 | Training loss 2.8768552616238594 | Validation loss 2.8035984933376312



  8%|█████▊                                                                   | 804/10000 [1:51:35<22:29:55,  8.81s/it]

Epoch: 803 | Training loss 2.880248509347439 | Validation loss 2.813765436410904



  8%|█████▉                                                                   | 805/10000 [1:51:44<22:30:33,  8.81s/it]

Epoch: 804 | Training loss 2.875530317425728 | Validation loss 2.8009479641914368



  8%|█████▉                                                                   | 806/10000 [1:51:52<22:25:47,  8.78s/it]

Epoch: 805 | Training loss 2.8745605126023293 | Validation loss 2.8069153130054474



  8%|█████▉                                                                   | 807/10000 [1:52:01<22:26:49,  8.79s/it]

Epoch: 806 | Training loss 2.87606143951416 | Validation loss 2.800475686788559



  8%|█████▉                                                                   | 808/10000 [1:52:10<22:29:02,  8.81s/it]

Epoch: 807 | Training loss 2.8749622628092766 | Validation loss 2.8107025623321533



  8%|█████▉                                                                   | 809/10000 [1:52:19<22:30:40,  8.82s/it]

Epoch: 808 | Training loss 2.8719492852687836 | Validation loss 2.8011909127235413



  8%|█████▉                                                                   | 810/10000 [1:52:28<22:29:15,  8.81s/it]

Epoch: 809 | Training loss 2.87848811596632 | Validation loss 2.7975296080112457



  8%|█████▉                                                                   | 811/10000 [1:52:37<22:31:22,  8.82s/it]

Epoch: 810 | Training loss 2.8782354667782784 | Validation loss 2.8028122782707214



  8%|█████▉                                                                   | 812/10000 [1:52:45<22:25:29,  8.79s/it]

Epoch: 811 | Training loss 2.874117709696293 | Validation loss 2.8045364022254944



  8%|█████▉                                                                   | 813/10000 [1:52:54<22:27:44,  8.80s/it]

Epoch: 812 | Training loss 2.8747465014457703 | Validation loss 2.8046668767929077



  8%|█████▉                                                                   | 814/10000 [1:53:03<22:26:24,  8.79s/it]

Epoch: 813 | Training loss 2.877509370446205 | Validation loss 2.799180805683136



  8%|█████▉                                                                   | 815/10000 [1:53:12<22:31:20,  8.83s/it]

Epoch: 814 | Training loss 2.8801639080047607 | Validation loss 2.8079281747341156



  8%|█████▉                                                                   | 816/10000 [1:53:20<22:03:51,  8.65s/it]

Epoch: 815 | Training loss 2.8806948214769363 | Validation loss 2.808387964963913



  8%|█████▉                                                                   | 817/10000 [1:53:28<21:34:24,  8.46s/it]

Epoch: 816 | Training loss 2.8721660673618317 | Validation loss 2.8047976195812225



  8%|█████▉                                                                   | 818/10000 [1:53:36<21:13:40,  8.32s/it]

Epoch: 817 | Training loss 2.880505971610546 | Validation loss 2.802438259124756



  8%|█████▉                                                                   | 819/10000 [1:53:44<21:04:48,  8.27s/it]

Epoch: 818 | Training loss 2.874733619391918 | Validation loss 2.8054085671901703



  8%|█████▉                                                                   | 820/10000 [1:53:52<20:59:21,  8.23s/it]

Epoch: 819 | Training loss 2.8770160377025604 | Validation loss 2.808782398700714



  8%|█████▉                                                                   | 821/10000 [1:54:00<20:54:10,  8.20s/it]

Epoch: 820 | Training loss 2.875074841082096 | Validation loss 2.799853354692459



  8%|██████                                                                   | 822/10000 [1:54:09<20:54:37,  8.20s/it]

Epoch: 821 | Training loss 2.8761299774050713 | Validation loss 2.804379791021347



  8%|██████                                                                   | 823/10000 [1:54:17<20:53:53,  8.20s/it]

Epoch: 822 | Training loss 2.871622122824192 | Validation loss 2.808662623167038



  8%|██████                                                                   | 824/10000 [1:54:25<20:54:27,  8.20s/it]

Epoch: 823 | Training loss 2.8747443780303 | Validation loss 2.8087476193904877



  8%|██████                                                                   | 825/10000 [1:54:33<20:55:52,  8.21s/it]

Epoch: 824 | Training loss 2.8752289190888405 | Validation loss 2.8067829310894012



  8%|██████                                                                   | 826/10000 [1:54:42<20:56:52,  8.22s/it]

Epoch: 825 | Training loss 2.8776919096708298 | Validation loss 2.802602082490921



  8%|██████                                                                   | 827/10000 [1:54:50<20:54:06,  8.20s/it]

Epoch: 826 | Training loss 2.879257023334503 | Validation loss 2.801335573196411



  8%|██████                                                                   | 828/10000 [1:54:58<20:54:18,  8.21s/it]

Epoch: 827 | Training loss 2.875341534614563 | Validation loss 2.8061738908290863



  8%|██████                                                                   | 829/10000 [1:55:06<20:50:46,  8.18s/it]

Epoch: 828 | Training loss 2.877257823944092 | Validation loss 2.799998939037323



  8%|██████                                                                   | 830/10000 [1:55:14<20:47:42,  8.16s/it]

Epoch: 829 | Training loss 2.8764399141073227 | Validation loss 2.7973122000694275



  8%|██████                                                                   | 831/10000 [1:55:22<20:39:46,  8.11s/it]

Epoch: 830 | Training loss 2.874433644115925 | Validation loss 2.8011918663978577



  8%|██████                                                                   | 832/10000 [1:55:30<20:36:12,  8.09s/it]

Epoch: 831 | Training loss 2.875695914030075 | Validation loss 2.812220126390457



  8%|██████                                                                   | 833/10000 [1:55:38<20:42:36,  8.13s/it]

Epoch: 832 | Training loss 2.875828355550766 | Validation loss 2.8045572638511658



  8%|██████                                                                   | 834/10000 [1:55:47<20:47:26,  8.17s/it]

Epoch: 833 | Training loss 2.8726454824209213 | Validation loss 2.804780751466751



  8%|██████                                                                   | 835/10000 [1:55:55<20:44:21,  8.15s/it]

Epoch: 834 | Training loss 2.8733429983258247 | Validation loss 2.8019171357154846



  8%|██████                                                                   | 836/10000 [1:56:03<20:44:25,  8.15s/it]

Epoch: 835 | Training loss 2.874342769384384 | Validation loss 2.7978846728801727



  8%|██████                                                                   | 837/10000 [1:56:11<20:43:35,  8.14s/it]

Epoch: 836 | Training loss 2.8776940032839775 | Validation loss 2.805871933698654



  8%|██████                                                                   | 838/10000 [1:56:19<20:43:39,  8.14s/it]

Epoch: 837 | Training loss 2.8746627643704414 | Validation loss 2.808458626270294



  8%|██████                                                                   | 839/10000 [1:56:27<20:44:21,  8.15s/it]

Epoch: 838 | Training loss 2.8783165588974953 | Validation loss 2.8071297705173492



  8%|██████▏                                                                  | 840/10000 [1:56:35<20:44:07,  8.15s/it]

Epoch: 839 | Training loss 2.8747675493359566 | Validation loss 2.8098998069763184



  8%|██████▏                                                                  | 841/10000 [1:56:44<20:39:41,  8.12s/it]

Epoch: 840 | Training loss 2.876252070069313 | Validation loss 2.806998550891876



  8%|██████▏                                                                  | 842/10000 [1:56:52<20:38:44,  8.12s/it]

Epoch: 841 | Training loss 2.874557062983513 | Validation loss 2.811714679002762



  8%|██████▏                                                                  | 843/10000 [1:57:00<20:31:25,  8.07s/it]

Epoch: 842 | Training loss 2.8733461797237396 | Validation loss 2.8036054372787476



  8%|██████▏                                                                  | 844/10000 [1:57:08<20:34:21,  8.09s/it]

Epoch: 843 | Training loss 2.8772423043847084 | Validation loss 2.816561758518219



  8%|██████▏                                                                  | 845/10000 [1:57:16<20:35:30,  8.10s/it]

Epoch: 844 | Training loss 2.877389393746853 | Validation loss 2.813436597585678



  8%|██████▏                                                                  | 846/10000 [1:57:24<20:37:40,  8.11s/it]

Epoch: 845 | Training loss 2.8817403614521027 | Validation loss 2.81189426779747



  8%|██████▏                                                                  | 847/10000 [1:57:32<20:39:57,  8.13s/it]

Epoch: 846 | Training loss 2.87565441429615 | Validation loss 2.8070284128189087



  8%|██████▏                                                                  | 848/10000 [1:57:40<20:37:51,  8.12s/it]

Epoch: 847 | Training loss 2.877281680703163 | Validation loss 2.799391597509384



  8%|██████▏                                                                  | 849/10000 [1:57:48<20:37:44,  8.12s/it]

Epoch: 848 | Training loss 2.878718540072441 | Validation loss 2.794864058494568



  8%|██████▏                                                                  | 850/10000 [1:57:57<20:43:44,  8.16s/it]

Epoch: 849 | Training loss 2.869904711842537 | Validation loss 2.7974463403224945



  9%|██████▏                                                                  | 851/10000 [1:58:05<20:40:56,  8.14s/it]

Epoch: 850 | Training loss 2.8739991933107376 | Validation loss 2.795769840478897



  9%|██████▏                                                                  | 852/10000 [1:58:13<20:36:58,  8.11s/it]

Epoch: 851 | Training loss 2.8779539316892624 | Validation loss 2.808853656053543



  9%|██████▏                                                                  | 853/10000 [1:58:21<20:34:56,  8.10s/it]

Epoch: 852 | Training loss 2.877228304743767 | Validation loss 2.802570730447769



  9%|██████▏                                                                  | 854/10000 [1:58:29<20:33:30,  8.09s/it]

Epoch: 853 | Training loss 2.879817619919777 | Validation loss 2.799696832895279



  9%|██████▏                                                                  | 855/10000 [1:58:37<20:32:20,  8.09s/it]

Epoch: 854 | Training loss 2.875881887972355 | Validation loss 2.7976513504981995



  9%|██████▏                                                                  | 856/10000 [1:58:45<20:32:07,  8.08s/it]

Epoch: 855 | Training loss 2.874251291155815 | Validation loss 2.8088274002075195



  9%|██████▎                                                                  | 857/10000 [1:58:53<20:32:04,  8.09s/it]

Epoch: 856 | Training loss 2.8768142610788345 | Validation loss 2.8018580079078674



  9%|██████▎                                                                  | 858/10000 [1:59:01<20:36:11,  8.11s/it]

Epoch: 857 | Training loss 2.873243123292923 | Validation loss 2.809190809726715



  9%|██████▎                                                                  | 859/10000 [1:59:09<20:37:43,  8.12s/it]

Epoch: 858 | Training loss 2.8754665926098824 | Validation loss 2.8053619265556335



  9%|██████▎                                                                  | 860/10000 [1:59:18<20:32:53,  8.09s/it]

Epoch: 859 | Training loss 2.876257263123989 | Validation loss 2.8039809465408325



  9%|██████▎                                                                  | 861/10000 [1:59:26<20:34:35,  8.11s/it]

Epoch: 860 | Training loss 2.873751752078533 | Validation loss 2.803166478872299



  9%|██████▎                                                                  | 862/10000 [1:59:34<20:37:10,  8.12s/it]

Epoch: 861 | Training loss 2.8782705664634705 | Validation loss 2.7983117401599884



  9%|██████▎                                                                  | 863/10000 [1:59:42<20:33:50,  8.10s/it]

Epoch: 862 | Training loss 2.8822401612997055 | Validation loss 2.809479206800461



  9%|██████▎                                                                  | 864/10000 [1:59:50<20:27:09,  8.06s/it]

Epoch: 863 | Training loss 2.8751979991793633 | Validation loss 2.795256793498993



  9%|██████▎                                                                  | 865/10000 [1:59:58<20:30:22,  8.08s/it]

Epoch: 864 | Training loss 2.8807203620672226 | Validation loss 2.8041398227214813



  9%|██████▎                                                                  | 866/10000 [2:00:06<20:36:14,  8.12s/it]

Epoch: 865 | Training loss 2.870965287089348 | Validation loss 2.798385798931122



  9%|██████▎                                                                  | 867/10000 [2:00:14<20:34:39,  8.11s/it]

Epoch: 866 | Training loss 2.8757390156388283 | Validation loss 2.8102724254131317



  9%|██████▎                                                                  | 868/10000 [2:00:22<20:38:25,  8.14s/it]

Epoch: 867 | Training loss 2.8723628371953964 | Validation loss 2.7957410514354706



  9%|██████▎                                                                  | 869/10000 [2:00:31<20:40:55,  8.15s/it]

Epoch: 868 | Training loss 2.878569021821022 | Validation loss 2.8124188482761383



  9%|██████▎                                                                  | 870/10000 [2:00:39<20:32:44,  8.10s/it]

Epoch: 869 | Training loss 2.8719196915626526 | Validation loss 2.798999637365341



  9%|██████▎                                                                  | 871/10000 [2:00:47<20:26:52,  8.06s/it]

Epoch: 870 | Training loss 2.875560946762562 | Validation loss 2.797231823205948



  9%|██████▎                                                                  | 872/10000 [2:00:55<20:29:04,  8.08s/it]

Epoch: 871 | Training loss 2.8771228790283203 | Validation loss 2.801419347524643



  9%|██████▎                                                                  | 873/10000 [2:01:03<20:33:24,  8.11s/it]

Epoch: 872 | Training loss 2.8780551701784134 | Validation loss 2.799755245447159



  9%|██████▍                                                                  | 874/10000 [2:01:11<20:36:34,  8.13s/it]

Epoch: 873 | Training loss 2.877709038555622 | Validation loss 2.8024422228336334



  9%|██████▍                                                                  | 875/10000 [2:01:19<20:35:46,  8.13s/it]

Epoch: 874 | Training loss 2.873742625117302 | Validation loss 2.813965678215027



  9%|██████▍                                                                  | 876/10000 [2:01:27<20:32:21,  8.10s/it]

Epoch: 875 | Training loss 2.876269720494747 | Validation loss 2.801136404275894



  9%|██████▍                                                                  | 877/10000 [2:01:35<20:35:03,  8.12s/it]

Epoch: 876 | Training loss 2.8706290498375893 | Validation loss 2.797409236431122



  9%|██████▍                                                                  | 878/10000 [2:01:44<20:34:42,  8.12s/it]

Epoch: 877 | Training loss 2.87956640124321 | Validation loss 2.8012699484825134



  9%|██████▍                                                                  | 879/10000 [2:01:52<20:39:25,  8.15s/it]

Epoch: 878 | Training loss 2.876356542110443 | Validation loss 2.8121542930603027



  9%|██████▍                                                                  | 880/10000 [2:02:00<20:41:13,  8.17s/it]

Epoch: 879 | Training loss 2.875109516084194 | Validation loss 2.7988803684711456



  9%|██████▍                                                                  | 881/10000 [2:02:08<20:38:42,  8.15s/it]

Epoch: 880 | Training loss 2.8760311976075172 | Validation loss 2.8108393251895905



  9%|██████▍                                                                  | 882/10000 [2:02:16<20:35:31,  8.13s/it]

Epoch: 881 | Training loss 2.875710867345333 | Validation loss 2.8074967861175537



  9%|██████▍                                                                  | 883/10000 [2:02:24<20:41:34,  8.17s/it]

Epoch: 882 | Training loss 2.8758939802646637 | Validation loss 2.814056307077408



  9%|██████▍                                                                  | 884/10000 [2:02:33<20:42:12,  8.18s/it]

Epoch: 883 | Training loss 2.8804048225283623 | Validation loss 2.8116730451583862



  9%|██████▍                                                                  | 885/10000 [2:02:41<20:34:23,  8.13s/it]

Epoch: 884 | Training loss 2.882777251303196 | Validation loss 2.80343359708786



  9%|██████▍                                                                  | 886/10000 [2:02:49<20:35:17,  8.13s/it]

Epoch: 885 | Training loss 2.880745992064476 | Validation loss 2.804511070251465



  9%|██████▍                                                                  | 887/10000 [2:02:57<20:37:12,  8.15s/it]

Epoch: 886 | Training loss 2.8745800107717514 | Validation loss 2.8051273822784424



  9%|██████▍                                                                  | 888/10000 [2:03:05<20:32:52,  8.12s/it]

Epoch: 887 | Training loss 2.878284730017185 | Validation loss 2.8056680858135223



  9%|██████▍                                                                  | 889/10000 [2:03:13<20:31:07,  8.11s/it]

Epoch: 888 | Training loss 2.8760191574692726 | Validation loss 2.8031098544597626



  9%|██████▍                                                                  | 890/10000 [2:03:21<20:34:15,  8.13s/it]

Epoch: 889 | Training loss 2.8741932213306427 | Validation loss 2.7990992069244385



  9%|██████▌                                                                  | 891/10000 [2:03:29<20:30:40,  8.11s/it]

Epoch: 890 | Training loss 2.874570406973362 | Validation loss 2.7983551025390625



  9%|██████▌                                                                  | 892/10000 [2:03:37<20:34:01,  8.13s/it]

Epoch: 891 | Training loss 2.881540961563587 | Validation loss 2.8128639459609985



  9%|██████▌                                                                  | 893/10000 [2:03:46<20:34:02,  8.13s/it]

Epoch: 892 | Training loss 2.878209963440895 | Validation loss 2.8009697794914246



  9%|██████▌                                                                  | 894/10000 [2:03:54<20:31:09,  8.11s/it]

Epoch: 893 | Training loss 2.8698632791638374 | Validation loss 2.8040237426757812



  9%|██████▌                                                                  | 895/10000 [2:04:02<20:32:04,  8.12s/it]

Epoch: 894 | Training loss 2.875487118959427 | Validation loss 2.8010766208171844



  9%|██████▌                                                                  | 896/10000 [2:04:10<20:30:33,  8.11s/it]

Epoch: 895 | Training loss 2.8741263672709465 | Validation loss 2.8021099269390106



  9%|██████▌                                                                  | 897/10000 [2:04:18<20:31:28,  8.12s/it]

Epoch: 896 | Training loss 2.873116746544838 | Validation loss 2.804998606443405



  9%|██████▌                                                                  | 898/10000 [2:04:26<20:35:40,  8.15s/it]

Epoch: 897 | Training loss 2.8805995732545853 | Validation loss 2.805020958185196



  9%|██████▌                                                                  | 899/10000 [2:04:34<20:34:59,  8.14s/it]

Epoch: 898 | Training loss 2.880486346781254 | Validation loss 2.8047523498535156



  9%|██████▌                                                                  | 900/10000 [2:04:43<20:35:02,  8.14s/it]

Epoch: 899 | Training loss 2.8771867230534554 | Validation loss 2.8021305203437805



  9%|██████▌                                                                  | 901/10000 [2:04:51<20:34:17,  8.14s/it]

Epoch: 900 | Training loss 2.877046473324299 | Validation loss 2.8076920807361603



  9%|██████▌                                                                  | 902/10000 [2:04:59<20:46:10,  8.22s/it]

Epoch: 901 | Training loss 2.877039909362793 | Validation loss 2.8080299496650696



  9%|██████▌                                                                  | 903/10000 [2:05:07<20:45:45,  8.22s/it]

Epoch: 902 | Training loss 2.8744016215205193 | Validation loss 2.800492227077484



  9%|██████▌                                                                  | 904/10000 [2:05:15<20:38:59,  8.17s/it]

Epoch: 903 | Training loss 2.8720993995666504 | Validation loss 2.805187225341797



  9%|██████▌                                                                  | 905/10000 [2:05:23<20:37:05,  8.16s/it]

Epoch: 904 | Training loss 2.8813709914684296 | Validation loss 2.8071069717407227



  9%|██████▌                                                                  | 906/10000 [2:05:32<20:33:36,  8.14s/it]

Epoch: 905 | Training loss 2.8805068880319595 | Validation loss 2.8099346458911896



  9%|██████▌                                                                  | 907/10000 [2:05:40<20:35:11,  8.15s/it]

Epoch: 906 | Training loss 2.8755744472146034 | Validation loss 2.8111701607704163



  9%|██████▋                                                                  | 908/10000 [2:05:48<20:32:00,  8.13s/it]

Epoch: 907 | Training loss 2.8711506873369217 | Validation loss 2.8150359392166138



  9%|██████▋                                                                  | 909/10000 [2:05:56<20:35:30,  8.15s/it]

Epoch: 908 | Training loss 2.8780022338032722 | Validation loss 2.8020850121974945



  9%|██████▋                                                                  | 910/10000 [2:06:04<20:32:21,  8.13s/it]

Epoch: 909 | Training loss 2.874921001493931 | Validation loss 2.803677886724472



  9%|██████▋                                                                  | 911/10000 [2:06:12<20:28:33,  8.11s/it]

Epoch: 910 | Training loss 2.8748360723257065 | Validation loss 2.800706386566162



  9%|██████▋                                                                  | 912/10000 [2:06:20<20:23:07,  8.08s/it]

Epoch: 911 | Training loss 2.876730337738991 | Validation loss 2.803654044866562



  9%|██████▋                                                                  | 913/10000 [2:06:28<20:23:34,  8.08s/it]

Epoch: 912 | Training loss 2.8784355744719505 | Validation loss 2.800803780555725



  9%|██████▋                                                                  | 914/10000 [2:06:36<20:23:49,  8.08s/it]

Epoch: 913 | Training loss 2.87308606505394 | Validation loss 2.809047818183899



  9%|██████▋                                                                  | 915/10000 [2:06:44<20:19:36,  8.05s/it]

Epoch: 914 | Training loss 2.8766443729400635 | Validation loss 2.801997095346451



  9%|██████▋                                                                  | 916/10000 [2:06:52<20:18:43,  8.05s/it]

Epoch: 915 | Training loss 2.8747489526867867 | Validation loss 2.802688330411911



  9%|██████▋                                                                  | 917/10000 [2:07:01<20:23:08,  8.08s/it]

Epoch: 916 | Training loss 2.8698652759194374 | Validation loss 2.8023994863033295



  9%|██████▋                                                                  | 918/10000 [2:07:09<20:21:56,  8.07s/it]

Epoch: 917 | Training loss 2.879658818244934 | Validation loss 2.799926608800888



  9%|██████▋                                                                  | 919/10000 [2:07:17<20:25:13,  8.10s/it]

Epoch: 918 | Training loss 2.8714078664779663 | Validation loss 2.799080044031143



  9%|██████▋                                                                  | 920/10000 [2:07:25<20:23:57,  8.09s/it]

Epoch: 919 | Training loss 2.870559848845005 | Validation loss 2.8059908747673035



  9%|██████▋                                                                  | 921/10000 [2:07:33<20:19:25,  8.06s/it]

Epoch: 920 | Training loss 2.878773771226406 | Validation loss 2.810843825340271



  9%|██████▋                                                                  | 922/10000 [2:07:41<20:19:11,  8.06s/it]

Epoch: 921 | Training loss 2.8797883465886116 | Validation loss 2.808156341314316



  9%|██████▋                                                                  | 923/10000 [2:07:49<20:21:45,  8.08s/it]

Epoch: 922 | Training loss 2.8795581683516502 | Validation loss 2.8092936873435974



  9%|██████▋                                                                  | 924/10000 [2:07:57<20:23:39,  8.09s/it]

Epoch: 923 | Training loss 2.8743301779031754 | Validation loss 2.8016297221183777



  9%|██████▊                                                                  | 925/10000 [2:08:05<20:24:12,  8.09s/it]

Epoch: 924 | Training loss 2.8769555538892746 | Validation loss 2.8103259801864624



  9%|██████▊                                                                  | 926/10000 [2:08:13<20:22:55,  8.09s/it]

Epoch: 925 | Training loss 2.8759471401572227 | Validation loss 2.798978328704834



  9%|██████▊                                                                  | 927/10000 [2:08:21<20:22:51,  8.09s/it]

Epoch: 926 | Training loss 2.8750762790441513 | Validation loss 2.8015617430210114



  9%|██████▊                                                                  | 928/10000 [2:08:29<20:21:15,  8.08s/it]

Epoch: 927 | Training loss 2.8776886612176895 | Validation loss 2.8016805946826935



  9%|██████▊                                                                  | 929/10000 [2:08:38<20:24:26,  8.10s/it]

Epoch: 928 | Training loss 2.87363388389349 | Validation loss 2.804282158613205



  9%|██████▊                                                                  | 930/10000 [2:08:46<20:25:07,  8.10s/it]

Epoch: 929 | Training loss 2.8773983791470528 | Validation loss 2.800895184278488



  9%|██████▊                                                                  | 931/10000 [2:08:54<20:28:42,  8.13s/it]

Epoch: 930 | Training loss 2.8749619349837303 | Validation loss 2.8078548312187195



  9%|██████▊                                                                  | 932/10000 [2:09:02<20:28:00,  8.13s/it]

Epoch: 931 | Training loss 2.8713203594088554 | Validation loss 2.8080011010169983



  9%|██████▊                                                                  | 933/10000 [2:09:10<20:26:04,  8.11s/it]

Epoch: 932 | Training loss 2.8794732466340065 | Validation loss 2.8065572679042816



  9%|██████▊                                                                  | 934/10000 [2:09:18<20:21:53,  8.09s/it]

Epoch: 933 | Training loss 2.8711588010191917 | Validation loss 2.8023303151130676



  9%|██████▊                                                                  | 935/10000 [2:09:26<20:23:57,  8.10s/it]

Epoch: 934 | Training loss 2.8792541548609734 | Validation loss 2.801773726940155



  9%|██████▊                                                                  | 936/10000 [2:09:34<20:26:39,  8.12s/it]

Epoch: 935 | Training loss 2.8753520995378494 | Validation loss 2.803823411464691



  9%|██████▊                                                                  | 937/10000 [2:09:43<20:28:34,  8.13s/it]

Epoch: 936 | Training loss 2.8751350045204163 | Validation loss 2.8025079667568207



  9%|██████▊                                                                  | 938/10000 [2:09:51<20:27:49,  8.13s/it]

Epoch: 937 | Training loss 2.8696187883615494 | Validation loss 2.803068995475769



  9%|██████▊                                                                  | 939/10000 [2:09:59<20:29:21,  8.14s/it]

Epoch: 938 | Training loss 2.8755579367280006 | Validation loss 2.807945281267166



  9%|██████▊                                                                  | 940/10000 [2:10:07<20:30:30,  8.15s/it]

Epoch: 939 | Training loss 2.8763220384716988 | Validation loss 2.8107224106788635



  9%|██████▊                                                                  | 941/10000 [2:10:15<20:28:12,  8.13s/it]

Epoch: 940 | Training loss 2.8767083808779716 | Validation loss 2.8044903576374054



  9%|██████▉                                                                  | 942/10000 [2:10:23<20:28:41,  8.14s/it]

Epoch: 941 | Training loss 2.8774920105934143 | Validation loss 2.80746066570282



  9%|██████▉                                                                  | 943/10000 [2:10:31<20:27:37,  8.13s/it]

Epoch: 942 | Training loss 2.8733472377061844 | Validation loss 2.8097080290317535



  9%|██████▉                                                                  | 944/10000 [2:10:39<20:21:46,  8.09s/it]

Epoch: 943 | Training loss 2.875778891146183 | Validation loss 2.8018577098846436



  9%|██████▉                                                                  | 945/10000 [2:10:48<20:24:06,  8.11s/it]

Epoch: 944 | Training loss 2.873195491731167 | Validation loss 2.8004675209522247



  9%|██████▉                                                                  | 946/10000 [2:10:56<20:22:09,  8.10s/it]

Epoch: 945 | Training loss 2.8741357401013374 | Validation loss 2.8049037158489227



  9%|██████▉                                                                  | 947/10000 [2:11:04<20:21:57,  8.10s/it]

Epoch: 946 | Training loss 2.870012179017067 | Validation loss 2.8000703752040863



  9%|██████▉                                                                  | 948/10000 [2:11:12<20:23:09,  8.11s/it]

Epoch: 947 | Training loss 2.87581517547369 | Validation loss 2.8036368787288666



  9%|██████▉                                                                  | 949/10000 [2:11:20<20:20:39,  8.09s/it]

Epoch: 948 | Training loss 2.879232481122017 | Validation loss 2.8005602061748505



 10%|██████▉                                                                  | 950/10000 [2:11:28<20:21:39,  8.10s/it]

Epoch: 949 | Training loss 2.8757773861289024 | Validation loss 2.8041237890720367



 10%|██████▉                                                                  | 951/10000 [2:11:36<20:20:56,  8.10s/it]

Epoch: 950 | Training loss 2.8733875826001167 | Validation loss 2.810791105031967



 10%|██████▉                                                                  | 952/10000 [2:11:44<20:21:53,  8.10s/it]

Epoch: 951 | Training loss 2.876049429178238 | Validation loss 2.800906479358673



 10%|██████▉                                                                  | 953/10000 [2:11:52<20:26:46,  8.14s/it]

Epoch: 952 | Training loss 2.8815994039177895 | Validation loss 2.8115503787994385



 10%|██████▉                                                                  | 954/10000 [2:12:01<20:25:06,  8.13s/it]

Epoch: 953 | Training loss 2.877991460263729 | Validation loss 2.806742876768112



 10%|██████▉                                                                  | 955/10000 [2:12:09<20:23:11,  8.11s/it]

Epoch: 954 | Training loss 2.8734167218208313 | Validation loss 2.810351401567459



 10%|██████▉                                                                  | 956/10000 [2:12:17<20:23:15,  8.12s/it]

Epoch: 955 | Training loss 2.875308409333229 | Validation loss 2.8050213158130646



 10%|██████▉                                                                  | 957/10000 [2:12:25<20:20:27,  8.10s/it]

Epoch: 956 | Training loss 2.8747498691082 | Validation loss 2.80276957154274



 10%|██████▉                                                                  | 958/10000 [2:12:33<20:18:30,  8.09s/it]

Epoch: 957 | Training loss 2.877651669085026 | Validation loss 2.807823657989502



 10%|███████                                                                  | 959/10000 [2:12:41<20:16:59,  8.08s/it]

Epoch: 958 | Training loss 2.87410107254982 | Validation loss 2.8124532401561737



 10%|███████                                                                  | 960/10000 [2:12:49<20:19:40,  8.10s/it]

Epoch: 959 | Training loss 2.870179630815983 | Validation loss 2.7984587848186493



 10%|███████                                                                  | 961/10000 [2:12:57<20:15:35,  8.07s/it]

Epoch: 960 | Training loss 2.8803476691246033 | Validation loss 2.813699871301651



 10%|███████                                                                  | 962/10000 [2:13:05<20:16:12,  8.07s/it]

Epoch: 961 | Training loss 2.8754149302840233 | Validation loss 2.8070998787879944



 10%|███████                                                                  | 963/10000 [2:13:13<20:18:04,  8.09s/it]

Epoch: 962 | Training loss 2.8784740194678307 | Validation loss 2.814183294773102



 10%|███████                                                                  | 964/10000 [2:13:21<20:20:06,  8.10s/it]

Epoch: 963 | Training loss 2.8715288639068604 | Validation loss 2.801863044500351



 10%|███████                                                                  | 965/10000 [2:13:29<20:19:19,  8.10s/it]

Epoch: 964 | Training loss 2.8773228526115417 | Validation loss 2.811259835958481



 10%|███████                                                                  | 966/10000 [2:13:38<20:16:33,  8.08s/it]

Epoch: 965 | Training loss 2.8762329295277596 | Validation loss 2.8050437569618225



 10%|███████                                                                  | 967/10000 [2:13:46<20:17:31,  8.09s/it]

Epoch: 966 | Training loss 2.881136581301689 | Validation loss 2.8072105050086975



 10%|███████                                                                  | 968/10000 [2:13:54<20:22:24,  8.12s/it]

Epoch: 967 | Training loss 2.874409541487694 | Validation loss 2.8057213127613068



 10%|███████                                                                  | 969/10000 [2:14:02<20:16:30,  8.08s/it]

Epoch: 968 | Training loss 2.8711052164435387 | Validation loss 2.8003933429718018



 10%|███████                                                                  | 970/10000 [2:14:10<20:13:41,  8.06s/it]

Epoch: 969 | Training loss 2.8754308000206947 | Validation loss 2.8032323718070984



 10%|███████                                                                  | 971/10000 [2:14:18<20:11:43,  8.05s/it]

Epoch: 970 | Training loss 2.874116249382496 | Validation loss 2.816231369972229



 10%|███████                                                                  | 972/10000 [2:14:26<20:12:30,  8.06s/it]

Epoch: 971 | Training loss 2.8803621530532837 | Validation loss 2.803953319787979



 10%|███████                                                                  | 973/10000 [2:14:34<20:13:38,  8.07s/it]

Epoch: 972 | Training loss 2.8775881230831146 | Validation loss 2.8073821365833282



 10%|███████                                                                  | 974/10000 [2:14:42<20:14:18,  8.07s/it]

Epoch: 973 | Training loss 2.874587096273899 | Validation loss 2.8049516677856445



 10%|███████                                                                  | 975/10000 [2:14:50<20:11:59,  8.06s/it]

Epoch: 974 | Training loss 2.87904704362154 | Validation loss 2.8014843463897705



 10%|███████                                                                  | 976/10000 [2:14:58<20:15:20,  8.08s/it]

Epoch: 975 | Training loss 2.878869093954563 | Validation loss 2.8134206235408783



 10%|███████▏                                                                 | 977/10000 [2:15:06<20:14:51,  8.08s/it]

Epoch: 976 | Training loss 2.8780615627765656 | Validation loss 2.8093591034412384



 10%|███████▏                                                                 | 978/10000 [2:15:14<20:14:19,  8.08s/it]

Epoch: 977 | Training loss 2.877346657216549 | Validation loss 2.802916556596756



 10%|███████▏                                                                 | 979/10000 [2:15:22<20:13:59,  8.07s/it]

Epoch: 978 | Training loss 2.8738709688186646 | Validation loss 2.8007109463214874



 10%|███████▏                                                                 | 980/10000 [2:15:31<20:13:41,  8.07s/it]

Epoch: 979 | Training loss 2.875700369477272 | Validation loss 2.804563581943512



 10%|███████▏                                                                 | 981/10000 [2:15:39<20:11:21,  8.06s/it]

Epoch: 980 | Training loss 2.8747109323740005 | Validation loss 2.805231660604477



 10%|███████▏                                                                 | 982/10000 [2:15:47<20:09:40,  8.05s/it]

Epoch: 981 | Training loss 2.8753370195627213 | Validation loss 2.8119274973869324



 10%|███████▏                                                                 | 983/10000 [2:15:55<20:14:45,  8.08s/it]

Epoch: 982 | Training loss 2.874742440879345 | Validation loss 2.810547709465027



 10%|███████▏                                                                 | 984/10000 [2:16:03<20:19:00,  8.11s/it]

Epoch: 983 | Training loss 2.8747533559799194 | Validation loss 2.80852073431015



 10%|███████▏                                                                 | 985/10000 [2:16:11<20:23:12,  8.14s/it]

Epoch: 984 | Training loss 2.877792276442051 | Validation loss 2.805686831474304



 10%|███████▏                                                                 | 986/10000 [2:16:19<20:22:37,  8.14s/it]

Epoch: 985 | Training loss 2.873642459511757 | Validation loss 2.802460253238678



 10%|███████▏                                                                 | 987/10000 [2:16:27<20:23:56,  8.15s/it]

Epoch: 986 | Training loss 2.8796561658382416 | Validation loss 2.803887218236923



 10%|███████▏                                                                 | 988/10000 [2:16:35<20:18:09,  8.11s/it]

Epoch: 987 | Training loss 2.880973309278488 | Validation loss 2.7993294298648834



 10%|███████▏                                                                 | 989/10000 [2:16:44<20:16:11,  8.10s/it]

Epoch: 988 | Training loss 2.8715479895472527 | Validation loss 2.798824191093445



 10%|███████▏                                                                 | 990/10000 [2:16:52<20:13:20,  8.08s/it]

Epoch: 989 | Training loss 2.878418557345867 | Validation loss 2.8050999641418457



 10%|███████▏                                                                 | 991/10000 [2:17:00<20:14:11,  8.09s/it]

Epoch: 990 | Training loss 2.8756129294633865 | Validation loss 2.800994485616684



 10%|███████▏                                                                 | 992/10000 [2:17:08<20:16:52,  8.11s/it]

Epoch: 991 | Training loss 2.869663290679455 | Validation loss 2.8054481148719788



 10%|███████▏                                                                 | 993/10000 [2:17:16<20:19:23,  8.12s/it]

Epoch: 992 | Training loss 2.8740475103259087 | Validation loss 2.799967974424362



 10%|███████▎                                                                 | 994/10000 [2:17:24<20:22:32,  8.14s/it]

Epoch: 993 | Training loss 2.8754802644252777 | Validation loss 2.805248975753784



 10%|███████▎                                                                 | 995/10000 [2:17:32<20:21:52,  8.14s/it]

Epoch: 994 | Training loss 2.879432789981365 | Validation loss 2.8184737265110016



 10%|███████▎                                                                 | 996/10000 [2:17:41<20:25:37,  8.17s/it]

Epoch: 995 | Training loss 2.8732514828443527 | Validation loss 2.8173816800117493



 10%|███████▎                                                                 | 997/10000 [2:17:49<20:26:19,  8.17s/it]

Epoch: 996 | Training loss 2.871045157313347 | Validation loss 2.8000249564647675



 10%|███████▎                                                                 | 998/10000 [2:17:57<20:24:18,  8.16s/it]

Epoch: 997 | Training loss 2.879929296672344 | Validation loss 2.804511159658432



 10%|███████▎                                                                 | 999/10000 [2:18:05<20:25:27,  8.17s/it]

Epoch: 998 | Training loss 2.871735602617264 | Validation loss 2.805939555168152



 10%|███████▏                                                                | 1000/10000 [2:18:13<20:24:40,  8.16s/it]

Epoch: 999 | Training loss 2.877612739801407 | Validation loss 2.808425396680832



 10%|███████▏                                                                | 1001/10000 [2:18:21<20:23:45,  8.16s/it]

Epoch: 1000 | Training loss 2.8725162222981453 | Validation loss 2.7968183159828186



 10%|███████▏                                                                | 1002/10000 [2:18:29<20:20:15,  8.14s/it]

Epoch: 1001 | Training loss 2.8714595958590508 | Validation loss 2.8081078231334686



 10%|███████▏                                                                | 1003/10000 [2:18:38<20:19:34,  8.13s/it]

Epoch: 1002 | Training loss 2.8764869272708893 | Validation loss 2.8089271187782288



 10%|███████▏                                                                | 1004/10000 [2:18:46<20:21:40,  8.15s/it]

Epoch: 1003 | Training loss 2.875405766069889 | Validation loss 2.805813789367676



 10%|███████▏                                                                | 1005/10000 [2:18:54<20:24:23,  8.17s/it]

Epoch: 1004 | Training loss 2.875222757458687 | Validation loss 2.804894834756851



 10%|███████▏                                                                | 1006/10000 [2:19:02<20:21:59,  8.15s/it]

Epoch: 1005 | Training loss 2.8753299489617348 | Validation loss 2.800216645002365



 10%|███████▎                                                                | 1007/10000 [2:19:10<20:19:33,  8.14s/it]

Epoch: 1006 | Training loss 2.8756499886512756 | Validation loss 2.8009367883205414



 10%|███████▎                                                                | 1008/10000 [2:19:18<20:18:27,  8.13s/it]

Epoch: 1007 | Training loss 2.874918632209301 | Validation loss 2.802545726299286



 10%|███████▎                                                                | 1009/10000 [2:19:26<20:19:08,  8.14s/it]

Epoch: 1008 | Training loss 2.8732114359736443 | Validation loss 2.8022596538066864



 10%|███████▎                                                                | 1010/10000 [2:19:35<20:16:03,  8.12s/it]

Epoch: 1009 | Training loss 2.8783846646547318 | Validation loss 2.812914878129959



 10%|███████▎                                                                | 1011/10000 [2:19:43<20:16:36,  8.12s/it]

Epoch: 1010 | Training loss 2.878481015563011 | Validation loss 2.80635067820549



 10%|███████▎                                                                | 1012/10000 [2:19:51<20:17:02,  8.12s/it]

Epoch: 1011 | Training loss 2.8732252568006516 | Validation loss 2.80908927321434



 10%|███████▎                                                                | 1013/10000 [2:19:59<20:20:48,  8.15s/it]

Epoch: 1012 | Training loss 2.877678260207176 | Validation loss 2.810133010149002



 10%|███████▎                                                                | 1014/10000 [2:20:07<20:19:46,  8.14s/it]

Epoch: 1013 | Training loss 2.8733044266700745 | Validation loss 2.7992970049381256



 10%|███████▎                                                                | 1015/10000 [2:20:15<20:14:52,  8.11s/it]

Epoch: 1014 | Training loss 2.8674017786979675 | Validation loss 2.8077590465545654



 10%|███████▎                                                                | 1016/10000 [2:20:23<20:16:16,  8.12s/it]

Epoch: 1015 | Training loss 2.8737632036209106 | Validation loss 2.804497480392456



 10%|███████▎                                                                | 1017/10000 [2:20:31<20:18:00,  8.14s/it]

Epoch: 1016 | Training loss 2.8755460157990456 | Validation loss 2.802251309156418



 10%|███████▎                                                                | 1018/10000 [2:20:39<20:11:20,  8.09s/it]

Epoch: 1017 | Training loss 2.877786099910736 | Validation loss 2.804163783788681



 10%|███████▎                                                                | 1019/10000 [2:20:48<20:10:16,  8.09s/it]

Epoch: 1018 | Training loss 2.877643257379532 | Validation loss 2.8034727573394775



 10%|███████▎                                                                | 1020/10000 [2:20:56<20:09:25,  8.08s/it]

Epoch: 1019 | Training loss 2.873857356607914 | Validation loss 2.811491221189499



 10%|███████▎                                                                | 1021/10000 [2:21:04<20:10:54,  8.09s/it]

Epoch: 1020 | Training loss 2.877029038965702 | Validation loss 2.8035812377929688



 10%|███████▎                                                                | 1022/10000 [2:21:12<20:13:23,  8.11s/it]

Epoch: 1021 | Training loss 2.87589268386364 | Validation loss 2.800022214651108



 10%|███████▎                                                                | 1023/10000 [2:21:20<20:14:24,  8.12s/it]

Epoch: 1022 | Training loss 2.8748590648174286 | Validation loss 2.800609976053238



 10%|███████▎                                                                | 1024/10000 [2:21:28<20:13:46,  8.11s/it]

Epoch: 1023 | Training loss 2.874637246131897 | Validation loss 2.8076677322387695



 10%|███████▍                                                                | 1025/10000 [2:21:36<20:11:35,  8.10s/it]

Epoch: 1024 | Training loss 2.87441898137331 | Validation loss 2.803276240825653



 10%|███████▍                                                                | 1026/10000 [2:21:44<20:17:39,  8.14s/it]

Epoch: 1025 | Training loss 2.877050541341305 | Validation loss 2.807110995054245



 10%|███████▍                                                                | 1027/10000 [2:21:52<20:13:46,  8.12s/it]

Epoch: 1026 | Training loss 2.8827046677470207 | Validation loss 2.8015305399894714



 10%|███████▍                                                                | 1028/10000 [2:22:01<20:11:45,  8.10s/it]

Epoch: 1027 | Training loss 2.8756378665566444 | Validation loss 2.810038536787033



 10%|███████▍                                                                | 1029/10000 [2:22:09<20:13:39,  8.12s/it]

Epoch: 1028 | Training loss 2.8746018782258034 | Validation loss 2.8027471005916595



 10%|███████▍                                                                | 1030/10000 [2:22:17<20:14:06,  8.12s/it]

Epoch: 1029 | Training loss 2.8774035051465034 | Validation loss 2.8019295632839203



 10%|███████▍                                                                | 1031/10000 [2:22:25<20:20:06,  8.16s/it]

Epoch: 1030 | Training loss 2.8762046322226524 | Validation loss 2.7973876297473907



 10%|███████▍                                                                | 1032/10000 [2:22:33<20:23:58,  8.19s/it]

Epoch: 1031 | Training loss 2.878634497523308 | Validation loss 2.802971214056015



 10%|███████▍                                                                | 1033/10000 [2:22:41<20:21:21,  8.17s/it]

Epoch: 1032 | Training loss 2.8755977377295494 | Validation loss 2.796079695224762



 10%|███████▍                                                                | 1034/10000 [2:22:50<20:22:03,  8.18s/it]

Epoch: 1033 | Training loss 2.8773208782076836 | Validation loss 2.8073261380195618



 10%|███████▍                                                                | 1035/10000 [2:22:58<20:15:40,  8.14s/it]

Epoch: 1034 | Training loss 2.8800632059574127 | Validation loss 2.801406592130661



 10%|███████▍                                                                | 1036/10000 [2:23:06<20:18:15,  8.15s/it]

Epoch: 1035 | Training loss 2.876078188419342 | Validation loss 2.806455612182617



 10%|███████▍                                                                | 1037/10000 [2:23:14<20:23:32,  8.19s/it]

Epoch: 1036 | Training loss 2.8772719725966454 | Validation loss 2.8025720417499542



 10%|███████▍                                                                | 1038/10000 [2:23:22<20:18:50,  8.16s/it]

Epoch: 1037 | Training loss 2.8778266608715057 | Validation loss 2.8039696514606476



 10%|███████▍                                                                | 1039/10000 [2:23:31<20:24:39,  8.20s/it]

Epoch: 1038 | Training loss 2.8813278675079346 | Validation loss 2.8050379157066345



 10%|███████▍                                                                | 1040/10000 [2:23:39<20:22:18,  8.19s/it]

Epoch: 1039 | Training loss 2.8752521499991417 | Validation loss 2.802117168903351



 10%|███████▍                                                                | 1041/10000 [2:23:47<20:14:17,  8.13s/it]

Epoch: 1040 | Training loss 2.8725899383425713 | Validation loss 2.806249648332596



 10%|███████▌                                                                | 1042/10000 [2:23:55<20:14:10,  8.13s/it]

Epoch: 1041 | Training loss 2.873932011425495 | Validation loss 2.805663526058197



 10%|███████▌                                                                | 1043/10000 [2:24:03<20:13:22,  8.13s/it]

Epoch: 1042 | Training loss 2.8800633251667023 | Validation loss 2.8125277757644653



 10%|███████▌                                                                | 1044/10000 [2:24:11<20:12:39,  8.12s/it]

Epoch: 1043 | Training loss 2.8748442083597183 | Validation loss 2.809734672307968



 10%|███████▌                                                                | 1045/10000 [2:24:19<20:12:11,  8.12s/it]

Epoch: 1044 | Training loss 2.8751066476106644 | Validation loss 2.8083136677742004



 10%|███████▌                                                                | 1046/10000 [2:24:27<20:14:40,  8.14s/it]

Epoch: 1045 | Training loss 2.8762368485331535 | Validation loss 2.798179119825363



 10%|███████▌                                                                | 1047/10000 [2:24:35<20:08:35,  8.10s/it]

Epoch: 1046 | Training loss 2.875069886445999 | Validation loss 2.7996188402175903



 10%|███████▌                                                                | 1048/10000 [2:24:44<20:13:47,  8.14s/it]

Epoch: 1047 | Training loss 2.876672312617302 | Validation loss 2.801714986562729



 10%|███████▌                                                                | 1049/10000 [2:24:52<20:15:29,  8.15s/it]

Epoch: 1048 | Training loss 2.8760936930775642 | Validation loss 2.8158620297908783



 10%|███████▌                                                                | 1050/10000 [2:25:00<20:14:44,  8.14s/it]

Epoch: 1049 | Training loss 2.87915475666523 | Validation loss 2.799995243549347



 11%|███████▌                                                                | 1051/10000 [2:25:08<20:07:13,  8.09s/it]

Epoch: 1050 | Training loss 2.875144235789776 | Validation loss 2.8131488263607025



 11%|███████▌                                                                | 1052/10000 [2:25:16<20:08:50,  8.11s/it]

Epoch: 1051 | Training loss 2.877004772424698 | Validation loss 2.8001136779785156



 11%|███████▌                                                                | 1053/10000 [2:25:24<20:07:50,  8.10s/it]

Epoch: 1052 | Training loss 2.8708938658237457 | Validation loss 2.797419011592865



 11%|███████▌                                                                | 1054/10000 [2:25:32<20:15:28,  8.15s/it]

Epoch: 1053 | Training loss 2.874665178358555 | Validation loss 2.799436032772064



 11%|███████▌                                                                | 1055/10000 [2:25:40<20:09:38,  8.11s/it]

Epoch: 1054 | Training loss 2.874853439629078 | Validation loss 2.802611917257309



 11%|███████▌                                                                | 1056/10000 [2:25:49<20:10:17,  8.12s/it]

Epoch: 1055 | Training loss 2.8775132074952126 | Validation loss 2.79720675945282



 11%|███████▌                                                                | 1057/10000 [2:25:57<20:10:46,  8.12s/it]

Epoch: 1056 | Training loss 2.883777365088463 | Validation loss 2.8013594448566437



 11%|███████▌                                                                | 1058/10000 [2:26:05<20:07:36,  8.10s/it]

Epoch: 1057 | Training loss 2.878142736852169 | Validation loss 2.8214386999607086



 11%|███████▌                                                                | 1059/10000 [2:26:13<20:09:33,  8.12s/it]

Epoch: 1058 | Training loss 2.8778846487402916 | Validation loss 2.802238553762436



 11%|███████▋                                                                | 1060/10000 [2:26:21<20:06:00,  8.09s/it]

Epoch: 1059 | Training loss 2.8747621551156044 | Validation loss 2.807527005672455



 11%|███████▋                                                                | 1061/10000 [2:26:29<20:09:15,  8.12s/it]

Epoch: 1060 | Training loss 2.879752404987812 | Validation loss 2.8016114830970764



 11%|███████▋                                                                | 1062/10000 [2:26:37<20:10:33,  8.13s/it]

Epoch: 1061 | Training loss 2.877230204641819 | Validation loss 2.803044319152832



 11%|███████▋                                                                | 1063/10000 [2:26:45<20:12:11,  8.14s/it]

Epoch: 1062 | Training loss 2.8785953298211098 | Validation loss 2.8007432222366333



 11%|███████▋                                                                | 1064/10000 [2:26:54<20:14:33,  8.16s/it]

Epoch: 1063 | Training loss 2.874101646244526 | Validation loss 2.8128849864006042



 11%|███████▋                                                                | 1065/10000 [2:27:02<20:09:02,  8.12s/it]

Epoch: 1064 | Training loss 2.8737800121307373 | Validation loss 2.804094910621643



 11%|███████▋                                                                | 1066/10000 [2:27:10<20:08:49,  8.12s/it]

Epoch: 1065 | Training loss 2.873802661895752 | Validation loss 2.8004875481128693



 11%|███████▋                                                                | 1067/10000 [2:27:18<20:07:09,  8.11s/it]

Epoch: 1066 | Training loss 2.8751399740576744 | Validation loss 2.800092786550522



 11%|███████▋                                                                | 1068/10000 [2:27:26<20:09:20,  8.12s/it]

Epoch: 1067 | Training loss 2.875975862145424 | Validation loss 2.809146285057068



 11%|███████▋                                                                | 1069/10000 [2:27:34<20:14:24,  8.16s/it]

Epoch: 1068 | Training loss 2.8800670132040977 | Validation loss 2.80430606007576



 11%|███████▋                                                                | 1070/10000 [2:27:42<20:17:03,  8.18s/it]

Epoch: 1069 | Training loss 2.8745610490441322 | Validation loss 2.8039908707141876



 11%|███████▋                                                                | 1071/10000 [2:27:51<20:19:54,  8.20s/it]

Epoch: 1070 | Training loss 2.8715743720531464 | Validation loss 2.803562730550766



 11%|███████▋                                                                | 1072/10000 [2:27:59<20:21:03,  8.21s/it]

Epoch: 1071 | Training loss 2.8803688064217567 | Validation loss 2.8091856837272644



 11%|███████▋                                                                | 1073/10000 [2:28:07<20:18:44,  8.19s/it]

Epoch: 1072 | Training loss 2.877064570784569 | Validation loss 2.8073424100875854



 11%|███████▋                                                                | 1074/10000 [2:28:15<20:13:08,  8.15s/it]

Epoch: 1073 | Training loss 2.8765399008989334 | Validation loss 2.802848607301712



 11%|███████▋                                                                | 1075/10000 [2:28:23<20:13:28,  8.16s/it]

Epoch: 1074 | Training loss 2.8740105107426643 | Validation loss 2.8031005859375



 11%|███████▋                                                                | 1076/10000 [2:28:31<20:12:24,  8.15s/it]

Epoch: 1075 | Training loss 2.8757340013980865 | Validation loss 2.80272313952446



 11%|███████▊                                                                | 1077/10000 [2:28:39<20:07:15,  8.12s/it]

Epoch: 1076 | Training loss 2.872689038515091 | Validation loss 2.8045665621757507



 11%|███████▊                                                                | 1078/10000 [2:28:48<20:07:48,  8.12s/it]

Epoch: 1077 | Training loss 2.876484416425228 | Validation loss 2.8063253462314606



 11%|███████▊                                                                | 1079/10000 [2:28:56<20:05:19,  8.11s/it]

Epoch: 1078 | Training loss 2.877075545489788 | Validation loss 2.8075628876686096



 11%|███████▊                                                                | 1080/10000 [2:29:04<19:58:51,  8.06s/it]

Epoch: 1079 | Training loss 2.8753954097628593 | Validation loss 2.7970443069934845



 11%|███████▊                                                                | 1081/10000 [2:29:12<20:02:27,  8.09s/it]

Epoch: 1080 | Training loss 2.8732537031173706 | Validation loss 2.802506685256958



 11%|███████▊                                                                | 1082/10000 [2:29:20<20:05:37,  8.11s/it]

Epoch: 1081 | Training loss 2.87427119910717 | Validation loss 2.799077183008194



 11%|███████▊                                                                | 1083/10000 [2:29:28<20:04:23,  8.10s/it]

Epoch: 1082 | Training loss 2.870141752064228 | Validation loss 2.8038054406642914



 11%|███████▊                                                                | 1084/10000 [2:29:36<20:02:05,  8.09s/it]

Epoch: 1083 | Training loss 2.8756387010216713 | Validation loss 2.8010092973709106



 11%|███████▊                                                                | 1085/10000 [2:29:44<19:57:37,  8.06s/it]

Epoch: 1084 | Training loss 2.8742102533578873 | Validation loss 2.7984114289283752



 11%|███████▊                                                                | 1086/10000 [2:29:52<20:03:32,  8.10s/it]

Epoch: 1085 | Training loss 2.876087225973606 | Validation loss 2.810447931289673



 11%|███████▊                                                                | 1087/10000 [2:30:00<20:02:42,  8.10s/it]

Epoch: 1086 | Training loss 2.8766137212514877 | Validation loss 2.803845524787903



 11%|███████▊                                                                | 1088/10000 [2:30:08<20:01:38,  8.09s/it]

Epoch: 1087 | Training loss 2.8777660951018333 | Validation loss 2.8074715435504913



 11%|███████▊                                                                | 1089/10000 [2:30:17<19:59:57,  8.08s/it]

Epoch: 1088 | Training loss 2.877966456115246 | Validation loss 2.8034935891628265



 11%|███████▊                                                                | 1090/10000 [2:30:25<19:58:43,  8.07s/it]

Epoch: 1089 | Training loss 2.873120926320553 | Validation loss 2.805916488170624



 11%|███████▊                                                                | 1091/10000 [2:30:33<20:04:08,  8.11s/it]

Epoch: 1090 | Training loss 2.8731177896261215 | Validation loss 2.8106736540794373



 11%|███████▊                                                                | 1092/10000 [2:30:41<20:03:41,  8.11s/it]

Epoch: 1091 | Training loss 2.8741913065314293 | Validation loss 2.8122308552265167



 11%|███████▊                                                                | 1093/10000 [2:30:49<20:06:04,  8.12s/it]

Epoch: 1092 | Training loss 2.876719392836094 | Validation loss 2.8040428459644318



 11%|███████▉                                                                | 1094/10000 [2:30:57<20:02:09,  8.10s/it]

Epoch: 1093 | Training loss 2.8752431347966194 | Validation loss 2.8069077730178833



 11%|███████▉                                                                | 1095/10000 [2:31:05<20:01:31,  8.10s/it]

Epoch: 1094 | Training loss 2.875077150762081 | Validation loss 2.799049347639084



 11%|███████▉                                                                | 1096/10000 [2:31:13<20:02:59,  8.11s/it]

Epoch: 1095 | Training loss 2.8753988668322563 | Validation loss 2.8093498051166534



 11%|███████▉                                                                | 1097/10000 [2:31:21<20:03:25,  8.11s/it]

Epoch: 1096 | Training loss 2.877021811902523 | Validation loss 2.8009881377220154



 11%|███████▉                                                                | 1098/10000 [2:31:30<20:03:39,  8.11s/it]

Epoch: 1097 | Training loss 2.874879963696003 | Validation loss 2.809214949607849



 11%|███████▉                                                                | 1099/10000 [2:31:38<20:01:40,  8.10s/it]

Epoch: 1098 | Training loss 2.872073695063591 | Validation loss 2.8012064695358276



 11%|███████▉                                                                | 1100/10000 [2:31:46<20:00:16,  8.09s/it]

Epoch: 1099 | Training loss 2.873072437942028 | Validation loss 2.8044154047966003



 11%|███████▉                                                                | 1101/10000 [2:31:54<20:06:50,  8.14s/it]

Epoch: 1100 | Training loss 2.8704272732138634 | Validation loss 2.7998946011066437



 11%|███████▉                                                                | 1102/10000 [2:32:02<20:11:32,  8.17s/it]

Epoch: 1101 | Training loss 2.866913139820099 | Validation loss 2.8123691976070404



 11%|███████▉                                                                | 1103/10000 [2:32:10<20:11:59,  8.17s/it]

Epoch: 1102 | Training loss 2.87922515720129 | Validation loss 2.8169698119163513



 11%|███████▉                                                                | 1104/10000 [2:32:18<20:10:38,  8.17s/it]

Epoch: 1103 | Training loss 2.8773825988173485 | Validation loss 2.8031485974788666



 11%|███████▉                                                                | 1105/10000 [2:32:27<20:09:02,  8.16s/it]

Epoch: 1104 | Training loss 2.878133863210678 | Validation loss 2.813477724790573



 11%|███████▉                                                                | 1106/10000 [2:32:35<20:11:23,  8.17s/it]

Epoch: 1105 | Training loss 2.872491016983986 | Validation loss 2.7982896268367767



 11%|███████▉                                                                | 1107/10000 [2:32:43<20:07:12,  8.14s/it]

Epoch: 1106 | Training loss 2.8743214905261993 | Validation loss 2.8130476772785187



 11%|███████▉                                                                | 1108/10000 [2:32:51<20:07:17,  8.15s/it]

Epoch: 1107 | Training loss 2.875529758632183 | Validation loss 2.804640769958496



 11%|███████▉                                                                | 1109/10000 [2:32:59<20:07:55,  8.15s/it]

Epoch: 1108 | Training loss 2.8768719881772995 | Validation loss 2.799529045820236



 11%|███████▉                                                                | 1110/10000 [2:33:07<20:02:00,  8.11s/it]

Epoch: 1109 | Training loss 2.8766842409968376 | Validation loss 2.8053291738033295



 11%|███████▉                                                                | 1111/10000 [2:33:15<19:59:30,  8.10s/it]

Epoch: 1110 | Training loss 2.8769607692956924 | Validation loss 2.814814269542694



 11%|████████                                                                | 1112/10000 [2:33:23<19:59:35,  8.10s/it]

Epoch: 1111 | Training loss 2.8753615245223045 | Validation loss 2.8123944997787476



 11%|████████                                                                | 1113/10000 [2:33:32<20:01:00,  8.11s/it]

Epoch: 1112 | Training loss 2.879521533846855 | Validation loss 2.7968565821647644



 11%|████████                                                                | 1114/10000 [2:33:40<19:57:10,  8.08s/it]

Epoch: 1113 | Training loss 2.874584883451462 | Validation loss 2.802185744047165



 11%|████████                                                                | 1115/10000 [2:33:48<19:56:29,  8.08s/it]

Epoch: 1114 | Training loss 2.878571853041649 | Validation loss 2.8146983981132507



 11%|████████                                                                | 1116/10000 [2:33:56<19:59:30,  8.10s/it]

Epoch: 1115 | Training loss 2.8757633566856384 | Validation loss 2.8040182292461395



 11%|████████                                                                | 1117/10000 [2:34:04<20:02:48,  8.12s/it]

Epoch: 1116 | Training loss 2.8756118044257164 | Validation loss 2.8008869290351868



 11%|████████                                                                | 1118/10000 [2:34:12<19:59:46,  8.10s/it]

Epoch: 1117 | Training loss 2.8721707314252853 | Validation loss 2.7991996109485626



 11%|████████                                                                | 1119/10000 [2:34:20<19:56:02,  8.08s/it]

Epoch: 1118 | Training loss 2.87497428804636 | Validation loss 2.8007482290267944



 11%|████████                                                                | 1120/10000 [2:34:28<19:57:14,  8.09s/it]

Epoch: 1119 | Training loss 2.876616694033146 | Validation loss 2.7944225668907166



 11%|████████                                                                | 1121/10000 [2:34:36<19:58:17,  8.10s/it]

Epoch: 1120 | Training loss 2.875408060848713 | Validation loss 2.802245706319809



 11%|████████                                                                | 1122/10000 [2:34:45<20:04:39,  8.14s/it]

Epoch: 1121 | Training loss 2.877625398337841 | Validation loss 2.807732582092285



 11%|████████                                                                | 1123/10000 [2:34:53<20:02:56,  8.13s/it]

Epoch: 1122 | Training loss 2.8748756423592567 | Validation loss 2.800467222929001



 11%|████████                                                                | 1124/10000 [2:35:01<20:00:47,  8.12s/it]

Epoch: 1123 | Training loss 2.877691462635994 | Validation loss 2.804555267095566



 11%|████████                                                                | 1125/10000 [2:35:09<20:02:02,  8.13s/it]

Epoch: 1124 | Training loss 2.873041585087776 | Validation loss 2.805229902267456



 11%|████████                                                                | 1126/10000 [2:35:17<20:00:49,  8.12s/it]

Epoch: 1125 | Training loss 2.872008576989174 | Validation loss 2.803641200065613



 11%|████████                                                                | 1127/10000 [2:35:25<20:01:11,  8.12s/it]

Epoch: 1126 | Training loss 2.876982942223549 | Validation loss 2.7999909818172455



 11%|████████                                                                | 1128/10000 [2:35:33<19:57:18,  8.10s/it]

Epoch: 1127 | Training loss 2.877037763595581 | Validation loss 2.8039514124393463



 11%|████████▏                                                               | 1129/10000 [2:35:41<19:55:17,  8.08s/it]

Epoch: 1128 | Training loss 2.873114734888077 | Validation loss 2.80575630068779



 11%|████████▏                                                               | 1130/10000 [2:35:49<19:56:34,  8.09s/it]

Epoch: 1129 | Training loss 2.873530074954033 | Validation loss 2.807909518480301



 11%|████████▏                                                               | 1131/10000 [2:35:57<19:59:31,  8.11s/it]

Epoch: 1130 | Training loss 2.8689376786351204 | Validation loss 2.8016470670700073



 11%|████████▏                                                               | 1132/10000 [2:36:06<19:58:50,  8.11s/it]

Epoch: 1131 | Training loss 2.877344496548176 | Validation loss 2.8021068274974823



 11%|████████▏                                                               | 1133/10000 [2:36:14<20:02:27,  8.14s/it]

Epoch: 1132 | Training loss 2.8749559223651886 | Validation loss 2.8020387291908264



 11%|████████▏                                                               | 1134/10000 [2:36:22<20:03:41,  8.15s/it]

Epoch: 1133 | Training loss 2.8716925233602524 | Validation loss 2.799372434616089



 11%|████████▏                                                               | 1135/10000 [2:36:30<20:04:56,  8.16s/it]

Epoch: 1134 | Training loss 2.8801557049155235 | Validation loss 2.8078054785728455



 11%|████████▏                                                               | 1136/10000 [2:36:38<20:01:54,  8.14s/it]

Epoch: 1135 | Training loss 2.873145341873169 | Validation loss 2.8032466769218445



 11%|████████▏                                                               | 1137/10000 [2:36:46<19:56:13,  8.10s/it]

Epoch: 1136 | Training loss 2.8705532178282738 | Validation loss 2.805289626121521



 11%|████████▏                                                               | 1138/10000 [2:36:54<19:56:51,  8.10s/it]

Epoch: 1137 | Training loss 2.8761335760354996 | Validation loss 2.8054964542388916



 11%|████████▏                                                               | 1139/10000 [2:37:02<19:57:26,  8.11s/it]

Epoch: 1138 | Training loss 2.873400092124939 | Validation loss 2.797871708869934



 11%|████████▏                                                               | 1140/10000 [2:37:11<19:57:40,  8.11s/it]

Epoch: 1139 | Training loss 2.8771509155631065 | Validation loss 2.801269829273224



 11%|████████▏                                                               | 1141/10000 [2:37:19<19:52:55,  8.08s/it]

Epoch: 1140 | Training loss 2.874625340104103 | Validation loss 2.8091512322425842



 11%|████████▏                                                               | 1142/10000 [2:37:27<19:58:34,  8.12s/it]

Epoch: 1141 | Training loss 2.8752237111330032 | Validation loss 2.8022505939006805



 11%|████████▏                                                               | 1143/10000 [2:37:35<19:54:46,  8.09s/it]

Epoch: 1142 | Training loss 2.878007359802723 | Validation loss 2.8103046119213104



 11%|████████▏                                                               | 1144/10000 [2:37:43<19:57:04,  8.11s/it]

Epoch: 1143 | Training loss 2.872590996325016 | Validation loss 2.8024744987487793



 11%|████████▏                                                               | 1145/10000 [2:37:51<19:58:39,  8.12s/it]

Epoch: 1144 | Training loss 2.8768956661224365 | Validation loss 2.802495986223221



 11%|████████▎                                                               | 1146/10000 [2:37:59<19:53:34,  8.09s/it]

Epoch: 1145 | Training loss 2.872892238199711 | Validation loss 2.7992312014102936



 11%|████████▎                                                               | 1147/10000 [2:38:07<19:56:06,  8.11s/it]

Epoch: 1146 | Training loss 2.875648520886898 | Validation loss 2.8111107647418976



 11%|████████▎                                                               | 1148/10000 [2:38:15<19:53:57,  8.09s/it]

Epoch: 1147 | Training loss 2.8756096437573433 | Validation loss 2.7999038100242615



 11%|████████▎                                                               | 1149/10000 [2:38:23<19:53:32,  8.09s/it]

Epoch: 1148 | Training loss 2.8825565800070763 | Validation loss 2.81245157122612



 12%|████████▎                                                               | 1150/10000 [2:38:31<19:52:26,  8.08s/it]

Epoch: 1149 | Training loss 2.8756198436021805 | Validation loss 2.802571803331375



 12%|████████▎                                                               | 1151/10000 [2:38:40<19:53:00,  8.09s/it]

Epoch: 1150 | Training loss 2.8750285506248474 | Validation loss 2.8182701468467712



 12%|████████▎                                                               | 1152/10000 [2:38:48<19:59:41,  8.14s/it]

Epoch: 1151 | Training loss 2.87151025980711 | Validation loss 2.8128415942192078



 12%|████████▎                                                               | 1153/10000 [2:38:56<19:56:09,  8.11s/it]

Epoch: 1152 | Training loss 2.8798383101820946 | Validation loss 2.802907794713974



 12%|████████▎                                                               | 1154/10000 [2:39:04<19:57:41,  8.12s/it]

Epoch: 1153 | Training loss 2.88107118755579 | Validation loss 2.809490442276001



 12%|████████▎                                                               | 1155/10000 [2:39:12<19:54:45,  8.10s/it]

Epoch: 1154 | Training loss 2.872510999441147 | Validation loss 2.8026234209537506



 12%|████████▎                                                               | 1156/10000 [2:39:20<19:49:42,  8.07s/it]

Epoch: 1155 | Training loss 2.875045321881771 | Validation loss 2.796790897846222



 12%|████████▎                                                               | 1157/10000 [2:39:28<19:51:32,  8.08s/it]

Epoch: 1156 | Training loss 2.8769562914967537 | Validation loss 2.8058115243911743



 12%|████████▎                                                               | 1158/10000 [2:39:36<19:55:16,  8.11s/it]

Epoch: 1157 | Training loss 2.873041532933712 | Validation loss 2.8087413609027863



 12%|████████▎                                                               | 1159/10000 [2:39:44<19:54:48,  8.11s/it]

Epoch: 1158 | Training loss 2.8783368170261383 | Validation loss 2.803895026445389



 12%|████████▎                                                               | 1160/10000 [2:39:53<19:59:19,  8.14s/it]

Epoch: 1159 | Training loss 2.875461108982563 | Validation loss 2.800531357526779



 12%|████████▎                                                               | 1161/10000 [2:40:01<19:56:08,  8.12s/it]

Epoch: 1160 | Training loss 2.8778389766812325 | Validation loss 2.8068011105060577



 12%|████████▎                                                               | 1162/10000 [2:40:09<19:53:49,  8.10s/it]

Epoch: 1161 | Training loss 2.877268649637699 | Validation loss 2.8122784197330475



 12%|████████▎                                                               | 1163/10000 [2:40:17<19:56:16,  8.12s/it]

Epoch: 1162 | Training loss 2.8769308403134346 | Validation loss 2.804177314043045



 12%|████████▍                                                               | 1164/10000 [2:40:25<19:56:17,  8.12s/it]

Epoch: 1163 | Training loss 2.8737318962812424 | Validation loss 2.805118054151535



 12%|████████▍                                                               | 1165/10000 [2:40:33<19:51:05,  8.09s/it]

Epoch: 1164 | Training loss 2.873762160539627 | Validation loss 2.8031140565872192



 12%|████████▍                                                               | 1166/10000 [2:40:41<19:56:17,  8.13s/it]

Epoch: 1165 | Training loss 2.880945436656475 | Validation loss 2.802564263343811



 12%|████████▍                                                               | 1167/10000 [2:40:49<19:57:09,  8.13s/it]

Epoch: 1166 | Training loss 2.8753606379032135 | Validation loss 2.803911507129669



 12%|████████▍                                                               | 1168/10000 [2:40:58<19:57:00,  8.13s/it]

Epoch: 1167 | Training loss 2.8696744441986084 | Validation loss 2.8048740327358246



 12%|████████▍                                                               | 1169/10000 [2:41:06<19:54:15,  8.11s/it]

Epoch: 1168 | Training loss 2.87668763846159 | Validation loss 2.804742693901062



 12%|████████▍                                                               | 1170/10000 [2:41:14<19:55:19,  8.12s/it]

Epoch: 1169 | Training loss 2.875652238726616 | Validation loss 2.806016981601715



 12%|████████▍                                                               | 1171/10000 [2:41:22<19:57:53,  8.14s/it]

Epoch: 1170 | Training loss 2.8720123544335365 | Validation loss 2.7979550659656525



 12%|████████▍                                                               | 1172/10000 [2:41:30<19:55:22,  8.12s/it]

Epoch: 1171 | Training loss 2.8738530427217484 | Validation loss 2.8024045526981354



 12%|████████▍                                                               | 1173/10000 [2:41:38<20:00:29,  8.16s/it]

Epoch: 1172 | Training loss 2.8794454857707024 | Validation loss 2.809729903936386



 12%|████████▍                                                               | 1174/10000 [2:41:46<19:53:36,  8.11s/it]

Epoch: 1173 | Training loss 2.8774679377675056 | Validation loss 2.806393653154373



 12%|████████▍                                                               | 1175/10000 [2:41:54<19:52:51,  8.11s/it]

Epoch: 1174 | Training loss 2.8741693049669266 | Validation loss 2.8089590668678284



 12%|████████▍                                                               | 1176/10000 [2:42:03<19:57:46,  8.14s/it]

Epoch: 1175 | Training loss 2.865968905389309 | Validation loss 2.809191793203354



 12%|████████▍                                                               | 1177/10000 [2:42:11<19:57:42,  8.14s/it]

Epoch: 1176 | Training loss 2.8761828169226646 | Validation loss 2.8092267513275146



 12%|████████▍                                                               | 1178/10000 [2:42:19<19:56:21,  8.14s/it]

Epoch: 1177 | Training loss 2.8732573240995407 | Validation loss 2.8065564930438995



 12%|████████▍                                                               | 1179/10000 [2:42:27<19:58:54,  8.15s/it]

Epoch: 1178 | Training loss 2.8758550509810448 | Validation loss 2.80790513753891



 12%|████████▍                                                               | 1180/10000 [2:42:35<19:55:06,  8.13s/it]

Epoch: 1179 | Training loss 2.877829998731613 | Validation loss 2.8105070888996124



 12%|████████▌                                                               | 1181/10000 [2:42:43<19:51:36,  8.11s/it]

Epoch: 1180 | Training loss 2.8794653117656708 | Validation loss 2.80775448679924



 12%|████████▌                                                               | 1182/10000 [2:42:52<19:59:30,  8.16s/it]

Epoch: 1181 | Training loss 2.8752215206623077 | Validation loss 2.8082570135593414



 12%|████████▌                                                               | 1183/10000 [2:43:00<19:54:40,  8.13s/it]

Epoch: 1182 | Training loss 2.8754774257540703 | Validation loss 2.801171690225601



 12%|████████▌                                                               | 1184/10000 [2:43:08<19:50:27,  8.10s/it]

Epoch: 1183 | Training loss 2.8751639053225517 | Validation loss 2.8013000190258026



 12%|████████▌                                                               | 1185/10000 [2:43:16<19:50:20,  8.10s/it]

Epoch: 1184 | Training loss 2.8722117990255356 | Validation loss 2.8059761822223663



 12%|████████▌                                                               | 1186/10000 [2:43:24<19:46:01,  8.07s/it]

Epoch: 1185 | Training loss 2.871975250542164 | Validation loss 2.832239717245102



 12%|████████▌                                                               | 1187/10000 [2:43:32<19:46:24,  8.08s/it]

Epoch: 1186 | Training loss 2.877660922706127 | Validation loss 2.8077889382839203



 12%|████████▌                                                               | 1188/10000 [2:43:40<19:49:04,  8.10s/it]

Epoch: 1187 | Training loss 2.874907560646534 | Validation loss 2.8227946162223816



 12%|████████▌                                                               | 1189/10000 [2:43:48<19:50:32,  8.11s/it]

Epoch: 1188 | Training loss 2.873921126127243 | Validation loss 2.8024259209632874



 12%|████████▌                                                               | 1190/10000 [2:43:56<19:49:26,  8.10s/it]

Epoch: 1189 | Training loss 2.8723418191075325 | Validation loss 2.8043904304504395



 12%|████████▌                                                               | 1191/10000 [2:44:04<19:54:16,  8.13s/it]

Epoch: 1190 | Training loss 2.873283088207245 | Validation loss 2.813582122325897



 12%|████████▌                                                               | 1192/10000 [2:44:13<19:52:40,  8.12s/it]

Epoch: 1191 | Training loss 2.8741449639201164 | Validation loss 2.8070864975452423



 12%|████████▌                                                               | 1193/10000 [2:44:21<19:52:55,  8.13s/it]

Epoch: 1192 | Training loss 2.875858925282955 | Validation loss 2.8092513382434845



 12%|████████▌                                                               | 1194/10000 [2:44:29<19:42:03,  8.05s/it]

Epoch: 1193 | Training loss 2.8759123980998993 | Validation loss 2.814058840274811



 12%|████████▌                                                               | 1195/10000 [2:44:37<19:43:18,  8.06s/it]

Epoch: 1194 | Training loss 2.877204179763794 | Validation loss 2.8023190796375275



 12%|████████▌                                                               | 1196/10000 [2:44:45<19:47:33,  8.09s/it]

Epoch: 1195 | Training loss 2.8762284964323044 | Validation loss 2.8054690062999725



 12%|████████▌                                                               | 1197/10000 [2:44:53<19:49:14,  8.11s/it]

Epoch: 1196 | Training loss 2.8737490251660347 | Validation loss 2.801407039165497



 12%|████████▋                                                               | 1198/10000 [2:45:01<19:56:33,  8.16s/it]

Epoch: 1197 | Training loss 2.8799332678318024 | Validation loss 2.800004720687866



 12%|████████▋                                                               | 1199/10000 [2:45:09<19:52:42,  8.13s/it]

Epoch: 1198 | Training loss 2.878939710557461 | Validation loss 2.802430033683777



 12%|████████▋                                                               | 1200/10000 [2:45:17<19:49:13,  8.11s/it]

Epoch: 1199 | Training loss 2.87756534665823 | Validation loss 2.802938610315323



 12%|████████▋                                                               | 1201/10000 [2:45:26<19:55:45,  8.15s/it]

Epoch: 1200 | Training loss 2.8739887550473213 | Validation loss 2.805487424135208



 12%|████████▋                                                               | 1202/10000 [2:45:34<19:54:06,  8.14s/it]

Epoch: 1201 | Training loss 2.8714199885725975 | Validation loss 2.8110128939151764



 12%|████████▋                                                               | 1203/10000 [2:45:42<19:53:35,  8.14s/it]

Epoch: 1202 | Training loss 2.874387316405773 | Validation loss 2.8105087280273438



 12%|████████▋                                                               | 1204/10000 [2:45:50<19:53:01,  8.14s/it]

Epoch: 1203 | Training loss 2.8780191615223885 | Validation loss 2.8089851438999176



 12%|████████▋                                                               | 1205/10000 [2:45:58<19:49:09,  8.11s/it]

Epoch: 1204 | Training loss 2.8733618184924126 | Validation loss 2.8042413890361786



 12%|████████▋                                                               | 1206/10000 [2:46:06<19:50:41,  8.12s/it]

Epoch: 1205 | Training loss 2.876569412648678 | Validation loss 2.8033048510551453



 12%|████████▋                                                               | 1207/10000 [2:46:14<19:52:56,  8.14s/it]

Epoch: 1206 | Training loss 2.873864769935608 | Validation loss 2.7977783381938934



 12%|████████▋                                                               | 1208/10000 [2:46:23<19:55:58,  8.16s/it]

Epoch: 1207 | Training loss 2.8730989396572113 | Validation loss 2.8021603524684906



 12%|████████▋                                                               | 1209/10000 [2:46:31<19:51:05,  8.13s/it]

Epoch: 1208 | Training loss 2.875939019024372 | Validation loss 2.80013170838356



 12%|████████▋                                                               | 1210/10000 [2:46:39<19:48:23,  8.11s/it]

Epoch: 1209 | Training loss 2.8754658550024033 | Validation loss 2.801994204521179



 12%|████████▋                                                               | 1211/10000 [2:46:47<19:52:40,  8.14s/it]

Epoch: 1210 | Training loss 2.8739212304353714 | Validation loss 2.800401896238327



 12%|████████▋                                                               | 1212/10000 [2:46:55<19:53:32,  8.15s/it]

Epoch: 1211 | Training loss 2.8716526553034782 | Validation loss 2.8038694262504578



 12%|████████▋                                                               | 1213/10000 [2:47:03<19:50:32,  8.13s/it]

Epoch: 1212 | Training loss 2.8740923777222633 | Validation loss 2.7978448271751404



 12%|████████▋                                                               | 1214/10000 [2:47:11<19:53:18,  8.15s/it]

Epoch: 1213 | Training loss 2.8761804923415184 | Validation loss 2.808865964412689



 12%|████████▋                                                               | 1215/10000 [2:47:19<19:52:27,  8.14s/it]

Epoch: 1214 | Training loss 2.877298042178154 | Validation loss 2.809481054544449



 12%|████████▊                                                               | 1216/10000 [2:47:28<19:53:14,  8.15s/it]

Epoch: 1215 | Training loss 2.877106733620167 | Validation loss 2.804657608270645



 12%|████████▊                                                               | 1217/10000 [2:47:36<19:50:53,  8.14s/it]

Epoch: 1216 | Training loss 2.8787851482629776 | Validation loss 2.8055693209171295



 12%|████████▊                                                               | 1218/10000 [2:47:44<19:50:34,  8.13s/it]

Epoch: 1217 | Training loss 2.876937784254551 | Validation loss 2.8078457415103912



 12%|████████▊                                                               | 1219/10000 [2:47:52<19:46:58,  8.11s/it]

Epoch: 1218 | Training loss 2.8761647939682007 | Validation loss 2.8004877269268036



 12%|████████▊                                                               | 1220/10000 [2:48:00<19:43:42,  8.09s/it]

Epoch: 1219 | Training loss 2.8719924613833427 | Validation loss 2.8040750324726105



 12%|████████▊                                                               | 1221/10000 [2:48:08<19:41:22,  8.07s/it]

Epoch: 1220 | Training loss 2.875341519713402 | Validation loss 2.802963674068451



 12%|████████▊                                                               | 1222/10000 [2:48:16<19:43:15,  8.09s/it]

Epoch: 1221 | Training loss 2.8699814900755882 | Validation loss 2.8020529747009277



 12%|████████▊                                                               | 1223/10000 [2:48:24<19:47:54,  8.12s/it]

Epoch: 1222 | Training loss 2.8774943873286247 | Validation loss 2.8030960261821747



 12%|████████▊                                                               | 1224/10000 [2:48:33<19:51:41,  8.15s/it]

Epoch: 1223 | Training loss 2.877486675977707 | Validation loss 2.8026640117168427



 12%|████████▊                                                               | 1225/10000 [2:48:41<19:49:32,  8.13s/it]

Epoch: 1224 | Training loss 2.8708580657839775 | Validation loss 2.8043324649333954



 12%|████████▊                                                               | 1226/10000 [2:48:49<19:47:17,  8.12s/it]

Epoch: 1225 | Training loss 2.8741086944937706 | Validation loss 2.8155751824378967



 12%|████████▊                                                               | 1227/10000 [2:48:57<19:50:27,  8.14s/it]

Epoch: 1226 | Training loss 2.875551722943783 | Validation loss 2.8126852810382843



 12%|████████▊                                                               | 1228/10000 [2:49:05<19:48:31,  8.13s/it]

Epoch: 1227 | Training loss 2.8778722137212753 | Validation loss 2.804420530796051



 12%|████████▊                                                               | 1229/10000 [2:49:13<19:47:50,  8.13s/it]

Epoch: 1228 | Training loss 2.8732558488845825 | Validation loss 2.7990776896476746



 12%|████████▊                                                               | 1230/10000 [2:49:21<19:46:48,  8.12s/it]

Epoch: 1229 | Training loss 2.8798412904143333 | Validation loss 2.8016782104969025



 12%|████████▊                                                               | 1231/10000 [2:49:29<19:41:34,  8.08s/it]

Epoch: 1230 | Training loss 2.8761957436800003 | Validation loss 2.8031250536441803



 12%|████████▊                                                               | 1232/10000 [2:49:37<19:45:06,  8.11s/it]

Epoch: 1231 | Training loss 2.8742226883769035 | Validation loss 2.803144335746765



 12%|████████▉                                                               | 1233/10000 [2:49:46<19:48:40,  8.14s/it]

Epoch: 1232 | Training loss 2.8702645674347878 | Validation loss 2.80965456366539



 12%|████████▉                                                               | 1234/10000 [2:49:54<19:49:46,  8.14s/it]

Epoch: 1233 | Training loss 2.8787538558244705 | Validation loss 2.802669644355774



 12%|████████▉                                                               | 1235/10000 [2:50:02<19:45:40,  8.12s/it]

Epoch: 1234 | Training loss 2.8770275712013245 | Validation loss 2.804077684879303



 12%|████████▉                                                               | 1236/10000 [2:50:10<19:47:05,  8.13s/it]

Epoch: 1235 | Training loss 2.8760984614491463 | Validation loss 2.806312620639801



 12%|████████▉                                                               | 1237/10000 [2:50:18<19:45:50,  8.12s/it]

Epoch: 1236 | Training loss 2.8750539869070053 | Validation loss 2.8041647374629974



 12%|████████▉                                                               | 1238/10000 [2:50:26<19:47:46,  8.13s/it]

Epoch: 1237 | Training loss 2.877068392932415 | Validation loss 2.8002021312713623



 12%|████████▉                                                               | 1239/10000 [2:50:34<19:50:20,  8.15s/it]

Epoch: 1238 | Training loss 2.874031975865364 | Validation loss 2.805312931537628



 12%|████████▉                                                               | 1240/10000 [2:50:43<19:50:30,  8.15s/it]

Epoch: 1239 | Training loss 2.875674732029438 | Validation loss 2.8042148649692535



 12%|████████▉                                                               | 1241/10000 [2:50:51<19:53:33,  8.18s/it]

Epoch: 1240 | Training loss 2.8758436515927315 | Validation loss 2.8055547177791595



 12%|████████▉                                                               | 1242/10000 [2:50:59<19:48:00,  8.14s/it]

Epoch: 1241 | Training loss 2.8778371289372444 | Validation loss 2.8073222637176514



 12%|████████▉                                                               | 1243/10000 [2:51:07<19:43:33,  8.11s/it]

Epoch: 1242 | Training loss 2.8799821585416794 | Validation loss 2.801287978887558



 12%|████████▉                                                               | 1244/10000 [2:51:15<19:41:45,  8.10s/it]

Epoch: 1243 | Training loss 2.8759918957948685 | Validation loss 2.804946482181549



 12%|████████▉                                                               | 1245/10000 [2:51:23<19:41:46,  8.10s/it]

Epoch: 1244 | Training loss 2.873639225959778 | Validation loss 2.80180561542511



 12%|████████▉                                                               | 1246/10000 [2:51:31<19:45:59,  8.13s/it]

Epoch: 1245 | Training loss 2.873643673956394 | Validation loss 2.8012389540672302



 12%|████████▉                                                               | 1247/10000 [2:51:39<19:49:24,  8.15s/it]

Epoch: 1246 | Training loss 2.877108432352543 | Validation loss 2.8089189529418945



 12%|████████▉                                                               | 1248/10000 [2:51:48<19:46:22,  8.13s/it]

Epoch: 1247 | Training loss 2.874039500951767 | Validation loss 2.8021110594272614



 12%|████████▉                                                               | 1249/10000 [2:51:56<19:43:33,  8.11s/it]

Epoch: 1248 | Training loss 2.8761072531342506 | Validation loss 2.799750119447708



 12%|█████████                                                               | 1250/10000 [2:52:04<19:42:52,  8.11s/it]

Epoch: 1249 | Training loss 2.874808557331562 | Validation loss 2.804058253765106



 13%|█████████                                                               | 1251/10000 [2:52:12<19:45:11,  8.13s/it]

Epoch: 1250 | Training loss 2.8756605088710785 | Validation loss 2.7993730008602142



 13%|█████████                                                               | 1252/10000 [2:52:20<19:39:51,  8.09s/it]

Epoch: 1251 | Training loss 2.8760914653539658 | Validation loss 2.8085959255695343



 13%|█████████                                                               | 1253/10000 [2:52:28<19:40:50,  8.10s/it]

Epoch: 1252 | Training loss 2.875631481409073 | Validation loss 2.809644043445587



 13%|█████████                                                               | 1254/10000 [2:52:36<19:40:49,  8.10s/it]

Epoch: 1253 | Training loss 2.8710935339331627 | Validation loss 2.7998918890953064



 13%|█████████                                                               | 1255/10000 [2:52:44<19:40:04,  8.10s/it]

Epoch: 1254 | Training loss 2.875144124031067 | Validation loss 2.809036999940872



 13%|█████████                                                               | 1256/10000 [2:52:52<19:39:29,  8.09s/it]

Epoch: 1255 | Training loss 2.8787299022078514 | Validation loss 2.8077232837677



 13%|█████████                                                               | 1257/10000 [2:53:00<19:32:57,  8.05s/it]

Epoch: 1256 | Training loss 2.874405585229397 | Validation loss 2.8006327748298645



 13%|█████████                                                               | 1258/10000 [2:53:08<19:34:18,  8.06s/it]

Epoch: 1257 | Training loss 2.873294770717621 | Validation loss 2.8044861257076263



 13%|█████████                                                               | 1259/10000 [2:53:16<19:35:23,  8.07s/it]

Epoch: 1258 | Training loss 2.872792065143585 | Validation loss 2.8040677905082703



 13%|█████████                                                               | 1260/10000 [2:53:25<19:34:35,  8.06s/it]

Epoch: 1259 | Training loss 2.876561589539051 | Validation loss 2.8052534759044647



 13%|█████████                                                               | 1261/10000 [2:53:33<19:39:40,  8.10s/it]

Epoch: 1260 | Training loss 2.8764479607343674 | Validation loss 2.8022061586380005



 13%|█████████                                                               | 1262/10000 [2:53:41<19:38:55,  8.10s/it]

Epoch: 1261 | Training loss 2.876624122262001 | Validation loss 2.8001497089862823



 13%|█████████                                                               | 1263/10000 [2:53:49<19:37:06,  8.08s/it]

Epoch: 1262 | Training loss 2.8733856081962585 | Validation loss 2.8146613240242004



 13%|█████████                                                               | 1264/10000 [2:53:57<19:38:28,  8.09s/it]

Epoch: 1263 | Training loss 2.8785970136523247 | Validation loss 2.810622900724411



 13%|█████████                                                               | 1265/10000 [2:54:05<19:37:57,  8.09s/it]

Epoch: 1264 | Training loss 2.8705496340990067 | Validation loss 2.7994819283485413



 13%|█████████                                                               | 1266/10000 [2:54:13<19:36:13,  8.08s/it]

Epoch: 1265 | Training loss 2.8724567890167236 | Validation loss 2.800741046667099



 13%|█████████                                                               | 1267/10000 [2:54:21<19:36:26,  8.08s/it]

Epoch: 1266 | Training loss 2.8763511702418327 | Validation loss 2.8129549622535706



 13%|█████████▏                                                              | 1268/10000 [2:54:29<19:35:47,  8.08s/it]

Epoch: 1267 | Training loss 2.877295009791851 | Validation loss 2.803371340036392



 13%|█████████▏                                                              | 1269/10000 [2:54:37<19:32:10,  8.06s/it]

Epoch: 1268 | Training loss 2.8756931871175766 | Validation loss 2.8047198355197906



 13%|█████████▏                                                              | 1270/10000 [2:54:45<19:36:05,  8.08s/it]

Epoch: 1269 | Training loss 2.871975101530552 | Validation loss 2.8030481040477753



 13%|█████████▏                                                              | 1271/10000 [2:54:53<19:35:22,  8.08s/it]

Epoch: 1270 | Training loss 2.8708393275737762 | Validation loss 2.7998890578746796



 13%|█████████▏                                                              | 1272/10000 [2:55:02<19:39:37,  8.11s/it]

Epoch: 1271 | Training loss 2.8775429129600525 | Validation loss 2.803415596485138



 13%|█████████▏                                                              | 1273/10000 [2:55:10<19:39:52,  8.11s/it]

Epoch: 1272 | Training loss 2.87233816832304 | Validation loss 2.803755909204483



 13%|█████████▏                                                              | 1274/10000 [2:55:18<19:36:41,  8.09s/it]

Epoch: 1273 | Training loss 2.878960818052292 | Validation loss 2.8120432794094086



 13%|█████████▏                                                              | 1275/10000 [2:55:26<19:30:46,  8.05s/it]

Epoch: 1274 | Training loss 2.874248079955578 | Validation loss 2.797416567802429



 13%|█████████▏                                                              | 1276/10000 [2:55:34<19:30:42,  8.05s/it]

Epoch: 1275 | Training loss 2.876162573695183 | Validation loss 2.815100759267807



 13%|█████████▏                                                              | 1277/10000 [2:55:42<19:32:06,  8.06s/it]

Epoch: 1276 | Training loss 2.877202846109867 | Validation loss 2.7970493137836456



 13%|█████████▏                                                              | 1278/10000 [2:55:50<19:35:01,  8.08s/it]

Epoch: 1277 | Training loss 2.8802723437547684 | Validation loss 2.7998529970645905



 13%|█████████▏                                                              | 1279/10000 [2:55:58<19:34:55,  8.08s/it]

Epoch: 1278 | Training loss 2.8724107667803764 | Validation loss 2.8063642978668213



 13%|█████████▏                                                              | 1280/10000 [2:56:06<19:34:53,  8.08s/it]

Epoch: 1279 | Training loss 2.8726827278733253 | Validation loss 2.798913300037384



 13%|█████████▏                                                              | 1281/10000 [2:56:14<19:34:56,  8.09s/it]

Epoch: 1280 | Training loss 2.871079169213772 | Validation loss 2.7996606528759003



 13%|█████████▏                                                              | 1282/10000 [2:56:22<19:37:36,  8.10s/it]

Epoch: 1281 | Training loss 2.8795456662774086 | Validation loss 2.8066749572753906



 13%|█████████▏                                                              | 1283/10000 [2:56:31<19:40:14,  8.12s/it]

Epoch: 1282 | Training loss 2.8823549821972847 | Validation loss 2.8138783276081085



 13%|█████████▏                                                              | 1284/10000 [2:56:39<19:43:07,  8.14s/it]

Epoch: 1283 | Training loss 2.8781375512480736 | Validation loss 2.811977446079254



 13%|█████████▎                                                              | 1285/10000 [2:56:47<19:44:58,  8.16s/it]

Epoch: 1284 | Training loss 2.8806486427783966 | Validation loss 2.801016539335251



 13%|█████████▎                                                              | 1286/10000 [2:56:55<19:43:03,  8.15s/it]

Epoch: 1285 | Training loss 2.8756277933716774 | Validation loss 2.8053365647792816



 13%|█████████▎                                                              | 1287/10000 [2:57:03<19:38:07,  8.11s/it]

Epoch: 1286 | Training loss 2.874968633055687 | Validation loss 2.8012458980083466



 13%|█████████▎                                                              | 1288/10000 [2:57:11<19:40:55,  8.13s/it]

Epoch: 1287 | Training loss 2.8709747791290283 | Validation loss 2.807141363620758



 13%|█████████▎                                                              | 1289/10000 [2:57:19<19:37:54,  8.11s/it]

Epoch: 1288 | Training loss 2.885587103664875 | Validation loss 2.802932024002075



 13%|█████████▎                                                              | 1290/10000 [2:57:28<19:38:42,  8.12s/it]

Epoch: 1289 | Training loss 2.8745916560292244 | Validation loss 2.8110610246658325



 13%|█████████▎                                                              | 1291/10000 [2:57:36<19:35:46,  8.10s/it]

Epoch: 1290 | Training loss 2.8774307519197464 | Validation loss 2.8145025968551636



 13%|█████████▎                                                              | 1292/10000 [2:57:44<19:36:15,  8.10s/it]

Epoch: 1291 | Training loss 2.882999822497368 | Validation loss 2.7979199588298798



 13%|█████████▎                                                              | 1293/10000 [2:57:52<19:42:50,  8.15s/it]

Epoch: 1292 | Training loss 2.8792272061109543 | Validation loss 2.8002836406230927



 13%|█████████▎                                                              | 1294/10000 [2:58:00<19:50:27,  8.20s/it]

Epoch: 1293 | Training loss 2.87308432161808 | Validation loss 2.797773540019989



 13%|█████████▎                                                              | 1295/10000 [2:58:08<19:41:39,  8.14s/it]

Epoch: 1294 | Training loss 2.8747705668210983 | Validation loss 2.8096413910388947



 13%|█████████▎                                                              | 1296/10000 [2:58:16<19:36:51,  8.11s/it]

Epoch: 1295 | Training loss 2.8688171058893204 | Validation loss 2.8058850467205048



 13%|█████████▎                                                              | 1297/10000 [2:58:25<19:40:12,  8.14s/it]

Epoch: 1296 | Training loss 2.8787957802414894 | Validation loss 2.8023487329483032



 13%|█████████▎                                                              | 1298/10000 [2:58:33<19:36:30,  8.11s/it]

Epoch: 1297 | Training loss 2.874432787299156 | Validation loss 2.7990926802158356



 13%|█████████▎                                                              | 1299/10000 [2:58:41<19:38:32,  8.13s/it]

Epoch: 1298 | Training loss 2.875034384429455 | Validation loss 2.8029803037643433



 13%|█████████▎                                                              | 1300/10000 [2:58:49<19:34:38,  8.10s/it]

Epoch: 1299 | Training loss 2.870689280331135 | Validation loss 2.812828540802002



 13%|█████████▎                                                              | 1301/10000 [2:58:57<19:33:05,  8.09s/it]

Epoch: 1300 | Training loss 2.876055061817169 | Validation loss 2.800840198993683



 13%|█████████▎                                                              | 1302/10000 [2:59:05<19:31:20,  8.08s/it]

Epoch: 1301 | Training loss 2.877193108201027 | Validation loss 2.811288446187973



 13%|█████████▍                                                              | 1303/10000 [2:59:13<19:29:30,  8.07s/it]

Epoch: 1302 | Training loss 2.8761959448456764 | Validation loss 2.8108243346214294



 13%|█████████▍                                                              | 1304/10000 [2:59:21<19:26:47,  8.05s/it]

Epoch: 1303 | Training loss 2.8708477318286896 | Validation loss 2.8069995641708374



 13%|█████████▍                                                              | 1305/10000 [2:59:29<19:24:50,  8.04s/it]

Epoch: 1304 | Training loss 2.879365347325802 | Validation loss 2.804154247045517



 13%|█████████▍                                                              | 1306/10000 [2:59:37<19:22:45,  8.02s/it]

Epoch: 1305 | Training loss 2.8707379773259163 | Validation loss 2.802986800670624



 13%|█████████▍                                                              | 1307/10000 [2:59:45<19:24:05,  8.03s/it]

Epoch: 1306 | Training loss 2.874894790351391 | Validation loss 2.800225257873535



 13%|█████████▍                                                              | 1308/10000 [2:59:53<19:28:21,  8.07s/it]

Epoch: 1307 | Training loss 2.874846898019314 | Validation loss 2.803435742855072



 13%|█████████▍                                                              | 1309/10000 [3:00:01<19:29:47,  8.08s/it]

Epoch: 1308 | Training loss 2.8763718381524086 | Validation loss 2.8082115948200226



 13%|█████████▍                                                              | 1310/10000 [3:00:09<19:33:03,  8.10s/it]

Epoch: 1309 | Training loss 2.8749852180480957 | Validation loss 2.8002882599830627



 13%|█████████▍                                                              | 1311/10000 [3:00:17<19:32:54,  8.10s/it]

Epoch: 1310 | Training loss 2.8758285716176033 | Validation loss 2.8170717358589172



 13%|█████████▍                                                              | 1312/10000 [3:00:26<19:34:56,  8.11s/it]

Epoch: 1311 | Training loss 2.8788302913308144 | Validation loss 2.803020715713501



 13%|█████████▍                                                              | 1313/10000 [3:00:34<19:35:27,  8.12s/it]

Epoch: 1312 | Training loss 2.8758020251989365 | Validation loss 2.807958334684372



 13%|█████████▍                                                              | 1314/10000 [3:00:42<19:35:58,  8.12s/it]

Epoch: 1313 | Training loss 2.876410499215126 | Validation loss 2.8059253096580505



 13%|█████████▍                                                              | 1315/10000 [3:00:50<19:35:30,  8.12s/it]

Epoch: 1314 | Training loss 2.874836653470993 | Validation loss 2.8098689317703247



 13%|█████████▍                                                              | 1316/10000 [3:00:58<19:33:08,  8.11s/it]

Epoch: 1315 | Training loss 2.8801952823996544 | Validation loss 2.8030038475990295



 13%|█████████▍                                                              | 1317/10000 [3:01:06<19:30:52,  8.09s/it]

Epoch: 1316 | Training loss 2.878137320280075 | Validation loss 2.8101291358470917



 13%|█████████▍                                                              | 1318/10000 [3:01:14<19:27:46,  8.07s/it]

Epoch: 1317 | Training loss 2.871866397559643 | Validation loss 2.8034174144268036



 13%|█████████▍                                                              | 1319/10000 [3:01:22<19:30:34,  8.09s/it]

Epoch: 1318 | Training loss 2.876431778073311 | Validation loss 2.8092769384384155



 13%|█████████▌                                                              | 1320/10000 [3:01:30<19:27:20,  8.07s/it]

Epoch: 1319 | Training loss 2.8787564858794212 | Validation loss 2.8055969774723053



 13%|█████████▌                                                              | 1321/10000 [3:01:38<19:30:01,  8.09s/it]

Epoch: 1320 | Training loss 2.8785023614764214 | Validation loss 2.8034064769744873



 13%|█████████▌                                                              | 1322/10000 [3:01:46<19:25:34,  8.06s/it]

Epoch: 1321 | Training loss 2.8784681856632233 | Validation loss 2.806003123521805



 13%|█████████▌                                                              | 1323/10000 [3:01:54<19:24:39,  8.05s/it]

Epoch: 1322 | Training loss 2.8753215298056602 | Validation loss 2.8047725558280945



 13%|█████████▌                                                              | 1324/10000 [3:02:02<19:21:57,  8.04s/it]

Epoch: 1323 | Training loss 2.8754622861742973 | Validation loss 2.8039920330047607



 13%|█████████▌                                                              | 1325/10000 [3:02:11<19:22:32,  8.04s/it]

Epoch: 1324 | Training loss 2.874629035592079 | Validation loss 2.8023420572280884



 13%|█████████▌                                                              | 1326/10000 [3:02:19<19:22:31,  8.04s/it]

Epoch: 1325 | Training loss 2.8738630264997482 | Validation loss 2.802099734544754



 13%|█████████▌                                                              | 1327/10000 [3:02:27<19:23:59,  8.05s/it]

Epoch: 1326 | Training loss 2.8790472373366356 | Validation loss 2.811505377292633



 13%|█████████▌                                                              | 1328/10000 [3:02:35<19:28:26,  8.08s/it]

Epoch: 1327 | Training loss 2.875601328909397 | Validation loss 2.803412824869156



 13%|█████████▌                                                              | 1329/10000 [3:02:43<19:30:39,  8.10s/it]

Epoch: 1328 | Training loss 2.8739242032170296 | Validation loss 2.802696019411087



 13%|█████████▌                                                              | 1330/10000 [3:02:51<19:29:10,  8.09s/it]

Epoch: 1329 | Training loss 2.874224402010441 | Validation loss 2.8014855682849884



 13%|█████████▌                                                              | 1331/10000 [3:02:59<19:34:18,  8.13s/it]

Epoch: 1330 | Training loss 2.8780642822384834 | Validation loss 2.8048063218593597



 13%|█████████▌                                                              | 1332/10000 [3:03:07<19:32:24,  8.12s/it]

Epoch: 1331 | Training loss 2.8816352859139442 | Validation loss 2.8091967701911926



 13%|█████████▌                                                              | 1333/10000 [3:03:15<19:25:35,  8.07s/it]

Epoch: 1332 | Training loss 2.8739340975880623 | Validation loss 2.80379182100296



 13%|█████████▌                                                              | 1334/10000 [3:03:23<19:26:52,  8.08s/it]

Epoch: 1333 | Training loss 2.8783213645219803 | Validation loss 2.8067113161087036



 13%|█████████▌                                                              | 1335/10000 [3:03:31<19:24:57,  8.07s/it]

Epoch: 1334 | Training loss 2.8783143684267998 | Validation loss 2.814846009016037



 13%|█████████▌                                                              | 1336/10000 [3:03:40<19:26:23,  8.08s/it]

Epoch: 1335 | Training loss 2.8836202174425125 | Validation loss 2.8134880363941193



 13%|█████████▋                                                              | 1337/10000 [3:03:48<19:28:40,  8.09s/it]

Epoch: 1336 | Training loss 2.8769380897283554 | Validation loss 2.8088389337062836



 13%|█████████▋                                                              | 1338/10000 [3:03:56<19:25:26,  8.07s/it]

Epoch: 1337 | Training loss 2.87654335051775 | Validation loss 2.812847524881363



 13%|█████████▋                                                              | 1339/10000 [3:04:04<19:23:09,  8.06s/it]

Epoch: 1338 | Training loss 2.8757361695170403 | Validation loss 2.8064735531806946



 13%|█████████▋                                                              | 1340/10000 [3:04:12<19:24:12,  8.07s/it]

Epoch: 1339 | Training loss 2.874402031302452 | Validation loss 2.799645185470581



 13%|█████████▋                                                              | 1341/10000 [3:04:20<19:29:02,  8.10s/it]

Epoch: 1340 | Training loss 2.8777469620108604 | Validation loss 2.8113265931606293



 13%|█████████▋                                                              | 1342/10000 [3:04:28<19:34:36,  8.14s/it]

Epoch: 1341 | Training loss 2.8759893849492073 | Validation loss 2.7979173064231873



 13%|█████████▋                                                              | 1343/10000 [3:04:36<19:36:08,  8.15s/it]

Epoch: 1342 | Training loss 2.8723586350679398 | Validation loss 2.801676332950592



 13%|█████████▋                                                              | 1344/10000 [3:04:44<19:31:45,  8.12s/it]

Epoch: 1343 | Training loss 2.8781279772520065 | Validation loss 2.80757737159729



 13%|█████████▋                                                              | 1345/10000 [3:04:53<19:36:09,  8.15s/it]

Epoch: 1344 | Training loss 2.878700777888298 | Validation loss 2.8088595867156982



 13%|█████████▋                                                              | 1346/10000 [3:05:01<19:35:47,  8.15s/it]

Epoch: 1345 | Training loss 2.8748162239789963 | Validation loss 2.8094866275787354



 13%|█████████▋                                                              | 1347/10000 [3:05:09<19:34:05,  8.14s/it]

Epoch: 1346 | Training loss 2.8786011040210724 | Validation loss 2.809324473142624



 13%|█████████▋                                                              | 1348/10000 [3:05:17<19:31:32,  8.12s/it]

Epoch: 1347 | Training loss 2.883798435330391 | Validation loss 2.807439863681793



 13%|█████████▋                                                              | 1349/10000 [3:05:25<19:35:14,  8.15s/it]

Epoch: 1348 | Training loss 2.8737262338399887 | Validation loss 2.7976947724819183



 14%|█████████▋                                                              | 1350/10000 [3:05:33<19:36:25,  8.16s/it]

Epoch: 1349 | Training loss 2.876401223242283 | Validation loss 2.8084056973457336



 14%|█████████▋                                                              | 1351/10000 [3:05:41<19:27:02,  8.10s/it]

Epoch: 1350 | Training loss 2.8777198269963264 | Validation loss 2.806409925222397



 14%|█████████▋                                                              | 1352/10000 [3:05:49<19:25:04,  8.08s/it]

Epoch: 1351 | Training loss 2.876996874809265 | Validation loss 2.8075680136680603



 14%|█████████▋                                                              | 1353/10000 [3:05:58<19:30:23,  8.12s/it]

Epoch: 1352 | Training loss 2.881646305322647 | Validation loss 2.8078032433986664



 14%|█████████▋                                                              | 1354/10000 [3:06:06<19:31:21,  8.13s/it]

Epoch: 1353 | Training loss 2.8758311942219734 | Validation loss 2.801590919494629



 14%|█████████▊                                                              | 1355/10000 [3:06:14<19:27:55,  8.11s/it]

Epoch: 1354 | Training loss 2.874025322496891 | Validation loss 2.8030105531215668



 14%|█████████▊                                                              | 1356/10000 [3:06:22<19:31:03,  8.13s/it]

Epoch: 1355 | Training loss 2.8723578825592995 | Validation loss 2.801824688911438



 14%|█████████▊                                                              | 1357/10000 [3:06:30<19:26:55,  8.10s/it]

Epoch: 1356 | Training loss 2.8765553534030914 | Validation loss 2.805894672870636



 14%|█████████▊                                                              | 1358/10000 [3:06:38<19:31:34,  8.13s/it]

Epoch: 1357 | Training loss 2.869625635445118 | Validation loss 2.8091255724430084



 14%|█████████▊                                                              | 1359/10000 [3:06:46<19:31:19,  8.13s/it]

Epoch: 1358 | Training loss 2.87442659586668 | Validation loss 2.8078510761260986



 14%|█████████▊                                                              | 1360/10000 [3:06:55<19:31:55,  8.14s/it]

Epoch: 1359 | Training loss 2.8744342997670174 | Validation loss 2.8099308907985687



 14%|█████████▊                                                              | 1361/10000 [3:07:03<19:28:14,  8.11s/it]

Epoch: 1360 | Training loss 2.8722492828965187 | Validation loss 2.804086059331894



 14%|█████████▊                                                              | 1362/10000 [3:07:11<19:22:43,  8.08s/it]

Epoch: 1361 | Training loss 2.874062269926071 | Validation loss 2.799022853374481



 14%|█████████▊                                                              | 1363/10000 [3:07:19<19:21:04,  8.07s/it]

Epoch: 1362 | Training loss 2.875912256538868 | Validation loss 2.8024254143238068



 14%|█████████▊                                                              | 1364/10000 [3:07:27<19:23:49,  8.09s/it]

Epoch: 1363 | Training loss 2.8747507855296135 | Validation loss 2.810352146625519



 14%|█████████▊                                                              | 1365/10000 [3:07:35<19:23:03,  8.08s/it]

Epoch: 1364 | Training loss 2.8744713217020035 | Validation loss 2.8009048104286194



 14%|█████████▊                                                              | 1366/10000 [3:07:43<19:26:31,  8.11s/it]

Epoch: 1365 | Training loss 2.873238295316696 | Validation loss 2.804376184940338



 14%|█████████▊                                                              | 1367/10000 [3:07:51<19:28:58,  8.12s/it]

Epoch: 1366 | Training loss 2.8753868490457535 | Validation loss 2.804707169532776



 14%|█████████▊                                                              | 1368/10000 [3:07:59<19:28:39,  8.12s/it]

Epoch: 1367 | Training loss 2.8749200105667114 | Validation loss 2.8020065426826477



 14%|█████████▊                                                              | 1369/10000 [3:08:07<19:26:56,  8.11s/it]

Epoch: 1368 | Training loss 2.873342089354992 | Validation loss 2.8105766773223877



 14%|█████████▊                                                              | 1370/10000 [3:08:15<19:25:05,  8.10s/it]

Epoch: 1369 | Training loss 2.8733920753002167 | Validation loss 2.8024726808071136



 14%|█████████▊                                                              | 1371/10000 [3:08:24<19:26:23,  8.11s/it]

Epoch: 1370 | Training loss 2.8767669275403023 | Validation loss 2.8071783781051636



 14%|█████████▉                                                              | 1372/10000 [3:08:32<19:30:45,  8.14s/it]

Epoch: 1371 | Training loss 2.8760352805256844 | Validation loss 2.806113690137863



 14%|█████████▉                                                              | 1373/10000 [3:08:40<19:31:45,  8.15s/it]

Epoch: 1372 | Training loss 2.8739387318491936 | Validation loss 2.802000552415848



 14%|█████████▉                                                              | 1374/10000 [3:08:48<19:27:04,  8.12s/it]

Epoch: 1373 | Training loss 2.87977322191 | Validation loss 2.8068330585956573



 14%|█████████▉                                                              | 1375/10000 [3:08:56<19:22:18,  8.09s/it]

Epoch: 1374 | Training loss 2.8792089745402336 | Validation loss 2.8060035407543182



 14%|█████████▉                                                              | 1376/10000 [3:09:04<19:20:51,  8.08s/it]

Epoch: 1375 | Training loss 2.8791262358427048 | Validation loss 2.814767599105835



 14%|█████████▉                                                              | 1377/10000 [3:09:12<19:19:46,  8.07s/it]

Epoch: 1376 | Training loss 2.876245990395546 | Validation loss 2.810098350048065



 14%|█████████▉                                                              | 1378/10000 [3:09:20<19:18:59,  8.07s/it]

Epoch: 1377 | Training loss 2.8757977038621902 | Validation loss 2.80073618888855



 14%|█████████▉                                                              | 1379/10000 [3:09:28<19:24:30,  8.10s/it]

Epoch: 1378 | Training loss 2.8740294873714447 | Validation loss 2.811034709215164



 14%|█████████▉                                                              | 1380/10000 [3:09:37<19:29:35,  8.14s/it]

Epoch: 1379 | Training loss 2.8785632252693176 | Validation loss 2.8227618038654327



 14%|█████████▉                                                              | 1381/10000 [3:09:45<19:31:46,  8.16s/it]

Epoch: 1380 | Training loss 2.874164327979088 | Validation loss 2.8079342246055603



 14%|█████████▉                                                              | 1382/10000 [3:09:53<19:30:40,  8.15s/it]

Epoch: 1381 | Training loss 2.8803460001945496 | Validation loss 2.803916037082672



 14%|█████████▉                                                              | 1383/10000 [3:10:01<19:29:11,  8.14s/it]

Epoch: 1382 | Training loss 2.8744335621595383 | Validation loss 2.8111374378204346



 14%|█████████▉                                                              | 1384/10000 [3:10:09<19:24:36,  8.11s/it]

Epoch: 1383 | Training loss 2.878139980137348 | Validation loss 2.8108422458171844



 14%|█████████▉                                                              | 1385/10000 [3:10:17<19:26:49,  8.13s/it]

Epoch: 1384 | Training loss 2.8724896535277367 | Validation loss 2.815170645713806



 14%|█████████▉                                                              | 1386/10000 [3:10:25<19:26:16,  8.12s/it]

Epoch: 1385 | Training loss 2.8750446140766144 | Validation loss 2.8133457005023956



 14%|█████████▉                                                              | 1387/10000 [3:10:33<19:24:35,  8.11s/it]

Epoch: 1386 | Training loss 2.8777965381741524 | Validation loss 2.802615225315094



 14%|█████████▉                                                              | 1388/10000 [3:10:42<19:25:19,  8.12s/it]

Epoch: 1387 | Training loss 2.8718179911375046 | Validation loss 2.8014613687992096



 14%|██████████                                                              | 1389/10000 [3:10:50<19:27:05,  8.13s/it]

Epoch: 1388 | Training loss 2.8763196915388107 | Validation loss 2.80450776219368



 14%|██████████                                                              | 1390/10000 [3:10:58<19:26:20,  8.13s/it]

Epoch: 1389 | Training loss 2.878423757851124 | Validation loss 2.8122141659259796



 14%|██████████                                                              | 1391/10000 [3:11:06<19:27:24,  8.14s/it]

Epoch: 1390 | Training loss 2.875357523560524 | Validation loss 2.8101146817207336



 14%|██████████                                                              | 1392/10000 [3:11:14<19:24:27,  8.12s/it]

Epoch: 1391 | Training loss 2.873606450855732 | Validation loss 2.806919425725937



 14%|██████████                                                              | 1393/10000 [3:11:22<19:23:02,  8.11s/it]

Epoch: 1392 | Training loss 2.8746389225125313 | Validation loss 2.807439476251602



 14%|██████████                                                              | 1394/10000 [3:11:30<19:23:20,  8.11s/it]

Epoch: 1393 | Training loss 2.8756191954016685 | Validation loss 2.8101213574409485



 14%|██████████                                                              | 1395/10000 [3:11:39<19:28:16,  8.15s/it]

Epoch: 1394 | Training loss 2.875782400369644 | Validation loss 2.80759733915329



 14%|██████████                                                              | 1396/10000 [3:11:47<19:25:31,  8.13s/it]

Epoch: 1395 | Training loss 2.876690998673439 | Validation loss 2.8078023195266724



 14%|██████████                                                              | 1397/10000 [3:11:55<19:25:01,  8.13s/it]

Epoch: 1396 | Training loss 2.8783035799860954 | Validation loss 2.8083349466323853



 14%|██████████                                                              | 1398/10000 [3:12:03<19:19:57,  8.09s/it]

Epoch: 1397 | Training loss 2.8755282014608383 | Validation loss 2.8110289871692657



 14%|██████████                                                              | 1399/10000 [3:12:11<19:22:16,  8.11s/it]

Epoch: 1398 | Training loss 2.8745615631341934 | Validation loss 2.813681811094284



 14%|██████████                                                              | 1400/10000 [3:12:19<19:21:55,  8.11s/it]

Epoch: 1399 | Training loss 2.871822625398636 | Validation loss 2.816569685935974



 14%|██████████                                                              | 1401/10000 [3:12:27<19:22:13,  8.11s/it]

Epoch: 1400 | Training loss 2.86692763119936 | Validation loss 2.798958897590637



 14%|██████████                                                              | 1402/10000 [3:12:35<19:16:24,  8.07s/it]

Epoch: 1401 | Training loss 2.8704373985528946 | Validation loss 2.795455664396286



 14%|██████████                                                              | 1403/10000 [3:12:43<19:18:20,  8.08s/it]

Epoch: 1402 | Training loss 2.871105596423149 | Validation loss 2.7993815541267395



 14%|██████████                                                              | 1404/10000 [3:12:51<19:22:20,  8.11s/it]

Epoch: 1403 | Training loss 2.876040495932102 | Validation loss 2.8014086186885834



 14%|██████████                                                              | 1405/10000 [3:13:00<19:25:56,  8.14s/it]

The best model was saved!
Epoch: 1404 | Training loss 2.8701320737600327 | Validation loss 2.790327489376068



 14%|██████████                                                              | 1406/10000 [3:13:08<19:28:16,  8.16s/it]

Epoch: 1405 | Training loss 2.87241118401289 | Validation loss 2.794131428003311



 14%|██████████▏                                                             | 1407/10000 [3:13:16<19:31:54,  8.18s/it]

Epoch: 1406 | Training loss 2.8689016103744507 | Validation loss 2.7926421761512756



 14%|██████████▏                                                             | 1408/10000 [3:13:24<19:32:17,  8.19s/it]

Epoch: 1407 | Training loss 2.8747865110635757 | Validation loss 2.802738308906555



 14%|██████████▏                                                             | 1409/10000 [3:13:32<19:29:12,  8.17s/it]

Epoch: 1408 | Training loss 2.8715173602104187 | Validation loss 2.796932965517044



 14%|██████████▏                                                             | 1410/10000 [3:13:40<19:25:02,  8.14s/it]

The best model was saved!
Epoch: 1409 | Training loss 2.86439561098814 | Validation loss 2.789837211370468



 14%|██████████▏                                                             | 1411/10000 [3:13:49<19:25:16,  8.14s/it]

Epoch: 1410 | Training loss 2.865789584815502 | Validation loss 2.7986640334129333



 14%|██████████▏                                                             | 1412/10000 [3:13:57<19:24:50,  8.14s/it]

Epoch: 1411 | Training loss 2.870385095477104 | Validation loss 2.792025089263916



 14%|██████████▏                                                             | 1413/10000 [3:14:05<19:30:40,  8.18s/it]

The best model was saved!
Epoch: 1412 | Training loss 2.8715505972504616 | Validation loss 2.7890709936618805



 14%|██████████▏                                                             | 1414/10000 [3:14:13<19:26:55,  8.15s/it]

Epoch: 1413 | Training loss 2.8751284033060074 | Validation loss 2.8036561608314514



 14%|██████████▏                                                             | 1415/10000 [3:14:21<19:28:33,  8.17s/it]

Epoch: 1414 | Training loss 2.871931530535221 | Validation loss 2.8032695055007935



 14%|██████████▏                                                             | 1416/10000 [3:14:29<19:27:41,  8.16s/it]

The best model was saved!
Epoch: 1415 | Training loss 2.8695094883441925 | Validation loss 2.7882399559020996



 14%|██████████▏                                                             | 1417/10000 [3:14:37<19:22:59,  8.13s/it]

Epoch: 1416 | Training loss 2.8646276593208313 | Validation loss 2.792176604270935



 14%|██████████▏                                                             | 1418/10000 [3:14:46<19:19:38,  8.11s/it]

Epoch: 1417 | Training loss 2.865485407412052 | Validation loss 2.789702594280243



 14%|██████████▏                                                             | 1419/10000 [3:14:54<19:18:30,  8.10s/it]

Epoch: 1418 | Training loss 2.8694604486227036 | Validation loss 2.7952034771442413



 14%|██████████▏                                                             | 1420/10000 [3:15:02<19:17:50,  8.10s/it]

Epoch: 1419 | Training loss 2.866846442222595 | Validation loss 2.7945688664913177



 14%|██████████▏                                                             | 1421/10000 [3:15:10<19:17:09,  8.09s/it]

Epoch: 1420 | Training loss 2.867645263671875 | Validation loss 2.799320787191391



 14%|██████████▏                                                             | 1422/10000 [3:15:18<19:20:01,  8.11s/it]

Epoch: 1421 | Training loss 2.868913322687149 | Validation loss 2.795843631029129



 14%|██████████▏                                                             | 1423/10000 [3:15:26<19:19:24,  8.11s/it]

Epoch: 1422 | Training loss 2.878128595650196 | Validation loss 2.792787045240402



 14%|██████████▎                                                             | 1424/10000 [3:15:34<19:20:12,  8.12s/it]

Epoch: 1423 | Training loss 2.8708333149552345 | Validation loss 2.8149138689041138



 14%|██████████▎                                                             | 1425/10000 [3:15:42<19:20:43,  8.12s/it]

Epoch: 1424 | Training loss 2.872156001627445 | Validation loss 2.792722165584564



 14%|██████████▎                                                             | 1426/10000 [3:15:50<19:23:46,  8.14s/it]

Epoch: 1425 | Training loss 2.862573228776455 | Validation loss 2.7929050624370575



 14%|██████████▎                                                             | 1427/10000 [3:15:59<19:23:48,  8.15s/it]

Epoch: 1426 | Training loss 2.870730198919773 | Validation loss 2.7909672558307648



 14%|██████████▎                                                             | 1428/10000 [3:16:07<19:18:27,  8.11s/it]

Epoch: 1427 | Training loss 2.871842712163925 | Validation loss 2.79693141579628



 14%|██████████▎                                                             | 1429/10000 [3:16:15<19:17:19,  8.10s/it]

Epoch: 1428 | Training loss 2.86835777759552 | Validation loss 2.7884991466999054



 14%|██████████▎                                                             | 1430/10000 [3:16:23<19:20:35,  8.13s/it]

Epoch: 1429 | Training loss 2.8685827255249023 | Validation loss 2.7897096276283264



 14%|██████████▎                                                             | 1431/10000 [3:16:31<19:19:23,  8.12s/it]

Epoch: 1430 | Training loss 2.8705467358231544 | Validation loss 2.8038342893123627



 14%|██████████▎                                                             | 1432/10000 [3:16:39<19:20:52,  8.13s/it]

Epoch: 1431 | Training loss 2.872368834912777 | Validation loss 2.7914856374263763



 14%|██████████▎                                                             | 1433/10000 [3:16:47<19:20:17,  8.13s/it]

Epoch: 1432 | Training loss 2.8682285994291306 | Validation loss 2.79073366522789



 14%|██████████▎                                                             | 1434/10000 [3:16:55<19:22:22,  8.14s/it]

The best model was saved!
Epoch: 1433 | Training loss 2.866256758570671 | Validation loss 2.7878390848636627



 14%|██████████▎                                                             | 1435/10000 [3:17:04<19:19:04,  8.12s/it]

Epoch: 1434 | Training loss 2.868399254977703 | Validation loss 2.797068029642105



 14%|██████████▎                                                             | 1436/10000 [3:17:12<19:24:55,  8.16s/it]

The best model was saved!
Epoch: 1435 | Training loss 2.8664525002241135 | Validation loss 2.7847862243652344



 14%|██████████▎                                                             | 1437/10000 [3:17:20<19:20:31,  8.13s/it]

Epoch: 1436 | Training loss 2.863346926867962 | Validation loss 2.7914601862430573



 14%|██████████▎                                                             | 1438/10000 [3:17:28<19:19:09,  8.12s/it]

Epoch: 1437 | Training loss 2.86900232732296 | Validation loss 2.790105789899826



 14%|██████████▎                                                             | 1439/10000 [3:17:36<19:16:04,  8.10s/it]

Epoch: 1438 | Training loss 2.86380298435688 | Validation loss 2.785226881504059



 14%|██████████▎                                                             | 1440/10000 [3:17:44<19:21:56,  8.14s/it]

Epoch: 1439 | Training loss 2.8665760159492493 | Validation loss 2.7885791659355164



 14%|██████████▍                                                             | 1441/10000 [3:17:52<19:18:01,  8.12s/it]

Epoch: 1440 | Training loss 2.8688736110925674 | Validation loss 2.791475623846054



 14%|██████████▍                                                             | 1442/10000 [3:18:00<19:15:09,  8.10s/it]

Epoch: 1441 | Training loss 2.866328924894333 | Validation loss 2.794734299182892



 14%|██████████▍                                                             | 1443/10000 [3:18:08<19:15:09,  8.10s/it]

Epoch: 1442 | Training loss 2.863618142902851 | Validation loss 2.792544275522232



 14%|██████████▍                                                             | 1444/10000 [3:18:17<19:13:06,  8.09s/it]

Epoch: 1443 | Training loss 2.8706549778580666 | Validation loss 2.7941285967826843



 14%|██████████▍                                                             | 1445/10000 [3:18:25<19:14:16,  8.10s/it]

Epoch: 1444 | Training loss 2.8668305948376656 | Validation loss 2.7877725958824158



 14%|██████████▍                                                             | 1446/10000 [3:18:33<19:15:18,  8.10s/it]

Epoch: 1445 | Training loss 2.869171343743801 | Validation loss 2.79868283867836



 14%|██████████▍                                                             | 1447/10000 [3:18:41<19:19:01,  8.13s/it]

Epoch: 1446 | Training loss 2.867179661989212 | Validation loss 2.792149633169174



 14%|██████████▍                                                             | 1448/10000 [3:18:49<19:18:54,  8.13s/it]

Epoch: 1447 | Training loss 2.869670234620571 | Validation loss 2.7885099053382874



 14%|██████████▍                                                             | 1449/10000 [3:18:57<19:21:30,  8.15s/it]

Epoch: 1448 | Training loss 2.868319369852543 | Validation loss 2.786835342645645



 14%|██████████▍                                                             | 1450/10000 [3:19:05<19:17:15,  8.12s/it]

Epoch: 1449 | Training loss 2.862887427210808 | Validation loss 2.7911664247512817



 15%|██████████▍                                                             | 1451/10000 [3:19:13<19:16:15,  8.12s/it]

Epoch: 1450 | Training loss 2.8675889149308205 | Validation loss 2.8024426698684692



 15%|██████████▍                                                             | 1452/10000 [3:19:22<19:14:58,  8.11s/it]

Epoch: 1451 | Training loss 2.869982622563839 | Validation loss 2.7914258539676666



 15%|██████████▍                                                             | 1453/10000 [3:19:30<19:12:39,  8.09s/it]

Epoch: 1452 | Training loss 2.8649130761623383 | Validation loss 2.793608784675598



 15%|██████████▍                                                             | 1454/10000 [3:19:38<19:09:33,  8.07s/it]

Epoch: 1453 | Training loss 2.8685693442821503 | Validation loss 2.788685530424118



 15%|██████████▍                                                             | 1455/10000 [3:19:46<19:09:32,  8.07s/it]

Epoch: 1454 | Training loss 2.8732340037822723 | Validation loss 2.794012129306793



 15%|██████████▍                                                             | 1456/10000 [3:19:54<19:14:10,  8.11s/it]

Epoch: 1455 | Training loss 2.8696677535772324 | Validation loss 2.8003976941108704



 15%|██████████▍                                                             | 1457/10000 [3:20:02<19:16:34,  8.12s/it]

Epoch: 1456 | Training loss 2.87261151522398 | Validation loss 2.793626993894577



 15%|██████████▍                                                             | 1458/10000 [3:20:10<19:18:50,  8.14s/it]

Epoch: 1457 | Training loss 2.8684061244130135 | Validation loss 2.7897532284259796



 15%|██████████▌                                                             | 1459/10000 [3:20:18<19:13:58,  8.11s/it]

Epoch: 1458 | Training loss 2.869180090725422 | Validation loss 2.791685163974762



 15%|██████████▌                                                             | 1460/10000 [3:20:26<19:14:54,  8.11s/it]

Epoch: 1459 | Training loss 2.8690997138619423 | Validation loss 2.7900163531303406



 15%|██████████▌                                                             | 1461/10000 [3:20:35<19:14:54,  8.12s/it]

Epoch: 1460 | Training loss 2.871650569140911 | Validation loss 2.7888250052928925



 15%|██████████▌                                                             | 1462/10000 [3:20:43<19:12:53,  8.10s/it]

Epoch: 1461 | Training loss 2.8634498342871666 | Validation loss 2.7991616427898407



 15%|██████████▌                                                             | 1463/10000 [3:20:51<19:15:20,  8.12s/it]

Epoch: 1462 | Training loss 2.8656259179115295 | Validation loss 2.792879730463028



 15%|██████████▌                                                             | 1464/10000 [3:20:59<19:12:22,  8.10s/it]

Epoch: 1463 | Training loss 2.86835790425539 | Validation loss 2.794094055891037



 15%|██████████▌                                                             | 1465/10000 [3:21:07<19:10:23,  8.09s/it]

Epoch: 1464 | Training loss 2.871103011071682 | Validation loss 2.7984812557697296



 15%|██████████▌                                                             | 1466/10000 [3:21:15<19:11:36,  8.10s/it]

Epoch: 1465 | Training loss 2.8683588579297066 | Validation loss 2.7903080880641937



 15%|██████████▌                                                             | 1467/10000 [3:21:23<19:12:13,  8.10s/it]

Epoch: 1466 | Training loss 2.8710339441895485 | Validation loss 2.7910454869270325



 15%|██████████▌                                                             | 1468/10000 [3:21:31<19:08:40,  8.08s/it]

Epoch: 1467 | Training loss 2.8665851429104805 | Validation loss 2.792920023202896



 15%|██████████▌                                                             | 1469/10000 [3:21:39<19:13:33,  8.11s/it]

The best model was saved!
Epoch: 1468 | Training loss 2.8701489716768265 | Validation loss 2.7820843160152435



 15%|██████████▌                                                             | 1470/10000 [3:21:47<19:13:33,  8.11s/it]

Epoch: 1469 | Training loss 2.865069255232811 | Validation loss 2.7919367253780365



 15%|██████████▌                                                             | 1471/10000 [3:21:56<19:14:04,  8.12s/it]

Epoch: 1470 | Training loss 2.8673873618245125 | Validation loss 2.7902728617191315



 15%|██████████▌                                                             | 1472/10000 [3:22:04<19:08:05,  8.08s/it]

Epoch: 1471 | Training loss 2.8622736409306526 | Validation loss 2.798891633749008



 15%|██████████▌                                                             | 1473/10000 [3:22:12<19:12:57,  8.11s/it]

Epoch: 1472 | Training loss 2.86300852894783 | Validation loss 2.789942651987076



 15%|██████████▌                                                             | 1474/10000 [3:22:20<19:15:34,  8.13s/it]

Epoch: 1473 | Training loss 2.8673388957977295 | Validation loss 2.795337826013565



 15%|██████████▌                                                             | 1475/10000 [3:22:28<19:13:29,  8.12s/it]

Epoch: 1474 | Training loss 2.864756263792515 | Validation loss 2.7920519411563873



 15%|██████████▋                                                             | 1476/10000 [3:22:36<19:17:04,  8.14s/it]

Epoch: 1475 | Training loss 2.8660931065678596 | Validation loss 2.7905800342559814



 15%|██████████▋                                                             | 1477/10000 [3:22:44<19:15:44,  8.14s/it]

Epoch: 1476 | Training loss 2.8716864809393883 | Validation loss 2.793911784887314



 15%|██████████▋                                                             | 1478/10000 [3:22:52<19:15:00,  8.13s/it]

Epoch: 1477 | Training loss 2.8705144748091698 | Validation loss 2.7970613837242126



 15%|██████████▋                                                             | 1479/10000 [3:23:01<19:18:20,  8.16s/it]

Epoch: 1478 | Training loss 2.868399128317833 | Validation loss 2.786723256111145



 15%|██████████▋                                                             | 1480/10000 [3:23:09<19:17:08,  8.15s/it]

Epoch: 1479 | Training loss 2.8699357584118843 | Validation loss 2.788195461034775



 15%|██████████▋                                                             | 1481/10000 [3:23:17<19:19:00,  8.16s/it]

Epoch: 1480 | Training loss 2.872224010527134 | Validation loss 2.790760099887848



 15%|██████████▋                                                             | 1482/10000 [3:23:25<19:21:43,  8.18s/it]

Epoch: 1481 | Training loss 2.8623215183615685 | Validation loss 2.7862784564495087



 15%|██████████▋                                                             | 1483/10000 [3:23:33<19:15:46,  8.14s/it]

Epoch: 1482 | Training loss 2.867449291050434 | Validation loss 2.787584900856018



 15%|██████████▋                                                             | 1484/10000 [3:23:41<19:15:14,  8.14s/it]

Epoch: 1483 | Training loss 2.8695089742541313 | Validation loss 2.7848117649555206



 15%|██████████▋                                                             | 1485/10000 [3:23:50<19:16:34,  8.15s/it]

Epoch: 1484 | Training loss 2.8657951578497887 | Validation loss 2.794530361890793



 15%|██████████▋                                                             | 1486/10000 [3:23:58<19:16:58,  8.15s/it]

Epoch: 1485 | Training loss 2.868879236280918 | Validation loss 2.790939837694168



 15%|██████████▋                                                             | 1487/10000 [3:24:06<19:17:17,  8.16s/it]

Epoch: 1486 | Training loss 2.8725892081856728 | Validation loss 2.7935915887355804



 15%|██████████▋                                                             | 1488/10000 [3:24:14<19:18:10,  8.16s/it]

Epoch: 1487 | Training loss 2.8675266429781914 | Validation loss 2.78603059053421



 15%|██████████▋                                                             | 1489/10000 [3:24:22<19:18:44,  8.17s/it]

Epoch: 1488 | Training loss 2.8714984208345413 | Validation loss 2.7970308661460876



 15%|██████████▋                                                             | 1490/10000 [3:24:30<19:18:46,  8.17s/it]

Epoch: 1489 | Training loss 2.8657305985689163 | Validation loss 2.801646441221237



 15%|██████████▋                                                             | 1491/10000 [3:24:38<19:13:07,  8.13s/it]

Epoch: 1490 | Training loss 2.868226535618305 | Validation loss 2.7922745048999786



 15%|██████████▋                                                             | 1492/10000 [3:24:47<19:13:44,  8.14s/it]

Epoch: 1491 | Training loss 2.866788148880005 | Validation loss 2.7937512695789337



 15%|██████████▋                                                             | 1493/10000 [3:24:55<19:15:35,  8.15s/it]

Epoch: 1492 | Training loss 2.874206282198429 | Validation loss 2.7965250611305237



 15%|██████████▊                                                             | 1494/10000 [3:25:03<19:18:40,  8.17s/it]

Epoch: 1493 | Training loss 2.863412484526634 | Validation loss 2.7896324694156647



 15%|██████████▊                                                             | 1495/10000 [3:25:11<19:14:50,  8.15s/it]

Epoch: 1494 | Training loss 2.8663921803236008 | Validation loss 2.7966566681861877



 15%|██████████▊                                                             | 1496/10000 [3:25:19<19:19:24,  8.18s/it]

Epoch: 1495 | Training loss 2.864950641989708 | Validation loss 2.7948188185691833



 15%|██████████▊                                                             | 1497/10000 [3:25:28<19:20:48,  8.19s/it]

Epoch: 1496 | Training loss 2.8668761923909187 | Validation loss 2.798297882080078



 15%|██████████▊                                                             | 1498/10000 [3:25:36<19:24:53,  8.22s/it]

Epoch: 1497 | Training loss 2.866471141576767 | Validation loss 2.8001433610916138



 15%|██████████▊                                                             | 1499/10000 [3:25:44<19:24:31,  8.22s/it]

Epoch: 1498 | Training loss 2.864881247282028 | Validation loss 2.7963558733463287



 15%|██████████▊                                                             | 1500/10000 [3:25:52<19:27:30,  8.24s/it]

Epoch: 1499 | Training loss 2.8687689378857613 | Validation loss 2.7934765815734863



 15%|██████████▊                                                             | 1501/10000 [3:26:01<19:25:36,  8.23s/it]

Epoch: 1500 | Training loss 2.8656590804457664 | Validation loss 2.790714681148529



 15%|██████████▊                                                             | 1502/10000 [3:26:09<19:25:28,  8.23s/it]

Epoch: 1501 | Training loss 2.86394727230072 | Validation loss 2.793147563934326



 15%|██████████▊                                                             | 1503/10000 [3:26:17<19:22:04,  8.21s/it]

Epoch: 1502 | Training loss 2.8667383939027786 | Validation loss 2.7914816439151764



 15%|██████████▊                                                             | 1504/10000 [3:26:25<19:24:36,  8.22s/it]

Epoch: 1503 | Training loss 2.8675136268138885 | Validation loss 2.7892856895923615



 15%|██████████▊                                                             | 1505/10000 [3:26:33<19:21:22,  8.20s/it]

Epoch: 1504 | Training loss 2.8696406707167625 | Validation loss 2.7893669605255127



 15%|██████████▊                                                             | 1506/10000 [3:26:41<19:15:50,  8.16s/it]

Epoch: 1505 | Training loss 2.8676894530653954 | Validation loss 2.7942631542682648



 15%|██████████▊                                                             | 1507/10000 [3:26:50<19:15:05,  8.16s/it]

Epoch: 1506 | Training loss 2.868285045027733 | Validation loss 2.7957866489887238



 15%|██████████▊                                                             | 1508/10000 [3:26:58<19:13:08,  8.15s/it]

Epoch: 1507 | Training loss 2.867718406021595 | Validation loss 2.798829048871994



 15%|██████████▊                                                             | 1509/10000 [3:27:06<19:10:30,  8.13s/it]

Epoch: 1508 | Training loss 2.8667083457112312 | Validation loss 2.7907243967056274



 15%|██████████▊                                                             | 1510/10000 [3:27:14<19:10:01,  8.13s/it]

Epoch: 1509 | Training loss 2.8693344816565514 | Validation loss 2.787148952484131



 15%|██████████▉                                                             | 1511/10000 [3:27:22<19:10:10,  8.13s/it]

Epoch: 1510 | Training loss 2.86885304749012 | Validation loss 2.793666958808899



 15%|██████████▉                                                             | 1512/10000 [3:27:30<19:13:52,  8.16s/it]

Epoch: 1511 | Training loss 2.872339390218258 | Validation loss 2.802062898874283



 15%|██████████▉                                                             | 1513/10000 [3:27:39<19:18:11,  8.19s/it]

Epoch: 1512 | Training loss 2.874153256416321 | Validation loss 2.7972368001937866



 15%|██████████▉                                                             | 1514/10000 [3:27:47<19:13:13,  8.15s/it]

Epoch: 1513 | Training loss 2.8729077577590942 | Validation loss 2.796765923500061



 15%|██████████▉                                                             | 1515/10000 [3:27:55<19:11:29,  8.14s/it]

Epoch: 1514 | Training loss 2.8664901331067085 | Validation loss 2.7951826453208923



 15%|██████████▉                                                             | 1516/10000 [3:28:03<19:09:05,  8.13s/it]

Epoch: 1515 | Training loss 2.8644388914108276 | Validation loss 2.800446391105652



 15%|██████████▉                                                             | 1517/10000 [3:28:11<19:11:19,  8.14s/it]

Epoch: 1516 | Training loss 2.8723839446902275 | Validation loss 2.797670006752014



 15%|██████████▉                                                             | 1518/10000 [3:28:19<19:08:54,  8.13s/it]

Epoch: 1517 | Training loss 2.867894507944584 | Validation loss 2.79001122713089



 15%|██████████▉                                                             | 1519/10000 [3:28:27<19:13:04,  8.16s/it]

Epoch: 1518 | Training loss 2.8696257025003433 | Validation loss 2.7990344166755676



 15%|██████████▉                                                             | 1520/10000 [3:28:35<19:10:14,  8.14s/it]

Epoch: 1519 | Training loss 2.8677767366170883 | Validation loss 2.7884535789489746



 15%|██████████▉                                                             | 1521/10000 [3:28:43<19:07:18,  8.12s/it]

Epoch: 1520 | Training loss 2.872987911105156 | Validation loss 2.793361186981201



 15%|██████████▉                                                             | 1522/10000 [3:28:52<19:11:02,  8.15s/it]

Epoch: 1521 | Training loss 2.8670975118875504 | Validation loss 2.7972129583358765



 15%|██████████▉                                                             | 1523/10000 [3:29:00<19:13:55,  8.17s/it]

Epoch: 1522 | Training loss 2.8670187443494797 | Validation loss 2.7911975383758545



 15%|██████████▉                                                             | 1524/10000 [3:29:08<19:10:06,  8.14s/it]

Epoch: 1523 | Training loss 2.8687823489308357 | Validation loss 2.7992756962776184



 15%|██████████▉                                                             | 1525/10000 [3:29:16<19:09:47,  8.14s/it]

Epoch: 1524 | Training loss 2.8635505214333534 | Validation loss 2.7951166331768036



 15%|██████████▉                                                             | 1526/10000 [3:29:24<19:09:29,  8.14s/it]

Epoch: 1525 | Training loss 2.8681759759783745 | Validation loss 2.7908205091953278



 15%|██████████▉                                                             | 1527/10000 [3:29:32<19:07:44,  8.13s/it]

Epoch: 1526 | Training loss 2.8650184646248817 | Validation loss 2.7906898856163025



 15%|███████████                                                             | 1528/10000 [3:29:40<19:05:52,  8.12s/it]

Epoch: 1527 | Training loss 2.867236763238907 | Validation loss 2.7907526195049286



 15%|███████████                                                             | 1529/10000 [3:29:49<19:03:16,  8.10s/it]

Epoch: 1528 | Training loss 2.8711825534701347 | Validation loss 2.789857357740402



 15%|███████████                                                             | 1530/10000 [3:29:57<19:04:38,  8.11s/it]

Epoch: 1529 | Training loss 2.870087578892708 | Validation loss 2.797578364610672



 15%|███████████                                                             | 1531/10000 [3:30:05<19:05:37,  8.12s/it]

Epoch: 1530 | Training loss 2.8698966056108475 | Validation loss 2.8095838725566864



 15%|███████████                                                             | 1532/10000 [3:30:13<19:00:20,  8.08s/it]

Epoch: 1531 | Training loss 2.866883762180805 | Validation loss 2.7931083142757416



 15%|███████████                                                             | 1533/10000 [3:30:21<19:08:35,  8.14s/it]

Epoch: 1532 | Training loss 2.868487313389778 | Validation loss 2.792079597711563



 15%|███████████                                                             | 1534/10000 [3:30:29<19:04:25,  8.11s/it]

Epoch: 1533 | Training loss 2.859743595123291 | Validation loss 2.7905317544937134



 15%|███████████                                                             | 1535/10000 [3:30:37<19:07:14,  8.13s/it]

Epoch: 1534 | Training loss 2.874294437468052 | Validation loss 2.7934786081314087



 15%|███████████                                                             | 1536/10000 [3:30:45<19:06:39,  8.13s/it]

Epoch: 1535 | Training loss 2.871900051832199 | Validation loss 2.789576441049576



 15%|███████████                                                             | 1537/10000 [3:30:54<19:07:25,  8.13s/it]

Epoch: 1536 | Training loss 2.8723816201090813 | Validation loss 2.7900828421115875



 15%|███████████                                                             | 1538/10000 [3:31:02<19:04:38,  8.12s/it]

Epoch: 1537 | Training loss 2.8661448657512665 | Validation loss 2.7890607118606567



 15%|███████████                                                             | 1539/10000 [3:31:10<19:02:36,  8.10s/it]

Epoch: 1538 | Training loss 2.8644165471196175 | Validation loss 2.789557456970215



 15%|███████████                                                             | 1540/10000 [3:31:18<19:06:24,  8.13s/it]

Epoch: 1539 | Training loss 2.864700958132744 | Validation loss 2.785064250230789



 15%|███████████                                                             | 1541/10000 [3:31:26<18:59:05,  8.08s/it]

Epoch: 1540 | Training loss 2.866602346301079 | Validation loss 2.7913798093795776



 15%|███████████                                                             | 1542/10000 [3:31:34<19:04:30,  8.12s/it]

Epoch: 1541 | Training loss 2.865728013217449 | Validation loss 2.78408420085907



 15%|███████████                                                             | 1543/10000 [3:31:42<19:03:00,  8.11s/it]

Epoch: 1542 | Training loss 2.8677223697304726 | Validation loss 2.7911509573459625



 15%|███████████                                                             | 1544/10000 [3:31:50<19:05:53,  8.13s/it]

Epoch: 1543 | Training loss 2.8697154000401497 | Validation loss 2.7921532094478607



 15%|███████████                                                             | 1545/10000 [3:31:58<19:01:56,  8.10s/it]

Epoch: 1544 | Training loss 2.8681374192237854 | Validation loss 2.7950302064418793



 15%|███████████▏                                                            | 1546/10000 [3:32:06<18:59:47,  8.09s/it]

Epoch: 1545 | Training loss 2.8636097386479378 | Validation loss 2.7907243072986603



 15%|███████████▏                                                            | 1547/10000 [3:32:15<19:01:17,  8.10s/it]

Epoch: 1546 | Training loss 2.8618511855602264 | Validation loss 2.7951621413230896



 15%|███████████▏                                                            | 1548/10000 [3:32:23<19:03:13,  8.12s/it]

Epoch: 1547 | Training loss 2.8690484315156937 | Validation loss 2.79275780916214



 15%|███████████▏                                                            | 1549/10000 [3:32:31<19:06:30,  8.14s/it]

Epoch: 1548 | Training loss 2.8700080290436745 | Validation loss 2.788299322128296



 16%|███████████▏                                                            | 1550/10000 [3:32:39<19:04:11,  8.12s/it]

Epoch: 1549 | Training loss 2.8714558109641075 | Validation loss 2.8042501509189606



 16%|███████████▏                                                            | 1551/10000 [3:32:47<19:03:13,  8.12s/it]

Epoch: 1550 | Training loss 2.8685551434755325 | Validation loss 2.7914952635765076



 16%|███████████▏                                                            | 1552/10000 [3:32:55<19:06:24,  8.14s/it]

Epoch: 1551 | Training loss 2.8632122427225113 | Validation loss 2.7969006299972534



 16%|███████████▏                                                            | 1553/10000 [3:33:03<19:05:21,  8.14s/it]

Epoch: 1552 | Training loss 2.8673346266150475 | Validation loss 2.7941349148750305



 16%|███████████▏                                                            | 1554/10000 [3:33:12<19:04:31,  8.13s/it]

Epoch: 1553 | Training loss 2.870220810174942 | Validation loss 2.788506805896759



 16%|███████████▏                                                            | 1555/10000 [3:33:20<19:04:30,  8.13s/it]

Epoch: 1554 | Training loss 2.8649281710386276 | Validation loss 2.786712408065796



 16%|███████████▏                                                            | 1556/10000 [3:33:28<19:05:08,  8.14s/it]

Epoch: 1555 | Training loss 2.871277630329132 | Validation loss 2.7935197055339813



 16%|███████████▏                                                            | 1557/10000 [3:33:36<19:02:06,  8.12s/it]

Epoch: 1556 | Training loss 2.8676215782761574 | Validation loss 2.790846675634384



 16%|███████████▏                                                            | 1558/10000 [3:33:44<19:05:19,  8.14s/it]

Epoch: 1557 | Training loss 2.8661834821105003 | Validation loss 2.791410654783249



 16%|███████████▏                                                            | 1559/10000 [3:33:52<19:03:45,  8.13s/it]

Epoch: 1558 | Training loss 2.8736435920000076 | Validation loss 2.7932585775852203



 16%|███████████▏                                                            | 1560/10000 [3:34:00<19:04:25,  8.14s/it]

Epoch: 1559 | Training loss 2.8662196919322014 | Validation loss 2.794967442750931



 16%|███████████▏                                                            | 1561/10000 [3:34:08<19:04:53,  8.14s/it]

Epoch: 1560 | Training loss 2.864641174674034 | Validation loss 2.805781662464142



 16%|███████████▏                                                            | 1562/10000 [3:34:17<19:04:07,  8.14s/it]

Epoch: 1561 | Training loss 2.868175931274891 | Validation loss 2.788031369447708



 16%|███████████▎                                                            | 1563/10000 [3:34:25<19:06:10,  8.15s/it]

Epoch: 1562 | Training loss 2.871897406876087 | Validation loss 2.783842831850052



 16%|███████████▎                                                            | 1564/10000 [3:34:33<19:09:51,  8.18s/it]

Epoch: 1563 | Training loss 2.8650444000959396 | Validation loss 2.8001189529895782



 16%|███████████▎                                                            | 1565/10000 [3:34:41<19:12:50,  8.20s/it]

Epoch: 1564 | Training loss 2.866507440805435 | Validation loss 2.789568066596985



 16%|███████████▎                                                            | 1566/10000 [3:34:49<19:09:11,  8.18s/it]

Epoch: 1565 | Training loss 2.8685451969504356 | Validation loss 2.794468194246292



 16%|███████████▎                                                            | 1567/10000 [3:34:58<19:06:03,  8.15s/it]

Epoch: 1566 | Training loss 2.8868196606636047 | Validation loss 2.785165101289749



 16%|███████████▎                                                            | 1568/10000 [3:35:06<19:02:31,  8.13s/it]

Epoch: 1567 | Training loss 2.881623327732086 | Validation loss 2.791768401861191



 16%|███████████▎                                                            | 1569/10000 [3:35:14<19:00:17,  8.11s/it]

Epoch: 1568 | Training loss 2.8732036724686623 | Validation loss 2.8021169304847717



 16%|███████████▎                                                            | 1570/10000 [3:35:22<18:54:26,  8.07s/it]

Epoch: 1569 | Training loss 2.869116060435772 | Validation loss 2.792178750038147



 16%|███████████▎                                                            | 1571/10000 [3:35:30<18:54:57,  8.08s/it]

Epoch: 1570 | Training loss 2.8665612563490868 | Validation loss 2.789029151201248



 16%|███████████▎                                                            | 1572/10000 [3:35:38<18:57:09,  8.10s/it]

Epoch: 1571 | Training loss 2.8644823729991913 | Validation loss 2.7855837643146515



 16%|███████████▎                                                            | 1573/10000 [3:35:46<18:58:50,  8.11s/it]

Epoch: 1572 | Training loss 2.8673515021800995 | Validation loss 2.7943290174007416



 16%|███████████▎                                                            | 1574/10000 [3:35:54<18:55:52,  8.09s/it]

Epoch: 1573 | Training loss 2.878799833357334 | Validation loss 2.792831242084503



 16%|███████████▎                                                            | 1575/10000 [3:36:02<18:59:45,  8.12s/it]

Epoch: 1574 | Training loss 2.871859885752201 | Validation loss 2.797104239463806



 16%|███████████▎                                                            | 1576/10000 [3:36:10<18:57:44,  8.10s/it]

Epoch: 1575 | Training loss 2.8683895021677017 | Validation loss 2.8031233847141266



 16%|███████████▎                                                            | 1577/10000 [3:36:18<18:54:57,  8.08s/it]

Epoch: 1576 | Training loss 2.8701494112610817 | Validation loss 2.7899991273880005



 16%|███████████▎                                                            | 1578/10000 [3:36:27<18:59:01,  8.11s/it]

Epoch: 1577 | Training loss 2.862514480948448 | Validation loss 2.7928524911403656



 16%|███████████▎                                                            | 1579/10000 [3:36:35<18:58:57,  8.12s/it]

Epoch: 1578 | Training loss 2.8687803372740746 | Validation loss 2.7991687655448914



 16%|███████████▍                                                            | 1580/10000 [3:36:43<18:58:17,  8.11s/it]

Epoch: 1579 | Training loss 2.8690819814801216 | Validation loss 2.794815421104431



 16%|███████████▍                                                            | 1581/10000 [3:36:51<18:56:23,  8.10s/it]

Epoch: 1580 | Training loss 2.8674997240304947 | Validation loss 2.7886859476566315



 16%|███████████▍                                                            | 1582/10000 [3:36:59<18:57:41,  8.11s/it]

Epoch: 1581 | Training loss 2.8623002097010612 | Validation loss 2.7893655002117157



 16%|███████████▍                                                            | 1583/10000 [3:37:07<18:58:00,  8.11s/it]

Epoch: 1582 | Training loss 2.875724568963051 | Validation loss 2.7906448543071747



 16%|███████████▍                                                            | 1584/10000 [3:37:15<18:52:46,  8.08s/it]

Epoch: 1583 | Training loss 2.8609075099229813 | Validation loss 2.7990618348121643



 16%|███████████▍                                                            | 1585/10000 [3:37:23<18:50:08,  8.06s/it]

Epoch: 1584 | Training loss 2.8703057542443275 | Validation loss 2.7945860624313354



 16%|███████████▍                                                            | 1586/10000 [3:37:31<18:51:11,  8.07s/it]

Epoch: 1585 | Training loss 2.8653054237365723 | Validation loss 2.788854658603668



 16%|███████████▍                                                            | 1587/10000 [3:37:39<18:53:19,  8.08s/it]

Epoch: 1586 | Training loss 2.8659412637352943 | Validation loss 2.7866694033145905



 16%|███████████▍                                                            | 1588/10000 [3:37:47<18:53:19,  8.08s/it]

Epoch: 1587 | Training loss 2.8603377789258957 | Validation loss 2.7897251546382904



 16%|███████████▍                                                            | 1589/10000 [3:37:56<18:56:39,  8.11s/it]

Epoch: 1588 | Training loss 2.8612434789538383 | Validation loss 2.78837126493454



 16%|███████████▍                                                            | 1590/10000 [3:38:04<18:55:38,  8.10s/it]

Epoch: 1589 | Training loss 2.8627737388014793 | Validation loss 2.7943661510944366



 16%|███████████▍                                                            | 1591/10000 [3:38:12<18:54:50,  8.10s/it]

Epoch: 1590 | Training loss 2.8684606105089188 | Validation loss 2.791610300540924



 16%|███████████▍                                                            | 1592/10000 [3:38:20<18:58:11,  8.12s/it]

Epoch: 1591 | Training loss 2.870087333023548 | Validation loss 2.797115981578827



 16%|███████████▍                                                            | 1593/10000 [3:38:28<18:57:51,  8.12s/it]

Epoch: 1592 | Training loss 2.863615430891514 | Validation loss 2.7953672111034393



 16%|███████████▍                                                            | 1594/10000 [3:38:36<18:54:19,  8.10s/it]

Epoch: 1593 | Training loss 2.8689747005701065 | Validation loss 2.790272057056427



 16%|███████████▍                                                            | 1595/10000 [3:38:44<18:49:58,  8.07s/it]

Epoch: 1594 | Training loss 2.868393763899803 | Validation loss 2.7997795939445496



 16%|███████████▍                                                            | 1596/10000 [3:38:52<18:50:05,  8.07s/it]

Epoch: 1595 | Training loss 2.8623346984386444 | Validation loss 2.795359343290329



 16%|███████████▍                                                            | 1597/10000 [3:39:00<18:52:38,  8.09s/it]

Epoch: 1596 | Training loss 2.8649022355675697 | Validation loss 2.789101541042328



 16%|███████████▌                                                            | 1598/10000 [3:39:08<18:55:40,  8.11s/it]

Epoch: 1597 | Training loss 2.86397535353899 | Validation loss 2.7897534668445587



 16%|███████████▌                                                            | 1599/10000 [3:39:17<18:56:31,  8.12s/it]

Epoch: 1598 | Training loss 2.8643962666392326 | Validation loss 2.7855642437934875



 16%|███████████▌                                                            | 1600/10000 [3:39:25<18:57:49,  8.13s/it]

Epoch: 1599 | Training loss 2.8666673451662064 | Validation loss 2.786269634962082



 16%|███████████▌                                                            | 1601/10000 [3:39:33<18:57:12,  8.12s/it]

Epoch: 1600 | Training loss 2.870528995990753 | Validation loss 2.7896578907966614



 16%|███████████▌                                                            | 1602/10000 [3:39:41<18:55:28,  8.11s/it]

Epoch: 1601 | Training loss 2.8699183836579323 | Validation loss 2.796206682920456



 16%|███████████▌                                                            | 1603/10000 [3:39:49<18:55:38,  8.11s/it]

Epoch: 1602 | Training loss 2.86777213960886 | Validation loss 2.79006764292717



 16%|███████████▌                                                            | 1604/10000 [3:39:57<18:55:02,  8.11s/it]

Epoch: 1603 | Training loss 2.8665317445993423 | Validation loss 2.7956607043743134



 16%|███████████▌                                                            | 1605/10000 [3:40:05<18:54:30,  8.11s/it]

Epoch: 1604 | Training loss 2.869910165667534 | Validation loss 2.7911579608917236



 16%|███████████▌                                                            | 1606/10000 [3:40:13<18:54:10,  8.11s/it]

Epoch: 1605 | Training loss 2.8643279746174812 | Validation loss 2.79647359251976



 16%|███████████▌                                                            | 1607/10000 [3:40:21<18:56:32,  8.12s/it]

Epoch: 1606 | Training loss 2.8679665103554726 | Validation loss 2.7986932396888733



 16%|███████████▌                                                            | 1608/10000 [3:40:30<18:55:25,  8.12s/it]

Epoch: 1607 | Training loss 2.8695551231503487 | Validation loss 2.7900208234786987



 16%|███████████▌                                                            | 1609/10000 [3:40:38<18:59:11,  8.15s/it]

Epoch: 1608 | Training loss 2.8671930953860283 | Validation loss 2.7946110367774963



 16%|███████████▌                                                            | 1610/10000 [3:40:46<18:57:15,  8.13s/it]

Epoch: 1609 | Training loss 2.869767501950264 | Validation loss 2.7960911095142365



 16%|███████████▌                                                            | 1611/10000 [3:40:54<18:55:50,  8.12s/it]

Epoch: 1610 | Training loss 2.8612583205103874 | Validation loss 2.7892121076583862



 16%|███████████▌                                                            | 1612/10000 [3:41:02<19:01:14,  8.16s/it]

Epoch: 1611 | Training loss 2.8682271167635918 | Validation loss 2.793642520904541



 16%|███████████▌                                                            | 1613/10000 [3:41:10<19:02:26,  8.17s/it]

Epoch: 1612 | Training loss 2.8699777871370316 | Validation loss 2.7897871136665344



 16%|███████████▌                                                            | 1614/10000 [3:41:19<19:02:02,  8.17s/it]

Epoch: 1613 | Training loss 2.8659206703305244 | Validation loss 2.792216420173645



 16%|███████████▋                                                            | 1615/10000 [3:41:27<19:03:30,  8.18s/it]

Epoch: 1614 | Training loss 2.8627883195877075 | Validation loss 2.785856455564499



 16%|███████████▋                                                            | 1616/10000 [3:41:35<19:00:09,  8.16s/it]

Epoch: 1615 | Training loss 2.871526189148426 | Validation loss 2.7929002940654755



 16%|███████████▋                                                            | 1617/10000 [3:41:43<19:02:15,  8.18s/it]

Epoch: 1616 | Training loss 2.8697248846292496 | Validation loss 2.7889753878116608



 16%|███████████▋                                                            | 1618/10000 [3:41:51<18:59:39,  8.16s/it]

Epoch: 1617 | Training loss 2.866042584180832 | Validation loss 2.788407117128372



 16%|███████████▋                                                            | 1619/10000 [3:41:59<19:01:11,  8.17s/it]

Epoch: 1618 | Training loss 2.86566561460495 | Validation loss 2.7871955931186676



 16%|███████████▋                                                            | 1620/10000 [3:42:08<19:03:31,  8.19s/it]

Epoch: 1619 | Training loss 2.8689401373267174 | Validation loss 2.790967881679535



 16%|███████████▋                                                            | 1621/10000 [3:42:16<18:57:10,  8.14s/it]

Epoch: 1620 | Training loss 2.867327429354191 | Validation loss 2.787506878376007



 16%|███████████▋                                                            | 1622/10000 [3:42:24<18:55:24,  8.13s/it]

Epoch: 1621 | Training loss 2.8714254572987556 | Validation loss 2.7927669882774353



 16%|███████████▋                                                            | 1623/10000 [3:42:32<18:54:04,  8.12s/it]

Epoch: 1622 | Training loss 2.8703615814447403 | Validation loss 2.7900764644145966



 16%|███████████▋                                                            | 1624/10000 [3:42:40<18:49:41,  8.09s/it]

Epoch: 1623 | Training loss 2.866767093539238 | Validation loss 2.785989671945572



 16%|███████████▋                                                            | 1625/10000 [3:42:48<18:48:27,  8.08s/it]

Epoch: 1624 | Training loss 2.863608665764332 | Validation loss 2.7875223457813263



 16%|███████████▋                                                            | 1626/10000 [3:42:56<18:50:25,  8.10s/it]

Epoch: 1625 | Training loss 2.866876095533371 | Validation loss 2.793785333633423



 16%|███████████▋                                                            | 1627/10000 [3:43:04<18:51:46,  8.11s/it]

Epoch: 1626 | Training loss 2.8662092685699463 | Validation loss 2.790766328573227



 16%|███████████▋                                                            | 1628/10000 [3:43:12<18:51:52,  8.11s/it]

Epoch: 1627 | Training loss 2.8666510358452797 | Validation loss 2.794226795434952



 16%|███████████▋                                                            | 1629/10000 [3:43:20<18:49:23,  8.10s/it]

Epoch: 1628 | Training loss 2.8677835389971733 | Validation loss 2.7895559668540955



 16%|███████████▋                                                            | 1630/10000 [3:43:29<18:54:06,  8.13s/it]

Epoch: 1629 | Training loss 2.864315629005432 | Validation loss 2.793186843395233



 16%|███████████▋                                                            | 1631/10000 [3:43:37<18:54:02,  8.13s/it]

Epoch: 1630 | Training loss 2.8688359558582306 | Validation loss 2.7963960468769073



 16%|███████████▊                                                            | 1632/10000 [3:43:45<18:53:11,  8.13s/it]

Epoch: 1631 | Training loss 2.8711050301790237 | Validation loss 2.793757528066635



 16%|███████████▊                                                            | 1633/10000 [3:43:53<18:52:42,  8.12s/it]

Epoch: 1632 | Training loss 2.865564651787281 | Validation loss 2.7846341729164124



 16%|███████████▊                                                            | 1634/10000 [3:44:01<18:54:19,  8.14s/it]

Epoch: 1633 | Training loss 2.8688618019223213 | Validation loss 2.788528561592102



 16%|███████████▊                                                            | 1635/10000 [3:44:09<18:52:11,  8.12s/it]

Epoch: 1634 | Training loss 2.8663008734583855 | Validation loss 2.7852167189121246



 16%|███████████▊                                                            | 1636/10000 [3:44:17<18:54:50,  8.14s/it]

Epoch: 1635 | Training loss 2.870259255170822 | Validation loss 2.7887701094150543



 16%|███████████▊                                                            | 1637/10000 [3:44:26<18:55:11,  8.14s/it]

Epoch: 1636 | Training loss 2.86525522172451 | Validation loss 2.7902420461177826



 16%|███████████▊                                                            | 1638/10000 [3:44:34<18:54:57,  8.14s/it]

Epoch: 1637 | Training loss 2.867115408182144 | Validation loss 2.7985496520996094



 16%|███████████▊                                                            | 1639/10000 [3:44:42<18:54:24,  8.14s/it]

Epoch: 1638 | Training loss 2.8645746633410454 | Validation loss 2.785323590040207



 16%|███████████▊                                                            | 1640/10000 [3:44:50<18:53:13,  8.13s/it]

Epoch: 1639 | Training loss 2.8671636134386063 | Validation loss 2.7876117825508118



 16%|███████████▊                                                            | 1641/10000 [3:44:58<18:54:19,  8.14s/it]

Epoch: 1640 | Training loss 2.865861624479294 | Validation loss 2.7890531718730927



 16%|███████████▊                                                            | 1642/10000 [3:45:06<19:00:59,  8.19s/it]

Epoch: 1641 | Training loss 2.8596403673291206 | Validation loss 2.79955393075943



 16%|███████████▊                                                            | 1643/10000 [3:45:15<18:59:48,  8.18s/it]

Epoch: 1642 | Training loss 2.868166208267212 | Validation loss 2.79660165309906



 16%|███████████▊                                                            | 1644/10000 [3:45:23<19:00:53,  8.19s/it]

Epoch: 1643 | Training loss 2.868935205042362 | Validation loss 2.793329894542694



 16%|███████████▊                                                            | 1645/10000 [3:45:31<18:59:36,  8.18s/it]

Epoch: 1644 | Training loss 2.872897669672966 | Validation loss 2.8040281236171722



 16%|███████████▊                                                            | 1646/10000 [3:45:39<18:56:34,  8.16s/it]

Epoch: 1645 | Training loss 2.8753464370965958 | Validation loss 2.794721871614456



 16%|███████████▊                                                            | 1647/10000 [3:45:47<18:54:00,  8.15s/it]

Epoch: 1646 | Training loss 2.8695300072431564 | Validation loss 2.7883616387844086



 16%|███████████▊                                                            | 1648/10000 [3:45:55<18:57:54,  8.17s/it]

Epoch: 1647 | Training loss 2.867266535758972 | Validation loss 2.7982131242752075



 16%|███████████▊                                                            | 1649/10000 [3:46:04<18:55:59,  8.16s/it]

Epoch: 1648 | Training loss 2.868439868092537 | Validation loss 2.8085385859012604



 16%|███████████▉                                                            | 1650/10000 [3:46:12<18:55:20,  8.16s/it]

Epoch: 1649 | Training loss 2.865727536380291 | Validation loss 2.7904860377311707



 17%|███████████▉                                                            | 1651/10000 [3:46:20<18:52:10,  8.14s/it]

Epoch: 1650 | Training loss 2.868616499006748 | Validation loss 2.789738804101944



 17%|███████████▉                                                            | 1652/10000 [3:46:28<18:54:01,  8.15s/it]

Epoch: 1651 | Training loss 2.866087906062603 | Validation loss 2.791394680738449



 17%|███████████▉                                                            | 1653/10000 [3:46:36<18:56:25,  8.17s/it]

Epoch: 1652 | Training loss 2.8638336658477783 | Validation loss 2.7921162545681



 17%|███████████▉                                                            | 1654/10000 [3:46:44<18:50:12,  8.13s/it]

Epoch: 1653 | Training loss 2.864533558487892 | Validation loss 2.7896581888198853



 17%|███████████▉                                                            | 1655/10000 [3:46:52<18:51:39,  8.14s/it]

Epoch: 1654 | Training loss 2.8637449741363525 | Validation loss 2.7865657806396484



 17%|███████████▉                                                            | 1656/10000 [3:47:01<18:50:02,  8.13s/it]

Epoch: 1655 | Training loss 2.864487409591675 | Validation loss 2.809522420167923



 17%|███████████▉                                                            | 1657/10000 [3:47:09<18:49:38,  8.12s/it]

Epoch: 1656 | Training loss 2.8675893917679787 | Validation loss 2.7917527556419373



 17%|███████████▉                                                            | 1658/10000 [3:47:17<18:48:34,  8.12s/it]

Epoch: 1657 | Training loss 2.8647553995251656 | Validation loss 2.795623779296875



 17%|███████████▉                                                            | 1659/10000 [3:47:25<18:49:50,  8.13s/it]

Epoch: 1658 | Training loss 2.8682310432195663 | Validation loss 2.797206223011017



 17%|███████████▉                                                            | 1660/10000 [3:47:33<18:49:19,  8.12s/it]

Epoch: 1659 | Training loss 2.8682494461536407 | Validation loss 2.792627304792404



 17%|███████████▉                                                            | 1661/10000 [3:47:41<18:52:12,  8.15s/it]

Epoch: 1660 | Training loss 2.8638889119029045 | Validation loss 2.7929352521896362



 17%|███████████▉                                                            | 1662/10000 [3:47:49<18:46:54,  8.11s/it]

Epoch: 1661 | Training loss 2.866575188934803 | Validation loss 2.7899116575717926



 17%|███████████▉                                                            | 1663/10000 [3:47:57<18:48:47,  8.12s/it]

Epoch: 1662 | Training loss 2.8647773787379265 | Validation loss 2.7908012568950653



 17%|███████████▉                                                            | 1664/10000 [3:48:05<18:42:57,  8.08s/it]

Epoch: 1663 | Training loss 2.868925414979458 | Validation loss 2.7934731543064117



 17%|███████████▉                                                            | 1665/10000 [3:48:14<18:44:20,  8.09s/it]

Epoch: 1664 | Training loss 2.869367375969887 | Validation loss 2.798028379678726



 17%|███████████▉                                                            | 1666/10000 [3:48:22<18:45:16,  8.10s/it]

Epoch: 1665 | Training loss 2.865071766078472 | Validation loss 2.792578637599945



 17%|████████████                                                            | 1667/10000 [3:48:30<18:47:51,  8.12s/it]

Epoch: 1666 | Training loss 2.868074096739292 | Validation loss 2.7923523783683777



 17%|████████████                                                            | 1668/10000 [3:48:38<18:50:54,  8.14s/it]

Epoch: 1667 | Training loss 2.86394315212965 | Validation loss 2.7941128611564636



 17%|████████████                                                            | 1669/10000 [3:48:46<18:49:08,  8.13s/it]

Epoch: 1668 | Training loss 2.867514543235302 | Validation loss 2.790823817253113



 17%|████████████                                                            | 1670/10000 [3:48:54<18:48:27,  8.13s/it]

Epoch: 1669 | Training loss 2.8664052337408066 | Validation loss 2.788919359445572



 17%|████████████                                                            | 1671/10000 [3:49:02<18:49:43,  8.14s/it]

Epoch: 1670 | Training loss 2.8630410954356194 | Validation loss 2.797229766845703



 17%|████████████                                                            | 1672/10000 [3:49:10<18:46:54,  8.12s/it]

Epoch: 1671 | Training loss 2.8648552522063255 | Validation loss 2.7875865399837494



 17%|████████████                                                            | 1673/10000 [3:49:19<18:47:12,  8.12s/it]

Epoch: 1672 | Training loss 2.867114618420601 | Validation loss 2.7927246689796448



 17%|████████████                                                            | 1674/10000 [3:49:27<18:50:23,  8.15s/it]

Epoch: 1673 | Training loss 2.8630911335349083 | Validation loss 2.788828045129776



 17%|████████████                                                            | 1675/10000 [3:49:35<18:51:34,  8.16s/it]

Epoch: 1674 | Training loss 2.8670258298516273 | Validation loss 2.7890183329582214



 17%|████████████                                                            | 1676/10000 [3:49:43<18:47:07,  8.12s/it]

Epoch: 1675 | Training loss 2.8680866360664368 | Validation loss 2.7910302579402924



 17%|████████████                                                            | 1677/10000 [3:49:51<18:49:14,  8.14s/it]

Epoch: 1676 | Training loss 2.8674859926104546 | Validation loss 2.7986791133880615



 17%|████████████                                                            | 1678/10000 [3:49:59<18:53:20,  8.17s/it]

Epoch: 1677 | Training loss 2.8701116889715195 | Validation loss 2.7925508320331573



 17%|████████████                                                            | 1679/10000 [3:50:08<18:52:08,  8.16s/it]

Epoch: 1678 | Training loss 2.8629933521151543 | Validation loss 2.799148380756378



 17%|████████████                                                            | 1680/10000 [3:50:16<18:54:35,  8.18s/it]

Epoch: 1679 | Training loss 2.862187996506691 | Validation loss 2.788528949022293



 17%|████████████                                                            | 1681/10000 [3:50:24<18:53:48,  8.18s/it]

Epoch: 1680 | Training loss 2.8633607551455498 | Validation loss 2.7961654365062714



 17%|████████████                                                            | 1682/10000 [3:50:32<18:53:03,  8.17s/it]

Epoch: 1681 | Training loss 2.8651421517133713 | Validation loss 2.7926398515701294



 17%|████████████                                                            | 1683/10000 [3:50:40<18:50:37,  8.16s/it]

Epoch: 1682 | Training loss 2.8613931462168694 | Validation loss 2.7885216176509857



 17%|████████████                                                            | 1684/10000 [3:50:48<18:48:46,  8.14s/it]

Epoch: 1683 | Training loss 2.865481197834015 | Validation loss 2.7863988876342773



 17%|████████████▏                                                           | 1685/10000 [3:50:56<18:43:03,  8.10s/it]

Epoch: 1684 | Training loss 2.8678117245435715 | Validation loss 2.799575001001358



 17%|████████████▏                                                           | 1686/10000 [3:51:04<18:41:36,  8.09s/it]

Epoch: 1685 | Training loss 2.870381072163582 | Validation loss 2.80306875705719



 17%|████████████▏                                                           | 1687/10000 [3:51:13<18:42:26,  8.10s/it]

Epoch: 1686 | Training loss 2.8671938106417656 | Validation loss 2.789473593235016



 17%|████████████▏                                                           | 1688/10000 [3:51:21<18:36:27,  8.06s/it]

Epoch: 1687 | Training loss 2.8664283379912376 | Validation loss 2.793221890926361



 17%|████████████▏                                                           | 1689/10000 [3:51:29<18:42:39,  8.10s/it]

Epoch: 1688 | Training loss 2.8668754771351814 | Validation loss 2.787402331829071



 17%|████████████▏                                                           | 1690/10000 [3:51:37<18:41:02,  8.09s/it]

Epoch: 1689 | Training loss 2.8682652041316032 | Validation loss 2.7917639017105103



 17%|████████████▏                                                           | 1691/10000 [3:51:45<18:43:05,  8.11s/it]

Epoch: 1690 | Training loss 2.866858161985874 | Validation loss 2.7839499413967133



 17%|████████████▏                                                           | 1692/10000 [3:51:53<18:44:31,  8.12s/it]

Epoch: 1691 | Training loss 2.8651265874505043 | Validation loss 2.7965604662895203



 17%|████████████▏                                                           | 1693/10000 [3:52:01<18:48:00,  8.15s/it]

Epoch: 1692 | Training loss 2.8657345548272133 | Validation loss 2.789050906896591



 17%|████████████▏                                                           | 1694/10000 [3:52:09<18:48:30,  8.15s/it]

Epoch: 1693 | Training loss 2.8615355491638184 | Validation loss 2.789677083492279



 17%|████████████▏                                                           | 1695/10000 [3:52:18<18:53:22,  8.19s/it]

Epoch: 1694 | Training loss 2.869551546871662 | Validation loss 2.7856194972991943



 17%|████████████▏                                                           | 1696/10000 [3:52:26<18:57:44,  8.22s/it]

Epoch: 1695 | Training loss 2.8671392425894737 | Validation loss 2.7927043437957764



 17%|████████████▏                                                           | 1697/10000 [3:52:34<18:52:40,  8.19s/it]

Epoch: 1696 | Training loss 2.8686129078269005 | Validation loss 2.7965696156024933



 17%|████████████▏                                                           | 1698/10000 [3:52:42<18:53:49,  8.19s/it]

Epoch: 1697 | Training loss 2.8686427772045135 | Validation loss 2.798847943544388



 17%|████████████▏                                                           | 1699/10000 [3:52:50<18:44:30,  8.13s/it]

Epoch: 1698 | Training loss 2.8718018978834152 | Validation loss 2.794846534729004



 17%|████████████▏                                                           | 1700/10000 [3:52:58<18:44:07,  8.13s/it]

Epoch: 1699 | Training loss 2.8697603717446327 | Validation loss 2.7977511286735535



 17%|████████████▏                                                           | 1701/10000 [3:53:07<18:41:40,  8.11s/it]

Epoch: 1700 | Training loss 2.8696843534708023 | Validation loss 2.799332618713379



 17%|████████████▎                                                           | 1702/10000 [3:53:15<18:43:07,  8.12s/it]

Epoch: 1701 | Training loss 2.8707265704870224 | Validation loss 2.8243550658226013



 17%|████████████▎                                                           | 1703/10000 [3:53:23<18:40:09,  8.10s/it]

Epoch: 1702 | Training loss 2.869438238441944 | Validation loss 2.7936264872550964



 17%|████████████▎                                                           | 1704/10000 [3:53:31<18:40:06,  8.10s/it]

Epoch: 1703 | Training loss 2.8643254712224007 | Validation loss 2.7891266644001007



 17%|████████████▎                                                           | 1705/10000 [3:53:39<18:39:18,  8.10s/it]

Epoch: 1704 | Training loss 2.866153135895729 | Validation loss 2.7960865795612335



 17%|████████████▎                                                           | 1706/10000 [3:53:47<18:38:02,  8.09s/it]

Epoch: 1705 | Training loss 2.8674487695097923 | Validation loss 2.7905751168727875



 17%|████████████▎                                                           | 1707/10000 [3:53:55<18:39:07,  8.10s/it]

Epoch: 1706 | Training loss 2.8659161552786827 | Validation loss 2.7921887934207916



 17%|████████████▎                                                           | 1708/10000 [3:54:03<18:34:38,  8.07s/it]

Epoch: 1707 | Training loss 2.8633774295449257 | Validation loss 2.792289137840271



 17%|████████████▎                                                           | 1709/10000 [3:54:11<18:34:43,  8.07s/it]

Epoch: 1708 | Training loss 2.865755334496498 | Validation loss 2.7883772552013397



 17%|████████████▎                                                           | 1710/10000 [3:54:19<18:36:36,  8.08s/it]

Epoch: 1709 | Training loss 2.866058476269245 | Validation loss 2.7860656678676605



 17%|████████████▎                                                           | 1711/10000 [3:54:27<18:37:13,  8.09s/it]

Epoch: 1710 | Training loss 2.867822542786598 | Validation loss 2.7888868749141693



 17%|████████████▎                                                           | 1712/10000 [3:54:35<18:35:03,  8.07s/it]

Epoch: 1711 | Training loss 2.8631312549114227 | Validation loss 2.7827179431915283



 17%|████████████▎                                                           | 1713/10000 [3:54:43<18:34:08,  8.07s/it]

Epoch: 1712 | Training loss 2.867313429713249 | Validation loss 2.7923356890678406



 17%|████████████▎                                                           | 1714/10000 [3:54:52<18:34:46,  8.07s/it]

Epoch: 1713 | Training loss 2.86760301142931 | Validation loss 2.7926871478557587



 17%|████████████▎                                                           | 1715/10000 [3:55:00<18:33:57,  8.07s/it]

Epoch: 1714 | Training loss 2.8643451631069183 | Validation loss 2.7847152054309845



 17%|████████████▎                                                           | 1716/10000 [3:55:08<18:39:46,  8.11s/it]

Epoch: 1715 | Training loss 2.8701163977384567 | Validation loss 2.7924459874629974



 17%|████████████▎                                                           | 1717/10000 [3:55:16<18:36:42,  8.09s/it]

Epoch: 1716 | Training loss 2.8704290688037872 | Validation loss 2.7993220686912537



 17%|████████████▎                                                           | 1718/10000 [3:55:24<18:39:06,  8.11s/it]

Epoch: 1717 | Training loss 2.8691598996520042 | Validation loss 2.8048133552074432



 17%|████████████▍                                                           | 1719/10000 [3:55:32<18:33:34,  8.07s/it]

Epoch: 1718 | Training loss 2.8660432919859886 | Validation loss 2.7897952795028687



 17%|████████████▍                                                           | 1720/10000 [3:55:40<18:44:26,  8.15s/it]

Epoch: 1719 | Training loss 2.8664414659142494 | Validation loss 2.78775617480278



 17%|████████████▍                                                           | 1721/10000 [3:55:49<18:47:31,  8.17s/it]

Epoch: 1720 | Training loss 2.863147310912609 | Validation loss 2.7959141731262207



 17%|████████████▍                                                           | 1722/10000 [3:55:57<18:47:40,  8.17s/it]

Epoch: 1721 | Training loss 2.867907054722309 | Validation loss 2.7988898754119873



 17%|████████████▍                                                           | 1723/10000 [3:56:05<18:47:13,  8.17s/it]

Epoch: 1722 | Training loss 2.8684596493840218 | Validation loss 2.7924104928970337



 17%|████████████▍                                                           | 1724/10000 [3:56:13<18:46:54,  8.17s/it]

Epoch: 1723 | Training loss 2.862955331802368 | Validation loss 2.791451245546341



 17%|████████████▍                                                           | 1725/10000 [3:56:21<18:47:46,  8.18s/it]

Epoch: 1724 | Training loss 2.8647963032126427 | Validation loss 2.791703939437866



 17%|████████████▍                                                           | 1726/10000 [3:56:29<18:44:29,  8.15s/it]

Epoch: 1725 | Training loss 2.8687478825449944 | Validation loss 2.7922365963459015



 17%|████████████▍                                                           | 1727/10000 [3:56:38<18:43:30,  8.15s/it]

Epoch: 1726 | Training loss 2.8678813576698303 | Validation loss 2.7929511070251465



 17%|████████████▍                                                           | 1728/10000 [3:56:46<18:42:42,  8.14s/it]

Epoch: 1727 | Training loss 2.865457110106945 | Validation loss 2.789527714252472



 17%|████████████▍                                                           | 1729/10000 [3:56:54<18:43:58,  8.15s/it]

Epoch: 1728 | Training loss 2.8636538833379745 | Validation loss 2.788547456264496



 17%|████████████▍                                                           | 1730/10000 [3:57:02<18:39:05,  8.12s/it]

Epoch: 1729 | Training loss 2.8612989112734795 | Validation loss 2.7899489402770996



 17%|████████████▍                                                           | 1731/10000 [3:57:10<18:40:06,  8.13s/it]

Epoch: 1730 | Training loss 2.862720660865307 | Validation loss 2.795996814966202



 17%|████████████▍                                                           | 1732/10000 [3:57:18<18:40:22,  8.13s/it]

Epoch: 1731 | Training loss 2.865760751068592 | Validation loss 2.791045844554901



 17%|████████████▍                                                           | 1733/10000 [3:57:26<18:39:42,  8.13s/it]

Epoch: 1732 | Training loss 2.8663733825087547 | Validation loss 2.788576066493988



 17%|████████████▍                                                           | 1734/10000 [3:57:34<18:43:37,  8.16s/it]

Epoch: 1733 | Training loss 2.8638189509510994 | Validation loss 2.7874855399131775



 17%|████████████▍                                                           | 1735/10000 [3:57:42<18:37:36,  8.11s/it]

Epoch: 1734 | Training loss 2.864864818751812 | Validation loss 2.7924632132053375



 17%|████████████▍                                                           | 1736/10000 [3:57:51<18:37:11,  8.11s/it]

Epoch: 1735 | Training loss 2.8648143112659454 | Validation loss 2.8035488426685333



 17%|████████████▌                                                           | 1737/10000 [3:57:59<18:43:23,  8.16s/it]

Epoch: 1736 | Training loss 2.8702371641993523 | Validation loss 2.7890535593032837



 17%|████████████▌                                                           | 1738/10000 [3:58:07<18:40:16,  8.14s/it]

Epoch: 1737 | Training loss 2.8682148084044456 | Validation loss 2.7995955049991608



 17%|████████████▌                                                           | 1739/10000 [3:58:15<18:40:05,  8.14s/it]

Epoch: 1738 | Training loss 2.8755999729037285 | Validation loss 2.7925034761428833



 17%|████████████▌                                                           | 1740/10000 [3:58:23<18:41:42,  8.15s/it]

Epoch: 1739 | Training loss 2.8674432560801506 | Validation loss 2.794039100408554



 17%|████████████▌                                                           | 1741/10000 [3:58:31<18:42:18,  8.15s/it]

Epoch: 1740 | Training loss 2.868130274116993 | Validation loss 2.7935657799243927



 17%|████████████▌                                                           | 1742/10000 [3:58:39<18:36:16,  8.11s/it]

Epoch: 1741 | Training loss 2.8643756359815598 | Validation loss 2.7949588000774384



 17%|████████████▌                                                           | 1743/10000 [3:58:47<18:33:07,  8.09s/it]

Epoch: 1742 | Training loss 2.8683722987771034 | Validation loss 2.7956430912017822



 17%|████████████▌                                                           | 1744/10000 [3:58:56<18:30:54,  8.07s/it]

Epoch: 1743 | Training loss 2.8644017428159714 | Validation loss 2.7896815836429596



 17%|████████████▌                                                           | 1745/10000 [3:59:04<18:39:50,  8.14s/it]

Epoch: 1744 | Training loss 2.8694077134132385 | Validation loss 2.7915970385074615



 17%|████████████▌                                                           | 1746/10000 [3:59:12<18:40:40,  8.15s/it]

Epoch: 1745 | Training loss 2.8617594316601753 | Validation loss 2.794920176267624



 17%|████████████▌                                                           | 1747/10000 [3:59:20<18:43:10,  8.17s/it]

Epoch: 1746 | Training loss 2.87202650308609 | Validation loss 2.797627866268158



 17%|████████████▌                                                           | 1748/10000 [3:59:28<18:41:26,  8.15s/it]

Epoch: 1747 | Training loss 2.8665150329470634 | Validation loss 2.7955411076545715



 17%|████████████▌                                                           | 1749/10000 [3:59:36<18:39:52,  8.14s/it]

Epoch: 1748 | Training loss 2.869031272828579 | Validation loss 2.792124927043915



 18%|████████████▌                                                           | 1750/10000 [3:59:45<18:38:55,  8.14s/it]

Epoch: 1749 | Training loss 2.8709208741784096 | Validation loss 2.8007428348064423



 18%|████████████▌                                                           | 1751/10000 [3:59:53<18:36:02,  8.12s/it]

Epoch: 1750 | Training loss 2.8704387694597244 | Validation loss 2.793003350496292



 18%|████████████▌                                                           | 1752/10000 [4:00:01<18:33:21,  8.10s/it]

Epoch: 1751 | Training loss 2.8681790456175804 | Validation loss 2.7870656549930573



 18%|████████████▌                                                           | 1753/10000 [4:00:09<18:34:43,  8.11s/it]

Epoch: 1752 | Training loss 2.8655472174286842 | Validation loss 2.7970739901065826



 18%|████████████▋                                                           | 1754/10000 [4:00:17<18:34:58,  8.11s/it]

Epoch: 1753 | Training loss 2.8728336840867996 | Validation loss 2.7991530895233154



 18%|████████████▋                                                           | 1755/10000 [4:00:25<18:38:49,  8.14s/it]

Epoch: 1754 | Training loss 2.8734883964061737 | Validation loss 2.793333739042282



 18%|████████████▋                                                           | 1756/10000 [4:00:33<18:36:20,  8.12s/it]

Epoch: 1755 | Training loss 2.872335582971573 | Validation loss 2.789127677679062



 18%|████████████▋                                                           | 1757/10000 [4:00:41<18:33:59,  8.11s/it]

Epoch: 1756 | Training loss 2.871567837893963 | Validation loss 2.7945159673690796



 18%|████████████▋                                                           | 1758/10000 [4:00:49<18:32:56,  8.10s/it]

Epoch: 1757 | Training loss 2.8640469759702682 | Validation loss 2.790473997592926



 18%|████████████▋                                                           | 1759/10000 [4:00:58<18:33:35,  8.11s/it]

Epoch: 1758 | Training loss 2.8712910264730453 | Validation loss 2.7922398149967194



 18%|████████████▋                                                           | 1760/10000 [4:01:06<18:31:13,  8.09s/it]

Epoch: 1759 | Training loss 2.8674685060977936 | Validation loss 2.791449099779129



 18%|████████████▋                                                           | 1761/10000 [4:01:14<18:35:25,  8.12s/it]

Epoch: 1760 | Training loss 2.8669896200299263 | Validation loss 2.7975494265556335



 18%|████████████▋                                                           | 1762/10000 [4:01:22<18:36:25,  8.13s/it]

Epoch: 1761 | Training loss 2.865208864212036 | Validation loss 2.7853608429431915



 18%|████████████▋                                                           | 1763/10000 [4:01:30<18:38:18,  8.15s/it]

Epoch: 1762 | Training loss 2.8703598380088806 | Validation loss 2.797310382127762



 18%|████████████▋                                                           | 1764/10000 [4:01:38<18:35:17,  8.12s/it]

Epoch: 1763 | Training loss 2.871408633887768 | Validation loss 2.7933365404605865



 18%|████████████▋                                                           | 1765/10000 [4:01:46<18:33:33,  8.11s/it]

Epoch: 1764 | Training loss 2.8688103184103966 | Validation loss 2.794469565153122



 18%|████████████▋                                                           | 1766/10000 [4:01:54<18:30:24,  8.09s/it]

Epoch: 1765 | Training loss 2.867789551615715 | Validation loss 2.7883989810943604



 18%|████████████▋                                                           | 1767/10000 [4:02:02<18:33:19,  8.11s/it]

Epoch: 1766 | Training loss 2.868844710290432 | Validation loss 2.7913436591625214



 18%|████████████▋                                                           | 1768/10000 [4:02:11<18:33:25,  8.12s/it]

Epoch: 1767 | Training loss 2.863381326198578 | Validation loss 2.787912040948868



 18%|████████████▋                                                           | 1769/10000 [4:02:19<18:28:52,  8.08s/it]

Epoch: 1768 | Training loss 2.8644319474697113 | Validation loss 2.795012891292572



 18%|████████████▋                                                           | 1770/10000 [4:02:27<18:34:43,  8.13s/it]

Epoch: 1769 | Training loss 2.8668161407113075 | Validation loss 2.790744364261627



 18%|████████████▊                                                           | 1771/10000 [4:02:35<18:33:41,  8.12s/it]

Epoch: 1770 | Training loss 2.8700340539216995 | Validation loss 2.7969655096530914



 18%|████████████▊                                                           | 1772/10000 [4:02:43<18:33:54,  8.12s/it]

Epoch: 1771 | Training loss 2.870255008339882 | Validation loss 2.795468032360077



 18%|████████████▊                                                           | 1773/10000 [4:02:51<18:30:44,  8.10s/it]

Epoch: 1772 | Training loss 2.863119252026081 | Validation loss 2.7893924713134766



 18%|████████████▊                                                           | 1774/10000 [4:02:59<18:28:58,  8.09s/it]

Epoch: 1773 | Training loss 2.8648116663098335 | Validation loss 2.7856582701206207



 18%|████████████▊                                                           | 1775/10000 [4:03:07<18:30:03,  8.10s/it]

Epoch: 1774 | Training loss 2.862173445522785 | Validation loss 2.7867431342601776



 18%|████████████▊                                                           | 1776/10000 [4:03:15<18:28:49,  8.09s/it]

Epoch: 1775 | Training loss 2.860359340906143 | Validation loss 2.786540925502777



 18%|████████████▊                                                           | 1777/10000 [4:03:24<18:32:03,  8.11s/it]

Epoch: 1776 | Training loss 2.8700204864144325 | Validation loss 2.7906198501586914



 18%|████████████▊                                                           | 1778/10000 [4:03:32<18:30:56,  8.11s/it]

Epoch: 1777 | Training loss 2.872027851641178 | Validation loss 2.7847529351711273



 18%|████████████▊                                                           | 1779/10000 [4:03:40<18:29:54,  8.10s/it]

Epoch: 1778 | Training loss 2.8697632551193237 | Validation loss 2.787333518266678



 18%|████████████▊                                                           | 1780/10000 [4:03:48<18:26:01,  8.07s/it]

Epoch: 1779 | Training loss 2.865907743573189 | Validation loss 2.7902122735977173



 18%|████████████▊                                                           | 1781/10000 [4:03:56<18:28:20,  8.09s/it]

Epoch: 1780 | Training loss 2.867046907544136 | Validation loss 2.794941008090973



 18%|████████████▊                                                           | 1782/10000 [4:04:04<18:28:40,  8.09s/it]

Epoch: 1781 | Training loss 2.8694543913006783 | Validation loss 2.797859787940979



 18%|████████████▊                                                           | 1783/10000 [4:04:12<18:32:44,  8.13s/it]

Epoch: 1782 | Training loss 2.865137629210949 | Validation loss 2.7854104936122894



 18%|████████████▊                                                           | 1784/10000 [4:04:20<18:25:09,  8.07s/it]

Epoch: 1783 | Training loss 2.867055118083954 | Validation loss 2.7922565042972565



 18%|████████████▊                                                           | 1785/10000 [4:04:28<18:29:54,  8.11s/it]

Epoch: 1784 | Training loss 2.866428777575493 | Validation loss 2.7941609025001526



 18%|████████████▊                                                           | 1786/10000 [4:04:36<18:33:21,  8.13s/it]

Epoch: 1785 | Training loss 2.868342563509941 | Validation loss 2.8016074001789093



 18%|████████████▊                                                           | 1787/10000 [4:04:45<18:31:10,  8.12s/it]

Epoch: 1786 | Training loss 2.869200810790062 | Validation loss 2.794473171234131



 18%|████████████▊                                                           | 1788/10000 [4:04:53<18:33:07,  8.13s/it]

Epoch: 1787 | Training loss 2.873007833957672 | Validation loss 2.7945699393749237



 18%|████████████▉                                                           | 1789/10000 [4:05:01<18:26:35,  8.09s/it]

Epoch: 1788 | Training loss 2.866572320461273 | Validation loss 2.787553519010544



 18%|████████████▉                                                           | 1790/10000 [4:05:09<18:27:44,  8.10s/it]

Epoch: 1789 | Training loss 2.8702324628829956 | Validation loss 2.7920547127723694



 18%|████████████▉                                                           | 1791/10000 [4:05:17<18:27:46,  8.10s/it]

Epoch: 1790 | Training loss 2.861419878900051 | Validation loss 2.787255108356476



 18%|████████████▉                                                           | 1792/10000 [4:05:25<18:25:21,  8.08s/it]

Epoch: 1791 | Training loss 2.8658804893493652 | Validation loss 2.7893888354301453



 18%|████████████▉                                                           | 1793/10000 [4:05:33<18:26:42,  8.09s/it]

Epoch: 1792 | Training loss 2.8680338710546494 | Validation loss 2.791358143091202



 18%|████████████▉                                                           | 1794/10000 [4:05:41<18:26:17,  8.09s/it]

Epoch: 1793 | Training loss 2.8654648810625076 | Validation loss 2.791617214679718



 18%|████████████▉                                                           | 1795/10000 [4:05:49<18:28:38,  8.11s/it]

Epoch: 1794 | Training loss 2.8617936819791794 | Validation loss 2.7888041734695435



 18%|████████████▉                                                           | 1796/10000 [4:05:57<18:29:36,  8.12s/it]

Epoch: 1795 | Training loss 2.8634914457798004 | Validation loss 2.793574720621109



 18%|████████████▉                                                           | 1797/10000 [4:06:06<18:28:14,  8.11s/it]

Epoch: 1796 | Training loss 2.8644708022475243 | Validation loss 2.791288524866104



 18%|████████████▉                                                           | 1798/10000 [4:06:14<18:27:55,  8.10s/it]

Epoch: 1797 | Training loss 2.8665521815419197 | Validation loss 2.7893801629543304



 18%|████████████▉                                                           | 1799/10000 [4:06:22<18:28:56,  8.11s/it]

Epoch: 1798 | Training loss 2.865509733557701 | Validation loss 2.790428191423416



 18%|████████████▉                                                           | 1800/10000 [4:06:30<18:25:50,  8.09s/it]

Epoch: 1799 | Training loss 2.8587045818567276 | Validation loss 2.786750555038452



 18%|████████████▉                                                           | 1801/10000 [4:06:38<18:25:27,  8.09s/it]

Epoch: 1800 | Training loss 2.8646474480628967 | Validation loss 2.792782247066498



 18%|████████████▉                                                           | 1802/10000 [4:06:46<18:25:14,  8.09s/it]

Epoch: 1801 | Training loss 2.872314490377903 | Validation loss 2.789474904537201



 18%|████████████▉                                                           | 1803/10000 [4:06:54<18:26:51,  8.10s/it]

Epoch: 1802 | Training loss 2.8652502074837685 | Validation loss 2.788948655128479



 18%|████████████▉                                                           | 1804/10000 [4:07:02<18:24:44,  8.09s/it]

Epoch: 1803 | Training loss 2.8702033683657646 | Validation loss 2.791423112154007



 18%|████████████▉                                                           | 1805/10000 [4:07:10<18:25:16,  8.09s/it]

Epoch: 1804 | Training loss 2.867975600063801 | Validation loss 2.798240303993225



 18%|█████████████                                                           | 1806/10000 [4:07:18<18:23:33,  8.08s/it]

Epoch: 1805 | Training loss 2.86989825963974 | Validation loss 2.789638012647629



 18%|█████████████                                                           | 1807/10000 [4:07:26<18:27:12,  8.11s/it]

Epoch: 1806 | Training loss 2.8630822747945786 | Validation loss 2.7916307151317596



 18%|█████████████                                                           | 1808/10000 [4:07:35<18:25:30,  8.10s/it]

Epoch: 1807 | Training loss 2.8671549782156944 | Validation loss 2.792546808719635



 18%|█████████████                                                           | 1809/10000 [4:07:43<18:24:56,  8.09s/it]

Epoch: 1808 | Training loss 2.867588020861149 | Validation loss 2.7906705737113953



 18%|█████████████                                                           | 1810/10000 [4:07:51<18:24:27,  8.09s/it]

Epoch: 1809 | Training loss 2.8693244084715843 | Validation loss 2.788525551557541



 18%|█████████████                                                           | 1811/10000 [4:07:59<18:25:27,  8.10s/it]

Epoch: 1810 | Training loss 2.868307150900364 | Validation loss 2.7928812205791473



 18%|█████████████                                                           | 1812/10000 [4:08:07<18:29:21,  8.13s/it]

Epoch: 1811 | Training loss 2.8667112216353416 | Validation loss 2.796118140220642



 18%|█████████████                                                           | 1813/10000 [4:08:15<18:29:24,  8.13s/it]

Epoch: 1812 | Training loss 2.863122873008251 | Validation loss 2.7899404168128967



 18%|█████████████                                                           | 1814/10000 [4:08:23<18:32:42,  8.16s/it]

Epoch: 1813 | Training loss 2.862616404891014 | Validation loss 2.782593786716461



 18%|█████████████                                                           | 1815/10000 [4:08:31<18:28:14,  8.12s/it]

Epoch: 1814 | Training loss 2.867445431649685 | Validation loss 2.791962504386902



 18%|█████████████                                                           | 1816/10000 [4:08:39<18:21:22,  8.07s/it]

Epoch: 1815 | Training loss 2.8669568598270416 | Validation loss 2.7865183353424072



 18%|█████████████                                                           | 1817/10000 [4:08:47<18:22:25,  8.08s/it]

Epoch: 1816 | Training loss 2.868736118078232 | Validation loss 2.7944878935813904



 18%|█████████████                                                           | 1818/10000 [4:08:56<18:24:23,  8.10s/it]

Epoch: 1817 | Training loss 2.867660030722618 | Validation loss 2.796394467353821



 18%|█████████████                                                           | 1819/10000 [4:09:04<18:26:11,  8.11s/it]

Epoch: 1818 | Training loss 2.8704008758068085 | Validation loss 2.7960213720798492



 18%|█████████████                                                           | 1820/10000 [4:09:12<18:22:27,  8.09s/it]

Epoch: 1819 | Training loss 2.873136557638645 | Validation loss 2.7853313386440277



 18%|█████████████                                                           | 1821/10000 [4:09:20<18:21:42,  8.08s/it]

Epoch: 1820 | Training loss 2.8631035834550858 | Validation loss 2.787978947162628



 18%|█████████████                                                           | 1822/10000 [4:09:28<18:24:53,  8.11s/it]

Epoch: 1821 | Training loss 2.8667353093624115 | Validation loss 2.7912515103816986



 18%|█████████████▏                                                          | 1823/10000 [4:09:36<18:19:22,  8.07s/it]

Epoch: 1822 | Training loss 2.869128040969372 | Validation loss 2.794257879257202



 18%|█████████████▏                                                          | 1824/10000 [4:09:44<18:19:20,  8.07s/it]

Epoch: 1823 | Training loss 2.8660757690668106 | Validation loss 2.794208139181137



 18%|█████████████▏                                                          | 1825/10000 [4:09:52<18:27:41,  8.13s/it]

Epoch: 1824 | Training loss 2.8695318326354027 | Validation loss 2.7892724573612213



 18%|█████████████▏                                                          | 1826/10000 [4:10:01<18:34:10,  8.18s/it]

Epoch: 1825 | Training loss 2.866136483848095 | Validation loss 2.7913196682929993



 18%|█████████████▏                                                          | 1827/10000 [4:10:09<18:41:17,  8.23s/it]

Epoch: 1826 | Training loss 2.8616220504045486 | Validation loss 2.785109370946884



 18%|█████████████▏                                                          | 1828/10000 [4:10:17<18:47:18,  8.28s/it]

Epoch: 1827 | Training loss 2.867966577410698 | Validation loss 2.785888761281967



 18%|█████████████▏                                                          | 1829/10000 [4:10:26<18:49:11,  8.29s/it]

Epoch: 1828 | Training loss 2.8691943883895874 | Validation loss 2.789668619632721



 18%|█████████████▏                                                          | 1830/10000 [4:10:34<18:57:20,  8.35s/it]

Epoch: 1829 | Training loss 2.8622246980667114 | Validation loss 2.7953011989593506



 18%|█████████████▏                                                          | 1831/10000 [4:10:43<19:00:32,  8.38s/it]

Epoch: 1830 | Training loss 2.869771659374237 | Validation loss 2.7896804213523865



 18%|█████████████▏                                                          | 1832/10000 [4:10:51<19:07:09,  8.43s/it]

Epoch: 1831 | Training loss 2.8688025176525116 | Validation loss 2.79561784863472



 18%|█████████████▏                                                          | 1833/10000 [4:11:00<19:09:43,  8.45s/it]

Epoch: 1832 | Training loss 2.8620821312069893 | Validation loss 2.792926102876663



 18%|█████████████▏                                                          | 1834/10000 [4:11:08<19:07:43,  8.43s/it]

Epoch: 1833 | Training loss 2.860027864575386 | Validation loss 2.7903294563293457



 18%|█████████████▏                                                          | 1835/10000 [4:11:16<19:02:32,  8.40s/it]

Epoch: 1834 | Training loss 2.86970167607069 | Validation loss 2.7929638028144836



 18%|█████████████▏                                                          | 1836/10000 [4:11:25<19:02:27,  8.40s/it]

Epoch: 1835 | Training loss 2.8673197254538536 | Validation loss 2.7923177778720856



 18%|█████████████▏                                                          | 1837/10000 [4:11:33<18:58:50,  8.37s/it]

Epoch: 1836 | Training loss 2.8622135370969772 | Validation loss 2.7835746109485626



 18%|█████████████▏                                                          | 1838/10000 [4:11:41<18:57:24,  8.36s/it]

Epoch: 1837 | Training loss 2.866841182112694 | Validation loss 2.791040599346161



 18%|█████████████▏                                                          | 1839/10000 [4:11:50<18:53:03,  8.33s/it]

Epoch: 1838 | Training loss 2.869594007730484 | Validation loss 2.7950029969215393



 18%|█████████████▏                                                          | 1840/10000 [4:11:58<18:48:42,  8.30s/it]

Epoch: 1839 | Training loss 2.8674837425351143 | Validation loss 2.799467533826828



 18%|█████████████▎                                                          | 1841/10000 [4:12:06<18:47:38,  8.29s/it]

Epoch: 1840 | Training loss 2.8679673597216606 | Validation loss 2.789196193218231



 18%|█████████████▎                                                          | 1842/10000 [4:12:15<18:54:27,  8.34s/it]

Epoch: 1841 | Training loss 2.8597303554415703 | Validation loss 2.786579519510269



 18%|█████████████▎                                                          | 1843/10000 [4:12:23<18:56:07,  8.36s/it]

Epoch: 1842 | Training loss 2.8671459779143333 | Validation loss 2.791512370109558



 18%|█████████████▎                                                          | 1844/10000 [4:12:31<18:55:57,  8.36s/it]

Epoch: 1843 | Training loss 2.8677104339003563 | Validation loss 2.7918968200683594



 18%|█████████████▎                                                          | 1845/10000 [4:12:40<18:58:17,  8.37s/it]

Epoch: 1844 | Training loss 2.8691078796982765 | Validation loss 2.7892338931560516



 18%|█████████████▎                                                          | 1846/10000 [4:12:48<18:56:05,  8.36s/it]

Epoch: 1845 | Training loss 2.8694114312529564 | Validation loss 2.793329030275345



 18%|█████████████▎                                                          | 1847/10000 [4:12:57<18:58:05,  8.38s/it]

Epoch: 1846 | Training loss 2.8676284477114677 | Validation loss 2.798368275165558



 18%|█████████████▎                                                          | 1848/10000 [4:13:05<18:59:28,  8.39s/it]

Epoch: 1847 | Training loss 2.865343317389488 | Validation loss 2.7891589403152466



 18%|█████████████▎                                                          | 1849/10000 [4:13:13<18:57:30,  8.37s/it]

Epoch: 1848 | Training loss 2.870624527335167 | Validation loss 2.7873523831367493



 18%|█████████████▎                                                          | 1850/10000 [4:13:22<19:00:27,  8.40s/it]

Epoch: 1849 | Training loss 2.8653595447540283 | Validation loss 2.78839111328125



 19%|█████████████▎                                                          | 1851/10000 [4:13:30<19:02:18,  8.41s/it]

Epoch: 1850 | Training loss 2.8660522550344467 | Validation loss 2.7971252501010895



 19%|█████████████▎                                                          | 1852/10000 [4:13:38<18:53:39,  8.35s/it]

Epoch: 1851 | Training loss 2.8662180975079536 | Validation loss 2.792280912399292



 19%|█████████████▎                                                          | 1853/10000 [4:13:47<18:46:15,  8.29s/it]

Epoch: 1852 | Training loss 2.8700838908553123 | Validation loss 2.7900254130363464



 19%|█████████████▎                                                          | 1854/10000 [4:13:55<18:37:20,  8.23s/it]

Epoch: 1853 | Training loss 2.8694395571947098 | Validation loss 2.7957575917243958



 19%|█████████████▎                                                          | 1855/10000 [4:14:03<18:35:10,  8.21s/it]

Epoch: 1854 | Training loss 2.8659447208046913 | Validation loss 2.7876527905464172



 19%|█████████████▎                                                          | 1856/10000 [4:14:11<18:31:00,  8.19s/it]

Epoch: 1855 | Training loss 2.8700642362236977 | Validation loss 2.7946861684322357



 19%|█████████████▎                                                          | 1857/10000 [4:14:19<18:27:21,  8.16s/it]

Epoch: 1856 | Training loss 2.8689577728509903 | Validation loss 2.79271137714386



 19%|█████████████▍                                                          | 1858/10000 [4:14:27<18:36:24,  8.23s/it]

Epoch: 1857 | Training loss 2.866362400352955 | Validation loss 2.788156360387802



 19%|█████████████▍                                                          | 1859/10000 [4:14:36<18:39:27,  8.25s/it]

Epoch: 1858 | Training loss 2.872084103524685 | Validation loss 2.790319114923477



 19%|█████████████▍                                                          | 1860/10000 [4:14:44<18:35:10,  8.22s/it]

Epoch: 1859 | Training loss 2.866655468940735 | Validation loss 2.7943725287914276



 19%|█████████████▍                                                          | 1861/10000 [4:14:52<18:23:16,  8.13s/it]

Epoch: 1860 | Training loss 2.8661005422472954 | Validation loss 2.7911779582500458



 19%|█████████████▍                                                          | 1862/10000 [4:15:00<18:27:30,  8.17s/it]

Epoch: 1861 | Training loss 2.866049975156784 | Validation loss 2.7870639264583588



 19%|█████████████▍                                                          | 1863/10000 [4:15:08<18:28:25,  8.17s/it]

Epoch: 1862 | Training loss 2.8686771169304848 | Validation loss 2.786643385887146



 19%|█████████████▍                                                          | 1864/10000 [4:15:16<18:28:48,  8.18s/it]

Epoch: 1863 | Training loss 2.8666526824235916 | Validation loss 2.79227015376091



 19%|█████████████▍                                                          | 1865/10000 [4:15:25<18:31:58,  8.20s/it]

Epoch: 1864 | Training loss 2.8721308782696724 | Validation loss 2.7991950511932373



 19%|█████████████▍                                                          | 1866/10000 [4:15:33<18:32:19,  8.20s/it]

Epoch: 1865 | Training loss 2.870030604302883 | Validation loss 2.7895445227622986



 19%|█████████████▍                                                          | 1867/10000 [4:15:41<18:32:21,  8.21s/it]

Epoch: 1866 | Training loss 2.8670249730348587 | Validation loss 2.785670429468155



 19%|█████████████▍                                                          | 1868/10000 [4:15:49<18:28:33,  8.18s/it]

Epoch: 1867 | Training loss 2.8655333071947098 | Validation loss 2.7910322546958923



 19%|█████████████▍                                                          | 1869/10000 [4:15:57<18:24:41,  8.15s/it]

Epoch: 1868 | Training loss 2.862537167966366 | Validation loss 2.790627032518387



 19%|█████████████▍                                                          | 1870/10000 [4:16:05<18:16:44,  8.09s/it]

Epoch: 1869 | Training loss 2.865687020123005 | Validation loss 2.7924469709396362



 19%|█████████████▍                                                          | 1871/10000 [4:16:13<18:14:23,  8.08s/it]

Epoch: 1870 | Training loss 2.8682941868901253 | Validation loss 2.7986648976802826



 19%|█████████████▍                                                          | 1872/10000 [4:16:21<18:17:43,  8.10s/it]

Epoch: 1871 | Training loss 2.8646209836006165 | Validation loss 2.7878260016441345



 19%|█████████████▍                                                          | 1873/10000 [4:16:30<18:21:16,  8.13s/it]

Epoch: 1872 | Training loss 2.8660598918795586 | Validation loss 2.7885272204875946



 19%|█████████████▍                                                          | 1874/10000 [4:16:38<18:23:12,  8.15s/it]

Epoch: 1873 | Training loss 2.8671868294477463 | Validation loss 2.7864292562007904



 19%|█████████████▌                                                          | 1875/10000 [4:16:46<18:21:57,  8.14s/it]

Epoch: 1874 | Training loss 2.8740461990237236 | Validation loss 2.791903406381607



 19%|█████████████▌                                                          | 1876/10000 [4:16:54<18:22:18,  8.14s/it]

Epoch: 1875 | Training loss 2.867274709045887 | Validation loss 2.798341304063797



 19%|█████████████▌                                                          | 1877/10000 [4:17:02<18:16:09,  8.10s/it]

Epoch: 1876 | Training loss 2.867079019546509 | Validation loss 2.791589140892029



 19%|█████████████▌                                                          | 1878/10000 [4:17:10<18:14:13,  8.08s/it]

Epoch: 1877 | Training loss 2.859774239361286 | Validation loss 2.7982963919639587



 19%|█████████████▌                                                          | 1879/10000 [4:17:18<18:12:59,  8.08s/it]

Epoch: 1878 | Training loss 2.866683579981327 | Validation loss 2.7909373342990875



 19%|█████████████▌                                                          | 1880/10000 [4:17:26<18:16:19,  8.10s/it]

Epoch: 1879 | Training loss 2.870591714978218 | Validation loss 2.793691724538803



 19%|█████████████▌                                                          | 1881/10000 [4:17:34<18:14:16,  8.09s/it]

Epoch: 1880 | Training loss 2.8620762079954147 | Validation loss 2.7875497937202454



 19%|█████████████▌                                                          | 1882/10000 [4:17:43<18:14:42,  8.09s/it]

Epoch: 1881 | Training loss 2.8695280700922012 | Validation loss 2.7888863384723663



 19%|█████████████▌                                                          | 1883/10000 [4:17:51<18:20:03,  8.13s/it]

Epoch: 1882 | Training loss 2.8664579018950462 | Validation loss 2.792774349451065



 19%|█████████████▌                                                          | 1884/10000 [4:17:59<18:18:05,  8.12s/it]

Epoch: 1883 | Training loss 2.869458168745041 | Validation loss 2.7994070053100586



 19%|█████████████▌                                                          | 1885/10000 [4:18:07<18:19:15,  8.13s/it]

Epoch: 1884 | Training loss 2.8691932633519173 | Validation loss 2.79354664683342



 19%|█████████████▌                                                          | 1886/10000 [4:18:15<18:16:56,  8.11s/it]

Epoch: 1885 | Training loss 2.8675617054104805 | Validation loss 2.7906713783740997



 19%|█████████████▌                                                          | 1887/10000 [4:18:23<18:18:55,  8.13s/it]

Epoch: 1886 | Training loss 2.868591621518135 | Validation loss 2.7866356670856476



 19%|█████████████▌                                                          | 1888/10000 [4:18:31<18:19:39,  8.13s/it]

Epoch: 1887 | Training loss 2.8642954006791115 | Validation loss 2.7860948145389557



 19%|█████████████▌                                                          | 1889/10000 [4:18:39<18:16:17,  8.11s/it]

Epoch: 1888 | Training loss 2.866959236562252 | Validation loss 2.7958741188049316



 19%|█████████████▌                                                          | 1890/10000 [4:18:47<18:11:26,  8.07s/it]

Epoch: 1889 | Training loss 2.8689472898840904 | Validation loss 2.7974587976932526



 19%|█████████████▌                                                          | 1891/10000 [4:18:55<18:09:57,  8.06s/it]

Epoch: 1890 | Training loss 2.868416078388691 | Validation loss 2.790811449289322



 19%|█████████████▌                                                          | 1892/10000 [4:19:04<18:10:01,  8.07s/it]

Epoch: 1891 | Training loss 2.862519808113575 | Validation loss 2.79388490319252



 19%|█████████████▋                                                          | 1893/10000 [4:19:12<18:12:36,  8.09s/it]

Epoch: 1892 | Training loss 2.8679513707756996 | Validation loss 2.795111268758774



 19%|█████████████▋                                                          | 1894/10000 [4:19:20<18:12:26,  8.09s/it]

Epoch: 1893 | Training loss 2.865342505276203 | Validation loss 2.787732779979706



 19%|█████████████▋                                                          | 1895/10000 [4:19:28<18:11:39,  8.08s/it]

Epoch: 1894 | Training loss 2.865782544016838 | Validation loss 2.7942018508911133



 19%|█████████████▋                                                          | 1896/10000 [4:19:36<18:16:44,  8.12s/it]

Epoch: 1895 | Training loss 2.86384454369545 | Validation loss 2.795863687992096



 19%|█████████████▋                                                          | 1897/10000 [4:19:44<18:18:29,  8.13s/it]

Epoch: 1896 | Training loss 2.86452928930521 | Validation loss 2.790864944458008



 19%|█████████████▋                                                          | 1898/10000 [4:19:52<18:17:36,  8.13s/it]

Epoch: 1897 | Training loss 2.865070976316929 | Validation loss 2.7962432205677032



 19%|█████████████▋                                                          | 1899/10000 [4:20:00<18:17:39,  8.13s/it]

Epoch: 1898 | Training loss 2.864757686853409 | Validation loss 2.7933748066425323



 19%|█████████████▋                                                          | 1900/10000 [4:20:09<18:18:18,  8.14s/it]

Epoch: 1899 | Training loss 2.8674093186855316 | Validation loss 2.7942719757556915



 19%|█████████████▋                                                          | 1901/10000 [4:20:17<18:20:38,  8.15s/it]

Epoch: 1900 | Training loss 2.86199614405632 | Validation loss 2.785091131925583



 19%|█████████████▋                                                          | 1902/10000 [4:20:25<18:22:14,  8.17s/it]

Epoch: 1901 | Training loss 2.8675048798322678 | Validation loss 2.7878822088241577



 19%|█████████████▋                                                          | 1903/10000 [4:20:33<18:22:36,  8.17s/it]

Epoch: 1902 | Training loss 2.868044711649418 | Validation loss 2.7867233753204346



 19%|█████████████▋                                                          | 1904/10000 [4:20:41<18:18:33,  8.14s/it]

Epoch: 1903 | Training loss 2.870986595749855 | Validation loss 2.78986993432045



 19%|█████████████▋                                                          | 1905/10000 [4:20:50<18:23:51,  8.18s/it]

Epoch: 1904 | Training loss 2.872143253684044 | Validation loss 2.7908779084682465



 19%|█████████████▋                                                          | 1906/10000 [4:20:58<18:18:45,  8.14s/it]

Epoch: 1905 | Training loss 2.871526814997196 | Validation loss 2.793951004743576



 19%|█████████████▋                                                          | 1907/10000 [4:21:06<18:19:20,  8.15s/it]

Epoch: 1906 | Training loss 2.8685149028897285 | Validation loss 2.7919819951057434



 19%|█████████████▋                                                          | 1908/10000 [4:21:14<18:19:15,  8.15s/it]

Epoch: 1907 | Training loss 2.8649551421403885 | Validation loss 2.788737267255783



 19%|█████████████▋                                                          | 1909/10000 [4:21:22<18:15:13,  8.12s/it]

Epoch: 1908 | Training loss 2.8712376728653908 | Validation loss 2.7899257838726044



 19%|█████████████▊                                                          | 1910/10000 [4:21:30<18:16:07,  8.13s/it]

Epoch: 1909 | Training loss 2.865022264420986 | Validation loss 2.7920534908771515



 19%|█████████████▊                                                          | 1911/10000 [4:21:38<18:12:56,  8.11s/it]

Epoch: 1910 | Training loss 2.8679871559143066 | Validation loss 2.790469229221344



 19%|█████████████▊                                                          | 1912/10000 [4:21:46<18:14:28,  8.12s/it]

Epoch: 1911 | Training loss 2.8754199147224426 | Validation loss 2.792292833328247



 19%|█████████████▊                                                          | 1913/10000 [4:21:54<18:12:59,  8.11s/it]

Epoch: 1912 | Training loss 2.859403684735298 | Validation loss 2.792267143726349



 19%|█████████████▊                                                          | 1914/10000 [4:22:03<18:12:31,  8.11s/it]

Epoch: 1913 | Training loss 2.8618291467428207 | Validation loss 2.7928174138069153



 19%|█████████████▊                                                          | 1915/10000 [4:22:11<18:14:44,  8.12s/it]

Epoch: 1914 | Training loss 2.8691075146198273 | Validation loss 2.792162299156189



 19%|█████████████▊                                                          | 1916/10000 [4:22:19<18:14:20,  8.12s/it]

Epoch: 1915 | Training loss 2.869214951992035 | Validation loss 2.790721356868744



 19%|█████████████▊                                                          | 1917/10000 [4:22:27<18:09:38,  8.09s/it]

Epoch: 1916 | Training loss 2.8694406151771545 | Validation loss 2.791859269142151



 19%|█████████████▊                                                          | 1918/10000 [4:22:35<18:09:31,  8.09s/it]

Epoch: 1917 | Training loss 2.864587478339672 | Validation loss 2.7941044867038727



 19%|█████████████▊                                                          | 1919/10000 [4:22:43<18:10:35,  8.10s/it]

Epoch: 1918 | Training loss 2.8658351376652718 | Validation loss 2.787709504365921



 19%|█████████████▊                                                          | 1920/10000 [4:22:51<18:07:24,  8.07s/it]

Epoch: 1919 | Training loss 2.8716562911868095 | Validation loss 2.7913977801799774



 19%|█████████████▊                                                          | 1921/10000 [4:22:59<18:04:31,  8.05s/it]

Epoch: 1920 | Training loss 2.861716754734516 | Validation loss 2.79208442568779



 19%|█████████████▊                                                          | 1922/10000 [4:23:07<18:08:48,  8.09s/it]

Epoch: 1921 | Training loss 2.865560546517372 | Validation loss 2.78976970911026



 19%|█████████████▊                                                          | 1923/10000 [4:23:15<18:11:06,  8.11s/it]

Epoch: 1922 | Training loss 2.862837366759777 | Validation loss 2.796375811100006



 19%|█████████████▊                                                          | 1924/10000 [4:23:23<18:11:32,  8.11s/it]

Epoch: 1923 | Training loss 2.863357588648796 | Validation loss 2.795557677745819



 19%|█████████████▊                                                          | 1925/10000 [4:23:32<18:10:27,  8.10s/it]

Epoch: 1924 | Training loss 2.87099152803421 | Validation loss 2.796092301607132



 19%|█████████████▊                                                          | 1926/10000 [4:23:40<18:10:52,  8.11s/it]

Epoch: 1925 | Training loss 2.8684383183717728 | Validation loss 2.794532746076584



 19%|█████████████▊                                                          | 1927/10000 [4:23:48<18:09:55,  8.10s/it]

Epoch: 1926 | Training loss 2.8656550347805023 | Validation loss 2.7892319560050964



 19%|█████████████▉                                                          | 1928/10000 [4:23:56<18:10:29,  8.11s/it]

Epoch: 1927 | Training loss 2.869131550192833 | Validation loss 2.797179162502289



 19%|█████████████▉                                                          | 1929/10000 [4:24:04<18:11:28,  8.11s/it]

Epoch: 1928 | Training loss 2.8693630397319794 | Validation loss 2.790548622608185



 19%|█████████████▉                                                          | 1930/10000 [4:24:12<18:13:00,  8.13s/it]

Epoch: 1929 | Training loss 2.865847870707512 | Validation loss 2.796650618314743



 19%|█████████████▉                                                          | 1931/10000 [4:24:20<18:09:59,  8.11s/it]

Epoch: 1930 | Training loss 2.8721107840538025 | Validation loss 2.804590404033661



 19%|█████████████▉                                                          | 1932/10000 [4:24:28<18:09:27,  8.10s/it]

Epoch: 1931 | Training loss 2.86970441788435 | Validation loss 2.7914497554302216



 19%|█████████████▉                                                          | 1933/10000 [4:24:36<18:11:20,  8.12s/it]

Epoch: 1932 | Training loss 2.869483694434166 | Validation loss 2.798107862472534



 19%|█████████████▉                                                          | 1934/10000 [4:24:44<18:06:18,  8.08s/it]

Epoch: 1933 | Training loss 2.8684346303343773 | Validation loss 2.792346239089966



 19%|█████████████▉                                                          | 1935/10000 [4:24:53<18:11:25,  8.12s/it]

Epoch: 1934 | Training loss 2.868762329220772 | Validation loss 2.8083346486091614



 19%|█████████████▉                                                          | 1936/10000 [4:25:01<18:10:40,  8.12s/it]

Epoch: 1935 | Training loss 2.8666054531931877 | Validation loss 2.7930272221565247



 19%|█████████████▉                                                          | 1937/10000 [4:25:09<18:11:13,  8.12s/it]

Epoch: 1936 | Training loss 2.8649397641420364 | Validation loss 2.796799808740616



 19%|█████████████▉                                                          | 1938/10000 [4:25:17<18:12:16,  8.13s/it]

Epoch: 1937 | Training loss 2.8728000670671463 | Validation loss 2.796593517065048



 19%|█████████████▉                                                          | 1939/10000 [4:25:25<18:09:10,  8.11s/it]

Epoch: 1938 | Training loss 2.865117847919464 | Validation loss 2.7935835421085358



 19%|█████████████▉                                                          | 1940/10000 [4:25:33<18:09:29,  8.11s/it]

Epoch: 1939 | Training loss 2.863399989902973 | Validation loss 2.7905508279800415



 19%|█████████████▉                                                          | 1941/10000 [4:25:41<18:09:09,  8.11s/it]

Epoch: 1940 | Training loss 2.865211822092533 | Validation loss 2.7910956144332886



 19%|█████████████▉                                                          | 1942/10000 [4:25:49<18:11:19,  8.13s/it]

Epoch: 1941 | Training loss 2.8661230504512787 | Validation loss 2.7923350632190704



 19%|█████████████▉                                                          | 1943/10000 [4:25:58<18:09:33,  8.11s/it]

Epoch: 1942 | Training loss 2.865747332572937 | Validation loss 2.788982182741165



 19%|█████████████▉                                                          | 1944/10000 [4:26:06<18:08:20,  8.11s/it]

Epoch: 1943 | Training loss 2.8712595999240875 | Validation loss 2.7937799096107483



 19%|██████████████                                                          | 1945/10000 [4:26:14<18:08:40,  8.11s/it]

Epoch: 1944 | Training loss 2.8691116124391556 | Validation loss 2.790379822254181



 19%|██████████████                                                          | 1946/10000 [4:26:22<18:09:36,  8.12s/it]

Epoch: 1945 | Training loss 2.867592141032219 | Validation loss 2.7897385358810425



 19%|██████████████                                                          | 1947/10000 [4:26:30<18:10:07,  8.12s/it]

Epoch: 1946 | Training loss 2.8693074956536293 | Validation loss 2.79404553771019



 19%|██████████████                                                          | 1948/10000 [4:26:38<18:11:06,  8.13s/it]

Epoch: 1947 | Training loss 2.8685856014490128 | Validation loss 2.789204925298691



 19%|██████████████                                                          | 1949/10000 [4:26:46<18:09:48,  8.12s/it]

Epoch: 1948 | Training loss 2.8632148802280426 | Validation loss 2.8022663295269012



 20%|██████████████                                                          | 1950/10000 [4:26:54<18:09:31,  8.12s/it]

Epoch: 1949 | Training loss 2.8629485368728638 | Validation loss 2.7883126735687256



 20%|██████████████                                                          | 1951/10000 [4:27:03<18:11:13,  8.13s/it]

Epoch: 1950 | Training loss 2.8668975085020065 | Validation loss 2.7944062054157257



 20%|██████████████                                                          | 1952/10000 [4:27:11<18:09:07,  8.12s/it]

Epoch: 1951 | Training loss 2.8642666935920715 | Validation loss 2.797208845615387



 20%|██████████████                                                          | 1953/10000 [4:27:19<18:05:07,  8.09s/it]

Epoch: 1952 | Training loss 2.867476835846901 | Validation loss 2.7890375554561615



 20%|██████████████                                                          | 1954/10000 [4:27:27<18:06:09,  8.10s/it]

Epoch: 1953 | Training loss 2.87183590978384 | Validation loss 2.8012154400348663



 20%|██████████████                                                          | 1955/10000 [4:27:35<18:06:05,  8.10s/it]

Epoch: 1954 | Training loss 2.866073749959469 | Validation loss 2.7977501451969147



 20%|██████████████                                                          | 1956/10000 [4:27:43<18:00:15,  8.06s/it]

Epoch: 1955 | Training loss 2.8665370494127274 | Validation loss 2.7927973866462708



 20%|██████████████                                                          | 1957/10000 [4:27:51<17:58:07,  8.04s/it]

Epoch: 1956 | Training loss 2.8672544062137604 | Validation loss 2.7870253920555115



 20%|██████████████                                                          | 1958/10000 [4:27:59<18:00:19,  8.06s/it]

Epoch: 1957 | Training loss 2.863064758479595 | Validation loss 2.7922726571559906



 20%|██████████████                                                          | 1959/10000 [4:28:07<18:07:23,  8.11s/it]

Epoch: 1958 | Training loss 2.8694524243474007 | Validation loss 2.7855238020420074



 20%|██████████████                                                          | 1960/10000 [4:28:15<18:06:43,  8.11s/it]

Epoch: 1959 | Training loss 2.873312659561634 | Validation loss 2.7974221408367157



 20%|██████████████                                                          | 1961/10000 [4:28:23<18:08:01,  8.12s/it]

Epoch: 1960 | Training loss 2.8684120401740074 | Validation loss 2.793617010116577



 20%|██████████████▏                                                         | 1962/10000 [4:28:32<18:10:16,  8.14s/it]

Epoch: 1961 | Training loss 2.873157724738121 | Validation loss 2.7957251965999603



 20%|██████████████▏                                                         | 1963/10000 [4:28:40<18:12:24,  8.16s/it]

Epoch: 1962 | Training loss 2.8614222705364227 | Validation loss 2.7918583750724792



 20%|██████████████▏                                                         | 1964/10000 [4:28:48<18:14:37,  8.17s/it]

Epoch: 1963 | Training loss 2.868287041783333 | Validation loss 2.7926362454891205



 20%|██████████████▏                                                         | 1965/10000 [4:28:56<18:15:20,  8.18s/it]

Epoch: 1964 | Training loss 2.8682195618748665 | Validation loss 2.7935834527015686



 20%|██████████████▏                                                         | 1966/10000 [4:29:05<18:18:54,  8.21s/it]

Epoch: 1965 | Training loss 2.8652374669909477 | Validation loss 2.787444233894348



 20%|██████████████▏                                                         | 1967/10000 [4:29:13<18:16:21,  8.19s/it]

Epoch: 1966 | Training loss 2.868909537792206 | Validation loss 2.7930094599723816



 20%|██████████████▏                                                         | 1968/10000 [4:29:21<18:10:14,  8.14s/it]

Epoch: 1967 | Training loss 2.865635983645916 | Validation loss 2.800961196422577



 20%|██████████████▏                                                         | 1969/10000 [4:29:29<18:10:17,  8.15s/it]

Epoch: 1968 | Training loss 2.8768415823578835 | Validation loss 2.7950206100940704



 20%|██████████████▏                                                         | 1970/10000 [4:29:37<18:08:22,  8.13s/it]

Epoch: 1969 | Training loss 2.8672699257731438 | Validation loss 2.7892720699310303



 20%|██████████████▏                                                         | 1971/10000 [4:29:45<18:08:55,  8.14s/it]

Epoch: 1970 | Training loss 2.8733835741877556 | Validation loss 2.7893900871276855



 20%|██████████████▏                                                         | 1972/10000 [4:29:53<18:09:01,  8.14s/it]

Epoch: 1971 | Training loss 2.865797184407711 | Validation loss 2.792712390422821



 20%|██████████████▏                                                         | 1973/10000 [4:30:01<18:11:36,  8.16s/it]

Epoch: 1972 | Training loss 2.8634017184376717 | Validation loss 2.7920278012752533



 20%|██████████████▏                                                         | 1974/10000 [4:30:10<18:11:09,  8.16s/it]

Epoch: 1973 | Training loss 2.872617930173874 | Validation loss 2.7912886440753937



 20%|██████████████▏                                                         | 1975/10000 [4:30:18<18:09:26,  8.15s/it]

Epoch: 1974 | Training loss 2.865754120051861 | Validation loss 2.797253906726837



 20%|██████████████▏                                                         | 1976/10000 [4:30:26<18:10:07,  8.15s/it]

Epoch: 1975 | Training loss 2.8684315606951714 | Validation loss 2.7849750220775604



 20%|██████████████▏                                                         | 1977/10000 [4:30:34<18:08:31,  8.14s/it]

Epoch: 1976 | Training loss 2.8652187809348106 | Validation loss 2.7924433648586273



 20%|██████████████▏                                                         | 1978/10000 [4:30:42<18:09:53,  8.15s/it]

Epoch: 1977 | Training loss 2.867280028760433 | Validation loss 2.8003541231155396



 20%|██████████████▏                                                         | 1979/10000 [4:30:50<18:08:56,  8.15s/it]

Epoch: 1978 | Training loss 2.868225112557411 | Validation loss 2.79369193315506



 20%|██████████████▎                                                         | 1980/10000 [4:30:58<18:07:52,  8.14s/it]

Epoch: 1979 | Training loss 2.867909722030163 | Validation loss 2.7946678698062897



 20%|██████████████▎                                                         | 1981/10000 [4:31:06<18:01:55,  8.10s/it]

Epoch: 1980 | Training loss 2.8701948672533035 | Validation loss 2.7936670184135437



 20%|██████████████▎                                                         | 1982/10000 [4:31:14<18:00:13,  8.08s/it]

Epoch: 1981 | Training loss 2.867619700729847 | Validation loss 2.7911912202835083



 20%|██████████████▎                                                         | 1983/10000 [4:31:23<18:00:49,  8.09s/it]

Epoch: 1982 | Training loss 2.860805720090866 | Validation loss 2.795707195997238



 20%|██████████████▎                                                         | 1984/10000 [4:31:31<18:04:22,  8.12s/it]

Epoch: 1983 | Training loss 2.869452551007271 | Validation loss 2.7932898700237274



 20%|██████████████▎                                                         | 1985/10000 [4:31:39<18:03:02,  8.11s/it]

Epoch: 1984 | Training loss 2.8653293401002884 | Validation loss 2.790599226951599



 20%|██████████████▎                                                         | 1986/10000 [4:31:47<18:00:47,  8.09s/it]

Epoch: 1985 | Training loss 2.8606460243463516 | Validation loss 2.7937757670879364



 20%|██████████████▎                                                         | 1987/10000 [4:31:55<18:01:05,  8.10s/it]

Epoch: 1986 | Training loss 2.866138219833374 | Validation loss 2.7993338108062744



 20%|██████████████▎                                                         | 1988/10000 [4:32:03<18:01:57,  8.10s/it]

Epoch: 1987 | Training loss 2.864744246006012 | Validation loss 2.7927368581295013



 20%|██████████████▎                                                         | 1989/10000 [4:32:11<17:57:57,  8.07s/it]

Epoch: 1988 | Training loss 2.8669545873999596 | Validation loss 2.7885932326316833



 20%|██████████████▎                                                         | 1990/10000 [4:32:19<18:03:55,  8.12s/it]

Epoch: 1989 | Training loss 2.8600999265909195 | Validation loss 2.7945606112480164



 20%|██████████████▎                                                         | 1991/10000 [4:32:28<18:04:00,  8.12s/it]

Epoch: 1990 | Training loss 2.8640990778803825 | Validation loss 2.7936750948429108



 20%|██████████████▎                                                         | 1992/10000 [4:32:36<18:04:23,  8.12s/it]

Epoch: 1991 | Training loss 2.8640193194150925 | Validation loss 2.789878696203232



 20%|██████████████▎                                                         | 1993/10000 [4:32:44<18:07:52,  8.15s/it]

Epoch: 1992 | Training loss 2.869502440094948 | Validation loss 2.806318610906601



 20%|██████████████▎                                                         | 1994/10000 [4:32:52<18:05:48,  8.14s/it]

Epoch: 1993 | Training loss 2.8678073287010193 | Validation loss 2.7931360006332397



 20%|██████████████▎                                                         | 1995/10000 [4:33:00<18:04:53,  8.13s/it]

Epoch: 1994 | Training loss 2.8639315739274025 | Validation loss 2.792228639125824



 20%|██████████████▎                                                         | 1996/10000 [4:33:08<18:08:32,  8.16s/it]

Epoch: 1995 | Training loss 2.8689252957701683 | Validation loss 2.790277749300003



 20%|██████████████▍                                                         | 1997/10000 [4:33:17<18:11:05,  8.18s/it]

Epoch: 1996 | Training loss 2.8656056076288223 | Validation loss 2.7950028777122498



 20%|██████████████▍                                                         | 1998/10000 [4:33:25<18:09:08,  8.17s/it]

Epoch: 1997 | Training loss 2.863077536225319 | Validation loss 2.7873623073101044



 20%|██████████████▍                                                         | 1999/10000 [4:33:33<18:10:12,  8.18s/it]

Epoch: 1998 | Training loss 2.87095545232296 | Validation loss 2.7901099622249603



 20%|██████████████▍                                                         | 2000/10000 [4:33:41<18:11:07,  8.18s/it]

Epoch: 1999 | Training loss 2.8689324110746384 | Validation loss 2.787755072116852



 20%|██████████████▍                                                         | 2001/10000 [4:33:49<18:13:29,  8.20s/it]

Epoch: 2000 | Training loss 2.870579570531845 | Validation loss 2.7930045425891876



 20%|██████████████▍                                                         | 2002/10000 [4:33:58<18:13:08,  8.20s/it]

Epoch: 2001 | Training loss 2.868894651532173 | Validation loss 2.7976517975330353



 20%|██████████████▍                                                         | 2003/10000 [4:34:06<18:11:20,  8.19s/it]

Epoch: 2002 | Training loss 2.8638908490538597 | Validation loss 2.7860815227031708



 20%|██████████████▍                                                         | 2004/10000 [4:34:14<18:07:47,  8.16s/it]

Epoch: 2003 | Training loss 2.864243805408478 | Validation loss 2.7890701591968536



 20%|██████████████▍                                                         | 2005/10000 [4:34:22<18:05:58,  8.15s/it]

Epoch: 2004 | Training loss 2.8672538325190544 | Validation loss 2.7835728228092194



 20%|██████████████▍                                                         | 2006/10000 [4:34:30<18:08:08,  8.17s/it]

Epoch: 2005 | Training loss 2.8690275847911835 | Validation loss 2.790049195289612



 20%|██████████████▍                                                         | 2007/10000 [4:34:38<18:06:03,  8.15s/it]

Epoch: 2006 | Training loss 2.8649365976452827 | Validation loss 2.7842903435230255



 20%|██████████████▍                                                         | 2008/10000 [4:34:46<18:08:16,  8.17s/it]

Epoch: 2007 | Training loss 2.865486800670624 | Validation loss 2.785504400730133



 20%|██████████████▍                                                         | 2009/10000 [4:34:55<18:10:29,  8.19s/it]

Epoch: 2008 | Training loss 2.863206446170807 | Validation loss 2.797883838415146



 20%|██████████████▍                                                         | 2010/10000 [4:35:03<18:11:27,  8.20s/it]

Epoch: 2009 | Training loss 2.865127444267273 | Validation loss 2.7878122329711914



 20%|██████████████▍                                                         | 2011/10000 [4:35:11<18:08:56,  8.18s/it]

Epoch: 2010 | Training loss 2.868472345173359 | Validation loss 2.789106011390686



 20%|██████████████▍                                                         | 2012/10000 [4:35:19<18:07:33,  8.17s/it]

Epoch: 2011 | Training loss 2.8730625063180923 | Validation loss 2.7903669476509094



 20%|██████████████▍                                                         | 2013/10000 [4:35:27<18:04:33,  8.15s/it]

Epoch: 2012 | Training loss 2.872254341840744 | Validation loss 2.797110140323639



 20%|██████████████▌                                                         | 2014/10000 [4:35:35<18:02:12,  8.13s/it]

Epoch: 2013 | Training loss 2.8666880652308464 | Validation loss 2.7855471074581146



 20%|██████████████▌                                                         | 2015/10000 [4:35:44<18:04:42,  8.15s/it]

Epoch: 2014 | Training loss 2.8654425516724586 | Validation loss 2.7981180250644684



 20%|██████████████▌                                                         | 2016/10000 [4:35:52<18:09:28,  8.19s/it]

Epoch: 2015 | Training loss 2.8632699102163315 | Validation loss 2.7996428310871124



 20%|██████████████▌                                                         | 2017/10000 [4:36:00<18:09:08,  8.19s/it]

Epoch: 2016 | Training loss 2.8667128160595894 | Validation loss 2.7959015369415283



 20%|██████████████▌                                                         | 2018/10000 [4:36:08<18:13:15,  8.22s/it]

Epoch: 2017 | Training loss 2.8669505789875984 | Validation loss 2.7890953719615936



 20%|██████████████▌                                                         | 2019/10000 [4:36:16<18:09:59,  8.19s/it]

Epoch: 2018 | Training loss 2.867639571428299 | Validation loss 2.7901771664619446



 20%|██████████████▌                                                         | 2020/10000 [4:36:25<18:07:42,  8.18s/it]

Epoch: 2019 | Training loss 2.8669394478201866 | Validation loss 2.786716938018799



 20%|██████████████▌                                                         | 2021/10000 [4:36:33<18:05:48,  8.16s/it]

Epoch: 2020 | Training loss 2.870035409927368 | Validation loss 2.7937618494033813



 20%|██████████████▌                                                         | 2022/10000 [4:36:41<18:06:38,  8.17s/it]

Epoch: 2021 | Training loss 2.868202030658722 | Validation loss 2.794301927089691



 20%|██████████████▌                                                         | 2023/10000 [4:36:49<18:08:01,  8.18s/it]

Epoch: 2022 | Training loss 2.8673050925135612 | Validation loss 2.793749511241913



 20%|██████████████▌                                                         | 2024/10000 [4:36:57<18:09:02,  8.19s/it]

Epoch: 2023 | Training loss 2.868518888950348 | Validation loss 2.7907563745975494



 20%|██████████████▌                                                         | 2025/10000 [4:37:05<18:05:13,  8.16s/it]

Epoch: 2024 | Training loss 2.8643329218029976 | Validation loss 2.7885658740997314



 20%|██████████████▌                                                         | 2026/10000 [4:37:14<18:02:37,  8.15s/it]

Epoch: 2025 | Training loss 2.883750393986702 | Validation loss 2.7938997745513916



 20%|██████████████▌                                                         | 2027/10000 [4:37:22<18:03:21,  8.15s/it]

Epoch: 2026 | Training loss 2.871011584997177 | Validation loss 2.7930346727371216



 20%|██████████████▌                                                         | 2028/10000 [4:37:30<18:09:22,  8.20s/it]

Epoch: 2027 | Training loss 2.8696258887648582 | Validation loss 2.7872918248176575



 20%|██████████████▌                                                         | 2029/10000 [4:37:38<18:06:49,  8.18s/it]

Epoch: 2028 | Training loss 2.8701744079589844 | Validation loss 2.791286826133728



 20%|██████████████▌                                                         | 2030/10000 [4:37:46<18:03:03,  8.15s/it]

Epoch: 2029 | Training loss 2.8694659620523453 | Validation loss 2.7933607399463654



 20%|██████████████▌                                                         | 2031/10000 [4:37:54<18:02:19,  8.15s/it]

Epoch: 2030 | Training loss 2.8671617433428764 | Validation loss 2.800664931535721



 20%|██████████████▋                                                         | 2032/10000 [4:38:03<18:02:11,  8.15s/it]

Epoch: 2031 | Training loss 2.86564002931118 | Validation loss 2.79125115275383



 20%|██████████████▋                                                         | 2033/10000 [4:38:11<18:01:06,  8.14s/it]

Epoch: 2032 | Training loss 2.8685204461216927 | Validation loss 2.7956387996673584



 20%|██████████████▋                                                         | 2034/10000 [4:38:19<17:59:58,  8.13s/it]

Epoch: 2033 | Training loss 2.8691890835762024 | Validation loss 2.787948042154312



 20%|██████████████▋                                                         | 2035/10000 [4:38:27<17:58:07,  8.12s/it]

Epoch: 2034 | Training loss 2.8654049709439278 | Validation loss 2.7850344479084015



 20%|██████████████▋                                                         | 2036/10000 [4:38:35<17:59:50,  8.14s/it]

Epoch: 2035 | Training loss 2.8701179176568985 | Validation loss 2.794338583946228



 20%|██████████████▋                                                         | 2037/10000 [4:38:43<17:59:04,  8.13s/it]

Epoch: 2036 | Training loss 2.8688824251294136 | Validation loss 2.7957319617271423



 20%|██████████████▋                                                         | 2038/10000 [4:38:51<18:01:33,  8.15s/it]

Epoch: 2037 | Training loss 2.872418701648712 | Validation loss 2.7918065786361694



 20%|██████████████▋                                                         | 2039/10000 [4:38:59<18:00:07,  8.14s/it]

Epoch: 2038 | Training loss 2.8731898069381714 | Validation loss 2.7972972989082336



 20%|██████████████▋                                                         | 2040/10000 [4:39:08<18:02:05,  8.16s/it]

Epoch: 2039 | Training loss 2.866425223648548 | Validation loss 2.7905323803424835



 20%|██████████████▋                                                         | 2041/10000 [4:39:16<18:03:30,  8.17s/it]

Epoch: 2040 | Training loss 2.8651087656617165 | Validation loss 2.7973594963550568



 20%|██████████████▋                                                         | 2042/10000 [4:39:24<18:04:26,  8.18s/it]

Epoch: 2041 | Training loss 2.8694880083203316 | Validation loss 2.788878083229065



 20%|██████████████▋                                                         | 2043/10000 [4:39:32<18:04:00,  8.17s/it]

Epoch: 2042 | Training loss 2.865414544939995 | Validation loss 2.7867233753204346



 20%|██████████████▋                                                         | 2044/10000 [4:39:40<18:00:21,  8.15s/it]

Epoch: 2043 | Training loss 2.865810915827751 | Validation loss 2.785271853208542



 20%|██████████████▋                                                         | 2045/10000 [4:39:48<18:00:18,  8.15s/it]

Epoch: 2044 | Training loss 2.8623735830187798 | Validation loss 2.7907201647758484



 20%|██████████████▋                                                         | 2046/10000 [4:39:57<17:58:20,  8.13s/it]

Epoch: 2045 | Training loss 2.871888853609562 | Validation loss 2.79460871219635



 20%|██████████████▋                                                         | 2047/10000 [4:40:05<17:54:32,  8.11s/it]

Epoch: 2046 | Training loss 2.862888105213642 | Validation loss 2.792346864938736



 20%|██████████████▋                                                         | 2048/10000 [4:40:13<17:51:20,  8.08s/it]

Epoch: 2047 | Training loss 2.8683221638202667 | Validation loss 2.7899496257305145



 20%|██████████████▊                                                         | 2049/10000 [4:40:21<17:50:03,  8.07s/it]

Epoch: 2048 | Training loss 2.865586631000042 | Validation loss 2.788100779056549



 20%|██████████████▊                                                         | 2050/10000 [4:40:29<17:54:08,  8.11s/it]

Epoch: 2049 | Training loss 2.8630590364336967 | Validation loss 2.7857120037078857



 21%|██████████████▊                                                         | 2051/10000 [4:40:37<17:53:07,  8.10s/it]

Epoch: 2050 | Training loss 2.869511231780052 | Validation loss 2.787465751171112



 21%|██████████████▊                                                         | 2052/10000 [4:40:45<18:01:07,  8.16s/it]

Epoch: 2051 | Training loss 2.8640967831015587 | Validation loss 2.7894231379032135



 21%|██████████████▊                                                         | 2053/10000 [4:40:53<18:01:53,  8.17s/it]

Epoch: 2052 | Training loss 2.866095133125782 | Validation loss 2.7925219237804413



 21%|██████████████▊                                                         | 2054/10000 [4:41:02<18:02:50,  8.18s/it]

Epoch: 2053 | Training loss 2.8746099323034286 | Validation loss 2.797768533229828



 21%|██████████████▊                                                         | 2055/10000 [4:41:10<18:04:05,  8.19s/it]

Epoch: 2054 | Training loss 2.8646923154592514 | Validation loss 2.7941958606243134



 21%|██████████████▊                                                         | 2056/10000 [4:41:18<17:58:44,  8.15s/it]

Epoch: 2055 | Training loss 2.865764558315277 | Validation loss 2.7950566709041595



 21%|██████████████▊                                                         | 2057/10000 [4:41:26<17:59:46,  8.16s/it]

Epoch: 2056 | Training loss 2.8714576363563538 | Validation loss 2.795822411775589



 21%|██████████████▊                                                         | 2058/10000 [4:41:34<17:59:54,  8.16s/it]

Epoch: 2057 | Training loss 2.865050733089447 | Validation loss 2.7880753576755524



 21%|██████████████▊                                                         | 2059/10000 [4:41:42<17:58:55,  8.15s/it]

Epoch: 2058 | Training loss 2.867125526070595 | Validation loss 2.791506767272949



 21%|██████████████▊                                                         | 2060/10000 [4:41:50<17:53:00,  8.11s/it]

Epoch: 2059 | Training loss 2.8662365078926086 | Validation loss 2.7850044071674347



 21%|██████████████▊                                                         | 2061/10000 [4:41:59<17:57:48,  8.15s/it]

Epoch: 2060 | Training loss 2.8688286170363426 | Validation loss 2.789189785718918



 21%|██████████████▊                                                         | 2062/10000 [4:42:07<17:52:08,  8.10s/it]

Epoch: 2061 | Training loss 2.868188016116619 | Validation loss 2.7863935828208923



 21%|██████████████▊                                                         | 2063/10000 [4:42:15<17:50:06,  8.09s/it]

Epoch: 2062 | Training loss 2.8700813949108124 | Validation loss 2.7903662025928497



 21%|██████████████▊                                                         | 2064/10000 [4:42:23<17:50:30,  8.09s/it]

Epoch: 2063 | Training loss 2.8687597066164017 | Validation loss 2.7925068736076355



 21%|██████████████▊                                                         | 2065/10000 [4:42:31<17:53:08,  8.11s/it]

Epoch: 2064 | Training loss 2.8591054156422615 | Validation loss 2.7941435277462006



 21%|██████████████▉                                                         | 2066/10000 [4:42:39<17:53:42,  8.12s/it]

Epoch: 2065 | Training loss 2.8686638697981834 | Validation loss 2.7987915873527527



 21%|██████████████▉                                                         | 2067/10000 [4:42:47<17:46:14,  8.06s/it]

Epoch: 2066 | Training loss 2.872294619679451 | Validation loss 2.7927620708942413



 21%|██████████████▉                                                         | 2068/10000 [4:42:55<17:43:21,  8.04s/it]

Epoch: 2067 | Training loss 2.8674270659685135 | Validation loss 2.7957969307899475



 21%|██████████████▉                                                         | 2069/10000 [4:43:03<17:56:07,  8.14s/it]

Epoch: 2068 | Training loss 2.8672026246786118 | Validation loss 2.798285335302353



 21%|██████████████▉                                                         | 2070/10000 [4:43:11<17:55:06,  8.13s/it]

Epoch: 2069 | Training loss 2.8672217801213264 | Validation loss 2.7894588112831116



 21%|██████████████▉                                                         | 2071/10000 [4:43:19<17:50:04,  8.10s/it]

Epoch: 2070 | Training loss 2.8641635105013847 | Validation loss 2.7944091260433197



 21%|██████████████▉                                                         | 2072/10000 [4:43:28<17:51:19,  8.11s/it]

Epoch: 2071 | Training loss 2.869409143924713 | Validation loss 2.7933776676654816



 21%|██████████████▉                                                         | 2073/10000 [4:43:36<17:49:03,  8.09s/it]

Epoch: 2072 | Training loss 2.870721071958542 | Validation loss 2.7914113998413086



 21%|██████████████▉                                                         | 2074/10000 [4:43:44<17:49:19,  8.09s/it]

Epoch: 2073 | Training loss 2.864984452724457 | Validation loss 2.7904040217399597



 21%|██████████████▉                                                         | 2075/10000 [4:43:52<17:52:14,  8.12s/it]

Epoch: 2074 | Training loss 2.87057626247406 | Validation loss 2.795125722885132



 21%|██████████████▉                                                         | 2076/10000 [4:44:00<17:48:20,  8.09s/it]

Epoch: 2075 | Training loss 2.8628208339214325 | Validation loss 2.7949772477149963



 21%|██████████████▉                                                         | 2077/10000 [4:44:08<17:48:46,  8.09s/it]

Epoch: 2076 | Training loss 2.8617790266871452 | Validation loss 2.7898146212100983



 21%|██████████████▉                                                         | 2078/10000 [4:44:16<17:50:50,  8.11s/it]

Epoch: 2077 | Training loss 2.864996589720249 | Validation loss 2.783648818731308



 21%|██████████████▉                                                         | 2079/10000 [4:44:24<17:54:51,  8.14s/it]

Epoch: 2078 | Training loss 2.871984340250492 | Validation loss 2.798161029815674



 21%|██████████████▉                                                         | 2080/10000 [4:44:33<17:54:28,  8.14s/it]

Epoch: 2079 | Training loss 2.8655964881181717 | Validation loss 2.7947638034820557



 21%|██████████████▉                                                         | 2081/10000 [4:44:41<17:52:08,  8.12s/it]

Epoch: 2080 | Training loss 2.8659138306975365 | Validation loss 2.795411080121994



 21%|██████████████▉                                                         | 2082/10000 [4:44:49<17:52:23,  8.13s/it]

Epoch: 2081 | Training loss 2.8632945343852043 | Validation loss 2.7907174229621887



 21%|██████████████▉                                                         | 2083/10000 [4:44:57<17:50:46,  8.12s/it]

Epoch: 2082 | Training loss 2.86748019605875 | Validation loss 2.7966229915618896



 21%|███████████████                                                         | 2084/10000 [4:45:05<17:51:23,  8.12s/it]

Epoch: 2083 | Training loss 2.868833191692829 | Validation loss 2.7909246683120728



 21%|███████████████                                                         | 2085/10000 [4:45:13<17:45:40,  8.08s/it]

Epoch: 2084 | Training loss 2.8693512082099915 | Validation loss 2.7868237495422363



 21%|███████████████                                                         | 2086/10000 [4:45:21<17:48:24,  8.10s/it]

Epoch: 2085 | Training loss 2.8714807257056236 | Validation loss 2.7941031754016876



 21%|███████████████                                                         | 2087/10000 [4:45:29<17:49:04,  8.11s/it]

Epoch: 2086 | Training loss 2.865030951797962 | Validation loss 2.791612207889557



 21%|███████████████                                                         | 2088/10000 [4:45:37<17:47:45,  8.10s/it]

Epoch: 2087 | Training loss 2.8706462383270264 | Validation loss 2.7918196618556976



 21%|███████████████                                                         | 2089/10000 [4:45:45<17:42:17,  8.06s/it]

Epoch: 2088 | Training loss 2.867225155234337 | Validation loss 2.786024510860443



 21%|███████████████                                                         | 2090/10000 [4:45:53<17:44:01,  8.07s/it]

Epoch: 2089 | Training loss 2.8659548833966255 | Validation loss 2.7909559309482574



 21%|███████████████                                                         | 2091/10000 [4:46:01<17:44:31,  8.08s/it]

Epoch: 2090 | Training loss 2.866649381816387 | Validation loss 2.794041007757187



 21%|███████████████                                                         | 2092/10000 [4:46:10<17:45:27,  8.08s/it]

Epoch: 2091 | Training loss 2.8691655546426773 | Validation loss 2.794490098953247



 21%|███████████████                                                         | 2093/10000 [4:46:18<17:44:52,  8.08s/it]

Epoch: 2092 | Training loss 2.8681660890579224 | Validation loss 2.792968362569809



 21%|███████████████                                                         | 2094/10000 [4:46:26<17:46:45,  8.10s/it]

Epoch: 2093 | Training loss 2.871572509407997 | Validation loss 2.7931055426597595



 21%|███████████████                                                         | 2095/10000 [4:46:34<17:48:42,  8.11s/it]

Epoch: 2094 | Training loss 2.8688334226608276 | Validation loss 2.7927187979221344



 21%|███████████████                                                         | 2096/10000 [4:46:42<17:47:30,  8.10s/it]

Epoch: 2095 | Training loss 2.8658319637179375 | Validation loss 2.791520893573761



 21%|███████████████                                                         | 2097/10000 [4:46:50<17:45:25,  8.09s/it]

Epoch: 2096 | Training loss 2.864151708781719 | Validation loss 2.788351356983185



 21%|███████████████                                                         | 2098/10000 [4:46:58<17:47:11,  8.10s/it]

Epoch: 2097 | Training loss 2.8631200343370438 | Validation loss 2.787481963634491



 21%|███████████████                                                         | 2099/10000 [4:47:06<17:46:26,  8.10s/it]

Epoch: 2098 | Training loss 2.8654689341783524 | Validation loss 2.792283445596695



 21%|███████████████                                                         | 2100/10000 [4:47:14<17:43:57,  8.08s/it]

Epoch: 2099 | Training loss 2.8640163466334343 | Validation loss 2.797135144472122



 21%|███████████████▏                                                        | 2101/10000 [4:47:22<17:41:38,  8.06s/it]

Epoch: 2100 | Training loss 2.8629598766565323 | Validation loss 2.7891843616962433



 21%|███████████████▏                                                        | 2102/10000 [4:47:31<17:45:10,  8.09s/it]

Epoch: 2101 | Training loss 2.86738283932209 | Validation loss 2.7893921434879303



 21%|███████████████▏                                                        | 2103/10000 [4:47:39<17:48:47,  8.12s/it]

Epoch: 2102 | Training loss 2.8724022805690765 | Validation loss 2.795406609773636



 21%|███████████████▏                                                        | 2104/10000 [4:47:47<17:51:00,  8.14s/it]

Epoch: 2103 | Training loss 2.8618658408522606 | Validation loss 2.797395795583725



 21%|███████████████▏                                                        | 2105/10000 [4:47:55<17:50:41,  8.14s/it]

Epoch: 2104 | Training loss 2.8632346838712692 | Validation loss 2.800610691308975



 21%|███████████████▏                                                        | 2106/10000 [4:48:03<17:51:07,  8.14s/it]

Epoch: 2105 | Training loss 2.8676538318395615 | Validation loss 2.7956970632076263



 21%|███████████████▏                                                        | 2107/10000 [4:48:11<17:47:02,  8.11s/it]

Epoch: 2106 | Training loss 2.859248049557209 | Validation loss 2.789360463619232



 21%|███████████████▏                                                        | 2108/10000 [4:48:19<17:50:15,  8.14s/it]

Epoch: 2107 | Training loss 2.860936440527439 | Validation loss 2.7906707525253296



 21%|███████████████▏                                                        | 2109/10000 [4:48:28<17:48:46,  8.13s/it]

Epoch: 2108 | Training loss 2.863213025033474 | Validation loss 2.7905038595199585



 21%|███████████████▏                                                        | 2110/10000 [4:48:36<17:50:09,  8.14s/it]

Epoch: 2109 | Training loss 2.8658743128180504 | Validation loss 2.791203022003174



 21%|███████████████▏                                                        | 2111/10000 [4:48:44<17:47:41,  8.12s/it]

Epoch: 2110 | Training loss 2.8662387281656265 | Validation loss 2.793576091527939



 21%|███████████████▏                                                        | 2112/10000 [4:48:52<17:45:49,  8.11s/it]

Epoch: 2111 | Training loss 2.8695603013038635 | Validation loss 2.789455771446228



 21%|███████████████▏                                                        | 2113/10000 [4:49:00<17:48:49,  8.13s/it]

Epoch: 2112 | Training loss 2.86628407984972 | Validation loss 2.7898877561092377



 21%|███████████████▏                                                        | 2114/10000 [4:49:08<17:45:41,  8.11s/it]

Epoch: 2113 | Training loss 2.8722372949123383 | Validation loss 2.7896260619163513



 21%|███████████████▏                                                        | 2115/10000 [4:49:16<17:47:09,  8.12s/it]

Epoch: 2114 | Training loss 2.8641360625624657 | Validation loss 2.792748987674713



 21%|███████████████▏                                                        | 2116/10000 [4:49:24<17:45:06,  8.11s/it]

Epoch: 2115 | Training loss 2.864354468882084 | Validation loss 2.789603501558304



 21%|███████████████▏                                                        | 2117/10000 [4:49:32<17:46:03,  8.11s/it]

Epoch: 2116 | Training loss 2.8681154400110245 | Validation loss 2.7898477613925934



 21%|███████████████▏                                                        | 2118/10000 [4:49:41<17:45:58,  8.11s/it]

Epoch: 2117 | Training loss 2.870085932314396 | Validation loss 2.796219527721405



 21%|███████████████▎                                                        | 2119/10000 [4:49:49<17:49:03,  8.14s/it]

Epoch: 2118 | Training loss 2.864491321146488 | Validation loss 2.7903129756450653



 21%|███████████████▎                                                        | 2120/10000 [4:49:57<17:53:38,  8.17s/it]

Epoch: 2119 | Training loss 2.8623637929558754 | Validation loss 2.787620395421982



 21%|███████████████▎                                                        | 2121/10000 [4:50:05<17:54:23,  8.18s/it]

Epoch: 2120 | Training loss 2.864939108490944 | Validation loss 2.7922653257846832



 21%|███████████████▎                                                        | 2122/10000 [4:50:13<17:53:00,  8.17s/it]

Epoch: 2121 | Training loss 2.8629543259739876 | Validation loss 2.7902443408966064



 21%|███████████████▎                                                        | 2123/10000 [4:50:21<17:51:22,  8.16s/it]

Epoch: 2122 | Training loss 2.865061230957508 | Validation loss 2.789315491914749



 21%|███████████████▎                                                        | 2124/10000 [4:50:30<17:48:14,  8.14s/it]

Epoch: 2123 | Training loss 2.872353956103325 | Validation loss 2.792458087205887



 21%|███████████████▎                                                        | 2125/10000 [4:50:38<17:49:10,  8.15s/it]

Epoch: 2124 | Training loss 2.8677374720573425 | Validation loss 2.788704425096512



 21%|███████████████▎                                                        | 2126/10000 [4:50:46<17:52:05,  8.17s/it]

Epoch: 2125 | Training loss 2.8617728278040886 | Validation loss 2.7917361855506897



 21%|███████████████▎                                                        | 2127/10000 [4:50:54<17:42:36,  8.10s/it]

Epoch: 2126 | Training loss 2.867405891418457 | Validation loss 2.7984397411346436



 21%|███████████████▎                                                        | 2128/10000 [4:51:02<17:45:40,  8.12s/it]

Epoch: 2127 | Training loss 2.8682054728269577 | Validation loss 2.7908016443252563



 21%|███████████████▎                                                        | 2129/10000 [4:51:10<17:52:53,  8.18s/it]

Epoch: 2128 | Training loss 2.865888886153698 | Validation loss 2.7930491864681244



 21%|███████████████▎                                                        | 2130/10000 [4:51:19<17:53:59,  8.19s/it]

Epoch: 2129 | Training loss 2.866020381450653 | Validation loss 2.795764684677124



 21%|███████████████▎                                                        | 2131/10000 [4:51:27<17:48:34,  8.15s/it]

Epoch: 2130 | Training loss 2.8676126524806023 | Validation loss 2.7946414947509766



 21%|███████████████▎                                                        | 2132/10000 [4:51:35<17:49:59,  8.16s/it]

Epoch: 2131 | Training loss 2.8728658854961395 | Validation loss 2.8030226826667786



 21%|███████████████▎                                                        | 2133/10000 [4:51:43<17:49:25,  8.16s/it]

Epoch: 2132 | Training loss 2.86755583435297 | Validation loss 2.794296681880951



 21%|███████████████▎                                                        | 2134/10000 [4:51:51<17:47:03,  8.14s/it]

Epoch: 2133 | Training loss 2.863321639597416 | Validation loss 2.789517968893051



 21%|███████████████▎                                                        | 2135/10000 [4:51:59<17:46:04,  8.13s/it]

Epoch: 2134 | Training loss 2.862385965883732 | Validation loss 2.7880991995334625



 21%|███████████████▍                                                        | 2136/10000 [4:52:07<17:42:49,  8.11s/it]

Epoch: 2135 | Training loss 2.868818335235119 | Validation loss 2.789665162563324



 21%|███████████████▍                                                        | 2137/10000 [4:52:15<17:43:34,  8.12s/it]

Epoch: 2136 | Training loss 2.86895077675581 | Validation loss 2.7943911254405975



 21%|███████████████▍                                                        | 2138/10000 [4:52:23<17:41:40,  8.10s/it]

Epoch: 2137 | Training loss 2.864815041422844 | Validation loss 2.7937360405921936



 21%|███████████████▍                                                        | 2139/10000 [4:52:32<17:40:57,  8.10s/it]

Epoch: 2138 | Training loss 2.8621964007616043 | Validation loss 2.7941245436668396



 21%|███████████████▍                                                        | 2140/10000 [4:52:40<17:39:40,  8.09s/it]

Epoch: 2139 | Training loss 2.8668934106826782 | Validation loss 2.7899264097213745



 21%|███████████████▍                                                        | 2141/10000 [4:52:48<17:43:05,  8.12s/it]

Epoch: 2140 | Training loss 2.868857257068157 | Validation loss 2.7872338593006134



 21%|███████████████▍                                                        | 2142/10000 [4:52:56<17:45:56,  8.14s/it]

Epoch: 2141 | Training loss 2.8636744618415833 | Validation loss 2.7883937060832977



 21%|███████████████▍                                                        | 2143/10000 [4:53:04<17:42:31,  8.11s/it]

Epoch: 2142 | Training loss 2.862223297357559 | Validation loss 2.785938084125519



 21%|███████████████▍                                                        | 2144/10000 [4:53:12<17:45:32,  8.14s/it]

Epoch: 2143 | Training loss 2.8624308928847313 | Validation loss 2.7914504408836365



 21%|███████████████▍                                                        | 2145/10000 [4:53:20<17:45:46,  8.14s/it]

Epoch: 2144 | Training loss 2.8686822280287743 | Validation loss 2.79410582780838



 21%|███████████████▍                                                        | 2146/10000 [4:53:29<17:45:27,  8.14s/it]

Epoch: 2145 | Training loss 2.870790146291256 | Validation loss 2.7955131232738495



 21%|███████████████▍                                                        | 2147/10000 [4:53:37<17:41:24,  8.11s/it]

Epoch: 2146 | Training loss 2.866046719253063 | Validation loss 2.7898488342761993



 21%|███████████████▍                                                        | 2148/10000 [4:53:45<17:40:20,  8.10s/it]

Epoch: 2147 | Training loss 2.865459978580475 | Validation loss 2.794895887374878



 21%|███████████████▍                                                        | 2149/10000 [4:53:53<17:41:29,  8.11s/it]

Epoch: 2148 | Training loss 2.86652909219265 | Validation loss 2.7895089089870453



 22%|███████████████▍                                                        | 2150/10000 [4:54:01<17:42:10,  8.12s/it]

Epoch: 2149 | Training loss 2.8669639974832535 | Validation loss 2.797202855348587



 22%|███████████████▍                                                        | 2151/10000 [4:54:09<17:44:20,  8.14s/it]

Epoch: 2150 | Training loss 2.868028722703457 | Validation loss 2.789070188999176



 22%|███████████████▍                                                        | 2152/10000 [4:54:17<17:45:14,  8.14s/it]

Epoch: 2151 | Training loss 2.867111958563328 | Validation loss 2.793714612722397



 22%|███████████████▌                                                        | 2153/10000 [4:54:25<17:46:36,  8.16s/it]

Epoch: 2152 | Training loss 2.865530848503113 | Validation loss 2.7906474471092224



 22%|███████████████▌                                                        | 2154/10000 [4:54:34<17:49:48,  8.18s/it]

Epoch: 2153 | Training loss 2.863589458167553 | Validation loss 2.791205018758774



 22%|███████████████▌                                                        | 2155/10000 [4:54:42<17:50:15,  8.19s/it]

Epoch: 2154 | Training loss 2.8654448986053467 | Validation loss 2.788549304008484



 22%|███████████████▌                                                        | 2156/10000 [4:54:50<17:53:45,  8.21s/it]

Epoch: 2155 | Training loss 2.866896316409111 | Validation loss 2.787161409854889



 22%|███████████████▌                                                        | 2157/10000 [4:54:58<17:45:44,  8.15s/it]

Epoch: 2156 | Training loss 2.8698635026812553 | Validation loss 2.7947695553302765



 22%|███████████████▌                                                        | 2158/10000 [4:55:06<17:46:00,  8.16s/it]

Epoch: 2157 | Training loss 2.864586681127548 | Validation loss 2.79043048620224



 22%|███████████████▌                                                        | 2159/10000 [4:55:14<17:44:57,  8.15s/it]

Epoch: 2158 | Training loss 2.866192080080509 | Validation loss 2.7874964475631714



 22%|███████████████▌                                                        | 2160/10000 [4:55:23<17:45:21,  8.15s/it]

Epoch: 2159 | Training loss 2.865345314145088 | Validation loss 2.7886744141578674



 22%|███████████████▌                                                        | 2161/10000 [4:55:31<17:46:20,  8.16s/it]

Epoch: 2160 | Training loss 2.867712587118149 | Validation loss 2.8119837939739227



 22%|███████████████▌                                                        | 2162/10000 [4:55:39<17:44:28,  8.15s/it]

Epoch: 2161 | Training loss 2.8638199865818024 | Validation loss 2.787163496017456



 22%|███████████████▌                                                        | 2163/10000 [4:55:47<17:42:30,  8.13s/it]

Epoch: 2162 | Training loss 2.866481713950634 | Validation loss 2.790712982416153



 22%|███████████████▌                                                        | 2164/10000 [4:55:55<17:42:27,  8.14s/it]

Epoch: 2163 | Training loss 2.8652033135294914 | Validation loss 2.789826363325119



 22%|███████████████▌                                                        | 2165/10000 [4:56:03<17:38:03,  8.10s/it]

Epoch: 2164 | Training loss 2.8666624948382378 | Validation loss 2.7873157262802124



 22%|███████████████▌                                                        | 2166/10000 [4:56:11<17:32:18,  8.06s/it]

Epoch: 2165 | Training loss 2.8647551760077477 | Validation loss 2.787838339805603



 22%|███████████████▌                                                        | 2167/10000 [4:56:19<17:32:20,  8.06s/it]

Epoch: 2166 | Training loss 2.8676485642790794 | Validation loss 2.7897513806819916



 22%|███████████████▌                                                        | 2168/10000 [4:56:27<17:34:14,  8.08s/it]

Epoch: 2167 | Training loss 2.8628495261073112 | Validation loss 2.789204388856888



 22%|███████████████▌                                                        | 2169/10000 [4:56:35<17:32:36,  8.06s/it]

Epoch: 2168 | Training loss 2.8737696558237076 | Validation loss 2.794189989566803



 22%|███████████████▌                                                        | 2170/10000 [4:56:43<17:33:17,  8.07s/it]

Epoch: 2169 | Training loss 2.863895498216152 | Validation loss 2.786620408296585



 22%|███████████████▋                                                        | 2171/10000 [4:56:52<17:32:39,  8.07s/it]

Epoch: 2170 | Training loss 2.8659484460949898 | Validation loss 2.7902135252952576



 22%|███████████████▋                                                        | 2172/10000 [4:57:00<17:36:16,  8.10s/it]

Epoch: 2171 | Training loss 2.8615218698978424 | Validation loss 2.7888323068618774



 22%|███████████████▋                                                        | 2173/10000 [4:57:08<17:38:52,  8.12s/it]

Epoch: 2172 | Training loss 2.867155522108078 | Validation loss 2.7919824719429016



 22%|███████████████▋                                                        | 2174/10000 [4:57:16<17:39:17,  8.12s/it]

Epoch: 2173 | Training loss 2.8657098338007927 | Validation loss 2.7887985706329346



 22%|███████████████▋                                                        | 2175/10000 [4:57:24<17:35:16,  8.09s/it]

Epoch: 2174 | Training loss 2.868116520345211 | Validation loss 2.7880553901195526



 22%|███████████████▋                                                        | 2176/10000 [4:57:32<17:33:32,  8.08s/it]

Epoch: 2175 | Training loss 2.86533360183239 | Validation loss 2.7916489243507385



 22%|███████████████▋                                                        | 2177/10000 [4:57:40<17:37:16,  8.11s/it]

Epoch: 2176 | Training loss 2.8700065687298775 | Validation loss 2.790352910757065



 22%|███████████████▋                                                        | 2178/10000 [4:57:48<17:36:44,  8.11s/it]

Epoch: 2177 | Training loss 2.861327610909939 | Validation loss 2.7919723093509674



 22%|███████████████▋                                                        | 2179/10000 [4:57:57<17:40:32,  8.14s/it]

Epoch: 2178 | Training loss 2.870805822312832 | Validation loss 2.794465035200119



 22%|███████████████▋                                                        | 2180/10000 [4:58:05<17:43:55,  8.16s/it]

Epoch: 2179 | Training loss 2.872228167951107 | Validation loss 2.7896940410137177



 22%|███████████████▋                                                        | 2181/10000 [4:58:13<17:40:41,  8.14s/it]

Epoch: 2180 | Training loss 2.8730637058615685 | Validation loss 2.793798953294754



 22%|███████████████▋                                                        | 2182/10000 [4:58:21<17:39:37,  8.13s/it]

Epoch: 2181 | Training loss 2.8656270876526833 | Validation loss 2.786779910326004



 22%|███████████████▋                                                        | 2183/10000 [4:58:29<17:38:52,  8.13s/it]

Epoch: 2182 | Training loss 2.8697123900055885 | Validation loss 2.787025213241577



 22%|███████████████▋                                                        | 2184/10000 [4:58:37<17:38:00,  8.12s/it]

Epoch: 2183 | Training loss 2.8651312217116356 | Validation loss 2.7859354317188263



 22%|███████████████▋                                                        | 2185/10000 [4:58:45<17:42:06,  8.15s/it]

Epoch: 2184 | Training loss 2.865561544895172 | Validation loss 2.7890526950359344



 22%|███████████████▋                                                        | 2186/10000 [4:58:54<17:43:29,  8.17s/it]

Epoch: 2185 | Training loss 2.867027848958969 | Validation loss 2.7897357046604156



 22%|███████████████▋                                                        | 2187/10000 [4:59:02<17:44:00,  8.17s/it]

Epoch: 2186 | Training loss 2.8654900416731834 | Validation loss 2.7904891669750214



 22%|███████████████▊                                                        | 2188/10000 [4:59:10<17:41:07,  8.15s/it]

Epoch: 2187 | Training loss 2.86355609446764 | Validation loss 2.7867888808250427



 22%|███████████████▊                                                        | 2189/10000 [4:59:18<17:37:51,  8.13s/it]

Epoch: 2188 | Training loss 2.8603968992829323 | Validation loss 2.785908490419388



 22%|███████████████▊                                                        | 2190/10000 [4:59:26<17:34:29,  8.10s/it]

Epoch: 2189 | Training loss 2.863447904586792 | Validation loss 2.7840700447559357



 22%|███████████████▊                                                        | 2191/10000 [4:59:34<17:34:17,  8.10s/it]

Epoch: 2190 | Training loss 2.862678311765194 | Validation loss 2.78667876124382



 22%|███████████████▊                                                        | 2192/10000 [4:59:42<17:33:36,  8.10s/it]

Epoch: 2191 | Training loss 2.8677515909075737 | Validation loss 2.7862906754016876



 22%|███████████████▊                                                        | 2193/10000 [4:59:50<17:30:00,  8.07s/it]

Epoch: 2192 | Training loss 2.865143768489361 | Validation loss 2.787593722343445



 22%|███████████████▊                                                        | 2194/10000 [4:59:58<17:30:00,  8.07s/it]

Epoch: 2193 | Training loss 2.871597647666931 | Validation loss 2.7926180362701416



 22%|███████████████▊                                                        | 2195/10000 [5:00:06<17:29:17,  8.07s/it]

Epoch: 2194 | Training loss 2.866760455071926 | Validation loss 2.7858802676200867



 22%|███████████████▊                                                        | 2196/10000 [5:00:14<17:29:54,  8.07s/it]

Epoch: 2195 | Training loss 2.871523231267929 | Validation loss 2.7963958978652954



 22%|███████████████▊                                                        | 2197/10000 [5:00:23<17:32:43,  8.09s/it]

Epoch: 2196 | Training loss 2.8720095679163933 | Validation loss 2.786410003900528



 22%|███████████████▊                                                        | 2198/10000 [5:00:31<17:37:43,  8.13s/it]

Epoch: 2197 | Training loss 2.871210604906082 | Validation loss 2.7910940051078796



 22%|███████████████▊                                                        | 2199/10000 [5:00:39<17:38:04,  8.14s/it]

Epoch: 2198 | Training loss 2.8645743802189827 | Validation loss 2.791917234659195



 22%|███████████████▊                                                        | 2200/10000 [5:00:47<17:39:37,  8.15s/it]

Epoch: 2199 | Training loss 2.867317780852318 | Validation loss 2.7849315404891968



 22%|███████████████▊                                                        | 2201/10000 [5:00:55<17:35:08,  8.12s/it]

Epoch: 2200 | Training loss 2.8626426979899406 | Validation loss 2.79215806722641



 22%|███████████████▊                                                        | 2202/10000 [5:01:03<17:34:20,  8.11s/it]

Epoch: 2201 | Training loss 2.8698751255869865 | Validation loss 2.790950834751129



 22%|███████████████▊                                                        | 2203/10000 [5:01:11<17:34:29,  8.11s/it]

Epoch: 2202 | Training loss 2.863001026213169 | Validation loss 2.7925989627838135



 22%|███████████████▊                                                        | 2204/10000 [5:01:20<17:35:35,  8.12s/it]

Epoch: 2203 | Training loss 2.8613404035568237 | Validation loss 2.7901492714881897



 22%|███████████████▉                                                        | 2205/10000 [5:01:28<17:38:14,  8.15s/it]

Epoch: 2204 | Training loss 2.8690012469887733 | Validation loss 2.7869094610214233



 22%|███████████████▉                                                        | 2206/10000 [5:01:36<17:35:40,  8.13s/it]

Epoch: 2205 | Training loss 2.869870327413082 | Validation loss 2.8009374737739563



 22%|███████████████▉                                                        | 2207/10000 [5:01:44<17:36:25,  8.13s/it]

Epoch: 2206 | Training loss 2.8636903762817383 | Validation loss 2.7937566936016083



 22%|███████████████▉                                                        | 2208/10000 [5:01:52<17:34:04,  8.12s/it]

Epoch: 2207 | Training loss 2.868921384215355 | Validation loss 2.797609269618988



 22%|███████████████▉                                                        | 2209/10000 [5:02:00<17:36:17,  8.13s/it]

Epoch: 2208 | Training loss 2.8653253689408302 | Validation loss 2.7936802804470062



 22%|███████████████▉                                                        | 2210/10000 [5:02:08<17:38:02,  8.15s/it]

Epoch: 2209 | Training loss 2.868277572095394 | Validation loss 2.7902086973190308



 22%|███████████████▉                                                        | 2211/10000 [5:02:17<17:37:25,  8.15s/it]

Epoch: 2210 | Training loss 2.868062414228916 | Validation loss 2.7951624393463135



 22%|███████████████▉                                                        | 2212/10000 [5:02:25<17:38:06,  8.15s/it]

Epoch: 2211 | Training loss 2.8708398044109344 | Validation loss 2.790783941745758



 22%|███████████████▉                                                        | 2213/10000 [5:02:33<17:34:12,  8.12s/it]

Epoch: 2212 | Training loss 2.8647749200463295 | Validation loss 2.7913766503334045



 22%|███████████████▉                                                        | 2214/10000 [5:02:41<17:36:56,  8.14s/it]

Epoch: 2213 | Training loss 2.8627610951662064 | Validation loss 2.7861852049827576



 22%|███████████████▉                                                        | 2215/10000 [5:02:49<17:36:53,  8.15s/it]

Epoch: 2214 | Training loss 2.8660536035895348 | Validation loss 2.789814829826355



 22%|███████████████▉                                                        | 2216/10000 [5:02:58<17:50:41,  8.25s/it]

Epoch: 2215 | Training loss 2.8660580068826675 | Validation loss 2.7879364788532257



 22%|███████████████▉                                                        | 2217/10000 [5:03:06<17:46:21,  8.22s/it]

Epoch: 2216 | Training loss 2.86587942391634 | Validation loss 2.7877396643161774



 22%|███████████████▉                                                        | 2218/10000 [5:03:14<17:44:06,  8.20s/it]

Epoch: 2217 | Training loss 2.869887135922909 | Validation loss 2.791803002357483



 22%|███████████████▉                                                        | 2219/10000 [5:03:22<17:43:36,  8.20s/it]

Epoch: 2218 | Training loss 2.869226425886154 | Validation loss 2.793769419193268



 22%|███████████████▉                                                        | 2220/10000 [5:03:30<17:42:31,  8.19s/it]

Epoch: 2219 | Training loss 2.8631762340664864 | Validation loss 2.7940126955509186



 22%|███████████████▉                                                        | 2221/10000 [5:03:38<17:43:35,  8.20s/it]

Epoch: 2220 | Training loss 2.8616325110197067 | Validation loss 2.7901522517204285



 22%|███████████████▉                                                        | 2222/10000 [5:03:47<17:40:44,  8.18s/it]

Epoch: 2221 | Training loss 2.8648333996534348 | Validation loss 2.7880382239818573



 22%|████████████████                                                        | 2223/10000 [5:03:55<17:32:36,  8.12s/it]

Epoch: 2222 | Training loss 2.869669370353222 | Validation loss 2.7930466532707214



 22%|████████████████                                                        | 2224/10000 [5:04:03<17:29:20,  8.10s/it]

Epoch: 2223 | Training loss 2.8671915605664253 | Validation loss 2.793255090713501



 22%|████████████████                                                        | 2225/10000 [5:04:11<17:32:27,  8.12s/it]

Epoch: 2224 | Training loss 2.872183009982109 | Validation loss 2.793939858675003



 22%|████████████████                                                        | 2226/10000 [5:04:19<17:28:34,  8.09s/it]

Epoch: 2225 | Training loss 2.8729467168450356 | Validation loss 2.7921891510486603



 22%|████████████████                                                        | 2227/10000 [5:04:27<17:26:17,  8.08s/it]

Epoch: 2226 | Training loss 2.867812894284725 | Validation loss 2.7927217185497284



 22%|████████████████                                                        | 2228/10000 [5:04:35<17:25:22,  8.07s/it]

Epoch: 2227 | Training loss 2.867615096271038 | Validation loss 2.7893084287643433



 22%|████████████████                                                        | 2229/10000 [5:04:43<17:22:17,  8.05s/it]

Epoch: 2228 | Training loss 2.8667165115475655 | Validation loss 2.7910077273845673



 22%|████████████████                                                        | 2230/10000 [5:04:51<17:25:00,  8.07s/it]

Epoch: 2229 | Training loss 2.8686425611376762 | Validation loss 2.7874861359596252



 22%|████████████████                                                        | 2231/10000 [5:04:59<17:26:42,  8.08s/it]

Epoch: 2230 | Training loss 2.870989553630352 | Validation loss 2.7902593314647675



 22%|████████████████                                                        | 2232/10000 [5:05:07<17:26:06,  8.08s/it]

Epoch: 2231 | Training loss 2.8671826496720314 | Validation loss 2.7928967773914337



 22%|████████████████                                                        | 2233/10000 [5:05:15<17:25:00,  8.07s/it]

Epoch: 2232 | Training loss 2.865274928510189 | Validation loss 2.7892958223819733



 22%|████████████████                                                        | 2234/10000 [5:05:23<17:25:58,  8.08s/it]

Epoch: 2233 | Training loss 2.8593291267752647 | Validation loss 2.784598171710968



 22%|████████████████                                                        | 2235/10000 [5:05:31<17:21:06,  8.04s/it]

Epoch: 2234 | Training loss 2.865879088640213 | Validation loss 2.786082834005356



 22%|████████████████                                                        | 2236/10000 [5:05:39<17:21:24,  8.05s/it]

Epoch: 2235 | Training loss 2.864766702055931 | Validation loss 2.786540597677231



 22%|████████████████                                                        | 2237/10000 [5:05:47<17:22:06,  8.05s/it]

Epoch: 2236 | Training loss 2.868005372583866 | Validation loss 2.790087938308716



 22%|████████████████                                                        | 2238/10000 [5:05:56<17:22:40,  8.06s/it]

Epoch: 2237 | Training loss 2.8693677112460136 | Validation loss 2.7936939001083374



 22%|████████████████                                                        | 2239/10000 [5:06:04<17:28:20,  8.10s/it]

Epoch: 2238 | Training loss 2.8668297603726387 | Validation loss 2.791961580514908



 22%|████████████████▏                                                       | 2240/10000 [5:06:12<17:24:25,  8.08s/it]

Epoch: 2239 | Training loss 2.8685202226042747 | Validation loss 2.7949670255184174



 22%|████████████████▏                                                       | 2241/10000 [5:06:20<17:23:29,  8.07s/it]

Epoch: 2240 | Training loss 2.869007259607315 | Validation loss 2.7997573614120483



 22%|████████████████▏                                                       | 2242/10000 [5:06:28<17:28:18,  8.11s/it]

Epoch: 2241 | Training loss 2.864417038857937 | Validation loss 2.793282777070999



 22%|████████████████▏                                                       | 2243/10000 [5:06:36<17:29:15,  8.12s/it]

Epoch: 2242 | Training loss 2.8680164366960526 | Validation loss 2.7850598990917206



 22%|████████████████▏                                                       | 2244/10000 [5:06:44<17:30:18,  8.13s/it]

Epoch: 2243 | Training loss 2.8685214295983315 | Validation loss 2.788527488708496



 22%|████████████████▏                                                       | 2245/10000 [5:06:52<17:28:37,  8.11s/it]

Epoch: 2244 | Training loss 2.8662278577685356 | Validation loss 2.7976252138614655



 22%|████████████████▏                                                       | 2246/10000 [5:07:01<17:31:01,  8.13s/it]

Epoch: 2245 | Training loss 2.8627410754561424 | Validation loss 2.7964028418064117



 22%|████████████████▏                                                       | 2247/10000 [5:07:09<17:29:40,  8.12s/it]

Epoch: 2246 | Training loss 2.8715108782052994 | Validation loss 2.7957204282283783



 22%|████████████████▏                                                       | 2248/10000 [5:07:17<17:26:58,  8.10s/it]

Epoch: 2247 | Training loss 2.86653308570385 | Validation loss 2.7944169342517853



 22%|████████████████▏                                                       | 2249/10000 [5:07:25<17:28:53,  8.12s/it]

Epoch: 2248 | Training loss 2.8695635721087456 | Validation loss 2.7914717197418213



 22%|████████████████▏                                                       | 2250/10000 [5:07:33<17:27:32,  8.11s/it]

Epoch: 2249 | Training loss 2.862438701093197 | Validation loss 2.7889159619808197



 23%|████████████████▏                                                       | 2251/10000 [5:07:41<17:28:26,  8.12s/it]

Epoch: 2250 | Training loss 2.8663542568683624 | Validation loss 2.789730966091156



 23%|████████████████▏                                                       | 2252/10000 [5:07:49<17:25:59,  8.10s/it]

Epoch: 2251 | Training loss 2.8710875436663628 | Validation loss 2.7913122475147247



 23%|████████████████▏                                                       | 2253/10000 [5:07:57<17:27:18,  8.11s/it]

Epoch: 2252 | Training loss 2.8685653060674667 | Validation loss 2.7947388887405396



 23%|████████████████▏                                                       | 2254/10000 [5:08:05<17:23:46,  8.09s/it]

Epoch: 2253 | Training loss 2.865854136645794 | Validation loss 2.789930075407028



 23%|████████████████▏                                                       | 2255/10000 [5:08:13<17:21:17,  8.07s/it]

Epoch: 2254 | Training loss 2.8676564693450928 | Validation loss 2.788089841604233



 23%|████████████████▏                                                       | 2256/10000 [5:08:21<17:22:02,  8.07s/it]

Epoch: 2255 | Training loss 2.8687674701213837 | Validation loss 2.790189176797867



 23%|████████████████▎                                                       | 2257/10000 [5:08:30<17:25:21,  8.10s/it]

Epoch: 2256 | Training loss 2.8664355278015137 | Validation loss 2.790692150592804



 23%|████████████████▎                                                       | 2258/10000 [5:08:38<17:23:26,  8.09s/it]

Epoch: 2257 | Training loss 2.8659343272447586 | Validation loss 2.789205551147461



 23%|████████████████▎                                                       | 2259/10000 [5:08:46<17:22:38,  8.08s/it]

Epoch: 2258 | Training loss 2.864472880959511 | Validation loss 2.787226378917694



 23%|████████████████▎                                                       | 2260/10000 [5:08:54<17:20:19,  8.06s/it]

Epoch: 2259 | Training loss 2.8688217848539352 | Validation loss 2.787277102470398



 23%|████████████████▎                                                       | 2261/10000 [5:09:02<17:20:28,  8.07s/it]

Epoch: 2260 | Training loss 2.869237795472145 | Validation loss 2.7949086725711823



 23%|████████████████▎                                                       | 2262/10000 [5:09:10<17:19:49,  8.06s/it]

Epoch: 2261 | Training loss 2.862324945628643 | Validation loss 2.7967799305915833



 23%|████████████████▎                                                       | 2263/10000 [5:09:18<17:23:33,  8.09s/it]

Epoch: 2262 | Training loss 2.861323393881321 | Validation loss 2.7912118434906006



 23%|████████████████▎                                                       | 2264/10000 [5:09:26<17:21:21,  8.08s/it]

Epoch: 2263 | Training loss 2.8704957962036133 | Validation loss 2.790356695652008



 23%|████████████████▎                                                       | 2265/10000 [5:09:34<17:21:30,  8.08s/it]

Epoch: 2264 | Training loss 2.865159310400486 | Validation loss 2.7911965548992157



 23%|████████████████▎                                                       | 2266/10000 [5:09:42<17:22:46,  8.09s/it]

Epoch: 2265 | Training loss 2.8658164963126183 | Validation loss 2.7871895134449005



 23%|████████████████▎                                                       | 2267/10000 [5:09:50<17:23:02,  8.09s/it]

Epoch: 2266 | Training loss 2.8639506697654724 | Validation loss 2.7923320531845093



 23%|████████████████▎                                                       | 2268/10000 [5:09:59<17:25:32,  8.11s/it]

Epoch: 2267 | Training loss 2.8667430132627487 | Validation loss 2.791911244392395



 23%|████████████████▎                                                       | 2269/10000 [5:10:07<17:28:47,  8.14s/it]

Epoch: 2268 | Training loss 2.8650956973433495 | Validation loss 2.786767542362213



 23%|████████████████▎                                                       | 2270/10000 [5:10:15<17:27:09,  8.13s/it]

Epoch: 2269 | Training loss 2.8686319664120674 | Validation loss 2.790525436401367



 23%|████████████████▎                                                       | 2271/10000 [5:10:23<17:27:52,  8.13s/it]

Epoch: 2270 | Training loss 2.8650314807891846 | Validation loss 2.7951991260051727



 23%|████████████████▎                                                       | 2272/10000 [5:10:31<17:25:15,  8.12s/it]

Epoch: 2271 | Training loss 2.8652012646198273 | Validation loss 2.789674073457718



 23%|████████████████▎                                                       | 2273/10000 [5:10:39<17:26:58,  8.13s/it]

Epoch: 2272 | Training loss 2.8635925576090813 | Validation loss 2.7863408029079437



 23%|████████████████▎                                                       | 2274/10000 [5:10:47<17:28:40,  8.14s/it]

Epoch: 2273 | Training loss 2.867360807955265 | Validation loss 2.7921996116638184



 23%|████████████████▍                                                       | 2275/10000 [5:10:56<17:29:14,  8.15s/it]

Epoch: 2274 | Training loss 2.8647466227412224 | Validation loss 2.794357031583786



 23%|████████████████▍                                                       | 2276/10000 [5:11:04<17:30:17,  8.16s/it]

Epoch: 2275 | Training loss 2.8682016730308533 | Validation loss 2.7897388339042664



 23%|████████████████▍                                                       | 2277/10000 [5:11:12<17:29:16,  8.15s/it]

Epoch: 2276 | Training loss 2.8639978766441345 | Validation loss 2.797705829143524



 23%|████████████████▍                                                       | 2278/10000 [5:11:20<17:28:32,  8.15s/it]

Epoch: 2277 | Training loss 2.8684399873018265 | Validation loss 2.7903306782245636



 23%|████████████████▍                                                       | 2279/10000 [5:11:28<17:29:02,  8.15s/it]

Epoch: 2278 | Training loss 2.8608523532748222 | Validation loss 2.79154434800148



 23%|████████████████▍                                                       | 2280/10000 [5:11:36<17:26:20,  8.13s/it]

Epoch: 2279 | Training loss 2.8589164912700653 | Validation loss 2.793901711702347



 23%|████████████████▍                                                       | 2281/10000 [5:11:44<17:22:34,  8.10s/it]

Epoch: 2280 | Training loss 2.863689139485359 | Validation loss 2.7888056337833405



 23%|████████████████▍                                                       | 2282/10000 [5:11:52<17:24:04,  8.12s/it]

Epoch: 2281 | Training loss 2.863760806620121 | Validation loss 2.7869902849197388



 23%|████████████████▍                                                       | 2283/10000 [5:12:01<17:24:30,  8.12s/it]

Epoch: 2282 | Training loss 2.8685381039977074 | Validation loss 2.8000691533088684



 23%|████████████████▍                                                       | 2284/10000 [5:12:09<17:24:13,  8.12s/it]

Epoch: 2283 | Training loss 2.8652375787496567 | Validation loss 2.7913765609264374



 23%|████████████████▍                                                       | 2285/10000 [5:12:17<17:25:10,  8.13s/it]

Epoch: 2284 | Training loss 2.8597165420651436 | Validation loss 2.7862071990966797



 23%|████████████████▍                                                       | 2286/10000 [5:12:25<17:22:42,  8.11s/it]

Epoch: 2285 | Training loss 2.8706319257616997 | Validation loss 2.7953712940216064



 23%|████████████████▍                                                       | 2287/10000 [5:12:33<17:19:54,  8.09s/it]

Epoch: 2286 | Training loss 2.872123382985592 | Validation loss 2.79879093170166



 23%|████████████████▍                                                       | 2288/10000 [5:12:41<17:23:12,  8.12s/it]

Epoch: 2287 | Training loss 2.868763655424118 | Validation loss 2.7904222309589386



 23%|████████████████▍                                                       | 2289/10000 [5:12:49<17:25:37,  8.14s/it]

Epoch: 2288 | Training loss 2.8672410994768143 | Validation loss 2.792341470718384



 23%|████████████████▍                                                       | 2290/10000 [5:12:57<17:25:02,  8.13s/it]

Epoch: 2289 | Training loss 2.863313779234886 | Validation loss 2.795091062784195



 23%|████████████████▍                                                       | 2291/10000 [5:13:06<17:27:54,  8.16s/it]

Epoch: 2290 | Training loss 2.8607620298862457 | Validation loss 2.7911146581172943



 23%|████████████████▌                                                       | 2292/10000 [5:13:14<17:22:38,  8.12s/it]

Epoch: 2291 | Training loss 2.8594898730516434 | Validation loss 2.7857094407081604



 23%|████████████████▌                                                       | 2293/10000 [5:13:22<17:21:54,  8.11s/it]

Epoch: 2292 | Training loss 2.8719057515263557 | Validation loss 2.7927767038345337



 23%|████████████████▌                                                       | 2294/10000 [5:13:30<17:23:14,  8.12s/it]

Epoch: 2293 | Training loss 2.868695989251137 | Validation loss 2.7886442244052887



 23%|████████████████▌                                                       | 2295/10000 [5:13:38<17:22:54,  8.12s/it]

Epoch: 2294 | Training loss 2.8722440898418427 | Validation loss 2.78854039311409



 23%|████████████████▌                                                       | 2296/10000 [5:13:46<17:20:46,  8.11s/it]

Epoch: 2295 | Training loss 2.871016353368759 | Validation loss 2.7976385056972504



 23%|████████████████▌                                                       | 2297/10000 [5:13:54<17:23:27,  8.13s/it]

Epoch: 2296 | Training loss 2.8651511818170547 | Validation loss 2.791853815317154



 23%|████████████████▌                                                       | 2298/10000 [5:14:02<17:22:19,  8.12s/it]

Epoch: 2297 | Training loss 2.8663302212953568 | Validation loss 2.7900314033031464



 23%|████████████████▌                                                       | 2299/10000 [5:14:10<17:18:30,  8.09s/it]

Epoch: 2298 | Training loss 2.869059681892395 | Validation loss 2.79450923204422



 23%|████████████████▌                                                       | 2300/10000 [5:14:19<17:18:50,  8.09s/it]

Epoch: 2299 | Training loss 2.868293412029743 | Validation loss 2.7988347113132477



 23%|████████████████▌                                                       | 2301/10000 [5:14:27<17:20:48,  8.11s/it]

Epoch: 2300 | Training loss 2.8662658110260963 | Validation loss 2.7875665724277496



 23%|████████████████▌                                                       | 2302/10000 [5:14:35<17:23:14,  8.13s/it]

Epoch: 2301 | Training loss 2.8712978661060333 | Validation loss 2.7906169295310974



 23%|████████████████▌                                                       | 2303/10000 [5:14:43<17:23:12,  8.13s/it]

Epoch: 2302 | Training loss 2.86928378790617 | Validation loss 2.7957977652549744



 23%|████████████████▌                                                       | 2304/10000 [5:14:51<17:24:49,  8.15s/it]

Epoch: 2303 | Training loss 2.8692083805799484 | Validation loss 2.7949447333812714



 23%|████████████████▌                                                       | 2305/10000 [5:14:59<17:24:09,  8.14s/it]

Epoch: 2304 | Training loss 2.86904539167881 | Validation loss 2.7943532168865204



 23%|████████████████▌                                                       | 2306/10000 [5:15:08<17:27:28,  8.17s/it]

Epoch: 2305 | Training loss 2.8636148497462273 | Validation loss 2.7851211428642273



 23%|████████████████▌                                                       | 2307/10000 [5:15:16<17:26:36,  8.16s/it]

Epoch: 2306 | Training loss 2.870935007929802 | Validation loss 2.790474832057953



 23%|████████████████▌                                                       | 2308/10000 [5:15:24<17:23:02,  8.14s/it]

Epoch: 2307 | Training loss 2.8689776211977005 | Validation loss 2.8043225407600403



 23%|████████████████▌                                                       | 2309/10000 [5:15:32<17:24:34,  8.15s/it]

Epoch: 2308 | Training loss 2.868186369538307 | Validation loss 2.789988875389099



 23%|████████████████▋                                                       | 2310/10000 [5:15:40<17:20:14,  8.12s/it]

Epoch: 2309 | Training loss 2.8615382462739944 | Validation loss 2.789815902709961



 23%|████████████████▋                                                       | 2311/10000 [5:15:48<17:20:12,  8.12s/it]

Epoch: 2310 | Training loss 2.8616103753447533 | Validation loss 2.787243515253067



 23%|████████████████▋                                                       | 2312/10000 [5:15:56<17:17:41,  8.10s/it]

Epoch: 2311 | Training loss 2.8686583936214447 | Validation loss 2.7952442169189453



 23%|████████████████▋                                                       | 2313/10000 [5:16:04<17:18:52,  8.11s/it]

Epoch: 2312 | Training loss 2.863220453262329 | Validation loss 2.793518364429474



 23%|████████████████▋                                                       | 2314/10000 [5:16:12<17:19:07,  8.11s/it]

Epoch: 2313 | Training loss 2.8659928664565086 | Validation loss 2.7936408817768097



 23%|████████████████▋                                                       | 2315/10000 [5:16:21<17:21:34,  8.13s/it]

Epoch: 2314 | Training loss 2.8609616830945015 | Validation loss 2.7921056151390076



 23%|████████████████▋                                                       | 2316/10000 [5:16:29<17:25:07,  8.16s/it]

Epoch: 2315 | Training loss 2.864552579820156 | Validation loss 2.7959248423576355



 23%|████████████████▋                                                       | 2317/10000 [5:16:37<17:24:03,  8.15s/it]

Epoch: 2316 | Training loss 2.8664808869361877 | Validation loss 2.7906315326690674



 23%|████████████████▋                                                       | 2318/10000 [5:16:45<17:21:15,  8.13s/it]

Epoch: 2317 | Training loss 2.867034524679184 | Validation loss 2.79358246922493



 23%|████████████████▋                                                       | 2319/10000 [5:16:53<17:21:45,  8.14s/it]

Epoch: 2318 | Training loss 2.8692902997136116 | Validation loss 2.7968238294124603



 23%|████████████████▋                                                       | 2320/10000 [5:17:01<17:20:30,  8.13s/it]

Epoch: 2319 | Training loss 2.862749069929123 | Validation loss 2.795994520187378



 23%|████████████████▋                                                       | 2321/10000 [5:17:09<17:17:31,  8.11s/it]

Epoch: 2320 | Training loss 2.871360592544079 | Validation loss 2.7912321984767914



 23%|████████████████▋                                                       | 2322/10000 [5:17:17<17:18:32,  8.12s/it]

Epoch: 2321 | Training loss 2.8614720702171326 | Validation loss 2.7859542965888977



 23%|████████████████▋                                                       | 2323/10000 [5:17:26<17:17:50,  8.11s/it]

Epoch: 2322 | Training loss 2.8633162453770638 | Validation loss 2.7877442240715027



 23%|████████████████▋                                                       | 2324/10000 [5:17:34<17:12:30,  8.07s/it]

Epoch: 2323 | Training loss 2.8669233173131943 | Validation loss 2.7889938056468964



 23%|████████████████▋                                                       | 2325/10000 [5:17:42<17:12:15,  8.07s/it]

Epoch: 2324 | Training loss 2.866034008562565 | Validation loss 2.785051167011261



 23%|████████████████▋                                                       | 2326/10000 [5:17:50<17:11:38,  8.07s/it]

Epoch: 2325 | Training loss 2.8636050149798393 | Validation loss 2.792934536933899



 23%|████████████████▊                                                       | 2327/10000 [5:17:58<17:11:49,  8.07s/it]

Epoch: 2326 | Training loss 2.8706177845597267 | Validation loss 2.79067924618721



 23%|████████████████▊                                                       | 2328/10000 [5:18:06<17:12:23,  8.07s/it]

Epoch: 2327 | Training loss 2.8640800416469574 | Validation loss 2.7905271649360657



 23%|████████████████▊                                                       | 2329/10000 [5:18:14<17:13:17,  8.08s/it]

Epoch: 2328 | Training loss 2.8617022708058357 | Validation loss 2.7916741371154785



 23%|████████████████▊                                                       | 2330/10000 [5:18:22<17:15:53,  8.10s/it]

Epoch: 2329 | Training loss 2.8677338510751724 | Validation loss 2.7931834161281586



 23%|████████████████▊                                                       | 2331/10000 [5:18:30<17:14:27,  8.09s/it]

Epoch: 2330 | Training loss 2.871028982102871 | Validation loss 2.792254388332367



 23%|████████████████▊                                                       | 2332/10000 [5:18:38<17:13:29,  8.09s/it]

Epoch: 2331 | Training loss 2.8699795082211494 | Validation loss 2.7872636020183563



 23%|████████████████▊                                                       | 2333/10000 [5:18:47<17:21:18,  8.15s/it]

Epoch: 2332 | Training loss 2.8659948483109474 | Validation loss 2.78533798456192



 23%|████████████████▊                                                       | 2334/10000 [5:18:55<17:21:46,  8.15s/it]

Epoch: 2333 | Training loss 2.8713768050074577 | Validation loss 2.7873476445674896



 23%|████████████████▊                                                       | 2335/10000 [5:19:03<17:19:42,  8.14s/it]

Epoch: 2334 | Training loss 2.866954132914543 | Validation loss 2.7872171700000763



 23%|████████████████▊                                                       | 2336/10000 [5:19:11<17:18:03,  8.13s/it]

Epoch: 2335 | Training loss 2.868361175060272 | Validation loss 2.7937257289886475



 23%|████████████████▊                                                       | 2337/10000 [5:19:19<17:15:40,  8.11s/it]

Epoch: 2336 | Training loss 2.8669181168079376 | Validation loss 2.7863985002040863



 23%|████████████████▊                                                       | 2338/10000 [5:19:27<17:15:56,  8.11s/it]

Epoch: 2337 | Training loss 2.8726849108934402 | Validation loss 2.7853888273239136



 23%|████████████████▊                                                       | 2339/10000 [5:19:35<17:15:26,  8.11s/it]

Epoch: 2338 | Training loss 2.86592535674572 | Validation loss 2.785342752933502



 23%|████████████████▊                                                       | 2340/10000 [5:19:43<17:16:07,  8.12s/it]

Epoch: 2339 | Training loss 2.8738929256796837 | Validation loss 2.795220732688904



 23%|████████████████▊                                                       | 2341/10000 [5:19:51<17:17:11,  8.13s/it]

Epoch: 2340 | Training loss 2.8631196469068527 | Validation loss 2.791946440935135



 23%|████████████████▊                                                       | 2342/10000 [5:20:00<17:14:56,  8.11s/it]

Epoch: 2341 | Training loss 2.8659387677907944 | Validation loss 2.7884106934070587



 23%|████████████████▊                                                       | 2343/10000 [5:20:08<17:09:39,  8.07s/it]

Epoch: 2342 | Training loss 2.8655840530991554 | Validation loss 2.794193595647812



 23%|████████████████▉                                                       | 2344/10000 [5:20:16<17:10:54,  8.08s/it]

Epoch: 2343 | Training loss 2.8689223378896713 | Validation loss 2.789755553007126



 23%|████████████████▉                                                       | 2345/10000 [5:20:24<17:13:25,  8.10s/it]

Epoch: 2344 | Training loss 2.8700181916356087 | Validation loss 2.7931578755378723



 23%|████████████████▉                                                       | 2346/10000 [5:20:32<17:13:52,  8.10s/it]

Epoch: 2345 | Training loss 2.86425544321537 | Validation loss 2.7856062948703766



 23%|████████████████▉                                                       | 2347/10000 [5:20:40<17:13:43,  8.10s/it]

Epoch: 2346 | Training loss 2.86154668033123 | Validation loss 2.7890973687171936



 23%|████████████████▉                                                       | 2348/10000 [5:20:48<17:17:41,  8.14s/it]

Epoch: 2347 | Training loss 2.8668392673134804 | Validation loss 2.788935035467148



 23%|████████████████▉                                                       | 2349/10000 [5:20:56<17:18:55,  8.15s/it]

Epoch: 2348 | Training loss 2.8649522587656975 | Validation loss 2.789302945137024



 24%|████████████████▉                                                       | 2350/10000 [5:21:05<17:20:32,  8.16s/it]

Epoch: 2349 | Training loss 2.8602822199463844 | Validation loss 2.7885302305221558



 24%|████████████████▉                                                       | 2351/10000 [5:21:13<17:17:33,  8.14s/it]

Epoch: 2350 | Training loss 2.8675184175372124 | Validation loss 2.7891984283924103



 24%|████████████████▉                                                       | 2352/10000 [5:21:21<17:19:40,  8.16s/it]

Epoch: 2351 | Training loss 2.8687838464975357 | Validation loss 2.793232887983322



 24%|████████████████▉                                                       | 2353/10000 [5:21:29<17:19:50,  8.16s/it]

Epoch: 2352 | Training loss 2.867685243487358 | Validation loss 2.7975257337093353



 24%|████████████████▉                                                       | 2354/10000 [5:21:37<17:18:06,  8.15s/it]

Epoch: 2353 | Training loss 2.8741381242871284 | Validation loss 2.805616617202759



 24%|████████████████▉                                                       | 2355/10000 [5:21:45<17:18:06,  8.15s/it]

Epoch: 2354 | Training loss 2.865919627249241 | Validation loss 2.7978500723838806



 24%|████████████████▉                                                       | 2356/10000 [5:21:54<17:22:55,  8.19s/it]

Epoch: 2355 | Training loss 2.8695135936141014 | Validation loss 2.8065930902957916



 24%|████████████████▉                                                       | 2357/10000 [5:22:02<17:19:30,  8.16s/it]

Epoch: 2356 | Training loss 2.865550182759762 | Validation loss 2.7933413088321686



 24%|████████████████▉                                                       | 2358/10000 [5:22:10<17:20:17,  8.17s/it]

Epoch: 2357 | Training loss 2.8621119558811188 | Validation loss 2.789848417043686



 24%|████████████████▉                                                       | 2359/10000 [5:22:18<17:13:58,  8.12s/it]

Epoch: 2358 | Training loss 2.8692493736743927 | Validation loss 2.7884062826633453



 24%|████████████████▉                                                       | 2360/10000 [5:22:26<17:13:20,  8.12s/it]

Epoch: 2359 | Training loss 2.869720220565796 | Validation loss 2.7933702170848846



 24%|████████████████▉                                                       | 2361/10000 [5:22:34<17:11:58,  8.11s/it]

Epoch: 2360 | Training loss 2.869182512164116 | Validation loss 2.7944988012313843



 24%|█████████████████                                                       | 2362/10000 [5:22:42<17:12:12,  8.11s/it]

Epoch: 2361 | Training loss 2.867597281932831 | Validation loss 2.7934675216674805



 24%|█████████████████                                                       | 2363/10000 [5:22:50<17:09:22,  8.09s/it]

Epoch: 2362 | Training loss 2.8644944429397583 | Validation loss 2.7934423983097076



 24%|█████████████████                                                       | 2364/10000 [5:22:58<17:10:55,  8.10s/it]

Epoch: 2363 | Training loss 2.866681605577469 | Validation loss 2.7915311753749847



 24%|█████████████████                                                       | 2365/10000 [5:23:06<17:10:41,  8.10s/it]

Epoch: 2364 | Training loss 2.8669310957193375 | Validation loss 2.7873367071151733



 24%|█████████████████                                                       | 2366/10000 [5:23:14<17:09:35,  8.09s/it]

Epoch: 2365 | Training loss 2.8669063970446587 | Validation loss 2.787008911371231



 24%|█████████████████                                                       | 2367/10000 [5:23:23<17:12:08,  8.11s/it]

Epoch: 2366 | Training loss 2.8631050139665604 | Validation loss 2.790007621049881



 24%|█████████████████                                                       | 2368/10000 [5:23:31<17:08:31,  8.09s/it]

Epoch: 2367 | Training loss 2.8691305816173553 | Validation loss 2.79751855134964



 24%|█████████████████                                                       | 2369/10000 [5:23:39<17:09:09,  8.09s/it]

Epoch: 2368 | Training loss 2.8690908700227737 | Validation loss 2.797135204076767



 24%|█████████████████                                                       | 2370/10000 [5:23:47<17:10:33,  8.10s/it]

Epoch: 2369 | Training loss 2.862111084163189 | Validation loss 2.7932026088237762



 24%|█████████████████                                                       | 2371/10000 [5:23:55<17:11:04,  8.11s/it]

Epoch: 2370 | Training loss 2.866147503256798 | Validation loss 2.786428302526474



 24%|█████████████████                                                       | 2372/10000 [5:24:03<17:07:05,  8.08s/it]

Epoch: 2371 | Training loss 2.862695463001728 | Validation loss 2.7905543744564056



 24%|█████████████████                                                       | 2373/10000 [5:24:11<17:05:25,  8.07s/it]

Epoch: 2372 | Training loss 2.86703472584486 | Validation loss 2.787754476070404



 24%|█████████████████                                                       | 2374/10000 [5:24:19<17:08:19,  8.09s/it]

Epoch: 2373 | Training loss 2.866630792617798 | Validation loss 2.7938022017478943



 24%|█████████████████                                                       | 2375/10000 [5:24:27<17:10:27,  8.11s/it]

Epoch: 2374 | Training loss 2.86702349036932 | Validation loss 2.7921320497989655



 24%|█████████████████                                                       | 2376/10000 [5:24:35<17:08:53,  8.10s/it]

Epoch: 2375 | Training loss 2.8651273995637894 | Validation loss 2.798042744398117



 24%|█████████████████                                                       | 2377/10000 [5:24:44<17:08:19,  8.09s/it]

Epoch: 2376 | Training loss 2.8638524264097214 | Validation loss 2.791409343481064



 24%|█████████████████                                                       | 2378/10000 [5:24:52<17:13:45,  8.14s/it]

Epoch: 2377 | Training loss 2.869189575314522 | Validation loss 2.796688735485077



 24%|█████████████████▏                                                      | 2379/10000 [5:25:00<17:14:03,  8.14s/it]

Epoch: 2378 | Training loss 2.8655385598540306 | Validation loss 2.791776716709137



 24%|█████████████████▏                                                      | 2380/10000 [5:25:08<17:12:26,  8.13s/it]

Epoch: 2379 | Training loss 2.8656604811549187 | Validation loss 2.7985754013061523



 24%|█████████████████▏                                                      | 2381/10000 [5:25:16<17:10:40,  8.12s/it]

Epoch: 2380 | Training loss 2.866010196506977 | Validation loss 2.7936426997184753



 24%|█████████████████▏                                                      | 2382/10000 [5:25:24<17:11:05,  8.12s/it]

Epoch: 2381 | Training loss 2.8691239953041077 | Validation loss 2.7931028008461



 24%|█████████████████▏                                                      | 2383/10000 [5:25:32<17:13:09,  8.14s/it]

Epoch: 2382 | Training loss 2.863610878586769 | Validation loss 2.790495842695236



 24%|█████████████████▏                                                      | 2384/10000 [5:25:41<17:14:05,  8.15s/it]

Epoch: 2383 | Training loss 2.871014676988125 | Validation loss 2.7961238026618958



 24%|█████████████████▏                                                      | 2385/10000 [5:25:49<17:14:34,  8.15s/it]

Epoch: 2384 | Training loss 2.8674829304218292 | Validation loss 2.7905848622322083



 24%|█████████████████▏                                                      | 2386/10000 [5:25:57<17:17:11,  8.17s/it]

Epoch: 2385 | Training loss 2.8641021847724915 | Validation loss 2.7904520332813263



 24%|█████████████████▏                                                      | 2387/10000 [5:26:05<17:14:51,  8.16s/it]

Epoch: 2386 | Training loss 2.866552881896496 | Validation loss 2.7975585758686066



 24%|█████████████████▏                                                      | 2388/10000 [5:26:13<17:13:48,  8.15s/it]

Epoch: 2387 | Training loss 2.8712523579597473 | Validation loss 2.7954419553279877



 24%|█████████████████▏                                                      | 2389/10000 [5:26:21<17:11:54,  8.13s/it]

Epoch: 2388 | Training loss 2.871220037341118 | Validation loss 2.796443849802017



 24%|█████████████████▏                                                      | 2390/10000 [5:26:30<17:13:58,  8.15s/it]

Epoch: 2389 | Training loss 2.863316908478737 | Validation loss 2.7890028059482574



 24%|█████████████████▏                                                      | 2391/10000 [5:26:38<17:14:00,  8.15s/it]

Epoch: 2390 | Training loss 2.8643298745155334 | Validation loss 2.791896641254425



 24%|█████████████████▏                                                      | 2392/10000 [5:26:46<17:10:47,  8.13s/it]

Epoch: 2391 | Training loss 2.8704227209091187 | Validation loss 2.7936235070228577



 24%|█████████████████▏                                                      | 2393/10000 [5:26:54<17:09:35,  8.12s/it]

Epoch: 2392 | Training loss 2.8645010516047478 | Validation loss 2.7974709570407867



 24%|█████████████████▏                                                      | 2394/10000 [5:27:02<17:15:17,  8.17s/it]

Epoch: 2393 | Training loss 2.866655223071575 | Validation loss 2.792492300271988



 24%|█████████████████▏                                                      | 2395/10000 [5:27:10<17:14:30,  8.16s/it]

Epoch: 2394 | Training loss 2.8659382089972496 | Validation loss 2.795508325099945



 24%|█████████████████▎                                                      | 2396/10000 [5:27:18<17:10:16,  8.13s/it]

Epoch: 2395 | Training loss 2.8669989481568336 | Validation loss 2.7862748503684998



 24%|█████████████████▎                                                      | 2397/10000 [5:27:26<17:12:02,  8.14s/it]

Epoch: 2396 | Training loss 2.8676401004195213 | Validation loss 2.803652346134186



 24%|█████████████████▎                                                      | 2398/10000 [5:27:35<17:09:03,  8.12s/it]

Epoch: 2397 | Training loss 2.8700920566916466 | Validation loss 2.790341764688492



 24%|█████████████████▎                                                      | 2399/10000 [5:27:44<17:57:24,  8.50s/it]

Epoch: 2398 | Training loss 2.868695393204689 | Validation loss 2.8009521663188934



 24%|█████████████████▎                                                      | 2400/10000 [5:27:53<18:13:56,  8.64s/it]

Epoch: 2399 | Training loss 2.8591747283935547 | Validation loss 2.7874172627925873



 24%|█████████████████▎                                                      | 2401/10000 [5:28:02<18:17:34,  8.67s/it]

Epoch: 2400 | Training loss 2.869125649333 | Validation loss 2.7914907932281494



 24%|█████████████████▎                                                      | 2402/10000 [5:28:13<20:12:22,  9.57s/it]

Epoch: 2401 | Training loss 2.8648239374160767 | Validation loss 2.791356533765793



 24%|█████████████████▎                                                      | 2403/10000 [5:28:22<19:25:26,  9.20s/it]

Epoch: 2402 | Training loss 2.8719200789928436 | Validation loss 2.7936782836914062



 24%|█████████████████▎                                                      | 2404/10000 [5:28:30<18:50:44,  8.93s/it]

Epoch: 2403 | Training loss 2.8666632398962975 | Validation loss 2.7867851555347443



 24%|█████████████████▎                                                      | 2405/10000 [5:28:38<18:28:57,  8.76s/it]

Epoch: 2404 | Training loss 2.86989589035511 | Validation loss 2.7909691631793976



 24%|█████████████████▎                                                      | 2406/10000 [5:28:47<18:10:18,  8.61s/it]

Epoch: 2405 | Training loss 2.8655753806233406 | Validation loss 2.791293054819107



 24%|█████████████████▎                                                      | 2407/10000 [5:28:55<17:50:39,  8.46s/it]

Epoch: 2406 | Training loss 2.8660091683268547 | Validation loss 2.7847644984722137



 24%|█████████████████▎                                                      | 2408/10000 [5:29:03<17:43:12,  8.40s/it]

Epoch: 2407 | Training loss 2.86753498762846 | Validation loss 2.793700784444809



 24%|█████████████████▎                                                      | 2409/10000 [5:29:11<17:37:07,  8.36s/it]

Epoch: 2408 | Training loss 2.8637585490942 | Validation loss 2.792940676212311



 24%|█████████████████▎                                                      | 2410/10000 [5:29:19<17:29:04,  8.29s/it]

Epoch: 2409 | Training loss 2.873709909617901 | Validation loss 2.7903210520744324



 24%|█████████████████▎                                                      | 2411/10000 [5:29:27<17:22:15,  8.24s/it]

Epoch: 2410 | Training loss 2.8640290647745132 | Validation loss 2.7836900651454926



 24%|█████████████████▎                                                      | 2412/10000 [5:29:36<17:20:32,  8.23s/it]

Epoch: 2411 | Training loss 2.8657125532627106 | Validation loss 2.789111167192459



 24%|█████████████████▎                                                      | 2413/10000 [5:29:44<17:22:03,  8.24s/it]

Epoch: 2412 | Training loss 2.8672083392739296 | Validation loss 2.7925549745559692



 24%|█████████████████▍                                                      | 2414/10000 [5:29:52<17:20:09,  8.23s/it]

Epoch: 2413 | Training loss 2.8674157932400703 | Validation loss 2.793593615293503



 24%|█████████████████▍                                                      | 2415/10000 [5:30:00<17:17:04,  8.20s/it]

Epoch: 2414 | Training loss 2.860449180006981 | Validation loss 2.7965697944164276



 24%|█████████████████▍                                                      | 2416/10000 [5:30:08<17:15:30,  8.19s/it]

Epoch: 2415 | Training loss 2.871196761727333 | Validation loss 2.7946361303329468



 24%|█████████████████▍                                                      | 2417/10000 [5:30:17<17:21:18,  8.24s/it]

Epoch: 2416 | Training loss 2.864638425409794 | Validation loss 2.7885240614414215



 24%|█████████████████▍                                                      | 2418/10000 [5:30:25<17:18:18,  8.22s/it]

Epoch: 2417 | Training loss 2.8681652769446373 | Validation loss 2.7934648394584656



 24%|█████████████████▍                                                      | 2419/10000 [5:30:33<17:17:09,  8.21s/it]

Epoch: 2418 | Training loss 2.867700845003128 | Validation loss 2.7863754332065582



 24%|█████████████████▍                                                      | 2420/10000 [5:30:41<17:18:49,  8.22s/it]

Epoch: 2419 | Training loss 2.8587647303938866 | Validation loss 2.7914201617240906



 24%|█████████████████▍                                                      | 2421/10000 [5:30:50<17:14:34,  8.19s/it]

Epoch: 2420 | Training loss 2.8661049231886864 | Validation loss 2.789264887571335



 24%|█████████████████▍                                                      | 2422/10000 [5:30:58<17:18:15,  8.22s/it]

Epoch: 2421 | Training loss 2.86618585139513 | Validation loss 2.797056347131729



 24%|█████████████████▍                                                      | 2423/10000 [5:31:06<17:14:42,  8.19s/it]

Epoch: 2422 | Training loss 2.870565913617611 | Validation loss 2.791159212589264



 24%|█████████████████▍                                                      | 2424/10000 [5:31:14<17:15:49,  8.20s/it]

Epoch: 2423 | Training loss 2.86924035102129 | Validation loss 2.7921416461467743



 24%|█████████████████▍                                                      | 2425/10000 [5:31:22<17:11:10,  8.17s/it]

Epoch: 2424 | Training loss 2.8678337186574936 | Validation loss 2.792039215564728



 24%|█████████████████▍                                                      | 2426/10000 [5:31:30<17:09:38,  8.16s/it]

Epoch: 2425 | Training loss 2.8641039356589317 | Validation loss 2.7910013794898987



 24%|█████████████████▍                                                      | 2427/10000 [5:31:39<17:12:05,  8.18s/it]

Epoch: 2426 | Training loss 2.85627081990242 | Validation loss 2.784071683883667



 24%|█████████████████▍                                                      | 2428/10000 [5:31:47<17:14:59,  8.20s/it]

Epoch: 2427 | Training loss 2.873133033514023 | Validation loss 2.7917976081371307



 24%|█████████████████▍                                                      | 2429/10000 [5:31:55<17:15:46,  8.21s/it]

Epoch: 2428 | Training loss 2.8567707762122154 | Validation loss 2.790055274963379



 24%|█████████████████▍                                                      | 2430/10000 [5:32:03<17:17:26,  8.22s/it]

Epoch: 2429 | Training loss 2.8631394132971764 | Validation loss 2.786439061164856



 24%|█████████████████▌                                                      | 2431/10000 [5:32:12<17:20:18,  8.25s/it]

Epoch: 2430 | Training loss 2.8630634620785713 | Validation loss 2.7935926020145416



 24%|█████████████████▌                                                      | 2432/10000 [5:32:20<17:19:26,  8.24s/it]

Epoch: 2431 | Training loss 2.865373656153679 | Validation loss 2.789443463087082



 24%|█████████████████▌                                                      | 2433/10000 [5:32:28<17:16:56,  8.22s/it]

Epoch: 2432 | Training loss 2.866756424307823 | Validation loss 2.7943760454654694



 24%|█████████████████▌                                                      | 2434/10000 [5:32:36<17:14:35,  8.20s/it]

Epoch: 2433 | Training loss 2.870576359331608 | Validation loss 2.789295017719269



 24%|█████████████████▌                                                      | 2435/10000 [5:32:44<17:11:04,  8.18s/it]

Epoch: 2434 | Training loss 2.86166575551033 | Validation loss 2.794792801141739



 24%|█████████████████▌                                                      | 2436/10000 [5:32:53<17:12:40,  8.19s/it]

Epoch: 2435 | Training loss 2.8691040873527527 | Validation loss 2.789485663175583



 24%|█████████████████▌                                                      | 2437/10000 [5:33:01<17:14:25,  8.21s/it]

Epoch: 2436 | Training loss 2.8658749759197235 | Validation loss 2.7880252301692963



 24%|█████████████████▌                                                      | 2438/10000 [5:33:09<17:15:04,  8.21s/it]

Epoch: 2437 | Training loss 2.859882891178131 | Validation loss 2.7882440090179443



 24%|█████████████████▌                                                      | 2439/10000 [5:33:17<17:15:27,  8.22s/it]

Epoch: 2438 | Training loss 2.868792623281479 | Validation loss 2.7898177802562714



 24%|█████████████████▌                                                      | 2440/10000 [5:33:25<17:14:27,  8.21s/it]

Epoch: 2439 | Training loss 2.867282062768936 | Validation loss 2.7876903116703033



 24%|█████████████████▌                                                      | 2441/10000 [5:33:34<17:11:20,  8.19s/it]

Epoch: 2440 | Training loss 2.8651210367679596 | Validation loss 2.7871335446834564



 24%|█████████████████▌                                                      | 2442/10000 [5:33:42<17:06:50,  8.15s/it]

Epoch: 2441 | Training loss 2.868770517408848 | Validation loss 2.789836138486862



 24%|█████████████████▌                                                      | 2443/10000 [5:33:50<17:03:01,  8.12s/it]

Epoch: 2442 | Training loss 2.863061472773552 | Validation loss 2.7867890894412994



 24%|█████████████████▌                                                      | 2444/10000 [5:33:58<16:54:24,  8.06s/it]

Epoch: 2443 | Training loss 2.860981397330761 | Validation loss 2.789424806833267



 24%|█████████████████▌                                                      | 2445/10000 [5:34:06<16:57:14,  8.08s/it]

Epoch: 2444 | Training loss 2.8612918704748154 | Validation loss 2.788046181201935



 24%|█████████████████▌                                                      | 2446/10000 [5:34:14<17:01:27,  8.11s/it]

Epoch: 2445 | Training loss 2.871688336133957 | Validation loss 2.791694939136505



 24%|█████████████████▌                                                      | 2447/10000 [5:34:22<17:04:33,  8.14s/it]

Epoch: 2446 | Training loss 2.8690100759267807 | Validation loss 2.7887070775032043



 24%|█████████████████▋                                                      | 2448/10000 [5:34:30<16:59:15,  8.10s/it]

Epoch: 2447 | Training loss 2.864818900823593 | Validation loss 2.79101100564003



 24%|█████████████████▋                                                      | 2449/10000 [5:34:38<17:01:39,  8.12s/it]

Epoch: 2448 | Training loss 2.866417594254017 | Validation loss 2.804632306098938



 24%|█████████████████▋                                                      | 2450/10000 [5:34:46<17:03:48,  8.14s/it]

Epoch: 2449 | Training loss 2.8681974709033966 | Validation loss 2.790495842695236



 25%|█████████████████▋                                                      | 2451/10000 [5:34:55<17:08:45,  8.18s/it]

Epoch: 2450 | Training loss 2.865111045539379 | Validation loss 2.790683299303055



 25%|█████████████████▋                                                      | 2452/10000 [5:35:03<17:06:58,  8.16s/it]

Epoch: 2451 | Training loss 2.8659453317523003 | Validation loss 2.7856436669826508



 25%|█████████████████▋                                                      | 2453/10000 [5:35:11<17:03:50,  8.14s/it]

Epoch: 2452 | Training loss 2.8730257600545883 | Validation loss 2.7906698882579803



 25%|█████████████████▋                                                      | 2454/10000 [5:35:19<17:01:36,  8.12s/it]

Epoch: 2453 | Training loss 2.865514285862446 | Validation loss 2.794080913066864



 25%|█████████████████▋                                                      | 2455/10000 [5:35:27<17:01:47,  8.13s/it]

Epoch: 2454 | Training loss 2.8600014597177505 | Validation loss 2.7940019369125366



 25%|█████████████████▋                                                      | 2456/10000 [5:35:35<17:00:15,  8.11s/it]

Epoch: 2455 | Training loss 2.8633714392781258 | Validation loss 2.793371856212616



 25%|█████████████████▋                                                      | 2457/10000 [5:35:43<16:58:28,  8.10s/it]

Epoch: 2456 | Training loss 2.8682771623134613 | Validation loss 2.804371476173401



 25%|█████████████████▋                                                      | 2458/10000 [5:35:51<16:59:01,  8.11s/it]

Epoch: 2457 | Training loss 2.8678179010748863 | Validation loss 2.8001175224781036



 25%|█████████████████▋                                                      | 2459/10000 [5:36:00<17:02:20,  8.13s/it]

Epoch: 2458 | Training loss 2.869667664170265 | Validation loss 2.785431832075119



 25%|█████████████████▋                                                      | 2460/10000 [5:36:08<17:03:17,  8.14s/it]

Epoch: 2459 | Training loss 2.8696655109524727 | Validation loss 2.7960003912448883



 25%|█████████████████▋                                                      | 2461/10000 [5:36:16<17:06:50,  8.17s/it]

Epoch: 2460 | Training loss 2.859824076294899 | Validation loss 2.79461869597435



 25%|█████████████████▋                                                      | 2462/10000 [5:36:24<17:02:49,  8.14s/it]

Epoch: 2461 | Training loss 2.86492557823658 | Validation loss 2.7981275022029877



 25%|█████████████████▋                                                      | 2463/10000 [5:36:32<17:02:58,  8.14s/it]

Epoch: 2462 | Training loss 2.8675424605607986 | Validation loss 2.7965122759342194



 25%|█████████████████▋                                                      | 2464/10000 [5:36:40<17:04:42,  8.16s/it]

Epoch: 2463 | Training loss 2.8597992658615112 | Validation loss 2.7884148359298706



 25%|█████████████████▋                                                      | 2465/10000 [5:36:49<17:01:12,  8.13s/it]

Epoch: 2464 | Training loss 2.865770362317562 | Validation loss 2.78676238656044



 25%|█████████████████▊                                                      | 2466/10000 [5:36:57<17:01:12,  8.13s/it]

Epoch: 2465 | Training loss 2.8651252686977386 | Validation loss 2.792745977640152



 25%|█████████████████▊                                                      | 2467/10000 [5:37:05<16:58:48,  8.11s/it]

Epoch: 2466 | Training loss 2.865268625319004 | Validation loss 2.791548103094101



 25%|█████████████████▊                                                      | 2468/10000 [5:37:13<17:01:04,  8.13s/it]

Epoch: 2467 | Training loss 2.8667232990264893 | Validation loss 2.790908008813858



 25%|█████████████████▊                                                      | 2469/10000 [5:37:21<16:56:50,  8.10s/it]

Epoch: 2468 | Training loss 2.865016169846058 | Validation loss 2.8025055825710297



 25%|█████████████████▊                                                      | 2470/10000 [5:37:29<16:59:05,  8.12s/it]

Epoch: 2469 | Training loss 2.8689165338873863 | Validation loss 2.794528067111969



 25%|█████████████████▊                                                      | 2471/10000 [5:37:37<16:53:52,  8.08s/it]

Epoch: 2470 | Training loss 2.8635403886437416 | Validation loss 2.7841529846191406



 25%|█████████████████▊                                                      | 2472/10000 [5:37:45<16:57:27,  8.11s/it]

Epoch: 2471 | Training loss 2.866016775369644 | Validation loss 2.7870795726776123



 25%|█████████████████▊                                                      | 2473/10000 [5:37:53<16:54:03,  8.08s/it]

Epoch: 2472 | Training loss 2.8689237758517265 | Validation loss 2.7923066318035126



 25%|█████████████████▊                                                      | 2474/10000 [5:38:01<16:55:25,  8.10s/it]

Epoch: 2473 | Training loss 2.8686325699090958 | Validation loss 2.7921575605869293



 25%|█████████████████▊                                                      | 2475/10000 [5:38:09<16:52:35,  8.07s/it]

Epoch: 2474 | Training loss 2.87040151655674 | Validation loss 2.7918097376823425



 25%|█████████████████▊                                                      | 2476/10000 [5:38:18<16:51:43,  8.07s/it]

Epoch: 2475 | Training loss 2.8655762374401093 | Validation loss 2.790205419063568



 25%|█████████████████▊                                                      | 2477/10000 [5:38:26<17:01:39,  8.15s/it]

Epoch: 2476 | Training loss 2.868573896586895 | Validation loss 2.7876007556915283



 25%|█████████████████▊                                                      | 2478/10000 [5:38:34<17:01:40,  8.15s/it]

Epoch: 2477 | Training loss 2.869014173746109 | Validation loss 2.792009860277176



 25%|█████████████████▊                                                      | 2479/10000 [5:38:42<16:59:01,  8.13s/it]

Epoch: 2478 | Training loss 2.864820659160614 | Validation loss 2.7922514975070953



 25%|█████████████████▊                                                      | 2480/10000 [5:38:50<16:55:33,  8.10s/it]

Epoch: 2479 | Training loss 2.863857075572014 | Validation loss 2.7888329327106476



 25%|█████████████████▊                                                      | 2481/10000 [5:38:58<16:51:16,  8.07s/it]

Epoch: 2480 | Training loss 2.8704669550061226 | Validation loss 2.7966192066669464



 25%|█████████████████▊                                                      | 2482/10000 [5:39:06<16:52:23,  8.08s/it]

Epoch: 2481 | Training loss 2.8647052720189095 | Validation loss 2.79221373796463



 25%|█████████████████▉                                                      | 2483/10000 [5:39:14<16:55:59,  8.11s/it]

Epoch: 2482 | Training loss 2.8650304079055786 | Validation loss 2.7887089550495148



 25%|█████████████████▉                                                      | 2484/10000 [5:39:23<16:57:51,  8.13s/it]

Epoch: 2483 | Training loss 2.872107781469822 | Validation loss 2.7904770374298096



 25%|█████████████████▉                                                      | 2485/10000 [5:39:31<17:00:56,  8.15s/it]

Epoch: 2484 | Training loss 2.868909776210785 | Validation loss 2.7875705659389496



 25%|█████████████████▉                                                      | 2486/10000 [5:39:39<16:57:39,  8.13s/it]

Epoch: 2485 | Training loss 2.8688864037394524 | Validation loss 2.7933932542800903



 25%|█████████████████▉                                                      | 2487/10000 [5:39:47<16:55:57,  8.11s/it]

Epoch: 2486 | Training loss 2.863086648285389 | Validation loss 2.7898152470588684



 25%|█████████████████▉                                                      | 2488/10000 [5:39:55<16:54:43,  8.10s/it]

Epoch: 2487 | Training loss 2.8708133921027184 | Validation loss 2.787725567817688



 25%|█████████████████▉                                                      | 2489/10000 [5:40:03<16:55:13,  8.11s/it]

Epoch: 2488 | Training loss 2.8629128858447075 | Validation loss 2.7886746525764465



 25%|█████████████████▉                                                      | 2490/10000 [5:40:11<16:58:28,  8.14s/it]

Epoch: 2489 | Training loss 2.8716432377696037 | Validation loss 2.798247307538986



 25%|█████████████████▉                                                      | 2491/10000 [5:40:20<16:59:23,  8.15s/it]

Epoch: 2490 | Training loss 2.864993676543236 | Validation loss 2.7935753762722015



 25%|█████████████████▉                                                      | 2492/10000 [5:40:28<16:57:31,  8.13s/it]

Epoch: 2491 | Training loss 2.871440999209881 | Validation loss 2.790601462125778



 25%|█████████████████▉                                                      | 2493/10000 [5:40:36<16:55:40,  8.12s/it]

Epoch: 2492 | Training loss 2.8642852306365967 | Validation loss 2.7938839197158813



 25%|█████████████████▉                                                      | 2494/10000 [5:40:44<16:55:30,  8.12s/it]

Epoch: 2493 | Training loss 2.865438997745514 | Validation loss 2.789371579885483



 25%|█████████████████▉                                                      | 2495/10000 [5:40:52<16:55:55,  8.12s/it]

Epoch: 2494 | Training loss 2.8602264523506165 | Validation loss 2.7880017459392548



 25%|█████████████████▉                                                      | 2496/10000 [5:41:00<16:56:16,  8.13s/it]

Epoch: 2495 | Training loss 2.864676296710968 | Validation loss 2.7867299914360046



 25%|█████████████████▉                                                      | 2497/10000 [5:41:08<16:55:43,  8.12s/it]

Epoch: 2496 | Training loss 2.8692634105682373 | Validation loss 2.7926928400993347



 25%|█████████████████▉                                                      | 2498/10000 [5:41:16<16:55:26,  8.12s/it]

Epoch: 2497 | Training loss 2.8666025325655937 | Validation loss 2.79843470454216



 25%|█████████████████▉                                                      | 2499/10000 [5:41:24<16:53:55,  8.11s/it]

Epoch: 2498 | Training loss 2.8609037920832634 | Validation loss 2.7880159318447113



 25%|██████████████████                                                      | 2500/10000 [5:41:33<16:54:08,  8.11s/it]

Epoch: 2499 | Training loss 2.8657703921198845 | Validation loss 2.7949827015399933



 25%|██████████████████                                                      | 2501/10000 [5:41:41<16:57:00,  8.14s/it]

Epoch: 2500 | Training loss 2.861113451421261 | Validation loss 2.788568615913391



 25%|██████████████████                                                      | 2502/10000 [5:41:49<16:54:58,  8.12s/it]

Epoch: 2501 | Training loss 2.8643080592155457 | Validation loss 2.7902530431747437



 25%|██████████████████                                                      | 2503/10000 [5:41:57<16:55:13,  8.12s/it]

Epoch: 2502 | Training loss 2.8665828332304955 | Validation loss 2.791335701942444



 25%|██████████████████                                                      | 2504/10000 [5:42:05<16:55:22,  8.13s/it]

Epoch: 2503 | Training loss 2.8723352774977684 | Validation loss 2.7939285933971405



 25%|██████████████████                                                      | 2505/10000 [5:42:13<16:55:56,  8.13s/it]

Epoch: 2504 | Training loss 2.8654774203896523 | Validation loss 2.7934526801109314



 25%|██████████████████                                                      | 2506/10000 [5:42:21<16:55:16,  8.13s/it]

Epoch: 2505 | Training loss 2.8685726150870323 | Validation loss 2.7902643382549286



 25%|██████████████████                                                      | 2507/10000 [5:42:29<16:52:50,  8.11s/it]

Epoch: 2506 | Training loss 2.871880032122135 | Validation loss 2.789489984512329



 25%|██████████████████                                                      | 2508/10000 [5:42:38<16:54:42,  8.13s/it]

Epoch: 2507 | Training loss 2.870940938591957 | Validation loss 2.790387064218521



 25%|██████████████████                                                      | 2509/10000 [5:42:46<16:54:11,  8.12s/it]

Epoch: 2508 | Training loss 2.8651393353939056 | Validation loss 2.798761874437332



 25%|██████████████████                                                      | 2510/10000 [5:42:54<16:50:17,  8.09s/it]

Epoch: 2509 | Training loss 2.8653618469834328 | Validation loss 2.788552761077881



 25%|██████████████████                                                      | 2511/10000 [5:43:02<16:49:27,  8.09s/it]

Epoch: 2510 | Training loss 2.867284342646599 | Validation loss 2.7939814031124115



 25%|██████████████████                                                      | 2512/10000 [5:43:10<16:52:14,  8.11s/it]

Epoch: 2511 | Training loss 2.867065042257309 | Validation loss 2.8001824021339417



 25%|██████████████████                                                      | 2513/10000 [5:43:18<16:52:52,  8.12s/it]

Epoch: 2512 | Training loss 2.863594889640808 | Validation loss 2.7963452637195587



 25%|██████████████████                                                      | 2514/10000 [5:43:26<16:55:06,  8.14s/it]

Epoch: 2513 | Training loss 2.8674425929784775 | Validation loss 2.7877325117588043



 25%|██████████████████                                                      | 2515/10000 [5:43:34<16:53:10,  8.12s/it]

Epoch: 2514 | Training loss 2.8651545718312263 | Validation loss 2.7914693653583527



 25%|██████████████████                                                      | 2516/10000 [5:43:42<16:47:05,  8.07s/it]

Epoch: 2515 | Training loss 2.8590135350823402 | Validation loss 2.789451152086258



 25%|██████████████████                                                      | 2517/10000 [5:43:50<16:44:08,  8.05s/it]

Epoch: 2516 | Training loss 2.864249750971794 | Validation loss 2.7902646958827972



 25%|██████████████████▏                                                     | 2518/10000 [5:43:58<16:45:52,  8.07s/it]

Epoch: 2517 | Training loss 2.8668713346123695 | Validation loss 2.794901192188263



 25%|██████████████████▏                                                     | 2519/10000 [5:44:06<16:45:56,  8.07s/it]

Epoch: 2518 | Training loss 2.8615345433354378 | Validation loss 2.791067451238632



 25%|██████████████████▏                                                     | 2520/10000 [5:44:15<16:49:22,  8.10s/it]

Epoch: 2519 | Training loss 2.8673623874783516 | Validation loss 2.792255014181137



 25%|██████████████████▏                                                     | 2521/10000 [5:44:23<16:53:30,  8.13s/it]

Epoch: 2520 | Training loss 2.8663223311305046 | Validation loss 2.791988730430603



 25%|██████████████████▏                                                     | 2522/10000 [5:44:31<16:52:20,  8.12s/it]

Epoch: 2521 | Training loss 2.8597289994359016 | Validation loss 2.7861150205135345



 25%|██████████████████▏                                                     | 2523/10000 [5:44:39<16:50:58,  8.11s/it]

Epoch: 2522 | Training loss 2.8641641288995743 | Validation loss 2.788294494152069



 25%|██████████████████▏                                                     | 2524/10000 [5:44:47<16:46:50,  8.08s/it]

Epoch: 2523 | Training loss 2.867778919637203 | Validation loss 2.7848577797412872



 25%|██████████████████▏                                                     | 2525/10000 [5:44:55<16:48:39,  8.10s/it]

Epoch: 2524 | Training loss 2.871620923280716 | Validation loss 2.7897519171237946



 25%|██████████████████▏                                                     | 2526/10000 [5:45:03<16:50:30,  8.11s/it]

Epoch: 2525 | Training loss 2.868873320519924 | Validation loss 2.7956457138061523



 25%|██████████████████▏                                                     | 2527/10000 [5:45:11<16:52:19,  8.13s/it]

Epoch: 2526 | Training loss 2.864941328763962 | Validation loss 2.7963497042655945



 25%|██████████████████▏                                                     | 2528/10000 [5:45:20<16:55:23,  8.15s/it]

Epoch: 2527 | Training loss 2.867152862250805 | Validation loss 2.7947936952114105



 25%|██████████████████▏                                                     | 2529/10000 [5:45:28<16:50:24,  8.11s/it]

Epoch: 2528 | Training loss 2.8660050109028816 | Validation loss 2.792377144098282



 25%|██████████████████▏                                                     | 2530/10000 [5:45:36<16:52:15,  8.13s/it]

Epoch: 2529 | Training loss 2.86736898124218 | Validation loss 2.7896707952022552



 25%|██████████████████▏                                                     | 2531/10000 [5:45:44<16:54:30,  8.15s/it]

Epoch: 2530 | Training loss 2.8671884536743164 | Validation loss 2.8010226786136627



 25%|██████████████████▏                                                     | 2532/10000 [5:45:52<16:50:51,  8.12s/it]

Epoch: 2531 | Training loss 2.8694413602352142 | Validation loss 2.7838939428329468



 25%|██████████████████▏                                                     | 2533/10000 [5:46:00<16:45:20,  8.08s/it]

Epoch: 2532 | Training loss 2.867363914847374 | Validation loss 2.786065995693207



 25%|██████████████████▏                                                     | 2534/10000 [5:46:08<16:45:31,  8.08s/it]

Epoch: 2533 | Training loss 2.8733213618397713 | Validation loss 2.8036389350891113



 25%|██████████████████▎                                                     | 2535/10000 [5:46:16<16:46:24,  8.09s/it]

Epoch: 2534 | Training loss 2.8659015223383904 | Validation loss 2.7909112572669983



 25%|██████████████████▎                                                     | 2536/10000 [5:46:25<16:53:15,  8.15s/it]

Epoch: 2535 | Training loss 2.869448870420456 | Validation loss 2.7862851321697235



 25%|██████████████████▎                                                     | 2537/10000 [5:46:33<16:58:33,  8.19s/it]

Epoch: 2536 | Training loss 2.8676654398441315 | Validation loss 2.7922817170619965



 25%|██████████████████▎                                                     | 2538/10000 [5:46:41<17:00:07,  8.20s/it]

Epoch: 2537 | Training loss 2.8640832006931305 | Validation loss 2.797119289636612



 25%|██████████████████▎                                                     | 2539/10000 [5:46:49<16:58:31,  8.19s/it]

Epoch: 2538 | Training loss 2.8688643351197243 | Validation loss 2.7926268875598907



 25%|██████████████████▎                                                     | 2540/10000 [5:46:57<16:55:33,  8.17s/it]

Epoch: 2539 | Training loss 2.8594393357634544 | Validation loss 2.7853971421718597



 25%|██████████████████▎                                                     | 2541/10000 [5:47:05<16:51:42,  8.14s/it]

Epoch: 2540 | Training loss 2.8663825914263725 | Validation loss 2.786119520664215



 25%|██████████████████▎                                                     | 2542/10000 [5:47:14<16:48:37,  8.11s/it]

Epoch: 2541 | Training loss 2.868232451379299 | Validation loss 2.7954890429973602



 25%|██████████████████▎                                                     | 2543/10000 [5:47:22<16:50:49,  8.13s/it]

Epoch: 2542 | Training loss 2.8671672269701958 | Validation loss 2.7892005443573



 25%|██████████████████▎                                                     | 2544/10000 [5:47:30<16:51:13,  8.14s/it]

Epoch: 2543 | Training loss 2.8649861589074135 | Validation loss 2.7908812761306763



 25%|██████████████████▎                                                     | 2545/10000 [5:47:38<16:50:22,  8.13s/it]

Epoch: 2544 | Training loss 2.8640202060341835 | Validation loss 2.7895197570323944



 25%|██████████████████▎                                                     | 2546/10000 [5:47:46<16:50:50,  8.14s/it]

Epoch: 2545 | Training loss 2.8627246394753456 | Validation loss 2.7895889580249786



 25%|██████████████████▎                                                     | 2547/10000 [5:47:54<16:47:05,  8.11s/it]

Epoch: 2546 | Training loss 2.863397054374218 | Validation loss 2.7931338250637054



 25%|██████████████████▎                                                     | 2548/10000 [5:48:02<16:46:46,  8.11s/it]

Epoch: 2547 | Training loss 2.8704117760062218 | Validation loss 2.7936299443244934



 25%|██████████████████▎                                                     | 2549/10000 [5:48:10<16:51:04,  8.14s/it]

Epoch: 2548 | Training loss 2.864594839513302 | Validation loss 2.7989214062690735



 26%|██████████████████▎                                                     | 2550/10000 [5:48:19<16:50:43,  8.14s/it]

Epoch: 2549 | Training loss 2.8612272664904594 | Validation loss 2.789071559906006



 26%|██████████████████▎                                                     | 2551/10000 [5:48:27<16:49:09,  8.13s/it]

Epoch: 2550 | Training loss 2.8606704622507095 | Validation loss 2.7858644127845764



 26%|██████████████████▎                                                     | 2552/10000 [5:48:35<16:44:38,  8.09s/it]

Epoch: 2551 | Training loss 2.864546447992325 | Validation loss 2.8002243638038635



 26%|██████████████████▍                                                     | 2553/10000 [5:48:43<16:45:32,  8.10s/it]

Epoch: 2552 | Training loss 2.8715530037879944 | Validation loss 2.790253132581711



 26%|██████████████████▍                                                     | 2554/10000 [5:48:51<16:46:35,  8.11s/it]

Epoch: 2553 | Training loss 2.8652983382344246 | Validation loss 2.786984860897064



 26%|██████████████████▍                                                     | 2555/10000 [5:48:59<16:42:35,  8.08s/it]

Epoch: 2554 | Training loss 2.865022361278534 | Validation loss 2.7952109277248383



 26%|██████████████████▍                                                     | 2556/10000 [5:49:07<16:40:59,  8.07s/it]

Epoch: 2555 | Training loss 2.8673150166869164 | Validation loss 2.7947748601436615



 26%|██████████████████▍                                                     | 2557/10000 [5:49:15<16:41:35,  8.07s/it]

Epoch: 2556 | Training loss 2.8651497811079025 | Validation loss 2.7908652126789093



 26%|██████████████████▍                                                     | 2558/10000 [5:49:23<16:44:51,  8.10s/it]

Epoch: 2557 | Training loss 2.866655871272087 | Validation loss 2.7954597175121307



 26%|██████████████████▍                                                     | 2559/10000 [5:49:31<16:44:38,  8.10s/it]

Epoch: 2558 | Training loss 2.866309441626072 | Validation loss 2.790048986673355



 26%|██████████████████▍                                                     | 2560/10000 [5:49:39<16:41:37,  8.08s/it]

Epoch: 2559 | Training loss 2.8655010759830475 | Validation loss 2.788740396499634



 26%|██████████████████▍                                                     | 2561/10000 [5:49:48<16:42:57,  8.09s/it]

Epoch: 2560 | Training loss 2.869389683008194 | Validation loss 2.7862121164798737



 26%|██████████████████▍                                                     | 2562/10000 [5:49:56<16:43:15,  8.09s/it]

Epoch: 2561 | Training loss 2.8628422990441322 | Validation loss 2.7864763140678406



 26%|██████████████████▍                                                     | 2563/10000 [5:50:04<16:44:33,  8.10s/it]

Epoch: 2562 | Training loss 2.8689198940992355 | Validation loss 2.7858535647392273



 26%|██████████████████▍                                                     | 2564/10000 [5:50:12<16:43:05,  8.09s/it]

Epoch: 2563 | Training loss 2.8684150651097298 | Validation loss 2.7912240028381348



 26%|██████████████████▍                                                     | 2565/10000 [5:50:20<16:39:08,  8.06s/it]

Epoch: 2564 | Training loss 2.8672020733356476 | Validation loss 2.7889384329319



 26%|██████████████████▍                                                     | 2566/10000 [5:50:28<16:44:34,  8.11s/it]

Epoch: 2565 | Training loss 2.8703611567616463 | Validation loss 2.7905026972293854



 26%|██████████████████▍                                                     | 2567/10000 [5:50:36<16:43:34,  8.10s/it]

Epoch: 2566 | Training loss 2.869162045419216 | Validation loss 2.792284518480301



 26%|██████████████████▍                                                     | 2568/10000 [5:50:44<16:40:09,  8.07s/it]

Epoch: 2567 | Training loss 2.861333727836609 | Validation loss 2.789175122976303



 26%|██████████████████▍                                                     | 2569/10000 [5:50:52<16:36:55,  8.05s/it]

Epoch: 2568 | Training loss 2.866764470934868 | Validation loss 2.795839935541153



 26%|██████████████████▌                                                     | 2570/10000 [5:51:00<16:38:48,  8.07s/it]

Epoch: 2569 | Training loss 2.8670940175652504 | Validation loss 2.797684669494629



 26%|██████████████████▌                                                     | 2571/10000 [5:51:08<16:42:23,  8.10s/it]

Epoch: 2570 | Training loss 2.865172289311886 | Validation loss 2.793364405632019



 26%|██████████████████▌                                                     | 2572/10000 [5:51:16<16:42:30,  8.10s/it]

Epoch: 2571 | Training loss 2.861532151699066 | Validation loss 2.7913466691970825



 26%|██████████████████▌                                                     | 2573/10000 [5:51:25<16:46:38,  8.13s/it]

Epoch: 2572 | Training loss 2.8693948686122894 | Validation loss 2.797204852104187



 26%|██████████████████▌                                                     | 2574/10000 [5:51:33<16:45:26,  8.12s/it]

Epoch: 2573 | Training loss 2.8699960857629776 | Validation loss 2.795560210943222



 26%|██████████████████▌                                                     | 2575/10000 [5:51:41<16:42:02,  8.10s/it]

Epoch: 2574 | Training loss 2.8730815201997757 | Validation loss 2.7970658242702484



 26%|██████████████████▌                                                     | 2576/10000 [5:51:49<16:39:41,  8.08s/it]

Epoch: 2575 | Training loss 2.867575943470001 | Validation loss 2.7932191491127014



 26%|██████████████████▌                                                     | 2577/10000 [5:51:57<16:40:58,  8.09s/it]

Epoch: 2576 | Training loss 2.866801083087921 | Validation loss 2.797725349664688



 26%|██████████████████▌                                                     | 2578/10000 [5:52:05<16:39:32,  8.08s/it]

Epoch: 2577 | Training loss 2.8643912374973297 | Validation loss 2.79316246509552



 26%|██████████████████▌                                                     | 2579/10000 [5:52:13<16:41:26,  8.10s/it]

Epoch: 2578 | Training loss 2.86099973320961 | Validation loss 2.788670778274536



 26%|██████████████████▌                                                     | 2580/10000 [5:52:21<16:42:42,  8.11s/it]

Epoch: 2579 | Training loss 2.863212436437607 | Validation loss 2.79452982544899



 26%|██████████████████▌                                                     | 2581/10000 [5:52:29<16:42:54,  8.11s/it]

Epoch: 2580 | Training loss 2.872147776186466 | Validation loss 2.8066425919532776



 26%|██████████████████▌                                                     | 2582/10000 [5:52:37<16:40:43,  8.09s/it]

Epoch: 2581 | Training loss 2.8677674159407616 | Validation loss 2.801036924123764



 26%|██████████████████▌                                                     | 2583/10000 [5:52:46<16:42:37,  8.11s/it]

Epoch: 2582 | Training loss 2.868866078555584 | Validation loss 2.789700984954834



 26%|██████████████████▌                                                     | 2584/10000 [5:52:54<16:41:34,  8.10s/it]

Epoch: 2583 | Training loss 2.869122862815857 | Validation loss 2.7911503612995148



 26%|██████████████████▌                                                     | 2585/10000 [5:53:02<16:43:39,  8.12s/it]

Epoch: 2584 | Training loss 2.8677659779787064 | Validation loss 2.787457287311554



 26%|██████████████████▌                                                     | 2586/10000 [5:53:10<16:45:10,  8.13s/it]

Epoch: 2585 | Training loss 2.87140304595232 | Validation loss 2.796120136976242



 26%|██████████████████▋                                                     | 2587/10000 [5:53:18<16:39:09,  8.09s/it]

Epoch: 2586 | Training loss 2.875273711979389 | Validation loss 2.788448750972748



 26%|██████████████████▋                                                     | 2588/10000 [5:53:26<16:40:38,  8.10s/it]

Epoch: 2587 | Training loss 2.8670067116618156 | Validation loss 2.789758801460266



 26%|██████████████████▋                                                     | 2589/10000 [5:53:34<16:40:32,  8.10s/it]

Epoch: 2588 | Training loss 2.8654870092868805 | Validation loss 2.7904773354530334



 26%|██████████████████▋                                                     | 2590/10000 [5:53:42<16:39:52,  8.10s/it]

Epoch: 2589 | Training loss 2.8690738454461098 | Validation loss 2.792688935995102



 26%|██████████████████▋                                                     | 2591/10000 [5:53:51<16:42:49,  8.12s/it]

Epoch: 2590 | Training loss 2.867296166718006 | Validation loss 2.7920095026493073



 26%|██████████████████▋                                                     | 2592/10000 [5:53:59<16:38:31,  8.09s/it]

Epoch: 2591 | Training loss 2.865445353090763 | Validation loss 2.794651448726654



 26%|██████████████████▋                                                     | 2593/10000 [5:54:07<16:36:03,  8.07s/it]

Epoch: 2592 | Training loss 2.8623645454645157 | Validation loss 2.793938398361206



 26%|██████████████████▋                                                     | 2594/10000 [5:54:15<16:36:33,  8.07s/it]

Epoch: 2593 | Training loss 2.870341882109642 | Validation loss 2.7907135784626007



 26%|██████████████████▋                                                     | 2595/10000 [5:54:23<16:36:23,  8.07s/it]

Epoch: 2594 | Training loss 2.871958799660206 | Validation loss 2.7970406115055084



 26%|██████████████████▋                                                     | 2596/10000 [5:54:31<16:37:23,  8.08s/it]

Epoch: 2595 | Training loss 2.868446297943592 | Validation loss 2.7876497507095337



 26%|██████████████████▋                                                     | 2597/10000 [5:54:39<16:40:28,  8.11s/it]

Epoch: 2596 | Training loss 2.865247741341591 | Validation loss 2.8022923171520233



 26%|██████████████████▋                                                     | 2598/10000 [5:54:47<16:39:00,  8.10s/it]

Epoch: 2597 | Training loss 2.8650517240166664 | Validation loss 2.7948854863643646



 26%|██████████████████▋                                                     | 2599/10000 [5:54:55<16:33:47,  8.06s/it]

Epoch: 2598 | Training loss 2.8708801567554474 | Validation loss 2.789634793996811



 26%|██████████████████▋                                                     | 2600/10000 [5:55:03<16:30:38,  8.03s/it]

Epoch: 2599 | Training loss 2.865486428141594 | Validation loss 2.7883446514606476



 26%|██████████████████▋                                                     | 2601/10000 [5:55:11<16:33:45,  8.06s/it]

Epoch: 2600 | Training loss 2.8632525876164436 | Validation loss 2.788575232028961



 26%|██████████████████▋                                                     | 2602/10000 [5:55:19<16:34:37,  8.07s/it]

Epoch: 2601 | Training loss 2.869143567979336 | Validation loss 2.7906338274478912



 26%|██████████████████▋                                                     | 2603/10000 [5:55:27<16:38:06,  8.10s/it]

Epoch: 2602 | Training loss 2.8692585229873657 | Validation loss 2.7854780852794647



 26%|██████████████████▋                                                     | 2604/10000 [5:55:36<16:45:05,  8.15s/it]

Epoch: 2603 | Training loss 2.8626235350966454 | Validation loss 2.783527284860611



 26%|██████████████████▊                                                     | 2605/10000 [5:55:44<16:41:52,  8.13s/it]

Epoch: 2604 | Training loss 2.8617466539144516 | Validation loss 2.7924402356147766



 26%|██████████████████▊                                                     | 2606/10000 [5:55:52<16:42:25,  8.13s/it]

Epoch: 2605 | Training loss 2.8609213083982468 | Validation loss 2.7864357829093933



 26%|██████████████████▊                                                     | 2607/10000 [5:56:00<16:41:40,  8.13s/it]

Epoch: 2606 | Training loss 2.8703655302524567 | Validation loss 2.796649992465973



 26%|██████████████████▊                                                     | 2608/10000 [5:56:08<16:39:52,  8.12s/it]

Epoch: 2607 | Training loss 2.8614693209528923 | Validation loss 2.7895370423793793



 26%|██████████████████▊                                                     | 2609/10000 [5:56:16<16:38:23,  8.10s/it]

Epoch: 2608 | Training loss 2.864550441503525 | Validation loss 2.7930392622947693



 26%|██████████████████▊                                                     | 2610/10000 [5:56:24<16:41:08,  8.13s/it]

Epoch: 2609 | Training loss 2.8625494837760925 | Validation loss 2.7911983132362366



 26%|██████████████████▊                                                     | 2611/10000 [5:56:32<16:40:39,  8.13s/it]

Epoch: 2610 | Training loss 2.8657510057091713 | Validation loss 2.7960338592529297



 26%|██████████████████▊                                                     | 2612/10000 [5:56:41<16:38:33,  8.11s/it]

Epoch: 2611 | Training loss 2.861340247094631 | Validation loss 2.793978303670883



 26%|██████████████████▊                                                     | 2613/10000 [5:56:49<16:39:17,  8.12s/it]

Epoch: 2612 | Training loss 2.866910435259342 | Validation loss 2.7956760227680206



 26%|██████████████████▊                                                     | 2614/10000 [5:56:57<16:36:51,  8.10s/it]

Epoch: 2613 | Training loss 2.8697341307997704 | Validation loss 2.8022329807281494



 26%|██████████████████▊                                                     | 2615/10000 [5:57:05<16:40:23,  8.13s/it]

Epoch: 2614 | Training loss 2.8724908530712128 | Validation loss 2.7907304167747498



 26%|██████████████████▊                                                     | 2616/10000 [5:57:13<16:40:56,  8.13s/it]

Epoch: 2615 | Training loss 2.8639791309833527 | Validation loss 2.793702006340027



 26%|██████████████████▊                                                     | 2617/10000 [5:57:21<16:36:10,  8.10s/it]

Epoch: 2616 | Training loss 2.867743395268917 | Validation loss 2.792545199394226



 26%|██████████████████▊                                                     | 2618/10000 [5:57:29<16:33:55,  8.08s/it]

Epoch: 2617 | Training loss 2.865569956600666 | Validation loss 2.7865655720233917



 26%|██████████████████▊                                                     | 2619/10000 [5:57:37<16:34:03,  8.08s/it]

Epoch: 2618 | Training loss 2.8674712255597115 | Validation loss 2.788583517074585



 26%|██████████████████▊                                                     | 2620/10000 [5:57:45<16:39:21,  8.12s/it]

Epoch: 2619 | Training loss 2.8603993132710457 | Validation loss 2.7872447669506073



 26%|██████████████████▊                                                     | 2621/10000 [5:57:54<16:45:20,  8.17s/it]

Epoch: 2620 | Training loss 2.8707769364118576 | Validation loss 2.7916091084480286



 26%|██████████████████▉                                                     | 2622/10000 [5:58:02<16:43:08,  8.16s/it]

Epoch: 2621 | Training loss 2.8654295206069946 | Validation loss 2.7956531941890717



 26%|██████████████████▉                                                     | 2623/10000 [5:58:10<16:39:51,  8.13s/it]

Epoch: 2622 | Training loss 2.8631190061569214 | Validation loss 2.789607048034668



 26%|██████████████████▉                                                     | 2624/10000 [5:58:18<16:40:52,  8.14s/it]

Epoch: 2623 | Training loss 2.8684091567993164 | Validation loss 2.8010199666023254



 26%|██████████████████▉                                                     | 2625/10000 [5:58:26<16:40:28,  8.14s/it]

Epoch: 2624 | Training loss 2.871208906173706 | Validation loss 2.793013483285904



 26%|██████████████████▉                                                     | 2626/10000 [5:58:34<16:39:36,  8.13s/it]

Epoch: 2625 | Training loss 2.8656942769885063 | Validation loss 2.7897538542747498



 26%|██████████████████▉                                                     | 2627/10000 [5:58:42<16:37:05,  8.11s/it]

Epoch: 2626 | Training loss 2.86726725846529 | Validation loss 2.788271427154541



 26%|██████████████████▉                                                     | 2628/10000 [5:58:51<16:37:31,  8.12s/it]

Epoch: 2627 | Training loss 2.8642254769802094 | Validation loss 2.799018621444702



 26%|██████████████████▉                                                     | 2629/10000 [5:58:59<16:34:22,  8.09s/it]

Epoch: 2628 | Training loss 2.863581120967865 | Validation loss 2.795309603214264



 26%|██████████████████▉                                                     | 2630/10000 [5:59:07<16:34:30,  8.10s/it]

Epoch: 2629 | Training loss 2.8653982058167458 | Validation loss 2.7962741255760193



 26%|██████████████████▉                                                     | 2631/10000 [5:59:15<16:35:17,  8.10s/it]

Epoch: 2630 | Training loss 2.8696310073137283 | Validation loss 2.793161153793335



 26%|██████████████████▉                                                     | 2632/10000 [5:59:23<16:32:51,  8.09s/it]

Epoch: 2631 | Training loss 2.8723312988877296 | Validation loss 2.791097640991211



 26%|██████████████████▉                                                     | 2633/10000 [5:59:31<16:35:04,  8.10s/it]

Epoch: 2632 | Training loss 2.8709267377853394 | Validation loss 2.79120334982872



 26%|██████████████████▉                                                     | 2634/10000 [5:59:39<16:37:38,  8.13s/it]

Epoch: 2633 | Training loss 2.866428792476654 | Validation loss 2.797136962413788



 26%|██████████████████▉                                                     | 2635/10000 [5:59:47<16:36:03,  8.11s/it]

Epoch: 2634 | Training loss 2.8667981326580048 | Validation loss 2.791917085647583



 26%|██████████████████▉                                                     | 2636/10000 [5:59:55<16:37:43,  8.13s/it]

Epoch: 2635 | Training loss 2.8650379553437233 | Validation loss 2.795957237482071



 26%|██████████████████▉                                                     | 2637/10000 [6:00:04<16:37:44,  8.13s/it]

Epoch: 2636 | Training loss 2.8617342859506607 | Validation loss 2.788637012243271



 26%|██████████████████▉                                                     | 2638/10000 [6:00:12<16:36:29,  8.12s/it]

Epoch: 2637 | Training loss 2.863603226840496 | Validation loss 2.7910732328891754



 26%|███████████████████                                                     | 2639/10000 [6:00:20<16:36:46,  8.12s/it]

Epoch: 2638 | Training loss 2.8634810000658035 | Validation loss 2.7891763150691986



 26%|███████████████████                                                     | 2640/10000 [6:00:28<16:34:42,  8.11s/it]

Epoch: 2639 | Training loss 2.863098092377186 | Validation loss 2.7909575700759888



 26%|███████████████████                                                     | 2641/10000 [6:00:36<16:30:51,  8.08s/it]

Epoch: 2640 | Training loss 2.865915834903717 | Validation loss 2.7867853939533234



 26%|███████████████████                                                     | 2642/10000 [6:00:44<16:29:12,  8.07s/it]

Epoch: 2641 | Training loss 2.8659996017813683 | Validation loss 2.7888825237751007



 26%|███████████████████                                                     | 2643/10000 [6:00:52<16:29:45,  8.07s/it]

Epoch: 2642 | Training loss 2.8702852055430412 | Validation loss 2.789816051721573



 26%|███████████████████                                                     | 2644/10000 [6:01:00<16:30:49,  8.08s/it]

Epoch: 2643 | Training loss 2.8670337051153183 | Validation loss 2.801737129688263



 26%|███████████████████                                                     | 2645/10000 [6:01:08<16:30:54,  8.08s/it]

Epoch: 2644 | Training loss 2.870383307337761 | Validation loss 2.7903852462768555



 26%|███████████████████                                                     | 2646/10000 [6:01:16<16:30:47,  8.08s/it]

Epoch: 2645 | Training loss 2.867027923464775 | Validation loss 2.7901816964149475



 26%|███████████████████                                                     | 2647/10000 [6:01:24<16:28:25,  8.07s/it]

Epoch: 2646 | Training loss 2.8699864372611046 | Validation loss 2.8011708557605743



 26%|███████████████████                                                     | 2648/10000 [6:01:32<16:27:56,  8.06s/it]

Epoch: 2647 | Training loss 2.8658668473362923 | Validation loss 2.7876577377319336



 26%|███████████████████                                                     | 2649/10000 [6:01:40<16:30:50,  8.09s/it]

Epoch: 2648 | Training loss 2.868218071758747 | Validation loss 2.79982927441597



 26%|███████████████████                                                     | 2650/10000 [6:01:49<16:31:12,  8.09s/it]

Epoch: 2649 | Training loss 2.8705926164984703 | Validation loss 2.791005313396454



 27%|███████████████████                                                     | 2651/10000 [6:01:57<16:33:35,  8.11s/it]

Epoch: 2650 | Training loss 2.863221749663353 | Validation loss 2.7888014018535614



 27%|███████████████████                                                     | 2652/10000 [6:02:05<16:35:53,  8.13s/it]

Epoch: 2651 | Training loss 2.868758797645569 | Validation loss 2.7904661297798157



 27%|███████████████████                                                     | 2653/10000 [6:02:13<16:36:22,  8.14s/it]

Epoch: 2652 | Training loss 2.9164090901613235 | Validation loss 2.8008140325546265



 27%|███████████████████                                                     | 2654/10000 [6:02:21<16:33:16,  8.11s/it]

Epoch: 2653 | Training loss 2.8686327636241913 | Validation loss 2.7899803817272186



 27%|███████████████████                                                     | 2655/10000 [6:02:29<16:33:56,  8.12s/it]

Epoch: 2654 | Training loss 2.867501750588417 | Validation loss 2.7904603481292725



 27%|███████████████████                                                     | 2656/10000 [6:02:37<16:34:16,  8.12s/it]

Epoch: 2655 | Training loss 2.8668514788150787 | Validation loss 2.7929199635982513



 27%|███████████████████▏                                                    | 2657/10000 [6:02:45<16:31:13,  8.10s/it]

Epoch: 2656 | Training loss 2.871158391237259 | Validation loss 2.793198436498642



 27%|███████████████████▏                                                    | 2658/10000 [6:02:54<16:30:36,  8.10s/it]

Epoch: 2657 | Training loss 2.8614969551563263 | Validation loss 2.798542022705078



 27%|███████████████████▏                                                    | 2659/10000 [6:03:02<16:31:13,  8.10s/it]

Epoch: 2658 | Training loss 2.8608167320489883 | Validation loss 2.7864344120025635



 27%|███████████████████▏                                                    | 2660/10000 [6:03:10<16:39:09,  8.17s/it]

Epoch: 2659 | Training loss 2.8722261860966682 | Validation loss 2.7905115485191345



 27%|███████████████████▏                                                    | 2661/10000 [6:03:18<16:42:19,  8.19s/it]

Epoch: 2660 | Training loss 2.863970771431923 | Validation loss 2.7883185744285583



 27%|███████████████████▏                                                    | 2662/10000 [6:03:26<16:40:31,  8.18s/it]

Epoch: 2661 | Training loss 2.870356447994709 | Validation loss 2.791977673768997



 27%|███████████████████▏                                                    | 2663/10000 [6:03:34<16:35:44,  8.14s/it]

Epoch: 2662 | Training loss 2.8702001869678497 | Validation loss 2.794477343559265



 27%|███████████████████▏                                                    | 2664/10000 [6:03:43<16:36:23,  8.15s/it]

Epoch: 2663 | Training loss 2.8648878186941147 | Validation loss 2.7925489842891693



 27%|███████████████████▏                                                    | 2665/10000 [6:03:51<16:34:32,  8.14s/it]

Epoch: 2664 | Training loss 2.8673534989356995 | Validation loss 2.7906617522239685



 27%|███████████████████▏                                                    | 2666/10000 [6:03:59<16:30:12,  8.10s/it]

Epoch: 2665 | Training loss 2.861177980899811 | Validation loss 2.789850175380707



 27%|███████████████████▏                                                    | 2667/10000 [6:04:07<16:29:35,  8.10s/it]

Epoch: 2666 | Training loss 2.8673262894153595 | Validation loss 2.790585696697235



 27%|███████████████████▏                                                    | 2668/10000 [6:04:15<16:22:46,  8.04s/it]

Epoch: 2667 | Training loss 2.8706178665161133 | Validation loss 2.787695288658142



 27%|███████████████████▏                                                    | 2669/10000 [6:04:23<16:25:21,  8.06s/it]

Epoch: 2668 | Training loss 2.8670983985066414 | Validation loss 2.7888867557048798



 27%|███████████████████▏                                                    | 2670/10000 [6:04:31<16:24:18,  8.06s/it]

Epoch: 2669 | Training loss 2.871188633143902 | Validation loss 2.793699234724045



 27%|███████████████████▏                                                    | 2671/10000 [6:04:39<16:23:31,  8.05s/it]

Epoch: 2670 | Training loss 2.868861399590969 | Validation loss 2.7936845421791077



 27%|███████████████████▏                                                    | 2672/10000 [6:04:47<16:21:14,  8.03s/it]

Epoch: 2671 | Training loss 2.8661003038287163 | Validation loss 2.790057122707367



 27%|███████████████████▏                                                    | 2673/10000 [6:04:55<16:23:34,  8.05s/it]

Epoch: 2672 | Training loss 2.8650365620851517 | Validation loss 2.7919013500213623



 27%|███████████████████▎                                                    | 2674/10000 [6:05:03<16:23:29,  8.05s/it]

Epoch: 2673 | Training loss 2.864944964647293 | Validation loss 2.7964381277561188



 27%|███████████████████▎                                                    | 2675/10000 [6:05:11<16:28:32,  8.10s/it]

Epoch: 2674 | Training loss 2.864804483950138 | Validation loss 2.798208236694336



 27%|███████████████████▎                                                    | 2676/10000 [6:05:19<16:24:32,  8.07s/it]

Epoch: 2675 | Training loss 2.8710351958870888 | Validation loss 2.806342452764511



 27%|███████████████████▎                                                    | 2677/10000 [6:05:27<16:23:58,  8.06s/it]

Epoch: 2676 | Training loss 2.8681795373559 | Validation loss 2.789381891489029



 27%|███████████████████▎                                                    | 2678/10000 [6:05:35<16:24:09,  8.06s/it]

Epoch: 2677 | Training loss 2.8638698011636734 | Validation loss 2.790952831506729



 27%|███████████████████▎                                                    | 2679/10000 [6:05:43<16:25:50,  8.08s/it]

Epoch: 2678 | Training loss 2.8617910891771317 | Validation loss 2.7878926396369934



 27%|███████████████████▎                                                    | 2680/10000 [6:05:52<16:26:27,  8.09s/it]

Epoch: 2679 | Training loss 2.867995597422123 | Validation loss 2.7951298356056213



 27%|███████████████████▎                                                    | 2681/10000 [6:06:00<16:25:42,  8.08s/it]

Epoch: 2680 | Training loss 2.8667039945721626 | Validation loss 2.793196976184845



 27%|███████████████████▎                                                    | 2682/10000 [6:06:08<16:27:27,  8.10s/it]

Epoch: 2681 | Training loss 2.8663052767515182 | Validation loss 2.7933008074760437



 27%|███████████████████▎                                                    | 2683/10000 [6:06:16<16:26:24,  8.09s/it]

Epoch: 2682 | Training loss 2.864514149725437 | Validation loss 2.79078209400177



 27%|███████████████████▎                                                    | 2684/10000 [6:06:24<16:29:05,  8.11s/it]

Epoch: 2683 | Training loss 2.8632346764206886 | Validation loss 2.797195613384247



 27%|███████████████████▎                                                    | 2685/10000 [6:06:32<16:26:57,  8.10s/it]

Epoch: 2684 | Training loss 2.8674828931689262 | Validation loss 2.791208803653717



 27%|███████████████████▎                                                    | 2686/10000 [6:06:40<16:25:25,  8.08s/it]

Epoch: 2685 | Training loss 2.8698052912950516 | Validation loss 2.799298882484436



 27%|███████████████████▎                                                    | 2687/10000 [6:06:48<16:26:25,  8.09s/it]

Epoch: 2686 | Training loss 2.868431992828846 | Validation loss 2.7863620221614838



 27%|███████████████████▎                                                    | 2688/10000 [6:06:56<16:25:33,  8.09s/it]

Epoch: 2687 | Training loss 2.8697845339775085 | Validation loss 2.797255277633667



 27%|███████████████████▎                                                    | 2689/10000 [6:07:05<16:29:40,  8.12s/it]

Epoch: 2688 | Training loss 2.862451024353504 | Validation loss 2.791427344083786



 27%|███████████████████▎                                                    | 2690/10000 [6:07:13<16:26:23,  8.10s/it]

Epoch: 2689 | Training loss 2.8664781600236893 | Validation loss 2.7905229330062866



 27%|███████████████████▍                                                    | 2691/10000 [6:07:21<16:28:13,  8.11s/it]

Epoch: 2690 | Training loss 2.8692119270563126 | Validation loss 2.7911531925201416



 27%|███████████████████▍                                                    | 2692/10000 [6:07:29<16:27:44,  8.11s/it]

Epoch: 2691 | Training loss 2.8623141050338745 | Validation loss 2.7910562455654144



 27%|███████████████████▍                                                    | 2693/10000 [6:07:37<16:26:44,  8.10s/it]

Epoch: 2692 | Training loss 2.8687343895435333 | Validation loss 2.796222358942032



 27%|███████████████████▍                                                    | 2694/10000 [6:07:45<16:26:29,  8.10s/it]

Epoch: 2693 | Training loss 2.8704234063625336 | Validation loss 2.7886722683906555



 27%|███████████████████▍                                                    | 2695/10000 [6:07:53<16:21:51,  8.06s/it]

Epoch: 2694 | Training loss 2.871726095676422 | Validation loss 2.7943387031555176



 27%|███████████████████▍                                                    | 2696/10000 [6:08:01<16:23:03,  8.08s/it]

Epoch: 2695 | Training loss 2.867653325200081 | Validation loss 2.7917410731315613



 27%|███████████████████▍                                                    | 2697/10000 [6:08:09<16:22:40,  8.07s/it]

Epoch: 2696 | Training loss 2.8619474098086357 | Validation loss 2.7952361404895782



 27%|███████████████████▍                                                    | 2698/10000 [6:08:17<16:24:05,  8.09s/it]

Epoch: 2697 | Training loss 2.8642616868019104 | Validation loss 2.7900134921073914



 27%|███████████████████▍                                                    | 2699/10000 [6:08:25<16:25:13,  8.10s/it]

Epoch: 2698 | Training loss 2.8680139407515526 | Validation loss 2.7869779467582703



 27%|███████████████████▍                                                    | 2700/10000 [6:08:34<16:25:55,  8.10s/it]

Epoch: 2699 | Training loss 2.8647366389632225 | Validation loss 2.791225850582123



 27%|███████████████████▍                                                    | 2701/10000 [6:08:42<16:29:15,  8.13s/it]

Epoch: 2700 | Training loss 2.8658555075526237 | Validation loss 2.791715145111084



 27%|███████████████████▍                                                    | 2702/10000 [6:08:50<16:28:37,  8.13s/it]

Epoch: 2701 | Training loss 2.8710057139396667 | Validation loss 2.7930581271648407



 27%|███████████████████▍                                                    | 2703/10000 [6:08:58<16:27:00,  8.12s/it]

Epoch: 2702 | Training loss 2.862238608300686 | Validation loss 2.789805620908737



 27%|███████████████████▍                                                    | 2704/10000 [6:09:06<16:24:42,  8.10s/it]

Epoch: 2703 | Training loss 2.8655612841248512 | Validation loss 2.7888289988040924



 27%|███████████████████▍                                                    | 2705/10000 [6:09:14<16:25:51,  8.11s/it]

Epoch: 2704 | Training loss 2.865479066967964 | Validation loss 2.7937654554843903



 27%|███████████████████▍                                                    | 2706/10000 [6:09:22<16:23:43,  8.09s/it]

Epoch: 2705 | Training loss 2.86279658973217 | Validation loss 2.796888679265976



 27%|███████████████████▍                                                    | 2707/10000 [6:09:30<16:22:47,  8.09s/it]

Epoch: 2706 | Training loss 2.864578701555729 | Validation loss 2.7865860760211945



 27%|███████████████████▍                                                    | 2708/10000 [6:09:38<16:28:29,  8.13s/it]

Epoch: 2707 | Training loss 2.8684237226843834 | Validation loss 2.795923203229904



 27%|███████████████████▌                                                    | 2709/10000 [6:09:47<16:26:38,  8.12s/it]

Epoch: 2708 | Training loss 2.865763060748577 | Validation loss 2.8019719421863556



 27%|███████████████████▌                                                    | 2710/10000 [6:09:55<16:23:04,  8.09s/it]

Epoch: 2709 | Training loss 2.8664386868476868 | Validation loss 2.789529025554657



 27%|███████████████████▌                                                    | 2711/10000 [6:10:03<16:22:48,  8.09s/it]

Epoch: 2710 | Training loss 2.86508971452713 | Validation loss 2.790706127882004



 27%|███████████████████▌                                                    | 2712/10000 [6:10:11<16:22:25,  8.09s/it]

Epoch: 2711 | Training loss 2.8669268637895584 | Validation loss 2.78898224234581



 27%|███████████████████▌                                                    | 2713/10000 [6:10:19<16:25:13,  8.11s/it]

Epoch: 2712 | Training loss 2.8645249903202057 | Validation loss 2.783811002969742



 27%|███████████████████▌                                                    | 2714/10000 [6:10:27<16:26:59,  8.13s/it]

Epoch: 2713 | Training loss 2.8682975992560387 | Validation loss 2.7882584929466248



 27%|███████████████████▌                                                    | 2715/10000 [6:10:35<16:24:12,  8.11s/it]

Epoch: 2714 | Training loss 2.8628126084804535 | Validation loss 2.7867058515548706



 27%|███████████████████▌                                                    | 2716/10000 [6:10:43<16:21:03,  8.08s/it]

Epoch: 2715 | Training loss 2.865648902952671 | Validation loss 2.792916923761368



 27%|███████████████████▌                                                    | 2717/10000 [6:10:51<16:21:46,  8.09s/it]

Epoch: 2716 | Training loss 2.871686600148678 | Validation loss 2.7913809418678284



 27%|███████████████████▌                                                    | 2718/10000 [6:10:59<16:21:08,  8.08s/it]

Epoch: 2717 | Training loss 2.866276040673256 | Validation loss 2.7965195178985596



 27%|███████████████████▌                                                    | 2719/10000 [6:11:07<16:17:40,  8.06s/it]

Epoch: 2718 | Training loss 2.8681653141975403 | Validation loss 2.79353067278862



 27%|███████████████████▌                                                    | 2720/10000 [6:11:15<16:17:23,  8.06s/it]

Epoch: 2719 | Training loss 2.8700194358825684 | Validation loss 2.7911869883537292



 27%|███████████████████▌                                                    | 2721/10000 [6:11:23<16:17:42,  8.06s/it]

Epoch: 2720 | Training loss 2.8627873435616493 | Validation loss 2.791598320007324



 27%|███████████████████▌                                                    | 2722/10000 [6:11:32<16:18:34,  8.07s/it]

Epoch: 2721 | Training loss 2.8699436336755753 | Validation loss 2.7993454039096832



 27%|███████████████████▌                                                    | 2723/10000 [6:11:40<16:19:15,  8.07s/it]

Epoch: 2722 | Training loss 2.8666206151247025 | Validation loss 2.7916868925094604



 27%|███████████████████▌                                                    | 2724/10000 [6:11:48<16:19:02,  8.07s/it]

Epoch: 2723 | Training loss 2.8688171580433846 | Validation loss 2.795406460762024



 27%|███████████████████▌                                                    | 2725/10000 [6:11:56<16:14:14,  8.03s/it]

Epoch: 2724 | Training loss 2.867482341825962 | Validation loss 2.7945140302181244



 27%|███████████████████▋                                                    | 2726/10000 [6:12:04<16:15:23,  8.05s/it]

Epoch: 2725 | Training loss 2.862785443663597 | Validation loss 2.7861467599868774



 27%|███████████████████▋                                                    | 2727/10000 [6:12:12<16:15:47,  8.05s/it]

Epoch: 2726 | Training loss 2.8645632043480873 | Validation loss 2.7873238921165466



 27%|███████████████████▋                                                    | 2728/10000 [6:12:20<16:20:58,  8.09s/it]

Epoch: 2727 | Training loss 2.8617771193385124 | Validation loss 2.784722626209259



 27%|███████████████████▋                                                    | 2729/10000 [6:12:28<16:23:51,  8.12s/it]

Epoch: 2728 | Training loss 2.865360476076603 | Validation loss 2.7864849865436554



 27%|███████████████████▋                                                    | 2730/10000 [6:12:36<16:21:15,  8.10s/it]

Epoch: 2729 | Training loss 2.8662061765789986 | Validation loss 2.7904269993305206



 27%|███████████████████▋                                                    | 2731/10000 [6:12:44<16:20:15,  8.09s/it]

Epoch: 2730 | Training loss 2.8746009841561317 | Validation loss 2.7891778647899628



 27%|███████████████████▋                                                    | 2732/10000 [6:12:52<16:20:03,  8.09s/it]

Epoch: 2731 | Training loss 2.867882013320923 | Validation loss 2.7880928218364716



 27%|███████████████████▋                                                    | 2733/10000 [6:13:01<16:22:37,  8.11s/it]

Epoch: 2732 | Training loss 2.8691041991114616 | Validation loss 2.788722723722458



 27%|███████████████████▋                                                    | 2734/10000 [6:13:09<16:22:40,  8.11s/it]

Epoch: 2733 | Training loss 2.868816837668419 | Validation loss 2.7944194972515106



 27%|███████████████████▋                                                    | 2735/10000 [6:13:17<16:20:56,  8.10s/it]

Epoch: 2734 | Training loss 2.869651347398758 | Validation loss 2.7893743813037872



 27%|███████████████████▋                                                    | 2736/10000 [6:13:25<16:20:15,  8.10s/it]

Epoch: 2735 | Training loss 2.8615701869130135 | Validation loss 2.791712373495102



 27%|███████████████████▋                                                    | 2737/10000 [6:13:33<16:21:24,  8.11s/it]

Epoch: 2736 | Training loss 2.8677987828850746 | Validation loss 2.7911376357078552



 27%|███████████████████▋                                                    | 2738/10000 [6:13:41<16:24:30,  8.13s/it]

Epoch: 2737 | Training loss 2.864007294178009 | Validation loss 2.8048572540283203



 27%|███████████████████▋                                                    | 2739/10000 [6:13:49<16:19:47,  8.10s/it]

Epoch: 2738 | Training loss 2.8658435940742493 | Validation loss 2.793237566947937



 27%|███████████████████▋                                                    | 2740/10000 [6:13:57<16:23:30,  8.13s/it]

Epoch: 2739 | Training loss 2.8714040219783783 | Validation loss 2.795679658651352



 27%|███████████████████▋                                                    | 2741/10000 [6:14:05<16:20:46,  8.11s/it]

Epoch: 2740 | Training loss 2.865089640021324 | Validation loss 2.7900039553642273



 27%|███████████████████▋                                                    | 2742/10000 [6:14:14<16:24:40,  8.14s/it]

Epoch: 2741 | Training loss 2.8634655997157097 | Validation loss 2.7916784286499023



 27%|███████████████████▋                                                    | 2743/10000 [6:14:22<16:24:54,  8.14s/it]

Epoch: 2742 | Training loss 2.867026701569557 | Validation loss 2.794812351465225



 27%|███████████████████▊                                                    | 2744/10000 [6:14:30<16:24:20,  8.14s/it]

Epoch: 2743 | Training loss 2.8691424503922462 | Validation loss 2.7944984436035156



 27%|███████████████████▊                                                    | 2745/10000 [6:14:38<16:20:30,  8.11s/it]

Epoch: 2744 | Training loss 2.871585428714752 | Validation loss 2.7894455194473267



 27%|███████████████████▊                                                    | 2746/10000 [6:14:46<16:21:46,  8.12s/it]

Epoch: 2745 | Training loss 2.864746704697609 | Validation loss 2.7874216437339783



 27%|███████████████████▊                                                    | 2747/10000 [6:14:54<16:23:52,  8.14s/it]

Epoch: 2746 | Training loss 2.8673208728432655 | Validation loss 2.7910483479499817



 27%|███████████████████▊                                                    | 2748/10000 [6:15:02<16:25:17,  8.15s/it]

Epoch: 2747 | Training loss 2.869177170097828 | Validation loss 2.7928444743156433



 27%|███████████████████▊                                                    | 2749/10000 [6:15:11<16:27:28,  8.17s/it]

Epoch: 2748 | Training loss 2.868845045566559 | Validation loss 2.797450602054596



 28%|███████████████████▊                                                    | 2750/10000 [6:15:19<16:31:12,  8.20s/it]

Epoch: 2749 | Training loss 2.8610756024718285 | Validation loss 2.7940543591976166



 28%|███████████████████▊                                                    | 2751/10000 [6:15:27<16:27:57,  8.18s/it]

Epoch: 2750 | Training loss 2.863452546298504 | Validation loss 2.7862596809864044



 28%|███████████████████▊                                                    | 2752/10000 [6:15:35<16:27:53,  8.18s/it]

Epoch: 2751 | Training loss 2.8658700957894325 | Validation loss 2.793651968240738



 28%|███████████████████▊                                                    | 2753/10000 [6:15:43<16:27:43,  8.18s/it]

Epoch: 2752 | Training loss 2.8704636096954346 | Validation loss 2.794130116701126



 28%|███████████████████▊                                                    | 2754/10000 [6:15:52<16:24:47,  8.15s/it]

Epoch: 2753 | Training loss 2.8705343157052994 | Validation loss 2.7912426590919495



 28%|███████████████████▊                                                    | 2755/10000 [6:16:00<16:28:02,  8.18s/it]

Epoch: 2754 | Training loss 2.86811164021492 | Validation loss 2.79205858707428



 28%|███████████████████▊                                                    | 2756/10000 [6:16:08<16:25:38,  8.16s/it]

Epoch: 2755 | Training loss 2.868185505270958 | Validation loss 2.788532704114914



 28%|███████████████████▊                                                    | 2757/10000 [6:16:16<16:23:43,  8.15s/it]

Epoch: 2756 | Training loss 2.87052845954895 | Validation loss 2.790071487426758



 28%|███████████████████▊                                                    | 2758/10000 [6:16:24<16:24:39,  8.16s/it]

Epoch: 2757 | Training loss 2.8634657338261604 | Validation loss 2.790982484817505



 28%|███████████████████▊                                                    | 2759/10000 [6:16:32<16:21:58,  8.14s/it]

Epoch: 2758 | Training loss 2.8615844026207924 | Validation loss 2.7890601456165314



 28%|███████████████████▊                                                    | 2760/10000 [6:16:40<16:21:38,  8.14s/it]

Epoch: 2759 | Training loss 2.8653797805309296 | Validation loss 2.7881287932395935



 28%|███████████████████▉                                                    | 2761/10000 [6:16:49<16:21:36,  8.14s/it]

Epoch: 2760 | Training loss 2.863732136785984 | Validation loss 2.7904394567012787



 28%|███████████████████▉                                                    | 2762/10000 [6:16:57<16:23:03,  8.15s/it]

Epoch: 2761 | Training loss 2.8629849329590797 | Validation loss 2.791495531797409



 28%|███████████████████▉                                                    | 2763/10000 [6:17:05<16:22:17,  8.14s/it]

Epoch: 2762 | Training loss 2.872835196554661 | Validation loss 2.795928806066513



 28%|███████████████████▉                                                    | 2764/10000 [6:17:13<16:27:11,  8.19s/it]

Epoch: 2763 | Training loss 2.8621550798416138 | Validation loss 2.7884961664676666



 28%|███████████████████▉                                                    | 2765/10000 [6:17:21<16:27:46,  8.19s/it]

Epoch: 2764 | Training loss 2.872805118560791 | Validation loss 2.7897409200668335



 28%|███████████████████▉                                                    | 2766/10000 [6:17:29<16:23:49,  8.16s/it]

Epoch: 2765 | Training loss 2.868473596870899 | Validation loss 2.7936456203460693



 28%|███████████████████▉                                                    | 2767/10000 [6:17:38<16:24:19,  8.17s/it]

Epoch: 2766 | Training loss 2.8674432560801506 | Validation loss 2.7922205924987793



 28%|███████████████████▉                                                    | 2768/10000 [6:17:46<16:29:03,  8.21s/it]

Epoch: 2767 | Training loss 2.868444189429283 | Validation loss 2.793167382478714



 28%|███████████████████▉                                                    | 2769/10000 [6:17:54<16:34:01,  8.25s/it]

Epoch: 2768 | Training loss 2.864079713821411 | Validation loss 2.794360488653183



 28%|███████████████████▉                                                    | 2770/10000 [6:18:12<22:29:57, 11.20s/it]

Epoch: 2769 | Training loss 2.866248771548271 | Validation loss 2.795289784669876



 28%|███████████████████▉                                                    | 2771/10000 [6:18:24<22:43:18, 11.32s/it]

Epoch: 2770 | Training loss 2.863910585641861 | Validation loss 2.7903278172016144



 28%|███████████████████▉                                                    | 2772/10000 [6:18:32<20:45:20, 10.34s/it]

Epoch: 2771 | Training loss 2.8673510774970055 | Validation loss 2.7930448949337006



 28%|███████████████████▉                                                    | 2773/10000 [6:18:40<19:24:51,  9.67s/it]

Epoch: 2772 | Training loss 2.862602472305298 | Validation loss 2.7849701941013336



 28%|███████████████████▉                                                    | 2774/10000 [6:18:48<18:28:31,  9.20s/it]

Epoch: 2773 | Training loss 2.8654484525322914 | Validation loss 2.7958887219429016



 28%|███████████████████▉                                                    | 2775/10000 [6:18:56<17:49:36,  8.88s/it]

Epoch: 2774 | Training loss 2.865100622177124 | Validation loss 2.7882220447063446



 28%|███████████████████▉                                                    | 2776/10000 [6:19:04<17:19:32,  8.63s/it]

Epoch: 2775 | Training loss 2.866570867598057 | Validation loss 2.7905484437942505



 28%|███████████████████▉                                                    | 2777/10000 [6:19:13<17:01:19,  8.48s/it]

Epoch: 2776 | Training loss 2.8677298054099083 | Validation loss 2.793317496776581



 28%|████████████████████                                                    | 2778/10000 [6:19:21<16:45:09,  8.35s/it]

Epoch: 2777 | Training loss 2.8632436245679855 | Validation loss 2.7942326962947845



 28%|████████████████████                                                    | 2779/10000 [6:19:29<16:36:29,  8.28s/it]

Epoch: 2778 | Training loss 2.868846245110035 | Validation loss 2.7892293632030487



 28%|████████████████████                                                    | 2780/10000 [6:19:37<16:29:19,  8.22s/it]

Epoch: 2779 | Training loss 2.8627296686172485 | Validation loss 2.786668360233307



 28%|████████████████████                                                    | 2781/10000 [6:19:45<16:25:58,  8.19s/it]

Epoch: 2780 | Training loss 2.864288479089737 | Validation loss 2.7894991636276245



 28%|████████████████████                                                    | 2782/10000 [6:19:53<16:23:37,  8.18s/it]

Epoch: 2781 | Training loss 2.868814170360565 | Validation loss 2.7906313240528107



 28%|████████████████████                                                    | 2783/10000 [6:20:01<16:24:07,  8.18s/it]

Epoch: 2782 | Training loss 2.8645138144493103 | Validation loss 2.795595407485962



 28%|████████████████████                                                    | 2784/10000 [6:20:09<16:22:48,  8.17s/it]

Epoch: 2783 | Training loss 2.8632033839821815 | Validation loss 2.7906370162963867



 28%|████████████████████                                                    | 2785/10000 [6:20:17<16:15:06,  8.11s/it]

Epoch: 2784 | Training loss 2.862992115318775 | Validation loss 2.7911714911460876



 28%|████████████████████                                                    | 2786/10000 [6:20:25<16:15:22,  8.11s/it]

Epoch: 2785 | Training loss 2.8677196726202965 | Validation loss 2.790588676929474



 28%|████████████████████                                                    | 2787/10000 [6:20:33<16:12:36,  8.09s/it]

Epoch: 2786 | Training loss 2.8648005947470665 | Validation loss 2.788441240787506



 28%|████████████████████                                                    | 2788/10000 [6:20:42<16:10:02,  8.07s/it]

Epoch: 2787 | Training loss 2.8688408359885216 | Validation loss 2.7900502383708954



 28%|████████████████████                                                    | 2789/10000 [6:20:50<16:10:27,  8.07s/it]

Epoch: 2788 | Training loss 2.867658346891403 | Validation loss 2.7962996661663055



 28%|████████████████████                                                    | 2790/10000 [6:20:58<16:12:26,  8.09s/it]

Epoch: 2789 | Training loss 2.865333564579487 | Validation loss 2.8000973761081696



 28%|████████████████████                                                    | 2791/10000 [6:21:06<16:09:47,  8.07s/it]

Epoch: 2790 | Training loss 2.866717629134655 | Validation loss 2.7931888103485107



 28%|████████████████████                                                    | 2792/10000 [6:21:14<16:08:26,  8.06s/it]

Epoch: 2791 | Training loss 2.8668113574385643 | Validation loss 2.793442666530609



 28%|████████████████████                                                    | 2793/10000 [6:21:22<16:10:57,  8.08s/it]

Epoch: 2792 | Training loss 2.8682444095611572 | Validation loss 2.795263707637787



 28%|████████████████████                                                    | 2794/10000 [6:21:30<16:08:36,  8.07s/it]

Epoch: 2793 | Training loss 2.872759409248829 | Validation loss 2.7921850383281708



 28%|████████████████████                                                    | 2795/10000 [6:21:38<16:08:44,  8.07s/it]

Epoch: 2794 | Training loss 2.8646723926067352 | Validation loss 2.790572226047516



 28%|████████████████████▏                                                   | 2796/10000 [6:21:46<16:08:41,  8.07s/it]

Epoch: 2795 | Training loss 2.8676173985004425 | Validation loss 2.790447175502777



 28%|████████████████████▏                                                   | 2797/10000 [6:21:54<16:10:28,  8.08s/it]

Epoch: 2796 | Training loss 2.864407978951931 | Validation loss 2.7900308668613434



 28%|████████████████████▏                                                   | 2798/10000 [6:22:02<16:11:30,  8.09s/it]

Epoch: 2797 | Training loss 2.8629987314343452 | Validation loss 2.7900335788726807



 28%|████████████████████▏                                                   | 2799/10000 [6:22:10<16:12:13,  8.10s/it]

Epoch: 2798 | Training loss 2.8684967160224915 | Validation loss 2.7952526807785034



 28%|████████████████████▏                                                   | 2800/10000 [6:22:19<16:10:23,  8.09s/it]

Epoch: 2799 | Training loss 2.8650492429733276 | Validation loss 2.785600185394287



 28%|████████████████████▏                                                   | 2801/10000 [6:22:27<16:09:38,  8.08s/it]

Epoch: 2800 | Training loss 2.8695124238729477 | Validation loss 2.793297529220581



 28%|████████████████████▏                                                   | 2802/10000 [6:22:35<16:11:25,  8.10s/it]

Epoch: 2801 | Training loss 2.869579717516899 | Validation loss 2.793183356523514



 28%|████████████████████▏                                                   | 2803/10000 [6:22:43<16:14:14,  8.12s/it]

Epoch: 2802 | Training loss 2.8650833889842033 | Validation loss 2.787142902612686



 28%|████████████████████▏                                                   | 2804/10000 [6:22:51<16:12:13,  8.11s/it]

Epoch: 2803 | Training loss 2.8673154041171074 | Validation loss 2.7889691293239594



 28%|████████████████████▏                                                   | 2805/10000 [6:22:59<16:14:12,  8.12s/it]

Epoch: 2804 | Training loss 2.867922693490982 | Validation loss 2.7966293692588806



 28%|████████████████████▏                                                   | 2806/10000 [6:23:07<16:12:07,  8.11s/it]

Epoch: 2805 | Training loss 2.869600012898445 | Validation loss 2.796021342277527



 28%|████████████████████▏                                                   | 2807/10000 [6:23:15<16:11:45,  8.11s/it]

Epoch: 2806 | Training loss 2.8683619126677513 | Validation loss 2.7951561510562897



 28%|████████████████████▏                                                   | 2808/10000 [6:23:23<16:12:31,  8.11s/it]

Epoch: 2807 | Training loss 2.8685373216867447 | Validation loss 2.791970431804657



 28%|████████████████████▏                                                   | 2809/10000 [6:23:32<16:14:28,  8.13s/it]

Epoch: 2808 | Training loss 2.866344526410103 | Validation loss 2.7896877825260162



 28%|████████████████████▏                                                   | 2810/10000 [6:23:40<16:16:37,  8.15s/it]

Epoch: 2809 | Training loss 2.867645025253296 | Validation loss 2.7905677258968353



 28%|████████████████████▏                                                   | 2811/10000 [6:23:48<16:15:17,  8.14s/it]

Epoch: 2810 | Training loss 2.8615915179252625 | Validation loss 2.7887336015701294



 28%|████████████████████▏                                                   | 2812/10000 [6:23:56<16:13:16,  8.12s/it]

Epoch: 2811 | Training loss 2.864693343639374 | Validation loss 2.7916444838047028



 28%|████████████████████▎                                                   | 2813/10000 [6:24:04<16:12:18,  8.12s/it]

Epoch: 2812 | Training loss 2.8639204129576683 | Validation loss 2.788647025823593



 28%|████████████████████▎                                                   | 2814/10000 [6:24:12<16:11:36,  8.11s/it]

Epoch: 2813 | Training loss 2.867291286587715 | Validation loss 2.7907424867153168



 28%|████████████████████▎                                                   | 2815/10000 [6:24:20<16:11:04,  8.11s/it]

Epoch: 2814 | Training loss 2.8649340718984604 | Validation loss 2.790725201368332



 28%|████████████████████▎                                                   | 2816/10000 [6:24:28<16:13:31,  8.13s/it]

Epoch: 2815 | Training loss 2.8681878745555878 | Validation loss 2.7927649319171906



 28%|████████████████████▎                                                   | 2817/10000 [6:24:37<16:13:23,  8.13s/it]

Epoch: 2816 | Training loss 2.865503504872322 | Validation loss 2.792991667985916



 28%|████████████████████▎                                                   | 2818/10000 [6:24:45<16:12:12,  8.12s/it]

Epoch: 2817 | Training loss 2.8672016263008118 | Validation loss 2.7926042079925537



 28%|████████████████████▎                                                   | 2819/10000 [6:24:53<16:07:23,  8.08s/it]

Epoch: 2818 | Training loss 2.864659145474434 | Validation loss 2.792526990175247



 28%|████████████████████▎                                                   | 2820/10000 [6:25:01<16:10:26,  8.11s/it]

Epoch: 2819 | Training loss 2.86164353787899 | Validation loss 2.7966015338897705



 28%|████████████████████▎                                                   | 2821/10000 [6:25:09<16:06:14,  8.08s/it]

Epoch: 2820 | Training loss 2.866124838590622 | Validation loss 2.785901755094528



 28%|████████████████████▎                                                   | 2822/10000 [6:25:17<16:06:01,  8.07s/it]

Epoch: 2821 | Training loss 2.8685960471630096 | Validation loss 2.7894670367240906



 28%|████████████████████▎                                                   | 2823/10000 [6:25:25<16:04:02,  8.06s/it]

Epoch: 2822 | Training loss 2.866381451487541 | Validation loss 2.7854093611240387



 28%|████████████████████▎                                                   | 2824/10000 [6:25:33<16:04:51,  8.07s/it]

Epoch: 2823 | Training loss 2.860052041709423 | Validation loss 2.788421928882599



 28%|████████████████████▎                                                   | 2825/10000 [6:25:41<16:00:20,  8.03s/it]

Epoch: 2824 | Training loss 2.864104561507702 | Validation loss 2.787661999464035



 28%|████████████████████▎                                                   | 2826/10000 [6:25:49<16:10:01,  8.11s/it]

Epoch: 2825 | Training loss 2.8655367121100426 | Validation loss 2.786236673593521



 28%|████████████████████▎                                                   | 2827/10000 [6:25:57<16:10:36,  8.12s/it]

Epoch: 2826 | Training loss 2.8631080836057663 | Validation loss 2.7895466089248657



 28%|████████████████████▎                                                   | 2828/10000 [6:26:06<16:09:17,  8.11s/it]

Epoch: 2827 | Training loss 2.8607963621616364 | Validation loss 2.793711096048355



 28%|████████████████████▎                                                   | 2829/10000 [6:26:14<16:08:19,  8.10s/it]

Epoch: 2828 | Training loss 2.866211451590061 | Validation loss 2.7913202047348022



 28%|████████████████████▍                                                   | 2830/10000 [6:26:22<16:07:34,  8.10s/it]

Epoch: 2829 | Training loss 2.863379091024399 | Validation loss 2.7936208844184875



 28%|████████████████████▍                                                   | 2831/10000 [6:26:30<16:07:40,  8.10s/it]

Epoch: 2830 | Training loss 2.863435223698616 | Validation loss 2.7884558141231537



 28%|████████████████████▍                                                   | 2832/10000 [6:26:38<16:08:50,  8.11s/it]

Epoch: 2831 | Training loss 2.864796906709671 | Validation loss 2.7923955619335175



 28%|████████████████████▍                                                   | 2833/10000 [6:26:46<16:08:25,  8.11s/it]

Epoch: 2832 | Training loss 2.8592454195022583 | Validation loss 2.7842327654361725



 28%|████████████████████▍                                                   | 2834/10000 [6:26:54<16:05:44,  8.09s/it]

Epoch: 2833 | Training loss 2.8695074021816254 | Validation loss 2.788662374019623



 28%|████████████████████▍                                                   | 2835/10000 [6:27:02<16:03:56,  8.07s/it]

Epoch: 2834 | Training loss 2.8698989152908325 | Validation loss 2.7940004765987396



 28%|████████████████████▍                                                   | 2836/10000 [6:27:10<16:04:54,  8.08s/it]

Epoch: 2835 | Training loss 2.8690702617168427 | Validation loss 2.793192893266678



 28%|████████████████████▍                                                   | 2837/10000 [6:27:18<16:03:14,  8.07s/it]

Epoch: 2836 | Training loss 2.867881514132023 | Validation loss 2.792188435792923



 28%|████████████████████▍                                                   | 2838/10000 [6:27:26<16:00:18,  8.05s/it]

Epoch: 2837 | Training loss 2.8668913543224335 | Validation loss 2.783989131450653



 28%|████████████████████▍                                                   | 2839/10000 [6:27:34<15:59:27,  8.04s/it]

Epoch: 2838 | Training loss 2.8655522540211678 | Validation loss 2.792157769203186



 28%|████████████████████▍                                                   | 2840/10000 [6:27:42<16:03:40,  8.08s/it]

Epoch: 2839 | Training loss 2.8679657876491547 | Validation loss 2.7919639348983765



 28%|████████████████████▍                                                   | 2841/10000 [6:27:51<16:15:06,  8.17s/it]

Epoch: 2840 | Training loss 2.863979294896126 | Validation loss 2.7901699542999268



 28%|████████████████████▍                                                   | 2842/10000 [6:27:59<16:22:12,  8.23s/it]

Epoch: 2841 | Training loss 2.8688700944185257 | Validation loss 2.7884634733200073



 28%|████████████████████▍                                                   | 2843/10000 [6:28:15<20:43:50, 10.43s/it]

Epoch: 2842 | Training loss 2.8646899834275246 | Validation loss 2.7865844070911407



 28%|████████████████████▍                                                   | 2844/10000 [6:28:29<23:02:58, 11.60s/it]

Epoch: 2843 | Training loss 2.8673364520072937 | Validation loss 2.7872881293296814



 28%|████████████████████▍                                                   | 2845/10000 [6:28:37<20:58:32, 10.55s/it]

Epoch: 2844 | Training loss 2.8637785613536835 | Validation loss 2.7900234162807465



 28%|████████████████████▍                                                   | 2846/10000 [6:28:45<19:35:15,  9.86s/it]

Epoch: 2845 | Training loss 2.864750899374485 | Validation loss 2.8003172874450684



 28%|████████████████████▍                                                   | 2847/10000 [6:28:53<18:31:09,  9.32s/it]

Epoch: 2846 | Training loss 2.866955727338791 | Validation loss 2.7890957295894623



 28%|████████████████████▌                                                   | 2848/10000 [6:29:02<17:47:23,  8.95s/it]

Epoch: 2847 | Training loss 2.8660357668995857 | Validation loss 2.7887991070747375



 28%|████████████████████▌                                                   | 2849/10000 [6:29:10<17:15:39,  8.69s/it]

Epoch: 2848 | Training loss 2.859638176858425 | Validation loss 2.7901974618434906



 28%|████████████████████▌                                                   | 2850/10000 [6:29:18<16:56:05,  8.53s/it]

Epoch: 2849 | Training loss 2.869351252913475 | Validation loss 2.7971943020820618



 29%|████████████████████▌                                                   | 2851/10000 [6:29:26<16:41:57,  8.41s/it]

Epoch: 2850 | Training loss 2.865961365401745 | Validation loss 2.790646582841873



 29%|████████████████████▌                                                   | 2852/10000 [6:29:34<16:31:15,  8.32s/it]

Epoch: 2851 | Training loss 2.869414061307907 | Validation loss 2.7983565032482147



 29%|████████████████████▌                                                   | 2853/10000 [6:29:42<16:22:11,  8.25s/it]

Epoch: 2852 | Training loss 2.865385837852955 | Validation loss 2.7906011044979095



 29%|████████████████████▌                                                   | 2854/10000 [6:29:50<16:16:16,  8.20s/it]

Epoch: 2853 | Training loss 2.8631832897663116 | Validation loss 2.790022075176239



 29%|████████████████████▌                                                   | 2855/10000 [6:29:58<16:11:03,  8.15s/it]

Epoch: 2854 | Training loss 2.8698252514004707 | Validation loss 2.7887949645519257



 29%|████████████████████▌                                                   | 2856/10000 [6:30:06<16:11:13,  8.16s/it]

Epoch: 2855 | Training loss 2.8673104867339134 | Validation loss 2.7924426198005676



 29%|████████████████████▌                                                   | 2857/10000 [6:30:15<16:10:17,  8.15s/it]

Epoch: 2856 | Training loss 2.865190513432026 | Validation loss 2.7961166501045227



 29%|████████████████████▌                                                   | 2858/10000 [6:30:23<16:07:15,  8.13s/it]

Epoch: 2857 | Training loss 2.8584011122584343 | Validation loss 2.7882650196552277



 29%|████████████████████▌                                                   | 2859/10000 [6:30:31<16:09:36,  8.15s/it]

Epoch: 2858 | Training loss 2.8632412180304527 | Validation loss 2.7889180779457092



 29%|████████████████████▌                                                   | 2860/10000 [6:30:39<16:09:29,  8.15s/it]

Epoch: 2859 | Training loss 2.869535230100155 | Validation loss 2.789377748966217



 29%|████████████████████▌                                                   | 2861/10000 [6:30:47<16:06:02,  8.12s/it]

Epoch: 2860 | Training loss 2.8630367144942284 | Validation loss 2.790812909603119



 29%|████████████████████▌                                                   | 2862/10000 [6:30:55<16:03:00,  8.09s/it]

Epoch: 2861 | Training loss 2.8662508204579353 | Validation loss 2.7887279987335205



 29%|████████████████████▌                                                   | 2863/10000 [6:31:03<16:04:17,  8.11s/it]

Epoch: 2862 | Training loss 2.8709949776530266 | Validation loss 2.793775051832199



 29%|████████████████████▌                                                   | 2864/10000 [6:31:11<16:05:04,  8.11s/it]

Epoch: 2863 | Training loss 2.859472341835499 | Validation loss 2.7902242839336395



 29%|████████████████████▋                                                   | 2865/10000 [6:31:19<16:03:57,  8.11s/it]

Epoch: 2864 | Training loss 2.8583366572856903 | Validation loss 2.790500819683075



 29%|████████████████████▋                                                   | 2866/10000 [6:31:28<16:08:49,  8.15s/it]

Epoch: 2865 | Training loss 2.868861958384514 | Validation loss 2.790592610836029



 29%|████████████████████▋                                                   | 2867/10000 [6:31:36<16:06:01,  8.13s/it]

Epoch: 2866 | Training loss 2.8663922399282455 | Validation loss 2.79342982172966



 29%|████████████████████▋                                                   | 2868/10000 [6:31:44<16:05:41,  8.12s/it]

Epoch: 2867 | Training loss 2.861099287867546 | Validation loss 2.7935268878936768



 29%|████████████████████▋                                                   | 2869/10000 [6:31:52<16:09:53,  8.16s/it]

Epoch: 2868 | Training loss 2.8668413758277893 | Validation loss 2.791572481393814



 29%|████████████████████▋                                                   | 2870/10000 [6:32:00<16:12:05,  8.18s/it]

Epoch: 2869 | Training loss 2.8666680678725243 | Validation loss 2.7931685745716095



 29%|████████████████████▋                                                   | 2871/10000 [6:32:09<16:11:35,  8.18s/it]

Epoch: 2870 | Training loss 2.865503638982773 | Validation loss 2.7908813059329987



 29%|████████████████████▋                                                   | 2872/10000 [6:32:17<16:05:56,  8.13s/it]

Epoch: 2871 | Training loss 2.862660773098469 | Validation loss 2.7907892167568207



 29%|████████████████████▋                                                   | 2873/10000 [6:32:25<16:01:58,  8.10s/it]

Epoch: 2872 | Training loss 2.8614473417401314 | Validation loss 2.7890787720680237



 29%|████████████████████▋                                                   | 2874/10000 [6:32:33<15:57:26,  8.06s/it]

Epoch: 2873 | Training loss 2.8633313700556755 | Validation loss 2.788162887096405



 29%|████████████████████▋                                                   | 2875/10000 [6:32:41<15:56:29,  8.05s/it]

Epoch: 2874 | Training loss 2.8708069398999214 | Validation loss 2.785958409309387



 29%|████████████████████▋                                                   | 2876/10000 [6:32:49<15:58:41,  8.07s/it]

Epoch: 2875 | Training loss 2.872112311422825 | Validation loss 2.7901995182037354



 29%|████████████████████▋                                                   | 2877/10000 [6:32:57<16:00:41,  8.09s/it]

Epoch: 2876 | Training loss 2.8720662593841553 | Validation loss 2.791961133480072



 29%|████████████████████▋                                                   | 2878/10000 [6:33:05<16:02:34,  8.11s/it]

Epoch: 2877 | Training loss 2.8614825159311295 | Validation loss 2.7872570157051086



 29%|████████████████████▋                                                   | 2879/10000 [6:33:13<16:04:31,  8.13s/it]

Epoch: 2878 | Training loss 2.867645740509033 | Validation loss 2.792230784893036



 29%|████████████████████▋                                                   | 2880/10000 [6:33:21<16:04:41,  8.13s/it]

Epoch: 2879 | Training loss 2.8678333684802055 | Validation loss 2.7896455824375153



 29%|████████████████████▋                                                   | 2881/10000 [6:33:29<16:04:40,  8.13s/it]

Epoch: 2880 | Training loss 2.8660827055573463 | Validation loss 2.7887129187583923



 29%|████████████████████▊                                                   | 2882/10000 [6:33:38<16:03:33,  8.12s/it]

Epoch: 2881 | Training loss 2.866677761077881 | Validation loss 2.7880889177322388



 29%|████████████████████▊                                                   | 2883/10000 [6:33:46<16:01:34,  8.11s/it]

Epoch: 2882 | Training loss 2.862994432449341 | Validation loss 2.785566031932831



 29%|████████████████████▊                                                   | 2884/10000 [6:33:54<16:02:52,  8.12s/it]

Epoch: 2883 | Training loss 2.858063891530037 | Validation loss 2.7891623973846436



 29%|████████████████████▊                                                   | 2885/10000 [6:34:02<16:01:36,  8.11s/it]

Epoch: 2884 | Training loss 2.8647035285830498 | Validation loss 2.7892321348190308



 29%|████████████████████▊                                                   | 2886/10000 [6:34:10<16:00:55,  8.10s/it]

Epoch: 2885 | Training loss 2.8688787072896957 | Validation loss 2.7900830507278442



 29%|████████████████████▊                                                   | 2887/10000 [6:34:18<15:56:12,  8.07s/it]

Epoch: 2886 | Training loss 2.8698877096176147 | Validation loss 2.795391470193863



 29%|████████████████████▊                                                   | 2888/10000 [6:34:26<15:57:22,  8.08s/it]

Epoch: 2887 | Training loss 2.860615275800228 | Validation loss 2.7967871129512787



 29%|████████████████████▊                                                   | 2889/10000 [6:34:34<15:54:45,  8.06s/it]

Epoch: 2888 | Training loss 2.8677968233823776 | Validation loss 2.792237550020218



 29%|████████████████████▊                                                   | 2890/10000 [6:34:42<15:56:20,  8.07s/it]

Epoch: 2889 | Training loss 2.871873877942562 | Validation loss 2.801577150821686



 29%|████████████████████▊                                                   | 2891/10000 [6:34:50<15:57:51,  8.08s/it]

Epoch: 2890 | Training loss 2.8684965148568153 | Validation loss 2.78991162776947



 29%|████████████████████▊                                                   | 2892/10000 [6:34:58<15:57:49,  8.09s/it]

Epoch: 2891 | Training loss 2.864847280085087 | Validation loss 2.7939863801002502



 29%|████████████████████▊                                                   | 2893/10000 [6:35:06<16:01:05,  8.11s/it]

Epoch: 2892 | Training loss 2.868263877928257 | Validation loss 2.7903246879577637



 29%|████████████████████▊                                                   | 2894/10000 [6:35:15<16:01:06,  8.12s/it]

Epoch: 2893 | Training loss 2.8669665828347206 | Validation loss 2.78914937376976



 29%|████████████████████▊                                                   | 2895/10000 [6:35:23<16:00:01,  8.11s/it]

Epoch: 2894 | Training loss 2.869059383869171 | Validation loss 2.790675699710846



 29%|████████████████████▊                                                   | 2896/10000 [6:35:31<15:59:03,  8.10s/it]

Epoch: 2895 | Training loss 2.869272857904434 | Validation loss 2.789831906557083



 29%|████████████████████▊                                                   | 2897/10000 [6:35:39<15:59:01,  8.10s/it]

Epoch: 2896 | Training loss 2.8694103956222534 | Validation loss 2.790328234434128



 29%|████████████████████▊                                                   | 2898/10000 [6:35:47<15:57:47,  8.09s/it]

Epoch: 2897 | Training loss 2.8675666898489 | Validation loss 2.7896879613399506



 29%|████████████████████▊                                                   | 2899/10000 [6:35:55<15:58:36,  8.10s/it]

Epoch: 2898 | Training loss 2.862600952386856 | Validation loss 2.7904452085494995



 29%|████████████████████▉                                                   | 2900/10000 [6:36:03<15:58:28,  8.10s/it]

Epoch: 2899 | Training loss 2.8637414649128914 | Validation loss 2.7877455949783325



 29%|████████████████████▉                                                   | 2901/10000 [6:36:11<15:57:19,  8.09s/it]

Epoch: 2900 | Training loss 2.867822729051113 | Validation loss 2.787317931652069



 29%|████████████████████▉                                                   | 2902/10000 [6:36:19<15:59:14,  8.11s/it]

Epoch: 2901 | Training loss 2.863707296550274 | Validation loss 2.7912335991859436



 29%|████████████████████▉                                                   | 2903/10000 [6:36:27<15:57:44,  8.10s/it]

Epoch: 2902 | Training loss 2.8672766387462616 | Validation loss 2.795813888311386



 29%|████████████████████▉                                                   | 2904/10000 [6:36:36<16:01:12,  8.13s/it]

Epoch: 2903 | Training loss 2.8665097504854202 | Validation loss 2.7927319705486298



 29%|████████████████████▉                                                   | 2905/10000 [6:36:44<15:58:02,  8.10s/it]

Epoch: 2904 | Training loss 2.8686533495783806 | Validation loss 2.7879964113235474



 29%|████████████████████▉                                                   | 2906/10000 [6:36:52<15:58:25,  8.11s/it]

Epoch: 2905 | Training loss 2.8634623885154724 | Validation loss 2.7931593358516693



 29%|████████████████████▉                                                   | 2907/10000 [6:37:00<15:58:46,  8.11s/it]

Epoch: 2906 | Training loss 2.862494222819805 | Validation loss 2.7872173488140106



 29%|████████████████████▉                                                   | 2908/10000 [6:37:08<15:59:23,  8.12s/it]

Epoch: 2907 | Training loss 2.8631772473454475 | Validation loss 2.7918564081192017



 29%|████████████████████▉                                                   | 2909/10000 [6:37:16<15:58:43,  8.11s/it]

Epoch: 2908 | Training loss 2.8668191209435463 | Validation loss 2.7889342308044434



 29%|████████████████████▉                                                   | 2910/10000 [6:37:24<15:58:47,  8.11s/it]

Epoch: 2909 | Training loss 2.8602716103196144 | Validation loss 2.7941397726535797



 29%|████████████████████▉                                                   | 2911/10000 [6:37:32<15:57:04,  8.10s/it]

Epoch: 2910 | Training loss 2.8661003485322 | Validation loss 2.7941981852054596



 29%|████████████████████▉                                                   | 2912/10000 [6:37:40<15:56:56,  8.10s/it]

Epoch: 2911 | Training loss 2.868223950266838 | Validation loss 2.7964221835136414



 29%|████████████████████▉                                                   | 2913/10000 [6:37:49<15:55:41,  8.09s/it]

Epoch: 2912 | Training loss 2.869821421802044 | Validation loss 2.786727786064148



 29%|████████████████████▉                                                   | 2914/10000 [6:37:57<15:58:08,  8.11s/it]

Epoch: 2913 | Training loss 2.8638218343257904 | Validation loss 2.792549639940262



 29%|████████████████████▉                                                   | 2915/10000 [6:38:05<15:56:32,  8.10s/it]

Epoch: 2914 | Training loss 2.86640964448452 | Validation loss 2.791000336408615



 29%|████████████████████▉                                                   | 2916/10000 [6:38:13<15:57:34,  8.11s/it]

Epoch: 2915 | Training loss 2.858147218823433 | Validation loss 2.7848668098449707



 29%|█████████████████████                                                   | 2917/10000 [6:38:21<15:58:19,  8.12s/it]

Epoch: 2916 | Training loss 2.8703977689146996 | Validation loss 2.790914088487625



 29%|█████████████████████                                                   | 2918/10000 [6:38:29<16:00:25,  8.14s/it]

Epoch: 2917 | Training loss 2.866019085049629 | Validation loss 2.7932064831256866



 29%|█████████████████████                                                   | 2919/10000 [6:38:37<16:00:45,  8.14s/it]

Epoch: 2918 | Training loss 2.863875649869442 | Validation loss 2.788882225751877



 29%|█████████████████████                                                   | 2920/10000 [6:38:45<15:58:41,  8.12s/it]

Epoch: 2919 | Training loss 2.8666467666625977 | Validation loss 2.7985923886299133



 29%|█████████████████████                                                   | 2921/10000 [6:38:53<15:53:57,  8.09s/it]

Epoch: 2920 | Training loss 2.8714524880051613 | Validation loss 2.7901872396469116



 29%|█████████████████████                                                   | 2922/10000 [6:39:02<15:55:58,  8.10s/it]

Epoch: 2921 | Training loss 2.8668821156024933 | Validation loss 2.790051579475403



 29%|█████████████████████                                                   | 2923/10000 [6:39:10<15:53:35,  8.08s/it]

Epoch: 2922 | Training loss 2.86531949788332 | Validation loss 2.7919357120990753



 29%|█████████████████████                                                   | 2924/10000 [6:39:18<15:55:44,  8.10s/it]

Epoch: 2923 | Training loss 2.858902543783188 | Validation loss 2.800498604774475



 29%|█████████████████████                                                   | 2925/10000 [6:39:26<15:53:44,  8.09s/it]

Epoch: 2924 | Training loss 2.86565949767828 | Validation loss 2.795697510242462



 29%|█████████████████████                                                   | 2926/10000 [6:39:34<15:56:30,  8.11s/it]

Epoch: 2925 | Training loss 2.8640274479985237 | Validation loss 2.791592538356781



 29%|█████████████████████                                                   | 2927/10000 [6:39:42<15:53:43,  8.09s/it]

Epoch: 2926 | Training loss 2.8676121681928635 | Validation loss 2.7924872040748596



 29%|█████████████████████                                                   | 2928/10000 [6:39:50<15:55:01,  8.10s/it]

Epoch: 2927 | Training loss 2.8673003166913986 | Validation loss 2.7943006455898285



 29%|█████████████████████                                                   | 2929/10000 [6:39:58<15:52:20,  8.08s/it]

Epoch: 2928 | Training loss 2.862578362226486 | Validation loss 2.791215419769287



 29%|█████████████████████                                                   | 2930/10000 [6:40:06<15:53:38,  8.09s/it]

Epoch: 2929 | Training loss 2.865150287747383 | Validation loss 2.7936238050460815



 29%|█████████████████████                                                   | 2931/10000 [6:40:14<15:53:15,  8.09s/it]

Epoch: 2930 | Training loss 2.8646797090768814 | Validation loss 2.791845202445984



 29%|█████████████████████                                                   | 2932/10000 [6:40:23<15:54:40,  8.10s/it]

Epoch: 2931 | Training loss 2.8657205402851105 | Validation loss 2.7946999967098236



 29%|█████████████████████                                                   | 2933/10000 [6:40:31<15:52:56,  8.09s/it]

Epoch: 2932 | Training loss 2.868442729115486 | Validation loss 2.7900613248348236



 29%|█████████████████████                                                   | 2934/10000 [6:40:39<15:52:09,  8.09s/it]

Epoch: 2933 | Training loss 2.865323893725872 | Validation loss 2.798023045063019



 29%|█████████████████████▏                                                  | 2935/10000 [6:40:47<15:54:25,  8.11s/it]

Epoch: 2934 | Training loss 2.866641588509083 | Validation loss 2.7948199808597565



 29%|█████████████████████▏                                                  | 2936/10000 [6:40:55<15:54:45,  8.11s/it]

Epoch: 2935 | Training loss 2.870384432375431 | Validation loss 2.794868439435959



 29%|█████████████████████▏                                                  | 2937/10000 [6:41:03<15:56:04,  8.12s/it]

Epoch: 2936 | Training loss 2.870126001536846 | Validation loss 2.7888329923152924



 29%|█████████████████████▏                                                  | 2938/10000 [6:41:11<15:56:18,  8.12s/it]

Epoch: 2937 | Training loss 2.862975999712944 | Validation loss 2.783355414867401



 29%|█████████████████████▏                                                  | 2939/10000 [6:41:19<15:55:51,  8.12s/it]

Epoch: 2938 | Training loss 2.8669203519821167 | Validation loss 2.789980560541153



 29%|█████████████████████▏                                                  | 2940/10000 [6:41:27<15:54:32,  8.11s/it]

Epoch: 2939 | Training loss 2.8653702586889267 | Validation loss 2.794110119342804



 29%|█████████████████████▏                                                  | 2941/10000 [6:41:36<15:53:28,  8.10s/it]

Epoch: 2940 | Training loss 2.8674315735697746 | Validation loss 2.798816442489624



 29%|█████████████████████▏                                                  | 2942/10000 [6:41:44<15:54:57,  8.12s/it]

Epoch: 2941 | Training loss 2.8631010204553604 | Validation loss 2.794252783060074



 29%|█████████████████████▏                                                  | 2943/10000 [6:41:52<15:57:34,  8.14s/it]

Epoch: 2942 | Training loss 2.8672799170017242 | Validation loss 2.788321763277054



 29%|█████████████████████▏                                                  | 2944/10000 [6:42:00<15:57:09,  8.14s/it]

Epoch: 2943 | Training loss 2.869414232671261 | Validation loss 2.7917909026145935



 29%|█████████████████████▏                                                  | 2945/10000 [6:42:08<15:56:13,  8.13s/it]

Epoch: 2944 | Training loss 2.8671433180570602 | Validation loss 2.7949667870998383



 29%|█████████████████████▏                                                  | 2946/10000 [6:42:16<15:54:30,  8.12s/it]

Epoch: 2945 | Training loss 2.867650620639324 | Validation loss 2.7896887362003326



 29%|█████████████████████▏                                                  | 2947/10000 [6:42:24<15:50:28,  8.09s/it]

Epoch: 2946 | Training loss 2.8670936301350594 | Validation loss 2.7854163348674774



 29%|█████████████████████▏                                                  | 2948/10000 [6:42:32<15:50:21,  8.09s/it]

Epoch: 2947 | Training loss 2.8659714683890343 | Validation loss 2.7957901060581207



 29%|█████████████████████▏                                                  | 2949/10000 [6:42:40<15:47:26,  8.06s/it]

Epoch: 2948 | Training loss 2.867612898349762 | Validation loss 2.79100438952446



 30%|█████████████████████▏                                                  | 2950/10000 [6:42:48<15:48:39,  8.07s/it]

Epoch: 2949 | Training loss 2.8622006624937057 | Validation loss 2.7865907549858093



 30%|█████████████████████▏                                                  | 2951/10000 [6:42:57<15:52:12,  8.11s/it]

Epoch: 2950 | Training loss 2.8760118260979652 | Validation loss 2.791084051132202



 30%|█████████████████████▎                                                  | 2952/10000 [6:43:05<15:53:46,  8.12s/it]

Epoch: 2951 | Training loss 2.8633075058460236 | Validation loss 2.788590908050537



 30%|█████████████████████▎                                                  | 2953/10000 [6:43:13<15:51:49,  8.10s/it]

Epoch: 2952 | Training loss 2.8665134608745575 | Validation loss 2.789639115333557



 30%|█████████████████████▎                                                  | 2954/10000 [6:43:21<15:51:42,  8.10s/it]

Epoch: 2953 | Training loss 2.8681834265589714 | Validation loss 2.789032995700836



 30%|█████████████████████▎                                                  | 2955/10000 [6:43:29<15:54:47,  8.13s/it]

Epoch: 2954 | Training loss 2.866086430847645 | Validation loss 2.7884930670261383



 30%|█████████████████████▎                                                  | 2956/10000 [6:43:37<15:51:56,  8.11s/it]

Epoch: 2955 | Training loss 2.865349255502224 | Validation loss 2.79007750749588



 30%|█████████████████████▎                                                  | 2957/10000 [6:43:45<15:54:48,  8.13s/it]

Epoch: 2956 | Training loss 2.8646392226219177 | Validation loss 2.8067034780979156



 30%|█████████████████████▎                                                  | 2958/10000 [6:43:54<15:55:11,  8.14s/it]

Epoch: 2957 | Training loss 2.8710603937506676 | Validation loss 2.7890550196170807



 30%|█████████████████████▎                                                  | 2959/10000 [6:44:02<15:55:22,  8.14s/it]

Epoch: 2958 | Training loss 2.8647163435816765 | Validation loss 2.7893235981464386



 30%|█████████████████████▎                                                  | 2960/10000 [6:44:10<15:52:24,  8.12s/it]

Epoch: 2959 | Training loss 2.866630844771862 | Validation loss 2.794877052307129



 30%|█████████████████████▎                                                  | 2961/10000 [6:44:18<15:50:07,  8.10s/it]

Epoch: 2960 | Training loss 2.8682687282562256 | Validation loss 2.79243603348732



 30%|█████████████████████▎                                                  | 2962/10000 [6:44:26<15:48:21,  8.08s/it]

Epoch: 2961 | Training loss 2.8682354167103767 | Validation loss 2.792327642440796



 30%|█████████████████████▎                                                  | 2963/10000 [6:44:34<15:48:52,  8.09s/it]

Epoch: 2962 | Training loss 2.8674612045288086 | Validation loss 2.788384258747101



 30%|█████████████████████▎                                                  | 2964/10000 [6:44:42<15:50:06,  8.10s/it]

Epoch: 2963 | Training loss 2.860568977892399 | Validation loss 2.7946276664733887



 30%|█████████████████████▎                                                  | 2965/10000 [6:44:50<15:48:17,  8.09s/it]

Epoch: 2964 | Training loss 2.8655019849538803 | Validation loss 2.7872432470321655



 30%|█████████████████████▎                                                  | 2966/10000 [6:44:58<15:46:10,  8.07s/it]

Epoch: 2965 | Training loss 2.8595933690667152 | Validation loss 2.7864579558372498



 30%|█████████████████████▎                                                  | 2967/10000 [6:45:06<15:46:03,  8.07s/it]

Epoch: 2966 | Training loss 2.8680165335536003 | Validation loss 2.7889456748962402



 30%|█████████████████████▎                                                  | 2968/10000 [6:45:14<15:46:57,  8.08s/it]

Epoch: 2967 | Training loss 2.868708223104477 | Validation loss 2.7892058193683624



 30%|█████████████████████▍                                                  | 2969/10000 [6:45:22<15:48:03,  8.09s/it]

Epoch: 2968 | Training loss 2.8662515953183174 | Validation loss 2.7907460927963257



 30%|█████████████████████▍                                                  | 2970/10000 [6:45:30<15:46:43,  8.08s/it]

Epoch: 2969 | Training loss 2.8663319125771523 | Validation loss 2.7941051721572876



 30%|█████████████████████▍                                                  | 2971/10000 [6:45:39<15:48:25,  8.10s/it]

Epoch: 2970 | Training loss 2.871795803308487 | Validation loss 2.788323163986206



 30%|█████████████████████▍                                                  | 2972/10000 [6:45:47<15:47:20,  8.09s/it]

Epoch: 2971 | Training loss 2.8698790669441223 | Validation loss 2.7905884385108948



 30%|█████████████████████▍                                                  | 2973/10000 [6:45:55<15:45:31,  8.07s/it]

Epoch: 2972 | Training loss 2.8593194782733917 | Validation loss 2.7856322824954987



 30%|█████████████████████▍                                                  | 2974/10000 [6:46:03<15:45:49,  8.08s/it]

Epoch: 2973 | Training loss 2.8681942373514175 | Validation loss 2.792629688978195



 30%|█████████████████████▍                                                  | 2975/10000 [6:46:11<15:46:35,  8.08s/it]

Epoch: 2974 | Training loss 2.8593894466757774 | Validation loss 2.7855736017227173



 30%|█████████████████████▍                                                  | 2976/10000 [6:46:19<15:43:41,  8.06s/it]

Epoch: 2975 | Training loss 2.871933728456497 | Validation loss 2.789120316505432



 30%|█████████████████████▍                                                  | 2977/10000 [6:46:27<15:44:24,  8.07s/it]

Epoch: 2976 | Training loss 2.8676480501890182 | Validation loss 2.789991706609726



 30%|█████████████████████▍                                                  | 2978/10000 [6:46:35<15:44:56,  8.07s/it]

Epoch: 2977 | Training loss 2.865915007889271 | Validation loss 2.7888077199459076



 30%|█████████████████████▍                                                  | 2979/10000 [6:46:43<15:47:19,  8.10s/it]

Epoch: 2978 | Training loss 2.8644722923636436 | Validation loss 2.7913024723529816



 30%|█████████████████████▍                                                  | 2980/10000 [6:46:51<15:46:22,  8.09s/it]

Epoch: 2979 | Training loss 2.8596726655960083 | Validation loss 2.7873271703720093



 30%|█████████████████████▍                                                  | 2981/10000 [6:46:59<15:46:45,  8.09s/it]

Epoch: 2980 | Training loss 2.8697759583592415 | Validation loss 2.787780314683914



 30%|█████████████████████▍                                                  | 2982/10000 [6:47:08<15:47:26,  8.10s/it]

Epoch: 2981 | Training loss 2.864486575126648 | Validation loss 2.790713518857956



 30%|█████████████████████▍                                                  | 2983/10000 [6:47:16<15:51:44,  8.14s/it]

Epoch: 2982 | Training loss 2.8644205778837204 | Validation loss 2.789484441280365



 30%|█████████████████████▍                                                  | 2984/10000 [6:47:24<15:51:22,  8.14s/it]

Epoch: 2983 | Training loss 2.861857555806637 | Validation loss 2.787489116191864



 30%|█████████████████████▍                                                  | 2985/10000 [6:47:32<15:46:10,  8.09s/it]

Epoch: 2984 | Training loss 2.866335816681385 | Validation loss 2.7910152673721313



 30%|█████████████████████▍                                                  | 2986/10000 [6:47:40<15:45:53,  8.09s/it]

Epoch: 2985 | Training loss 2.8688723370432854 | Validation loss 2.789917916059494



 30%|█████████████████████▌                                                  | 2987/10000 [6:47:48<15:43:20,  8.07s/it]

Epoch: 2986 | Training loss 2.865686573088169 | Validation loss 2.789723664522171



 30%|█████████████████████▌                                                  | 2988/10000 [6:47:56<15:45:23,  8.09s/it]

Epoch: 2987 | Training loss 2.877287745475769 | Validation loss 2.7885908484458923



 30%|█████████████████████▌                                                  | 2989/10000 [6:48:04<15:52:21,  8.15s/it]

Epoch: 2988 | Training loss 2.8685474917292595 | Validation loss 2.7932312190532684



 30%|█████████████████████▌                                                  | 2990/10000 [6:48:13<15:54:41,  8.17s/it]

Epoch: 2989 | Training loss 2.8642038628458977 | Validation loss 2.7921043932437897



 30%|█████████████████████▌                                                  | 2991/10000 [6:48:21<15:54:59,  8.18s/it]

Epoch: 2990 | Training loss 2.8631696328520775 | Validation loss 2.7922772467136383



 30%|█████████████████████▌                                                  | 2992/10000 [6:48:29<15:52:54,  8.16s/it]

Epoch: 2991 | Training loss 2.8626203909516335 | Validation loss 2.7904863953590393



 30%|█████████████████████▌                                                  | 2993/10000 [6:48:37<15:52:29,  8.16s/it]

Epoch: 2992 | Training loss 2.864216774702072 | Validation loss 2.794455885887146



 30%|█████████████████████▌                                                  | 2994/10000 [6:48:45<15:54:14,  8.17s/it]

Epoch: 2993 | Training loss 2.8629332333803177 | Validation loss 2.79210364818573



 30%|█████████████████████▌                                                  | 2995/10000 [6:48:53<15:52:43,  8.16s/it]

Epoch: 2994 | Training loss 2.8716816678643227 | Validation loss 2.793733090162277



 30%|█████████████████████▌                                                  | 2996/10000 [6:49:02<15:51:07,  8.15s/it]

Epoch: 2995 | Training loss 2.8657505437731743 | Validation loss 2.7897589206695557



 30%|█████████████████████▌                                                  | 2997/10000 [6:49:10<15:48:26,  8.13s/it]

Epoch: 2996 | Training loss 2.860773041844368 | Validation loss 2.791194826364517



 30%|█████████████████████▌                                                  | 2998/10000 [6:49:18<15:53:27,  8.17s/it]

Epoch: 2997 | Training loss 2.8662725538015366 | Validation loss 2.7875135838985443



 30%|█████████████████████▌                                                  | 2999/10000 [6:49:26<15:50:55,  8.15s/it]

Epoch: 2998 | Training loss 2.8662327751517296 | Validation loss 2.7856182754039764



 30%|█████████████████████▌                                                  | 3000/10000 [6:49:34<15:46:19,  8.11s/it]

Epoch: 2999 | Training loss 2.868835859000683 | Validation loss 2.788062244653702



 30%|█████████████████████▌                                                  | 3001/10000 [6:49:42<15:45:20,  8.10s/it]

Epoch: 3000 | Training loss 2.863791801035404 | Validation loss 2.7868294417858124



 30%|█████████████████████▌                                                  | 3002/10000 [6:49:50<15:44:35,  8.10s/it]

Epoch: 3001 | Training loss 2.866760604083538 | Validation loss 2.7947577238082886



 30%|█████████████████████▌                                                  | 3003/10000 [6:49:58<15:43:59,  8.09s/it]

Epoch: 3002 | Training loss 2.8702773973345757 | Validation loss 2.797039747238159



 30%|█████████████████████▋                                                  | 3004/10000 [6:50:06<15:42:23,  8.08s/it]

Epoch: 3003 | Training loss 2.864621214568615 | Validation loss 2.796192914247513



 30%|█████████████████████▋                                                  | 3005/10000 [6:50:14<15:40:43,  8.07s/it]

Epoch: 3004 | Training loss 2.8608389422297478 | Validation loss 2.8030365705490112



 30%|█████████████████████▋                                                  | 3006/10000 [6:50:23<15:43:20,  8.09s/it]

Epoch: 3005 | Training loss 2.866543598473072 | Validation loss 2.7895600497722626



 30%|█████████████████████▋                                                  | 3007/10000 [6:50:31<15:43:04,  8.09s/it]

Epoch: 3006 | Training loss 2.8707874715328217 | Validation loss 2.795307219028473



 30%|█████████████████████▋                                                  | 3008/10000 [6:50:39<15:45:58,  8.12s/it]

Epoch: 3007 | Training loss 2.8729345574975014 | Validation loss 2.791075348854065



 30%|█████████████████████▋                                                  | 3009/10000 [6:50:47<15:43:42,  8.10s/it]

Epoch: 3008 | Training loss 2.8639728128910065 | Validation loss 2.798758566379547



 30%|█████████████████████▋                                                  | 3010/10000 [6:50:55<15:42:37,  8.09s/it]

Epoch: 3009 | Training loss 2.862389288842678 | Validation loss 2.806070476770401



 30%|█████████████████████▋                                                  | 3011/10000 [6:51:03<15:42:19,  8.09s/it]

Epoch: 3010 | Training loss 2.8684895411133766 | Validation loss 2.7909543216228485



 30%|█████████████████████▋                                                  | 3012/10000 [6:51:11<15:38:10,  8.06s/it]

Epoch: 3011 | Training loss 2.857325367629528 | Validation loss 2.78999787569046



 30%|█████████████████████▋                                                  | 3013/10000 [6:51:19<15:39:13,  8.07s/it]

Epoch: 3012 | Training loss 2.8709787651896477 | Validation loss 2.796335369348526



 30%|█████████████████████▋                                                  | 3014/10000 [6:51:27<15:41:21,  8.09s/it]

Epoch: 3013 | Training loss 2.864552579820156 | Validation loss 2.7972421050071716



 30%|█████████████████████▋                                                  | 3015/10000 [6:51:35<15:41:14,  8.09s/it]

Epoch: 3014 | Training loss 2.875031791627407 | Validation loss 2.794868588447571



 30%|█████████████████████▋                                                  | 3016/10000 [6:51:43<15:41:24,  8.09s/it]

Epoch: 3015 | Training loss 2.864999607205391 | Validation loss 2.7909576296806335



 30%|█████████████████████▋                                                  | 3017/10000 [6:51:51<15:38:59,  8.07s/it]

Epoch: 3016 | Training loss 2.8680640310049057 | Validation loss 2.7985019087791443



 30%|█████████████████████▋                                                  | 3018/10000 [6:51:59<15:37:42,  8.06s/it]

Epoch: 3017 | Training loss 2.865872398018837 | Validation loss 2.7893517315387726



 30%|█████████████████████▋                                                  | 3019/10000 [6:52:08<15:40:17,  8.08s/it]

Epoch: 3018 | Training loss 2.862465664744377 | Validation loss 2.789738565683365



 30%|█████████████████████▋                                                  | 3020/10000 [6:52:16<15:38:36,  8.07s/it]

Epoch: 3019 | Training loss 2.8646849915385246 | Validation loss 2.7934608161449432



 30%|█████████████████████▊                                                  | 3021/10000 [6:52:24<15:41:46,  8.10s/it]

Epoch: 3020 | Training loss 2.8676752224564552 | Validation loss 2.7912768125534058



 30%|█████████████████████▊                                                  | 3022/10000 [6:52:32<15:38:01,  8.07s/it]

Epoch: 3021 | Training loss 2.867310993373394 | Validation loss 2.798625946044922



 30%|█████████████████████▊                                                  | 3023/10000 [6:52:40<15:35:17,  8.04s/it]

Epoch: 3022 | Training loss 2.862826593220234 | Validation loss 2.786520630121231



 30%|█████████████████████▊                                                  | 3024/10000 [6:52:48<15:36:39,  8.06s/it]

Epoch: 3023 | Training loss 2.8666495233774185 | Validation loss 2.7909249663352966



 30%|█████████████████████▊                                                  | 3025/10000 [6:52:56<15:37:29,  8.06s/it]

Epoch: 3024 | Training loss 2.8645278066396713 | Validation loss 2.789903402328491



 30%|█████████████████████▊                                                  | 3026/10000 [6:53:04<15:39:18,  8.08s/it]

Epoch: 3025 | Training loss 2.8605599403381348 | Validation loss 2.7857426404953003



 30%|█████████████████████▊                                                  | 3027/10000 [6:53:12<15:40:31,  8.09s/it]

Epoch: 3026 | Training loss 2.864398293197155 | Validation loss 2.789131760597229



 30%|█████████████████████▊                                                  | 3028/10000 [6:53:20<15:40:02,  8.09s/it]

Epoch: 3027 | Training loss 2.863238625228405 | Validation loss 2.7874620258808136



 30%|█████████████████████▊                                                  | 3029/10000 [6:53:28<15:38:07,  8.07s/it]

Epoch: 3028 | Training loss 2.8556017205119133 | Validation loss 2.7867111265659332



 30%|█████████████████████▊                                                  | 3030/10000 [6:53:36<15:37:18,  8.07s/it]

Epoch: 3029 | Training loss 2.8669616281986237 | Validation loss 2.7847617864608765



 30%|█████████████████████▊                                                  | 3031/10000 [6:53:44<15:39:28,  8.09s/it]

Epoch: 3030 | Training loss 2.8662020415067673 | Validation loss 2.784762978553772



 30%|█████████████████████▊                                                  | 3032/10000 [6:53:53<15:39:15,  8.09s/it]

Epoch: 3031 | Training loss 2.874236397445202 | Validation loss 2.788307249546051



 30%|█████████████████████▊                                                  | 3033/10000 [6:54:01<15:45:07,  8.14s/it]

Epoch: 3032 | Training loss 2.8673573434352875 | Validation loss 2.785513609647751



 30%|█████████████████████▊                                                  | 3034/10000 [6:54:09<15:44:21,  8.13s/it]

Epoch: 3033 | Training loss 2.864107258617878 | Validation loss 2.787246733903885



 30%|█████████████████████▊                                                  | 3035/10000 [6:54:17<15:41:28,  8.11s/it]

Epoch: 3034 | Training loss 2.8650458455085754 | Validation loss 2.787662595510483



 30%|█████████████████████▊                                                  | 3036/10000 [6:54:25<15:43:10,  8.13s/it]

Epoch: 3035 | Training loss 2.861586719751358 | Validation loss 2.789994239807129



 30%|█████████████████████▊                                                  | 3037/10000 [6:54:33<15:41:37,  8.11s/it]

Epoch: 3036 | Training loss 2.8679983094334602 | Validation loss 2.7853230237960815



 30%|█████████████████████▊                                                  | 3038/10000 [6:54:41<15:39:20,  8.10s/it]

Epoch: 3037 | Training loss 2.8678635582327843 | Validation loss 2.808082729578018



 30%|█████████████████████▉                                                  | 3039/10000 [6:54:49<15:38:54,  8.09s/it]

Epoch: 3038 | Training loss 2.8717496171593666 | Validation loss 2.7939948737621307



 30%|█████████████████████▉                                                  | 3040/10000 [6:54:58<15:41:14,  8.11s/it]

Epoch: 3039 | Training loss 2.8653426095843315 | Validation loss 2.7910746037960052



 30%|█████████████████████▉                                                  | 3041/10000 [6:55:06<15:36:20,  8.07s/it]

Epoch: 3040 | Training loss 2.866027593612671 | Validation loss 2.790442109107971



 30%|█████████████████████▉                                                  | 3042/10000 [6:55:14<15:36:37,  8.08s/it]

Epoch: 3041 | Training loss 2.8647862002253532 | Validation loss 2.786400616168976



 30%|█████████████████████▉                                                  | 3043/10000 [6:55:22<15:41:08,  8.12s/it]

Epoch: 3042 | Training loss 2.86301326751709 | Validation loss 2.790261924266815



 30%|█████████████████████▉                                                  | 3044/10000 [6:55:30<15:41:34,  8.12s/it]

Epoch: 3043 | Training loss 2.8656213581562042 | Validation loss 2.7882016003131866



 30%|█████████████████████▉                                                  | 3045/10000 [6:55:38<15:42:21,  8.13s/it]

Epoch: 3044 | Training loss 2.867325223982334 | Validation loss 2.798012614250183



 30%|█████████████████████▉                                                  | 3046/10000 [6:55:46<15:41:25,  8.12s/it]

Epoch: 3045 | Training loss 2.861465446650982 | Validation loss 2.7844503223896027



 30%|█████████████████████▉                                                  | 3047/10000 [6:55:54<15:40:30,  8.12s/it]

Epoch: 3046 | Training loss 2.8641923367977142 | Validation loss 2.786319375038147



 30%|█████████████████████▉                                                  | 3048/10000 [6:56:02<15:40:37,  8.12s/it]

Epoch: 3047 | Training loss 2.8671053126454353 | Validation loss 2.7939031422138214



 30%|█████████████████████▉                                                  | 3049/10000 [6:56:11<15:44:14,  8.15s/it]

Epoch: 3048 | Training loss 2.8651042133569717 | Validation loss 2.79735204577446



 30%|█████████████████████▉                                                  | 3050/10000 [6:56:19<15:41:20,  8.13s/it]

Epoch: 3049 | Training loss 2.868998669087887 | Validation loss 2.7965577840805054



 31%|█████████████████████▉                                                  | 3051/10000 [6:56:27<15:42:33,  8.14s/it]

Epoch: 3050 | Training loss 2.8625099062919617 | Validation loss 2.793011486530304



 31%|█████████████████████▉                                                  | 3052/10000 [6:56:35<15:42:44,  8.14s/it]

Epoch: 3051 | Training loss 2.864600382745266 | Validation loss 2.786915957927704



 31%|█████████████████████▉                                                  | 3053/10000 [6:56:43<15:43:56,  8.15s/it]

Epoch: 3052 | Training loss 2.871246449649334 | Validation loss 2.790170818567276



 31%|█████████████████████▉                                                  | 3054/10000 [6:56:51<15:42:35,  8.14s/it]

Epoch: 3053 | Training loss 2.867474727332592 | Validation loss 2.7927039861679077



 31%|█████████████████████▉                                                  | 3055/10000 [6:56:59<15:38:26,  8.11s/it]

Epoch: 3054 | Training loss 2.860898770391941 | Validation loss 2.7905368506908417



 31%|██████████████████████                                                  | 3056/10000 [6:57:07<15:38:12,  8.11s/it]

Epoch: 3055 | Training loss 2.867721363902092 | Validation loss 2.793835163116455



 31%|██████████████████████                                                  | 3057/10000 [6:57:15<15:35:13,  8.08s/it]

Epoch: 3056 | Training loss 2.8683483004570007 | Validation loss 2.799064815044403



 31%|██████████████████████                                                  | 3058/10000 [6:57:24<15:34:13,  8.07s/it]

Epoch: 3057 | Training loss 2.864063896238804 | Validation loss 2.7932853400707245



 31%|██████████████████████                                                  | 3059/10000 [6:57:32<15:32:52,  8.06s/it]

Epoch: 3058 | Training loss 2.862157754600048 | Validation loss 2.788842588663101



 31%|██████████████████████                                                  | 3060/10000 [6:57:40<15:32:24,  8.06s/it]

Epoch: 3059 | Training loss 2.8666985481977463 | Validation loss 2.8015776574611664



 31%|██████████████████████                                                  | 3061/10000 [6:57:48<15:33:55,  8.08s/it]

Epoch: 3060 | Training loss 2.868180699646473 | Validation loss 2.7907831966876984



 31%|██████████████████████                                                  | 3062/10000 [6:57:56<15:37:52,  8.11s/it]

Epoch: 3061 | Training loss 2.866453357040882 | Validation loss 2.78683003783226



 31%|██████████████████████                                                  | 3063/10000 [6:58:04<15:34:40,  8.08s/it]

Epoch: 3062 | Training loss 2.867671273648739 | Validation loss 2.7929427921772003



 31%|██████████████████████                                                  | 3064/10000 [6:58:12<15:36:52,  8.10s/it]

Epoch: 3063 | Training loss 2.8657456412911415 | Validation loss 2.79450723528862



 31%|██████████████████████                                                  | 3065/10000 [6:58:20<15:39:18,  8.13s/it]

Epoch: 3064 | Training loss 2.866360627114773 | Validation loss 2.7980973422527313



 31%|██████████████████████                                                  | 3066/10000 [6:58:29<15:42:39,  8.16s/it]

Epoch: 3065 | Training loss 2.8692020773887634 | Validation loss 2.7921564280986786



 31%|██████████████████████                                                  | 3067/10000 [6:58:37<15:41:03,  8.14s/it]

Epoch: 3066 | Training loss 2.8630058839917183 | Validation loss 2.791544198989868



 31%|██████████████████████                                                  | 3068/10000 [6:58:45<15:41:08,  8.15s/it]

Epoch: 3067 | Training loss 2.8688636869192123 | Validation loss 2.7916159331798553



 31%|██████████████████████                                                  | 3069/10000 [6:58:53<15:40:01,  8.14s/it]

Epoch: 3068 | Training loss 2.864742673933506 | Validation loss 2.7921960651874542



 31%|██████████████████████                                                  | 3070/10000 [6:59:01<15:43:30,  8.17s/it]

Epoch: 3069 | Training loss 2.861171454191208 | Validation loss 2.7868713438510895



 31%|██████████████████████                                                  | 3071/10000 [6:59:09<15:43:29,  8.17s/it]

Epoch: 3070 | Training loss 2.8615211993455887 | Validation loss 2.787076771259308



 31%|██████████████████████                                                  | 3072/10000 [6:59:17<15:42:33,  8.16s/it]

Epoch: 3071 | Training loss 2.8670352548360825 | Validation loss 2.789988547563553



 31%|██████████████████████▏                                                 | 3073/10000 [6:59:26<15:38:05,  8.13s/it]

Epoch: 3072 | Training loss 2.869917005300522 | Validation loss 2.7880699038505554



 31%|██████████████████████▏                                                 | 3074/10000 [6:59:34<15:37:11,  8.12s/it]

Epoch: 3073 | Training loss 2.8686976805329323 | Validation loss 2.7943904101848602



 31%|██████████████████████▏                                                 | 3075/10000 [6:59:42<15:35:26,  8.10s/it]

Epoch: 3074 | Training loss 2.86573788523674 | Validation loss 2.787420481443405



 31%|██████████████████████▏                                                 | 3076/10000 [6:59:50<15:35:43,  8.11s/it]

Epoch: 3075 | Training loss 2.864464156329632 | Validation loss 2.7928531169891357



 31%|██████████████████████▏                                                 | 3077/10000 [6:59:58<15:32:56,  8.09s/it]

Epoch: 3076 | Training loss 2.8633429631590843 | Validation loss 2.7892834842205048



 31%|██████████████████████▏                                                 | 3078/10000 [7:00:06<15:32:15,  8.08s/it]

Epoch: 3077 | Training loss 2.8682417273521423 | Validation loss 2.791029214859009



 31%|██████████████████████▏                                                 | 3079/10000 [7:00:14<15:34:33,  8.10s/it]

Epoch: 3078 | Training loss 2.8641262501478195 | Validation loss 2.798112839460373



 31%|██████████████████████▏                                                 | 3080/10000 [7:00:22<15:36:05,  8.12s/it]

Epoch: 3079 | Training loss 2.8662664964795113 | Validation loss 2.7851923406124115



 31%|██████████████████████▏                                                 | 3081/10000 [7:00:30<15:32:12,  8.08s/it]

Epoch: 3080 | Training loss 2.864708885550499 | Validation loss 2.788438707590103



 31%|██████████████████████▏                                                 | 3082/10000 [7:00:38<15:33:46,  8.10s/it]

Epoch: 3081 | Training loss 2.863567538559437 | Validation loss 2.7892662286758423



 31%|██████████████████████▏                                                 | 3083/10000 [7:00:46<15:33:45,  8.10s/it]

Epoch: 3082 | Training loss 2.8678934425115585 | Validation loss 2.7926674485206604



 31%|██████████████████████▏                                                 | 3084/10000 [7:00:55<15:36:52,  8.13s/it]

Epoch: 3083 | Training loss 2.8658879548311234 | Validation loss 2.786570757627487



 31%|██████████████████████▏                                                 | 3085/10000 [7:01:03<15:35:48,  8.12s/it]

Epoch: 3084 | Training loss 2.8653529807925224 | Validation loss 2.7865521013736725



 31%|██████████████████████▏                                                 | 3086/10000 [7:01:11<15:32:51,  8.10s/it]

Epoch: 3085 | Training loss 2.8677989691495895 | Validation loss 2.787296861410141



 31%|██████████████████████▏                                                 | 3087/10000 [7:01:19<15:32:53,  8.10s/it]

Epoch: 3086 | Training loss 2.8646971583366394 | Validation loss 2.785249501466751



 31%|██████████████████████▏                                                 | 3088/10000 [7:01:27<15:33:38,  8.10s/it]

Epoch: 3087 | Training loss 2.8680674880743027 | Validation loss 2.7953843474388123



 31%|██████████████████████▏                                                 | 3089/10000 [7:01:35<15:29:36,  8.07s/it]

Epoch: 3088 | Training loss 2.8669192790985107 | Validation loss 2.792283982038498



 31%|██████████████████████▏                                                 | 3090/10000 [7:01:43<15:30:36,  8.08s/it]

Epoch: 3089 | Training loss 2.8665341064333916 | Validation loss 2.7919023036956787



 31%|██████████████████████▎                                                 | 3091/10000 [7:01:51<15:31:14,  8.09s/it]

Epoch: 3090 | Training loss 2.8680192828178406 | Validation loss 2.791332334280014



 31%|██████████████████████▎                                                 | 3092/10000 [7:01:59<15:33:03,  8.10s/it]

Epoch: 3091 | Training loss 2.865434393286705 | Validation loss 2.7940794825553894



 31%|██████████████████████▎                                                 | 3093/10000 [7:02:07<15:34:12,  8.12s/it]

Epoch: 3092 | Training loss 2.8700648322701454 | Validation loss 2.7984311282634735



 31%|██████████████████████▎                                                 | 3094/10000 [7:02:16<15:36:14,  8.13s/it]

Epoch: 3093 | Training loss 2.8647872135043144 | Validation loss 2.8010504841804504



 31%|██████████████████████▎                                                 | 3095/10000 [7:02:24<15:37:41,  8.15s/it]

Epoch: 3094 | Training loss 2.866705022752285 | Validation loss 2.7919837832450867



 31%|██████████████████████▎                                                 | 3096/10000 [7:02:32<15:37:36,  8.15s/it]

Epoch: 3095 | Training loss 2.8631173372268677 | Validation loss 2.7921064496040344



 31%|██████████████████████▎                                                 | 3097/10000 [7:02:40<15:37:56,  8.15s/it]

Epoch: 3096 | Training loss 2.869296468794346 | Validation loss 2.7949977219104767



 31%|██████████████████████▎                                                 | 3098/10000 [7:02:48<15:34:26,  8.12s/it]

Epoch: 3097 | Training loss 2.8573707416653633 | Validation loss 2.7904535233974457



 31%|██████████████████████▎                                                 | 3099/10000 [7:02:56<15:33:07,  8.11s/it]

Epoch: 3098 | Training loss 2.8664084002375603 | Validation loss 2.7958561182022095



 31%|██████████████████████▎                                                 | 3100/10000 [7:03:04<15:34:46,  8.13s/it]

Epoch: 3099 | Training loss 2.8697984516620636 | Validation loss 2.7894093096256256



 31%|██████████████████████▎                                                 | 3101/10000 [7:03:13<15:36:28,  8.14s/it]

Epoch: 3100 | Training loss 2.868462882936001 | Validation loss 2.7860671877861023



 31%|██████████████████████▎                                                 | 3102/10000 [7:03:21<15:34:49,  8.13s/it]

Epoch: 3101 | Training loss 2.8696377277374268 | Validation loss 2.7961893379688263



 31%|██████████████████████▎                                                 | 3103/10000 [7:03:29<15:37:29,  8.16s/it]

Epoch: 3102 | Training loss 2.8717766627669334 | Validation loss 2.793634057044983



 31%|██████████████████████▎                                                 | 3104/10000 [7:03:37<15:33:20,  8.12s/it]

Epoch: 3103 | Training loss 2.8627916052937508 | Validation loss 2.7900739312171936



 31%|██████████████████████▎                                                 | 3105/10000 [7:03:45<15:31:18,  8.10s/it]

Epoch: 3104 | Training loss 2.8633972108364105 | Validation loss 2.7892938554286957



 31%|██████████████████████▎                                                 | 3106/10000 [7:03:53<15:31:35,  8.11s/it]

Epoch: 3105 | Training loss 2.857788659632206 | Validation loss 2.787530928850174



 31%|██████████████████████▎                                                 | 3107/10000 [7:04:01<15:34:01,  8.13s/it]

Epoch: 3106 | Training loss 2.86871088296175 | Validation loss 2.788849800825119



 31%|██████████████████████▍                                                 | 3108/10000 [7:04:09<15:31:50,  8.11s/it]

Epoch: 3107 | Training loss 2.8674252033233643 | Validation loss 2.798216789960861



 31%|██████████████████████▍                                                 | 3109/10000 [7:04:18<15:32:59,  8.12s/it]

Epoch: 3108 | Training loss 2.8644948974251747 | Validation loss 2.792761743068695



 31%|██████████████████████▍                                                 | 3110/10000 [7:04:26<15:33:08,  8.13s/it]

Epoch: 3109 | Training loss 2.8622387498617172 | Validation loss 2.795789510011673



 31%|██████████████████████▍                                                 | 3111/10000 [7:04:34<15:32:19,  8.12s/it]

Epoch: 3110 | Training loss 2.8676226660609245 | Validation loss 2.7891127169132233



 31%|██████████████████████▍                                                 | 3112/10000 [7:04:42<15:30:28,  8.11s/it]

Epoch: 3111 | Training loss 2.867867738008499 | Validation loss 2.7895186841487885



 31%|██████████████████████▍                                                 | 3113/10000 [7:04:50<15:28:32,  8.09s/it]

Epoch: 3112 | Training loss 2.8631921261548996 | Validation loss 2.7924669682979584



 31%|██████████████████████▍                                                 | 3114/10000 [7:04:58<15:27:39,  8.08s/it]

Epoch: 3113 | Training loss 2.8694014623761177 | Validation loss 2.8010521829128265



 31%|██████████████████████▍                                                 | 3115/10000 [7:05:06<15:32:31,  8.13s/it]

Epoch: 3114 | Training loss 2.8624972477555275 | Validation loss 2.7870593070983887



 31%|██████████████████████▍                                                 | 3116/10000 [7:05:14<15:31:00,  8.11s/it]

Epoch: 3115 | Training loss 2.8649028912186623 | Validation loss 2.7881205081939697



 31%|██████████████████████▍                                                 | 3117/10000 [7:05:22<15:28:44,  8.10s/it]

Epoch: 3116 | Training loss 2.8637885227799416 | Validation loss 2.7885727882385254



 31%|██████████████████████▍                                                 | 3118/10000 [7:05:30<15:27:14,  8.08s/it]

Epoch: 3117 | Training loss 2.8660728335380554 | Validation loss 2.793160140514374



 31%|██████████████████████▍                                                 | 3119/10000 [7:05:39<15:28:13,  8.09s/it]

Epoch: 3118 | Training loss 2.867055043578148 | Validation loss 2.7887379229068756



 31%|██████████████████████▍                                                 | 3120/10000 [7:05:47<15:30:29,  8.11s/it]

Epoch: 3119 | Training loss 2.861034743487835 | Validation loss 2.7912977933883667



 31%|██████████████████████▍                                                 | 3121/10000 [7:05:55<15:31:57,  8.13s/it]

Epoch: 3120 | Training loss 2.8703158795833588 | Validation loss 2.7888409793376923



 31%|██████████████████████▍                                                 | 3122/10000 [7:06:03<15:30:50,  8.12s/it]

Epoch: 3121 | Training loss 2.8703213036060333 | Validation loss 2.7911681532859802



 31%|██████████████████████▍                                                 | 3123/10000 [7:06:11<15:31:15,  8.12s/it]

Epoch: 3122 | Training loss 2.8665333837270737 | Validation loss 2.792928248643875



 31%|██████████████████████▍                                                 | 3124/10000 [7:06:19<15:28:10,  8.10s/it]

Epoch: 3123 | Training loss 2.8679329007864 | Validation loss 2.7896033823490143



 31%|██████████████████████▌                                                 | 3125/10000 [7:06:27<15:32:56,  8.14s/it]

Epoch: 3124 | Training loss 2.8651046380400658 | Validation loss 2.787308305501938



 31%|██████████████████████▌                                                 | 3126/10000 [7:06:35<15:30:23,  8.12s/it]

Epoch: 3125 | Training loss 2.8659544810652733 | Validation loss 2.7920084595680237



 31%|██████████████████████▌                                                 | 3127/10000 [7:06:44<15:30:34,  8.12s/it]

Epoch: 3126 | Training loss 2.871297135949135 | Validation loss 2.792739152908325



 31%|██████████████████████▌                                                 | 3128/10000 [7:06:52<15:29:27,  8.12s/it]

Epoch: 3127 | Training loss 2.867657706141472 | Validation loss 2.7907695174217224



 31%|██████████████████████▌                                                 | 3129/10000 [7:07:00<15:28:45,  8.11s/it]

Epoch: 3128 | Training loss 2.8657670319080353 | Validation loss 2.789496421813965



 31%|██████████████████████▌                                                 | 3130/10000 [7:07:08<15:30:26,  8.13s/it]

Epoch: 3129 | Training loss 2.868182525038719 | Validation loss 2.7930822372436523



 31%|██████████████████████▌                                                 | 3131/10000 [7:07:16<15:28:53,  8.11s/it]

Epoch: 3130 | Training loss 2.8716724514961243 | Validation loss 2.787560999393463



 31%|██████████████████████▌                                                 | 3132/10000 [7:07:24<15:28:51,  8.11s/it]

Epoch: 3131 | Training loss 2.869489587843418 | Validation loss 2.796279639005661



 31%|██████████████████████▌                                                 | 3133/10000 [7:07:32<15:28:53,  8.12s/it]

Epoch: 3132 | Training loss 2.867994010448456 | Validation loss 2.7944556176662445



 31%|██████████████████████▌                                                 | 3134/10000 [7:07:40<15:28:54,  8.12s/it]

Epoch: 3133 | Training loss 2.865208216011524 | Validation loss 2.7929245829582214



 31%|██████████████████████▌                                                 | 3135/10000 [7:07:48<15:26:41,  8.10s/it]

Epoch: 3134 | Training loss 2.872698374092579 | Validation loss 2.79702490568161



 31%|██████████████████████▌                                                 | 3136/10000 [7:07:57<15:25:35,  8.09s/it]

Epoch: 3135 | Training loss 2.865257814526558 | Validation loss 2.8005206882953644



 31%|██████████████████████▌                                                 | 3137/10000 [7:08:05<15:27:24,  8.11s/it]

Epoch: 3136 | Training loss 2.8650362715125084 | Validation loss 2.7911638021469116



 31%|██████████████████████▌                                                 | 3138/10000 [7:08:13<15:23:45,  8.08s/it]

Epoch: 3137 | Training loss 2.8622049912810326 | Validation loss 2.7907465994358063



 31%|██████████████████████▌                                                 | 3139/10000 [7:08:21<15:20:10,  8.05s/it]

Epoch: 3138 | Training loss 2.863499790430069 | Validation loss 2.7855782210826874



 31%|██████████████████████▌                                                 | 3140/10000 [7:08:29<15:28:52,  8.12s/it]

Epoch: 3139 | Training loss 2.864332877099514 | Validation loss 2.7891268134117126



 31%|██████████████████████▌                                                 | 3141/10000 [7:08:37<15:28:27,  8.12s/it]

Epoch: 3140 | Training loss 2.865904323756695 | Validation loss 2.7910876274108887



 31%|██████████████████████▌                                                 | 3142/10000 [7:08:45<15:28:41,  8.13s/it]

Epoch: 3141 | Training loss 2.8641167879104614 | Validation loss 2.7909793853759766



 31%|██████████████████████▋                                                 | 3143/10000 [7:08:53<15:30:58,  8.15s/it]

Epoch: 3142 | Training loss 2.8673927262425423 | Validation loss 2.7925018668174744



 31%|██████████████████████▋                                                 | 3144/10000 [7:09:02<15:30:23,  8.14s/it]

Epoch: 3143 | Training loss 2.866881884634495 | Validation loss 2.798444986343384



 31%|██████████████████████▋                                                 | 3145/10000 [7:09:10<15:27:23,  8.12s/it]

Epoch: 3144 | Training loss 2.866819880902767 | Validation loss 2.8008317351341248



 31%|██████████████████████▋                                                 | 3146/10000 [7:09:18<15:27:28,  8.12s/it]

Epoch: 3145 | Training loss 2.872918054461479 | Validation loss 2.788844794034958



 31%|██████████████████████▋                                                 | 3147/10000 [7:09:26<15:26:44,  8.11s/it]

Epoch: 3146 | Training loss 2.8669417276978493 | Validation loss 2.7888828814029694



 31%|██████████████████████▋                                                 | 3148/10000 [7:09:34<15:31:02,  8.15s/it]

Epoch: 3147 | Training loss 2.859635181725025 | Validation loss 2.7843123078346252



 31%|██████████████████████▋                                                 | 3149/10000 [7:09:42<15:30:43,  8.15s/it]

Epoch: 3148 | Training loss 2.859354205429554 | Validation loss 2.7869379222393036



 32%|██████████████████████▋                                                 | 3150/10000 [7:09:50<15:31:40,  8.16s/it]

Epoch: 3149 | Training loss 2.865078553557396 | Validation loss 2.782505363225937



 32%|██████████████████████▋                                                 | 3151/10000 [7:09:58<15:24:46,  8.10s/it]

Epoch: 3150 | Training loss 2.8672309666872025 | Validation loss 2.783852845430374



 32%|██████████████████████▋                                                 | 3152/10000 [7:10:06<15:22:29,  8.08s/it]

Epoch: 3151 | Training loss 2.870283178985119 | Validation loss 2.78776553273201



 32%|██████████████████████▋                                                 | 3153/10000 [7:10:14<15:20:24,  8.07s/it]

Epoch: 3152 | Training loss 2.867768205702305 | Validation loss 2.787076324224472



 32%|██████████████████████▋                                                 | 3154/10000 [7:10:23<15:22:04,  8.08s/it]

Epoch: 3153 | Training loss 2.8684073984622955 | Validation loss 2.7884280681610107



 32%|██████████████████████▋                                                 | 3155/10000 [7:10:31<15:22:07,  8.08s/it]

Epoch: 3154 | Training loss 2.8671410009264946 | Validation loss 2.7934785187244415



 32%|██████████████████████▋                                                 | 3156/10000 [7:10:39<15:22:05,  8.08s/it]

Epoch: 3155 | Training loss 2.8673944547772408 | Validation loss 2.790527731180191



 32%|██████████████████████▋                                                 | 3157/10000 [7:10:47<15:22:11,  8.09s/it]

Epoch: 3156 | Training loss 2.8585767969489098 | Validation loss 2.790897697210312



 32%|██████████████████████▋                                                 | 3158/10000 [7:10:55<15:21:04,  8.08s/it]

Epoch: 3157 | Training loss 2.8714545592665672 | Validation loss 2.7921675741672516



 32%|██████████████████████▋                                                 | 3159/10000 [7:11:03<15:25:39,  8.12s/it]

Epoch: 3158 | Training loss 2.8681859374046326 | Validation loss 2.787272661924362



 32%|██████████████████████▊                                                 | 3160/10000 [7:11:11<15:24:53,  8.11s/it]

Epoch: 3159 | Training loss 2.86717576533556 | Validation loss 2.786349803209305



 32%|██████████████████████▊                                                 | 3161/10000 [7:11:19<15:25:28,  8.12s/it]

Epoch: 3160 | Training loss 2.869772434234619 | Validation loss 2.788923442363739



 32%|██████████████████████▊                                                 | 3162/10000 [7:11:27<15:24:13,  8.11s/it]

Epoch: 3161 | Training loss 2.870932474732399 | Validation loss 2.795744448900223



 32%|██████████████████████▊                                                 | 3163/10000 [7:11:35<15:22:41,  8.10s/it]

Epoch: 3162 | Training loss 2.8694748878479004 | Validation loss 2.7929538786411285



 32%|██████████████████████▊                                                 | 3164/10000 [7:11:44<15:22:41,  8.10s/it]

Epoch: 3163 | Training loss 2.8706099465489388 | Validation loss 2.796950042247772



 32%|██████████████████████▊                                                 | 3165/10000 [7:11:52<15:23:10,  8.10s/it]

Epoch: 3164 | Training loss 2.8617071136832237 | Validation loss 2.800543040037155



 32%|██████████████████████▊                                                 | 3166/10000 [7:12:00<15:24:28,  8.12s/it]

Epoch: 3165 | Training loss 2.863500475883484 | Validation loss 2.795246124267578



 32%|██████████████████████▊                                                 | 3167/10000 [7:12:08<15:20:03,  8.08s/it]

Epoch: 3166 | Training loss 2.861598327755928 | Validation loss 2.79171621799469



 32%|██████████████████████▊                                                 | 3168/10000 [7:12:16<15:20:24,  8.08s/it]

Epoch: 3167 | Training loss 2.8761336356401443 | Validation loss 2.790629655122757



 32%|██████████████████████▊                                                 | 3169/10000 [7:12:24<15:24:18,  8.12s/it]

Epoch: 3168 | Training loss 2.8637092635035515 | Validation loss 2.789327561855316



 32%|██████████████████████▊                                                 | 3170/10000 [7:12:32<15:27:10,  8.14s/it]

Epoch: 3169 | Training loss 2.8617580011487007 | Validation loss 2.789493352174759



 32%|██████████████████████▊                                                 | 3171/10000 [7:12:40<15:23:26,  8.11s/it]

Epoch: 3170 | Training loss 2.8699900433421135 | Validation loss 2.7931494414806366



 32%|██████████████████████▊                                                 | 3172/10000 [7:12:48<15:20:44,  8.09s/it]

Epoch: 3171 | Training loss 2.8668454587459564 | Validation loss 2.785053014755249



 32%|██████████████████████▊                                                 | 3173/10000 [7:12:57<15:21:01,  8.09s/it]

Epoch: 3172 | Training loss 2.866525463759899 | Validation loss 2.789235472679138



 32%|██████████████████████▊                                                 | 3174/10000 [7:13:05<15:20:00,  8.09s/it]

Epoch: 3173 | Training loss 2.8679457679390907 | Validation loss 2.794264853000641



 32%|██████████████████████▊                                                 | 3175/10000 [7:13:13<15:18:47,  8.08s/it]

Epoch: 3174 | Training loss 2.8654527738690376 | Validation loss 2.7876358032226562



 32%|██████████████████████▊                                                 | 3176/10000 [7:13:21<15:22:08,  8.11s/it]

Epoch: 3175 | Training loss 2.870594285428524 | Validation loss 2.790765255689621



 32%|██████████████████████▊                                                 | 3177/10000 [7:13:29<15:22:49,  8.12s/it]

Epoch: 3176 | Training loss 2.8699200823903084 | Validation loss 2.7896145582199097



 32%|██████████████████████▉                                                 | 3178/10000 [7:13:37<15:25:37,  8.14s/it]

Epoch: 3177 | Training loss 2.8658432587981224 | Validation loss 2.7909510135650635



 32%|██████████████████████▉                                                 | 3179/10000 [7:13:45<15:25:43,  8.14s/it]

Epoch: 3178 | Training loss 2.872615806758404 | Validation loss 2.7902301251888275



 32%|██████████████████████▉                                                 | 3180/10000 [7:13:53<15:25:57,  8.15s/it]

Epoch: 3179 | Training loss 2.8665378987789154 | Validation loss 2.789891541004181



 32%|██████████████████████▉                                                 | 3181/10000 [7:14:02<15:28:07,  8.17s/it]

Epoch: 3180 | Training loss 2.8637705892324448 | Validation loss 2.7867052257061005



 32%|██████████████████████▉                                                 | 3182/10000 [7:14:10<15:27:41,  8.16s/it]

Epoch: 3181 | Training loss 2.8600901439785957 | Validation loss 2.7950699627399445



 32%|██████████████████████▉                                                 | 3183/10000 [7:14:18<15:26:32,  8.15s/it]

Epoch: 3182 | Training loss 2.869040496647358 | Validation loss 2.7964850664138794



 32%|██████████████████████▉                                                 | 3184/10000 [7:14:26<15:32:32,  8.21s/it]

Epoch: 3183 | Training loss 2.8643313199281693 | Validation loss 2.794540375471115



 32%|██████████████████████▉                                                 | 3185/10000 [7:14:34<15:29:14,  8.18s/it]

Epoch: 3184 | Training loss 2.86579916626215 | Validation loss 2.7944796085357666



 32%|██████████████████████▉                                                 | 3186/10000 [7:14:43<15:26:52,  8.16s/it]

Epoch: 3185 | Training loss 2.8692920729517937 | Validation loss 2.7938384115695953



 32%|██████████████████████▉                                                 | 3187/10000 [7:14:51<15:29:15,  8.18s/it]

Epoch: 3186 | Training loss 2.865746423602104 | Validation loss 2.792492836713791



 32%|██████████████████████▉                                                 | 3188/10000 [7:14:59<15:27:24,  8.17s/it]

Epoch: 3187 | Training loss 2.8668249770998955 | Validation loss 2.789578080177307



 32%|██████████████████████▉                                                 | 3189/10000 [7:15:07<15:27:11,  8.17s/it]

Epoch: 3188 | Training loss 2.871425747871399 | Validation loss 2.791729152202606



 32%|██████████████████████▉                                                 | 3190/10000 [7:15:15<15:23:48,  8.14s/it]

Epoch: 3189 | Training loss 2.868206702172756 | Validation loss 2.794288605451584



 32%|██████████████████████▉                                                 | 3191/10000 [7:15:23<15:21:52,  8.12s/it]

Epoch: 3190 | Training loss 2.86064101010561 | Validation loss 2.7960928976535797



 32%|██████████████████████▉                                                 | 3192/10000 [7:15:31<15:19:24,  8.10s/it]

Epoch: 3191 | Training loss 2.8686747401952744 | Validation loss 2.794412463903427



 32%|██████████████████████▉                                                 | 3193/10000 [7:15:39<15:18:42,  8.10s/it]

Epoch: 3192 | Training loss 2.8612420186400414 | Validation loss 2.7883249819278717



 32%|██████████████████████▉                                                 | 3194/10000 [7:15:47<15:13:46,  8.06s/it]

Epoch: 3193 | Training loss 2.8637878745794296 | Validation loss 2.7939931750297546



 32%|███████████████████████                                                 | 3195/10000 [7:15:55<15:12:32,  8.05s/it]

Epoch: 3194 | Training loss 2.868330329656601 | Validation loss 2.7889062762260437



 32%|███████████████████████                                                 | 3196/10000 [7:16:03<15:10:03,  8.03s/it]

Epoch: 3195 | Training loss 2.8653973564505577 | Validation loss 2.7925983369350433



 32%|███████████████████████                                                 | 3197/10000 [7:16:11<15:14:02,  8.06s/it]

Epoch: 3196 | Training loss 2.8611924946308136 | Validation loss 2.786662757396698



 32%|███████████████████████                                                 | 3198/10000 [7:16:19<15:12:35,  8.05s/it]

Epoch: 3197 | Training loss 2.8667789921164513 | Validation loss 2.788761019706726



 32%|███████████████████████                                                 | 3199/10000 [7:16:28<15:17:24,  8.09s/it]

Epoch: 3198 | Training loss 2.870082587003708 | Validation loss 2.790390908718109



 32%|███████████████████████                                                 | 3200/10000 [7:16:36<15:15:53,  8.08s/it]

Epoch: 3199 | Training loss 2.8649197965860367 | Validation loss 2.7892752587795258



 32%|███████████████████████                                                 | 3201/10000 [7:16:44<15:17:32,  8.10s/it]

Epoch: 3200 | Training loss 2.8662058115005493 | Validation loss 2.791633039712906



 32%|███████████████████████                                                 | 3202/10000 [7:16:52<15:15:28,  8.08s/it]

Epoch: 3201 | Training loss 2.8629726096987724 | Validation loss 2.7900554537773132



 32%|███████████████████████                                                 | 3203/10000 [7:17:00<15:15:33,  8.08s/it]

Epoch: 3202 | Training loss 2.8669025748968124 | Validation loss 2.7975858449935913



 32%|███████████████████████                                                 | 3204/10000 [7:17:08<15:16:05,  8.09s/it]

Epoch: 3203 | Training loss 2.866821087896824 | Validation loss 2.792219400405884



 32%|███████████████████████                                                 | 3205/10000 [7:17:16<15:16:58,  8.10s/it]

Epoch: 3204 | Training loss 2.8646672442555428 | Validation loss 2.791453003883362



 32%|███████████████████████                                                 | 3206/10000 [7:17:24<15:17:32,  8.10s/it]

Epoch: 3205 | Training loss 2.8673401847481728 | Validation loss 2.7913572788238525



 32%|███████████████████████                                                 | 3207/10000 [7:17:32<15:16:19,  8.09s/it]

Epoch: 3206 | Training loss 2.8682255148887634 | Validation loss 2.790628731250763



 32%|███████████████████████                                                 | 3208/10000 [7:17:40<15:16:24,  8.10s/it]

Epoch: 3207 | Training loss 2.8674913719296455 | Validation loss 2.7949429154396057



 32%|███████████████████████                                                 | 3209/10000 [7:17:49<15:18:58,  8.12s/it]

Epoch: 3208 | Training loss 2.8674452006816864 | Validation loss 2.794727861881256



 32%|███████████████████████                                                 | 3210/10000 [7:17:57<15:17:38,  8.11s/it]

Epoch: 3209 | Training loss 2.862657956779003 | Validation loss 2.7925851345062256



 32%|███████████████████████                                                 | 3211/10000 [7:18:05<15:15:06,  8.09s/it]

Epoch: 3210 | Training loss 2.8680419251322746 | Validation loss 2.7895827889442444



 32%|███████████████████████▏                                                | 3212/10000 [7:18:13<15:14:19,  8.08s/it]

Epoch: 3211 | Training loss 2.8714646697044373 | Validation loss 2.7911100685596466



 32%|███████████████████████▏                                                | 3213/10000 [7:18:21<15:11:14,  8.06s/it]

Epoch: 3212 | Training loss 2.8689903989434242 | Validation loss 2.799630284309387



 32%|███████████████████████▏                                                | 3214/10000 [7:18:29<15:16:19,  8.10s/it]

Epoch: 3213 | Training loss 2.8614244535565376 | Validation loss 2.7912005484104156



 32%|███████████████████████▏                                                | 3215/10000 [7:18:37<15:19:48,  8.13s/it]

Epoch: 3214 | Training loss 2.8657614812254906 | Validation loss 2.789313167333603



 32%|███████████████████████▏                                                | 3216/10000 [7:18:45<15:17:39,  8.12s/it]

Epoch: 3215 | Training loss 2.86591649800539 | Validation loss 2.789622187614441



 32%|███████████████████████▏                                                | 3217/10000 [7:18:53<15:17:41,  8.12s/it]

Epoch: 3216 | Training loss 2.869592361152172 | Validation loss 2.79063081741333



 32%|███████████████████████▏                                                | 3218/10000 [7:19:02<15:17:32,  8.12s/it]

Epoch: 3217 | Training loss 2.8693596944212914 | Validation loss 2.787537395954132



 32%|███████████████████████▏                                                | 3219/10000 [7:19:10<15:15:47,  8.10s/it]

Epoch: 3218 | Training loss 2.8707995787262917 | Validation loss 2.7902979254722595



 32%|███████████████████████▏                                                | 3220/10000 [7:19:18<15:11:54,  8.07s/it]

Epoch: 3219 | Training loss 2.8705969527363777 | Validation loss 2.793793350458145



 32%|███████████████████████▏                                                | 3221/10000 [7:19:26<15:12:51,  8.08s/it]

Epoch: 3220 | Training loss 2.8653822019696236 | Validation loss 2.796671152114868



 32%|███████████████████████▏                                                | 3222/10000 [7:19:34<15:14:32,  8.10s/it]

Epoch: 3221 | Training loss 2.866606116294861 | Validation loss 2.793312042951584



 32%|███████████████████████▏                                                | 3223/10000 [7:19:42<15:15:43,  8.11s/it]

Epoch: 3222 | Training loss 2.8709601908922195 | Validation loss 2.7893869876861572



 32%|███████████████████████▏                                                | 3224/10000 [7:19:50<15:15:21,  8.11s/it]

Epoch: 3223 | Training loss 2.8685896173119545 | Validation loss 2.7928569316864014



 32%|███████████████████████▏                                                | 3225/10000 [7:19:58<15:10:56,  8.07s/it]

Epoch: 3224 | Training loss 2.870184823870659 | Validation loss 2.8011230528354645



 32%|███████████████████████▏                                                | 3226/10000 [7:20:06<15:12:26,  8.08s/it]

Epoch: 3225 | Training loss 2.8661433085799217 | Validation loss 2.792845129966736



 32%|███████████████████████▏                                                | 3227/10000 [7:20:14<15:13:29,  8.09s/it]

Epoch: 3226 | Training loss 2.8745096176862717 | Validation loss 2.7926630079746246



 32%|███████████████████████▏                                                | 3228/10000 [7:20:23<15:16:49,  8.12s/it]

Epoch: 3227 | Training loss 2.863131083548069 | Validation loss 2.7839092910289764



 32%|███████████████████████▏                                                | 3229/10000 [7:20:31<15:15:29,  8.11s/it]

Epoch: 3228 | Training loss 2.8699934259057045 | Validation loss 2.7869085371494293



 32%|███████████████████████▎                                                | 3230/10000 [7:20:39<15:17:06,  8.13s/it]

Epoch: 3229 | Training loss 2.864557735621929 | Validation loss 2.789797693490982



 32%|███████████████████████▎                                                | 3231/10000 [7:20:47<15:15:01,  8.11s/it]

Epoch: 3230 | Training loss 2.867190793156624 | Validation loss 2.7888570725917816



 32%|███████████████████████▎                                                | 3232/10000 [7:20:55<15:11:21,  8.08s/it]

Epoch: 3231 | Training loss 2.8660379201173782 | Validation loss 2.7962958812713623



 32%|███████████████████████▎                                                | 3233/10000 [7:21:03<15:09:25,  8.06s/it]

Epoch: 3232 | Training loss 2.862936310470104 | Validation loss 2.7893702685832977



 32%|███████████████████████▎                                                | 3234/10000 [7:21:11<15:05:12,  8.03s/it]

Epoch: 3233 | Training loss 2.8655512407422066 | Validation loss 2.790294647216797



 32%|███████████████████████▎                                                | 3235/10000 [7:21:19<15:10:50,  8.08s/it]

Epoch: 3234 | Training loss 2.871092453598976 | Validation loss 2.7936832308769226



 32%|███████████████████████▎                                                | 3236/10000 [7:21:27<15:11:25,  8.08s/it]

Epoch: 3235 | Training loss 2.8661682456731796 | Validation loss 2.792742520570755



 32%|███████████████████████▎                                                | 3237/10000 [7:21:35<15:11:49,  8.09s/it]

Epoch: 3236 | Training loss 2.858634799718857 | Validation loss 2.792578399181366



 32%|███████████████████████▎                                                | 3238/10000 [7:21:43<15:13:10,  8.10s/it]

Epoch: 3237 | Training loss 2.8671208396553993 | Validation loss 2.790019929409027



 32%|███████████████████████▎                                                | 3239/10000 [7:21:51<15:12:27,  8.10s/it]

Epoch: 3238 | Training loss 2.8665637746453285 | Validation loss 2.7946481704711914



 32%|███████████████████████▎                                                | 3240/10000 [7:21:59<15:09:15,  8.07s/it]

Epoch: 3239 | Training loss 2.86695359647274 | Validation loss 2.7867129743099213



 32%|███████████████████████▎                                                | 3241/10000 [7:22:07<15:03:50,  8.02s/it]

Epoch: 3240 | Training loss 2.8659606128931046 | Validation loss 2.788573771715164



 32%|███████████████████████▎                                                | 3242/10000 [7:22:15<15:04:18,  8.03s/it]

Epoch: 3241 | Training loss 2.872251234948635 | Validation loss 2.7918260395526886



 32%|███████████████████████▎                                                | 3243/10000 [7:22:23<15:06:12,  8.05s/it]

Epoch: 3242 | Training loss 2.8637326061725616 | Validation loss 2.7907391488552094



 32%|███████████████████████▎                                                | 3244/10000 [7:22:32<15:10:38,  8.09s/it]

Epoch: 3243 | Training loss 2.866765081882477 | Validation loss 2.7888356745243073



 32%|███████████████████████▎                                                | 3245/10000 [7:22:40<15:10:59,  8.09s/it]

Epoch: 3244 | Training loss 2.864125281572342 | Validation loss 2.7910644710063934



 32%|███████████████████████▎                                                | 3246/10000 [7:22:48<15:12:17,  8.10s/it]

Epoch: 3245 | Training loss 2.865627497434616 | Validation loss 2.7949404418468475



 32%|███████████████████████▍                                                | 3247/10000 [7:22:56<15:08:50,  8.07s/it]

Epoch: 3246 | Training loss 2.8644725307822227 | Validation loss 2.7890352308750153



 32%|███████████████████████▍                                                | 3248/10000 [7:23:04<15:07:30,  8.06s/it]

Epoch: 3247 | Training loss 2.8627360984683037 | Validation loss 2.792428135871887



 32%|███████████████████████▍                                                | 3249/10000 [7:23:12<15:08:49,  8.08s/it]

Epoch: 3248 | Training loss 2.8585197627544403 | Validation loss 2.787889391183853



 32%|███████████████████████▍                                                | 3250/10000 [7:23:20<15:08:26,  8.07s/it]

Epoch: 3249 | Training loss 2.8640151247382164 | Validation loss 2.797110617160797



 33%|███████████████████████▍                                                | 3251/10000 [7:23:28<15:07:35,  8.07s/it]

Epoch: 3250 | Training loss 2.866269461810589 | Validation loss 2.7885637283325195



 33%|███████████████████████▍                                                | 3252/10000 [7:23:36<15:07:33,  8.07s/it]

Epoch: 3251 | Training loss 2.868866130709648 | Validation loss 2.785854399204254



 33%|███████████████████████▍                                                | 3253/10000 [7:23:44<15:10:45,  8.10s/it]

Epoch: 3252 | Training loss 2.8715777918696404 | Validation loss 2.8021343052387238



 33%|███████████████████████▍                                                | 3254/10000 [7:23:52<15:05:32,  8.05s/it]

Epoch: 3253 | Training loss 2.8671781420707703 | Validation loss 2.7909678518772125



 33%|███████████████████████▍                                                | 3255/10000 [7:24:00<15:04:22,  8.04s/it]

Epoch: 3254 | Training loss 2.8650732710957527 | Validation loss 2.7939402759075165



 33%|███████████████████████▍                                                | 3256/10000 [7:24:09<15:06:39,  8.07s/it]

Epoch: 3255 | Training loss 2.865451082587242 | Validation loss 2.786954402923584



 33%|███████████████████████▍                                                | 3257/10000 [7:24:17<15:05:05,  8.05s/it]

Epoch: 3256 | Training loss 2.8690507113933563 | Validation loss 2.7893139719963074



 33%|███████████████████████▍                                                | 3258/10000 [7:24:25<15:03:54,  8.04s/it]

Epoch: 3257 | Training loss 2.8670697435736656 | Validation loss 2.7928412556648254



 33%|███████████████████████▍                                                | 3259/10000 [7:24:33<15:07:19,  8.08s/it]

Epoch: 3258 | Training loss 2.8664083629846573 | Validation loss 2.7850738167762756



 33%|███████████████████████▍                                                | 3260/10000 [7:24:41<15:04:53,  8.06s/it]

Epoch: 3259 | Training loss 2.86890609562397 | Validation loss 2.796623706817627



 33%|███████████████████████▍                                                | 3261/10000 [7:24:49<15:09:00,  8.09s/it]

Epoch: 3260 | Training loss 2.8640443235635757 | Validation loss 2.7974112927913666



 33%|███████████████████████▍                                                | 3262/10000 [7:24:57<15:07:30,  8.08s/it]

Epoch: 3261 | Training loss 2.8683495596051216 | Validation loss 2.797954022884369



 33%|███████████████████████▍                                                | 3263/10000 [7:25:05<15:06:30,  8.07s/it]

Epoch: 3262 | Training loss 2.8606261014938354 | Validation loss 2.788375824689865



 33%|███████████████████████▌                                                | 3264/10000 [7:25:13<15:05:15,  8.06s/it]

Epoch: 3263 | Training loss 2.8675022646784782 | Validation loss 2.788510650396347



 33%|███████████████████████▌                                                | 3265/10000 [7:25:21<15:05:50,  8.07s/it]

Epoch: 3264 | Training loss 2.867518797516823 | Validation loss 2.7971752882003784



 33%|███████████████████████▌                                                | 3266/10000 [7:25:29<15:10:00,  8.11s/it]

Epoch: 3265 | Training loss 2.870866760611534 | Validation loss 2.79929918050766



 33%|███████████████████████▌                                                | 3267/10000 [7:25:37<15:08:06,  8.09s/it]

Epoch: 3266 | Training loss 2.866776682436466 | Validation loss 2.786596804857254



 33%|███████████████████████▌                                                | 3268/10000 [7:25:46<15:10:54,  8.12s/it]

Epoch: 3267 | Training loss 2.8691373094916344 | Validation loss 2.790603905916214



 33%|███████████████████████▌                                                | 3269/10000 [7:25:54<15:07:33,  8.09s/it]

Epoch: 3268 | Training loss 2.8588752672076225 | Validation loss 2.7940813302993774



 33%|███████████████████████▌                                                | 3270/10000 [7:26:02<15:07:17,  8.09s/it]

Epoch: 3269 | Training loss 2.863850958645344 | Validation loss 2.7856196761131287



 33%|███████████████████████▌                                                | 3271/10000 [7:26:10<15:06:00,  8.08s/it]

Epoch: 3270 | Training loss 2.8627807423472404 | Validation loss 2.7882218956947327



 33%|███████████████████████▌                                                | 3272/10000 [7:26:18<15:07:17,  8.09s/it]

Epoch: 3271 | Training loss 2.872910387814045 | Validation loss 2.788726270198822



 33%|███████████████████████▌                                                | 3273/10000 [7:26:26<15:08:49,  8.11s/it]

Epoch: 3272 | Training loss 2.8618816062808037 | Validation loss 2.786771446466446



 33%|███████████████████████▌                                                | 3274/10000 [7:26:34<15:12:09,  8.14s/it]

Epoch: 3273 | Training loss 2.8672541826963425 | Validation loss 2.8040353059768677



 33%|███████████████████████▌                                                | 3275/10000 [7:26:42<15:12:23,  8.14s/it]

Epoch: 3274 | Training loss 2.858133666217327 | Validation loss 2.792074292898178



 33%|███████████████████████▌                                                | 3276/10000 [7:26:51<15:14:10,  8.16s/it]

Epoch: 3275 | Training loss 2.866381362080574 | Validation loss 2.788548320531845



 33%|███████████████████████▌                                                | 3277/10000 [7:26:59<15:15:47,  8.17s/it]

Epoch: 3276 | Training loss 2.8712794929742813 | Validation loss 2.792245537042618



 33%|███████████████████████▌                                                | 3278/10000 [7:27:07<15:14:56,  8.17s/it]

Epoch: 3277 | Training loss 2.868294596672058 | Validation loss 2.7928958237171173



 33%|███████████████████████▌                                                | 3279/10000 [7:27:15<15:13:14,  8.15s/it]

Epoch: 3278 | Training loss 2.864929035305977 | Validation loss 2.792886793613434



 33%|███████████████████████▌                                                | 3280/10000 [7:27:23<15:09:56,  8.12s/it]

Epoch: 3279 | Training loss 2.8679496720433235 | Validation loss 2.7996010780334473



 33%|███████████████████████▌                                                | 3281/10000 [7:27:31<15:09:58,  8.13s/it]

Epoch: 3280 | Training loss 2.871675580739975 | Validation loss 2.791246086359024



 33%|███████████████████████▋                                                | 3282/10000 [7:27:39<15:08:59,  8.12s/it]

Epoch: 3281 | Training loss 2.8683641478419304 | Validation loss 2.7911159992218018



 33%|███████████████████████▋                                                | 3283/10000 [7:27:48<15:11:25,  8.14s/it]

Epoch: 3282 | Training loss 2.871110260486603 | Validation loss 2.7965624928474426



 33%|███████████████████████▋                                                | 3284/10000 [7:27:56<15:12:00,  8.15s/it]

Epoch: 3283 | Training loss 2.868422657251358 | Validation loss 2.78865322470665



 33%|███████████████████████▋                                                | 3285/10000 [7:28:04<15:10:12,  8.13s/it]

Epoch: 3284 | Training loss 2.868579350411892 | Validation loss 2.7959735989570618



 33%|███████████████████████▋                                                | 3286/10000 [7:28:12<15:08:43,  8.12s/it]

Epoch: 3285 | Training loss 2.865496516227722 | Validation loss 2.797510027885437



 33%|███████████████████████▋                                                | 3287/10000 [7:28:20<15:05:48,  8.10s/it]

Epoch: 3286 | Training loss 2.867478221654892 | Validation loss 2.8007939755916595



 33%|███████████████████████▋                                                | 3288/10000 [7:28:28<15:02:40,  8.07s/it]

Epoch: 3287 | Training loss 2.868826702237129 | Validation loss 2.7854455411434174



 33%|███████████████████████▋                                                | 3289/10000 [7:28:36<15:02:15,  8.07s/it]

Epoch: 3288 | Training loss 2.8660579845309258 | Validation loss 2.789015829563141



 33%|███████████████████████▋                                                | 3290/10000 [7:28:44<15:03:50,  8.08s/it]

Epoch: 3289 | Training loss 2.8675258681178093 | Validation loss 2.7933409512043



 33%|███████████████████████▋                                                | 3291/10000 [7:28:52<14:59:37,  8.05s/it]

Epoch: 3290 | Training loss 2.865012414753437 | Validation loss 2.7874037623405457



 33%|███████████████████████▋                                                | 3292/10000 [7:29:00<15:00:50,  8.06s/it]

Epoch: 3291 | Training loss 2.8642599806189537 | Validation loss 2.79016575217247



 33%|███████████████████████▋                                                | 3293/10000 [7:29:08<15:03:12,  8.08s/it]

Epoch: 3292 | Training loss 2.8679858073592186 | Validation loss 2.789679318666458



 33%|███████████████████████▋                                                | 3294/10000 [7:29:16<15:06:01,  8.11s/it]

Epoch: 3293 | Training loss 2.8661932423710823 | Validation loss 2.791293889284134



 33%|███████████████████████▋                                                | 3295/10000 [7:29:24<15:04:34,  8.09s/it]

Epoch: 3294 | Training loss 2.8629961758852005 | Validation loss 2.7885803878307343



 33%|███████████████████████▋                                                | 3296/10000 [7:29:33<15:05:12,  8.10s/it]

Epoch: 3295 | Training loss 2.8635771721601486 | Validation loss 2.787354290485382



 33%|███████████████████████▋                                                | 3297/10000 [7:29:41<15:05:05,  8.10s/it]

Epoch: 3296 | Training loss 2.866204656660557 | Validation loss 2.796501487493515



 33%|███████████████████████▋                                                | 3298/10000 [7:29:49<15:07:03,  8.12s/it]

Epoch: 3297 | Training loss 2.8662536665797234 | Validation loss 2.791755199432373



 33%|███████████████████████▊                                                | 3299/10000 [7:29:57<15:04:15,  8.10s/it]

Epoch: 3298 | Training loss 2.864904709160328 | Validation loss 2.7863758504390717



 33%|███████████████████████▊                                                | 3300/10000 [7:30:05<15:04:14,  8.10s/it]

Epoch: 3299 | Training loss 2.8628634065389633 | Validation loss 2.79020819067955



 33%|███████████████████████▊                                                | 3301/10000 [7:30:13<15:02:36,  8.08s/it]

Epoch: 3300 | Training loss 2.866476185619831 | Validation loss 2.7871735990047455



 33%|███████████████████████▊                                                | 3302/10000 [7:30:21<15:07:09,  8.13s/it]

Epoch: 3301 | Training loss 2.866109274327755 | Validation loss 2.785121977329254



 33%|███████████████████████▊                                                | 3303/10000 [7:30:29<15:07:43,  8.13s/it]

Epoch: 3302 | Training loss 2.8721907660365105 | Validation loss 2.7883318960666656



 33%|███████████████████████▊                                                | 3304/10000 [7:30:38<15:05:59,  8.12s/it]

Epoch: 3303 | Training loss 2.874906860291958 | Validation loss 2.8115417659282684



 33%|███████████████████████▊                                                | 3305/10000 [7:30:46<15:05:20,  8.11s/it]

Epoch: 3304 | Training loss 2.8697702065110207 | Validation loss 2.7959223091602325



 33%|███████████████████████▊                                                | 3306/10000 [7:30:54<15:04:51,  8.11s/it]

Epoch: 3305 | Training loss 2.868941009044647 | Validation loss 2.7880907356739044



 33%|███████████████████████▊                                                | 3307/10000 [7:31:02<15:03:59,  8.10s/it]

Epoch: 3306 | Training loss 2.8660180494189262 | Validation loss 2.7905673682689667



 33%|███████████████████████▊                                                | 3308/10000 [7:31:10<15:05:52,  8.12s/it]

Epoch: 3307 | Training loss 2.865219369530678 | Validation loss 2.7905086278915405



 33%|███████████████████████▊                                                | 3309/10000 [7:31:18<15:03:27,  8.10s/it]

Epoch: 3308 | Training loss 2.864934541285038 | Validation loss 2.7958134710788727



 33%|███████████████████████▊                                                | 3310/10000 [7:31:26<15:00:31,  8.08s/it]

Epoch: 3309 | Training loss 2.871050514280796 | Validation loss 2.786576271057129



 33%|███████████████████████▊                                                | 3311/10000 [7:31:34<15:02:46,  8.10s/it]

Epoch: 3310 | Training loss 2.86760700494051 | Validation loss 2.786146730184555



 33%|███████████████████████▊                                                | 3312/10000 [7:31:42<15:05:52,  8.13s/it]

Epoch: 3311 | Training loss 2.867418795824051 | Validation loss 2.794123947620392



 33%|███████████████████████▊                                                | 3313/10000 [7:31:51<15:07:03,  8.14s/it]

Epoch: 3312 | Training loss 2.8717186599969864 | Validation loss 2.80018350481987



 33%|███████████████████████▊                                                | 3314/10000 [7:31:59<15:09:22,  8.16s/it]

Epoch: 3313 | Training loss 2.863144263625145 | Validation loss 2.795686513185501



 33%|███████████████████████▊                                                | 3315/10000 [7:32:07<15:08:26,  8.15s/it]

Epoch: 3314 | Training loss 2.861707516014576 | Validation loss 2.789596378803253



 33%|███████████████████████▉                                                | 3316/10000 [7:32:15<15:10:45,  8.18s/it]

Epoch: 3315 | Training loss 2.859221152961254 | Validation loss 2.7872798144817352



 33%|███████████████████████▉                                                | 3317/10000 [7:32:23<15:10:29,  8.17s/it]

Epoch: 3316 | Training loss 2.8705067336559296 | Validation loss 2.7857525050640106



 33%|███████████████████████▉                                                | 3318/10000 [7:32:31<15:05:47,  8.13s/it]

Epoch: 3317 | Training loss 2.8675356060266495 | Validation loss 2.79050475358963



 33%|███████████████████████▉                                                | 3319/10000 [7:32:39<15:03:34,  8.11s/it]

Epoch: 3318 | Training loss 2.871812365949154 | Validation loss 2.7970415353775024



 33%|███████████████████████▉                                                | 3320/10000 [7:32:48<15:03:27,  8.11s/it]

Epoch: 3319 | Training loss 2.8656418323516846 | Validation loss 2.7940138578414917



 33%|███████████████████████▉                                                | 3321/10000 [7:32:56<15:00:46,  8.09s/it]

Epoch: 3320 | Training loss 2.8747205808758736 | Validation loss 2.7894316017627716



 33%|███████████████████████▉                                                | 3322/10000 [7:33:04<14:58:58,  8.08s/it]

Epoch: 3321 | Training loss 2.8638825938105583 | Validation loss 2.7921526432037354



 33%|███████████████████████▉                                                | 3323/10000 [7:33:12<14:58:39,  8.08s/it]

Epoch: 3322 | Training loss 2.8645420968532562 | Validation loss 2.7906642258167267



 33%|███████████████████████▉                                                | 3324/10000 [7:33:20<14:58:51,  8.08s/it]

Epoch: 3323 | Training loss 2.8667837977409363 | Validation loss 2.7893627285957336



 33%|███████████████████████▉                                                | 3325/10000 [7:33:28<14:57:25,  8.07s/it]

Epoch: 3324 | Training loss 2.863387107849121 | Validation loss 2.7919208109378815



 33%|███████████████████████▉                                                | 3326/10000 [7:33:36<14:55:57,  8.05s/it]

Epoch: 3325 | Training loss 2.867809481918812 | Validation loss 2.791007548570633



 33%|███████████████████████▉                                                | 3327/10000 [7:33:44<14:57:32,  8.07s/it]

Epoch: 3326 | Training loss 2.867059826850891 | Validation loss 2.7944085001945496



 33%|███████████████████████▉                                                | 3328/10000 [7:33:52<14:59:28,  8.09s/it]

Epoch: 3327 | Training loss 2.8682151064276695 | Validation loss 2.7874434888362885



 33%|███████████████████████▉                                                | 3329/10000 [7:34:00<14:56:22,  8.06s/it]

Epoch: 3328 | Training loss 2.87037805467844 | Validation loss 2.790512055158615



 33%|███████████████████████▉                                                | 3330/10000 [7:34:08<14:56:58,  8.07s/it]

Epoch: 3329 | Training loss 2.86370687186718 | Validation loss 2.787734091281891



 33%|███████████████████████▉                                                | 3331/10000 [7:34:16<14:53:21,  8.04s/it]

Epoch: 3330 | Training loss 2.8666127175092697 | Validation loss 2.785482883453369



 33%|███████████████████████▉                                                | 3332/10000 [7:34:24<14:59:09,  8.09s/it]

Epoch: 3331 | Training loss 2.8622979149222374 | Validation loss 2.7995002269744873



 33%|███████████████████████▉                                                | 3333/10000 [7:34:32<14:59:24,  8.09s/it]

Epoch: 3332 | Training loss 2.8635197430849075 | Validation loss 2.79397314786911



 33%|████████████████████████                                                | 3334/10000 [7:34:41<14:59:32,  8.10s/it]

Epoch: 3333 | Training loss 2.8617113083601 | Validation loss 2.7917086482048035



 33%|████████████████████████                                                | 3335/10000 [7:34:49<14:58:01,  8.08s/it]

Epoch: 3334 | Training loss 2.8692963793873787 | Validation loss 2.7886212170124054



 33%|████████████████████████                                                | 3336/10000 [7:34:57<14:56:55,  8.08s/it]

Epoch: 3335 | Training loss 2.8695319667458534 | Validation loss 2.791746586561203



 33%|████████████████████████                                                | 3337/10000 [7:35:05<14:51:25,  8.03s/it]

Epoch: 3336 | Training loss 2.866220772266388 | Validation loss 2.7854557633399963



 33%|████████████████████████                                                | 3338/10000 [7:35:13<14:51:40,  8.03s/it]

Epoch: 3337 | Training loss 2.86942832916975 | Validation loss 2.794333040714264



 33%|████████████████████████                                                | 3339/10000 [7:35:21<14:51:48,  8.03s/it]

Epoch: 3338 | Training loss 2.865552730858326 | Validation loss 2.7915757298469543



 33%|████████████████████████                                                | 3340/10000 [7:35:29<14:50:24,  8.02s/it]

Epoch: 3339 | Training loss 2.871022589504719 | Validation loss 2.792462706565857



 33%|████████████████████████                                                | 3341/10000 [7:35:37<14:52:53,  8.05s/it]

Epoch: 3340 | Training loss 2.8666501194238663 | Validation loss 2.794830322265625



 33%|████████████████████████                                                | 3342/10000 [7:35:45<14:55:40,  8.07s/it]

Epoch: 3341 | Training loss 2.8712275102734566 | Validation loss 2.800792545080185



 33%|████████████████████████                                                | 3343/10000 [7:35:53<14:57:42,  8.09s/it]

Epoch: 3342 | Training loss 2.8632486164569855 | Validation loss 2.793348401784897



 33%|████████████████████████                                                | 3344/10000 [7:36:01<14:55:19,  8.07s/it]

Epoch: 3343 | Training loss 2.86728148907423 | Validation loss 2.7884817123413086



 33%|████████████████████████                                                | 3345/10000 [7:36:09<14:53:04,  8.05s/it]

Epoch: 3344 | Training loss 2.8700794354081154 | Validation loss 2.7923054099082947



 33%|████████████████████████                                                | 3346/10000 [7:36:17<14:52:33,  8.05s/it]

Epoch: 3345 | Training loss 2.864641599357128 | Validation loss 2.7896110117435455



 33%|████████████████████████                                                | 3347/10000 [7:36:25<14:56:24,  8.08s/it]

Epoch: 3346 | Training loss 2.8650917187333107 | Validation loss 2.7956585586071014



 33%|████████████████████████                                                | 3348/10000 [7:36:33<14:57:24,  8.09s/it]

Epoch: 3347 | Training loss 2.8655520230531693 | Validation loss 2.7946099042892456



 33%|████████████████████████                                                | 3349/10000 [7:36:41<14:52:45,  8.05s/it]

Epoch: 3348 | Training loss 2.866014428436756 | Validation loss 2.7898618280887604



 34%|████████████████████████                                                | 3350/10000 [7:36:49<14:54:07,  8.07s/it]

Epoch: 3349 | Training loss 2.8644351586699486 | Validation loss 2.790614753961563



 34%|████████████████████████▏                                               | 3351/10000 [7:36:58<14:55:36,  8.08s/it]

Epoch: 3350 | Training loss 2.865953251719475 | Validation loss 2.7926678359508514



 34%|████████████████████████▏                                               | 3352/10000 [7:37:06<15:00:22,  8.13s/it]

Epoch: 3351 | Training loss 2.8653224408626556 | Validation loss 2.7891824543476105



 34%|████████████████████████▏                                               | 3353/10000 [7:37:14<15:00:26,  8.13s/it]

Epoch: 3352 | Training loss 2.873221457004547 | Validation loss 2.7956071197986603



 34%|████████████████████████▏                                               | 3354/10000 [7:37:22<15:01:27,  8.14s/it]

Epoch: 3353 | Training loss 2.8630520403385162 | Validation loss 2.792287588119507



 34%|████████████████████████▏                                               | 3355/10000 [7:37:30<14:58:01,  8.11s/it]

Epoch: 3354 | Training loss 2.8678307980298996 | Validation loss 2.796540230512619



 34%|████████████████████████▏                                               | 3356/10000 [7:37:38<14:56:32,  8.10s/it]

Epoch: 3355 | Training loss 2.868115559220314 | Validation loss 2.7946687042713165



 34%|████████████████████████▏                                               | 3357/10000 [7:37:46<14:56:37,  8.10s/it]

Epoch: 3356 | Training loss 2.8650851771235466 | Validation loss 2.796819895505905



 34%|████████████████████████▏                                               | 3358/10000 [7:37:54<14:57:01,  8.10s/it]

Epoch: 3357 | Training loss 2.8670611158013344 | Validation loss 2.787065714597702



 34%|████████████████████████▏                                               | 3359/10000 [7:38:02<14:55:46,  8.09s/it]

Epoch: 3358 | Training loss 2.862035073339939 | Validation loss 2.787211000919342



 34%|████████████████████████▏                                               | 3360/10000 [7:38:11<14:57:58,  8.11s/it]

Epoch: 3359 | Training loss 2.8710507601499557 | Validation loss 2.789200186729431



 34%|████████████████████████▏                                               | 3361/10000 [7:38:19<14:55:18,  8.09s/it]

Epoch: 3360 | Training loss 2.8651878088712692 | Validation loss 2.790413409471512



 34%|████████████████████████▏                                               | 3362/10000 [7:38:27<14:56:34,  8.10s/it]

Epoch: 3361 | Training loss 2.859434850513935 | Validation loss 2.7932918071746826



 34%|████████████████████████▏                                               | 3363/10000 [7:38:35<14:58:59,  8.13s/it]

Epoch: 3362 | Training loss 2.8680669888854027 | Validation loss 2.790648430585861



 34%|████████████████████████▏                                               | 3364/10000 [7:38:43<14:58:09,  8.12s/it]

Epoch: 3363 | Training loss 2.8696315810084343 | Validation loss 2.7866047620773315



 34%|████████████████████████▏                                               | 3365/10000 [7:38:51<14:56:51,  8.11s/it]

Epoch: 3364 | Training loss 2.8692547082901 | Validation loss 2.7912504076957703



 34%|████████████████████████▏                                               | 3366/10000 [7:38:59<15:00:01,  8.14s/it]

Epoch: 3365 | Training loss 2.863000549376011 | Validation loss 2.790398746728897



 34%|████████████████████████▏                                               | 3367/10000 [7:39:07<14:56:36,  8.11s/it]

Epoch: 3366 | Training loss 2.872552566230297 | Validation loss 2.7963435351848602



 34%|████████████████████████▏                                               | 3368/10000 [7:39:16<14:57:11,  8.12s/it]

Epoch: 3367 | Training loss 2.8656096383929253 | Validation loss 2.7887580692768097



 34%|████████████████████████▎                                               | 3369/10000 [7:39:24<15:00:44,  8.15s/it]

Epoch: 3368 | Training loss 2.864268518984318 | Validation loss 2.7922135293483734



 34%|████████████████████████▎                                               | 3370/10000 [7:39:32<14:56:06,  8.11s/it]

Epoch: 3369 | Training loss 2.864697739481926 | Validation loss 2.79008287191391



 34%|████████████████████████▎                                               | 3371/10000 [7:39:40<14:56:43,  8.12s/it]

Epoch: 3370 | Training loss 2.869899347424507 | Validation loss 2.7959083020687103



 34%|████████████████████████▎                                               | 3372/10000 [7:39:48<14:56:37,  8.12s/it]

Epoch: 3371 | Training loss 2.8662924394011497 | Validation loss 2.7902311980724335



 34%|████████████████████████▎                                               | 3373/10000 [7:39:56<14:56:35,  8.12s/it]

Epoch: 3372 | Training loss 2.8645936027169228 | Validation loss 2.7878777384757996



 34%|████████████████████████▎                                               | 3374/10000 [7:40:04<14:59:36,  8.15s/it]

Epoch: 3373 | Training loss 2.8695067167282104 | Validation loss 2.7869426012039185



 34%|████████████████████████▎                                               | 3375/10000 [7:40:13<14:59:32,  8.15s/it]

Epoch: 3374 | Training loss 2.8672604858875275 | Validation loss 2.7906004786491394



 34%|████████████████████████▎                                               | 3376/10000 [7:40:21<14:56:19,  8.12s/it]

Epoch: 3375 | Training loss 2.8634586110711098 | Validation loss 2.7911892533302307



 34%|████████████████████████▎                                               | 3377/10000 [7:40:29<14:59:14,  8.15s/it]

Epoch: 3376 | Training loss 2.861028715968132 | Validation loss 2.7899963557720184



 34%|████████████████████████▎                                               | 3378/10000 [7:40:37<14:59:04,  8.15s/it]

Epoch: 3377 | Training loss 2.867907002568245 | Validation loss 2.7908277213573456



 34%|████████████████████████▎                                               | 3379/10000 [7:40:45<14:55:55,  8.12s/it]

Epoch: 3378 | Training loss 2.866690017282963 | Validation loss 2.7836712300777435



 34%|████████████████████████▎                                               | 3380/10000 [7:40:53<14:53:08,  8.09s/it]

Epoch: 3379 | Training loss 2.8618290051817894 | Validation loss 2.788264811038971



 34%|████████████████████████▎                                               | 3381/10000 [7:41:01<14:52:40,  8.09s/it]

Epoch: 3380 | Training loss 2.865636594593525 | Validation loss 2.7928368747234344



 34%|████████████████████████▎                                               | 3382/10000 [7:41:09<14:55:23,  8.12s/it]

Epoch: 3381 | Training loss 2.8648461773991585 | Validation loss 2.7901377081871033



 34%|████████████████████████▎                                               | 3383/10000 [7:41:17<14:55:43,  8.12s/it]

Epoch: 3382 | Training loss 2.865747705101967 | Validation loss 2.7908332645893097



 34%|████████████████████████▎                                               | 3384/10000 [7:41:26<14:57:04,  8.14s/it]

Epoch: 3383 | Training loss 2.86528367549181 | Validation loss 2.792547643184662



 34%|████████████████████████▎                                               | 3385/10000 [7:41:34<14:55:16,  8.12s/it]

Epoch: 3384 | Training loss 2.865278258919716 | Validation loss 2.7884647846221924



 34%|████████████████████████▍                                               | 3386/10000 [7:41:42<14:56:31,  8.13s/it]

Epoch: 3385 | Training loss 2.86626710742712 | Validation loss 2.792642056941986



 34%|████████████████████████▍                                               | 3387/10000 [7:41:50<14:56:55,  8.14s/it]

Epoch: 3386 | Training loss 2.8591349124908447 | Validation loss 2.788232296705246



 34%|████████████████████████▍                                               | 3388/10000 [7:41:58<14:58:43,  8.16s/it]

Epoch: 3387 | Training loss 2.8686269968748093 | Validation loss 2.78831747174263



 34%|████████████████████████▍                                               | 3389/10000 [7:42:06<14:58:04,  8.15s/it]

Epoch: 3388 | Training loss 2.868984915316105 | Validation loss 2.791384905576706



 34%|████████████████████████▍                                               | 3390/10000 [7:42:14<14:53:13,  8.11s/it]

Epoch: 3389 | Training loss 2.865655839443207 | Validation loss 2.790728896856308



 34%|████████████████████████▍                                               | 3391/10000 [7:42:22<14:52:22,  8.10s/it]

Epoch: 3390 | Training loss 2.8655208125710487 | Validation loss 2.792185068130493



 34%|████████████████████████▍                                               | 3392/10000 [7:42:30<14:50:12,  8.08s/it]

Epoch: 3391 | Training loss 2.864007331430912 | Validation loss 2.7902697920799255



 34%|████████████████████████▍                                               | 3393/10000 [7:42:39<14:55:55,  8.14s/it]

Epoch: 3392 | Training loss 2.870615340769291 | Validation loss 2.7924643754959106



 34%|████████████████████████▍                                               | 3394/10000 [7:42:47<14:54:42,  8.13s/it]

Epoch: 3393 | Training loss 2.8698184490203857 | Validation loss 2.7912056148052216



 34%|████████████████████████▍                                               | 3395/10000 [7:42:55<14:55:17,  8.13s/it]

Epoch: 3394 | Training loss 2.8681371062994003 | Validation loss 2.7941560447216034



 34%|████████████████████████▍                                               | 3396/10000 [7:43:03<14:51:35,  8.10s/it]

Epoch: 3395 | Training loss 2.866046406328678 | Validation loss 2.7890555262565613



 34%|████████████████████████▍                                               | 3397/10000 [7:43:11<14:49:54,  8.09s/it]

Epoch: 3396 | Training loss 2.8628211617469788 | Validation loss 2.789820283651352



 34%|████████████████████████▍                                               | 3398/10000 [7:43:19<14:48:41,  8.08s/it]

Epoch: 3397 | Training loss 2.864398017525673 | Validation loss 2.7893667817115784



 34%|████████████████████████▍                                               | 3399/10000 [7:43:27<14:53:27,  8.12s/it]

Epoch: 3398 | Training loss 2.8652253299951553 | Validation loss 2.7981106340885162



 34%|████████████████████████▍                                               | 3400/10000 [7:43:35<14:51:09,  8.10s/it]

Epoch: 3399 | Training loss 2.8635611459612846 | Validation loss 2.7914216220378876



 34%|████████████████████████▍                                               | 3401/10000 [7:43:43<14:49:29,  8.09s/it]

Epoch: 3400 | Training loss 2.865443468093872 | Validation loss 2.788838267326355



 34%|████████████████████████▍                                               | 3402/10000 [7:43:52<14:51:54,  8.11s/it]

Epoch: 3401 | Training loss 2.8639692068099976 | Validation loss 2.7883183360099792



 34%|████████████████████████▌                                               | 3403/10000 [7:44:00<14:49:23,  8.09s/it]

Epoch: 3402 | Training loss 2.867443196475506 | Validation loss 2.784901410341263



 34%|████████████████████████▌                                               | 3404/10000 [7:44:08<14:50:21,  8.10s/it]

Epoch: 3403 | Training loss 2.8645980060100555 | Validation loss 2.7929937541484833



 34%|████████████████████████▌                                               | 3405/10000 [7:44:16<14:53:53,  8.13s/it]

Epoch: 3404 | Training loss 2.8663066253066063 | Validation loss 2.7910839319229126



 34%|████████████████████████▌                                               | 3406/10000 [7:44:24<14:54:37,  8.14s/it]

Epoch: 3405 | Training loss 2.8731731697916985 | Validation loss 2.788374572992325



 34%|████████████████████████▌                                               | 3407/10000 [7:44:32<14:50:07,  8.10s/it]

Epoch: 3406 | Training loss 2.8675149753689766 | Validation loss 2.7889853417873383



 34%|████████████████████████▌                                               | 3408/10000 [7:44:40<14:46:54,  8.07s/it]

Epoch: 3407 | Training loss 2.865821197628975 | Validation loss 2.7996377050876617



 34%|████████████████████████▌                                               | 3409/10000 [7:44:48<14:48:44,  8.09s/it]

Epoch: 3408 | Training loss 2.8715612441301346 | Validation loss 2.7919687628746033



 34%|████████████████████████▌                                               | 3410/10000 [7:44:56<14:48:27,  8.09s/it]

Epoch: 3409 | Training loss 2.8706103190779686 | Validation loss 2.795171618461609



 34%|████████████████████████▌                                               | 3411/10000 [7:45:04<14:49:14,  8.10s/it]

Epoch: 3410 | Training loss 2.8680806383490562 | Validation loss 2.7899425625801086



 34%|████████████████████████▌                                               | 3412/10000 [7:45:13<14:46:38,  8.08s/it]

Epoch: 3411 | Training loss 2.8624524027109146 | Validation loss 2.7878507375717163



 34%|████████████████████████▌                                               | 3413/10000 [7:45:21<14:47:24,  8.08s/it]

Epoch: 3412 | Training loss 2.861720532178879 | Validation loss 2.7902361154556274



 34%|████████████████████████▌                                               | 3414/10000 [7:45:29<14:47:16,  8.08s/it]

Epoch: 3413 | Training loss 2.8674846664071083 | Validation loss 2.795410066843033



 34%|████████████████████████▌                                               | 3415/10000 [7:45:37<14:47:06,  8.08s/it]

Epoch: 3414 | Training loss 2.8603373169898987 | Validation loss 2.789879024028778



 34%|████████████████████████▌                                               | 3416/10000 [7:45:45<14:48:07,  8.09s/it]

Epoch: 3415 | Training loss 2.867168553173542 | Validation loss 2.79259717464447



 34%|████████████████████████▌                                               | 3417/10000 [7:45:53<14:49:50,  8.11s/it]

Epoch: 3416 | Training loss 2.863293133676052 | Validation loss 2.7917662858963013



 34%|████████████████████████▌                                               | 3418/10000 [7:46:01<14:49:55,  8.11s/it]

Epoch: 3417 | Training loss 2.870749816298485 | Validation loss 2.795539677143097



 34%|████████████████████████▌                                               | 3419/10000 [7:46:09<14:51:56,  8.13s/it]

Epoch: 3418 | Training loss 2.8687711656093597 | Validation loss 2.7918396294116974



 34%|████████████████████████▌                                               | 3420/10000 [7:46:17<14:51:27,  8.13s/it]

Epoch: 3419 | Training loss 2.8693411350250244 | Validation loss 2.7890421748161316



 34%|████████████████████████▋                                               | 3421/10000 [7:46:26<14:50:25,  8.12s/it]

Epoch: 3420 | Training loss 2.85972361266613 | Validation loss 2.7927019894123077



 34%|████████████████████████▋                                               | 3422/10000 [7:46:34<14:51:42,  8.13s/it]

Epoch: 3421 | Training loss 2.866692528128624 | Validation loss 2.7911728024482727



 34%|████████████████████████▋                                               | 3423/10000 [7:46:42<14:46:36,  8.09s/it]

Epoch: 3422 | Training loss 2.871076874434948 | Validation loss 2.7877837419509888



 34%|████████████████████████▋                                               | 3424/10000 [7:46:50<14:47:57,  8.10s/it]

Epoch: 3423 | Training loss 2.8715817779302597 | Validation loss 2.7946397960186005



 34%|████████████████████████▋                                               | 3425/10000 [7:46:58<14:46:20,  8.09s/it]

Epoch: 3424 | Training loss 2.8711370155215263 | Validation loss 2.797389328479767



 34%|████████████████████████▋                                               | 3426/10000 [7:47:06<14:47:09,  8.10s/it]

Epoch: 3425 | Training loss 2.8659484535455704 | Validation loss 2.7886375784873962



 34%|████████████████████████▋                                               | 3427/10000 [7:47:14<14:48:14,  8.11s/it]

Epoch: 3426 | Training loss 2.8665282800793648 | Validation loss 2.790754020214081



 34%|████████████████████████▋                                               | 3428/10000 [7:47:22<14:43:15,  8.06s/it]

Epoch: 3427 | Training loss 2.8707805052399635 | Validation loss 2.7907784283161163



 34%|████████████████████████▋                                               | 3429/10000 [7:47:30<14:40:41,  8.04s/it]

Epoch: 3428 | Training loss 2.867726854979992 | Validation loss 2.7939893901348114



 34%|████████████████████████▋                                               | 3430/10000 [7:47:38<14:42:29,  8.06s/it]

Epoch: 3429 | Training loss 2.8669007793068886 | Validation loss 2.7896840274333954



 34%|████████████████████████▋                                               | 3431/10000 [7:47:46<14:42:14,  8.06s/it]

Epoch: 3430 | Training loss 2.87583015114069 | Validation loss 2.7943268418312073



 34%|████████████████████████▋                                               | 3432/10000 [7:47:54<14:38:27,  8.02s/it]

Epoch: 3431 | Training loss 2.864673115313053 | Validation loss 2.786619633436203



 34%|████████████████████████▋                                               | 3433/10000 [7:48:02<14:40:21,  8.04s/it]

Epoch: 3432 | Training loss 2.8671207204461098 | Validation loss 2.7871610522270203



 34%|████████████████████████▋                                               | 3434/10000 [7:48:10<14:42:07,  8.06s/it]

Epoch: 3433 | Training loss 2.866146646440029 | Validation loss 2.7910082638263702



 34%|████████████████████████▋                                               | 3435/10000 [7:48:18<14:41:21,  8.06s/it]

Epoch: 3434 | Training loss 2.8656946048140526 | Validation loss 2.785821110010147



 34%|████████████████████████▋                                               | 3436/10000 [7:48:27<14:45:54,  8.10s/it]

Epoch: 3435 | Training loss 2.865734785795212 | Validation loss 2.7898994386196136



 34%|████████████████████████▋                                               | 3437/10000 [7:48:35<14:44:18,  8.08s/it]

Epoch: 3436 | Training loss 2.869909405708313 | Validation loss 2.792140245437622



 34%|████████████████████████▊                                               | 3438/10000 [7:48:43<14:47:54,  8.12s/it]

Epoch: 3437 | Training loss 2.8655087426304817 | Validation loss 2.7875031232833862



 34%|████████████████████████▊                                               | 3439/10000 [7:48:51<14:46:37,  8.11s/it]

Epoch: 3438 | Training loss 2.8690674379467964 | Validation loss 2.793482929468155



 34%|████████████████████████▊                                               | 3440/10000 [7:48:59<14:46:17,  8.11s/it]

Epoch: 3439 | Training loss 2.8642980083823204 | Validation loss 2.7933094203472137



 34%|████████████████████████▊                                               | 3441/10000 [7:49:07<14:45:00,  8.10s/it]

Epoch: 3440 | Training loss 2.868371292948723 | Validation loss 2.7877106368541718



 34%|████████████████████████▊                                               | 3442/10000 [7:49:15<14:44:37,  8.09s/it]

Epoch: 3441 | Training loss 2.8636483773589134 | Validation loss 2.7878062427043915



 34%|████████████████████████▊                                               | 3443/10000 [7:49:23<14:45:29,  8.10s/it]

Epoch: 3442 | Training loss 2.864690437912941 | Validation loss 2.787986606359482



 34%|████████████████████████▊                                               | 3444/10000 [7:49:32<14:49:00,  8.14s/it]

Epoch: 3443 | Training loss 2.8689531683921814 | Validation loss 2.7962256371974945



 34%|████████████████████████▊                                               | 3445/10000 [7:49:40<14:52:19,  8.17s/it]

Epoch: 3444 | Training loss 2.8661010935902596 | Validation loss 2.787536710500717



 34%|████████████████████████▊                                               | 3446/10000 [7:49:48<14:53:33,  8.18s/it]

Epoch: 3445 | Training loss 2.8659596145153046 | Validation loss 2.799114942550659



 34%|████████████████████████▊                                               | 3447/10000 [7:49:56<14:51:48,  8.17s/it]

Epoch: 3446 | Training loss 2.8662857562303543 | Validation loss 2.7892061471939087



 34%|████████████████████████▊                                               | 3448/10000 [7:50:04<14:49:29,  8.15s/it]

Epoch: 3447 | Training loss 2.867951326072216 | Validation loss 2.790791869163513



 34%|████████████████████████▊                                               | 3449/10000 [7:50:12<14:46:19,  8.12s/it]

Epoch: 3448 | Training loss 2.8643844202160835 | Validation loss 2.7947909832000732



 34%|████████████████████████▊                                               | 3450/10000 [7:50:20<14:46:10,  8.12s/it]

Epoch: 3449 | Training loss 2.8694151416420937 | Validation loss 2.787416487932205



 35%|████████████████████████▊                                               | 3451/10000 [7:50:29<14:44:59,  8.11s/it]

Epoch: 3450 | Training loss 2.8662373423576355 | Validation loss 2.795754700899124



 35%|████████████████████████▊                                               | 3452/10000 [7:50:36<14:40:01,  8.06s/it]

Epoch: 3451 | Training loss 2.869379013776779 | Validation loss 2.793269604444504



 35%|████████████████████████▊                                               | 3453/10000 [7:50:45<14:40:39,  8.07s/it]

Epoch: 3452 | Training loss 2.8613680824637413 | Validation loss 2.784984529018402



 35%|████████████████████████▊                                               | 3454/10000 [7:50:53<14:40:31,  8.07s/it]

Epoch: 3453 | Training loss 2.865617409348488 | Validation loss 2.7955337464809418



 35%|████████████████████████▉                                               | 3455/10000 [7:51:01<14:35:42,  8.03s/it]

Epoch: 3454 | Training loss 2.8637572303414345 | Validation loss 2.7919622659683228



 35%|████████████████████████▉                                               | 3456/10000 [7:51:09<14:40:30,  8.07s/it]

Epoch: 3455 | Training loss 2.8636609315872192 | Validation loss 2.7955626249313354



 35%|████████████████████████▉                                               | 3457/10000 [7:51:17<14:46:49,  8.13s/it]

Epoch: 3456 | Training loss 2.8649975806474686 | Validation loss 2.7899398505687714



 35%|████████████████████████▉                                               | 3458/10000 [7:51:25<14:44:48,  8.12s/it]

Epoch: 3457 | Training loss 2.8676213920116425 | Validation loss 2.7932671904563904



 35%|████████████████████████▉                                               | 3459/10000 [7:51:33<14:50:30,  8.17s/it]

Epoch: 3458 | Training loss 2.8676868453621864 | Validation loss 2.7963947653770447



 35%|████████████████████████▉                                               | 3460/10000 [7:51:41<14:48:41,  8.15s/it]

Epoch: 3459 | Training loss 2.8667894154787064 | Validation loss 2.7916646003723145



 35%|████████████████████████▉                                               | 3461/10000 [7:51:50<14:49:30,  8.16s/it]

Epoch: 3460 | Training loss 2.8713368475437164 | Validation loss 2.796178102493286



 35%|████████████████████████▉                                               | 3462/10000 [7:51:58<14:46:52,  8.14s/it]

Epoch: 3461 | Training loss 2.8662576004862785 | Validation loss 2.7911773920059204



 35%|████████████████████████▉                                               | 3463/10000 [7:52:06<14:47:34,  8.15s/it]

Epoch: 3462 | Training loss 2.8652228340506554 | Validation loss 2.789287656545639



 35%|████████████████████████▉                                               | 3464/10000 [7:52:14<14:45:29,  8.13s/it]

Epoch: 3463 | Training loss 2.867504969239235 | Validation loss 2.7879391610622406



 35%|████████████████████████▉                                               | 3465/10000 [7:52:22<14:43:29,  8.11s/it]

Epoch: 3464 | Training loss 2.8661425784230232 | Validation loss 2.78774955868721



 35%|████████████████████████▉                                               | 3466/10000 [7:52:30<14:45:01,  8.13s/it]

Epoch: 3465 | Training loss 2.870697893202305 | Validation loss 2.791526108980179



 35%|████████████████████████▉                                               | 3467/10000 [7:52:38<14:45:34,  8.13s/it]

Epoch: 3466 | Training loss 2.8715402856469154 | Validation loss 2.7935942709445953



 35%|████████████████████████▉                                               | 3468/10000 [7:52:46<14:41:49,  8.10s/it]

Epoch: 3467 | Training loss 2.8646190464496613 | Validation loss 2.78943407535553



 35%|████████████████████████▉                                               | 3469/10000 [7:52:55<14:41:21,  8.10s/it]

Epoch: 3468 | Training loss 2.8640774115920067 | Validation loss 2.78774031996727



 35%|████████████████████████▉                                               | 3470/10000 [7:53:03<14:41:26,  8.10s/it]

Epoch: 3469 | Training loss 2.8719886019825935 | Validation loss 2.7929870188236237



 35%|████████████████████████▉                                               | 3471/10000 [7:53:11<14:38:22,  8.07s/it]

Epoch: 3470 | Training loss 2.864781677722931 | Validation loss 2.787728726863861



 35%|████████████████████████▉                                               | 3472/10000 [7:53:19<14:39:10,  8.08s/it]

Epoch: 3471 | Training loss 2.8631768450140953 | Validation loss 2.787266284227371



 35%|█████████████████████████                                               | 3473/10000 [7:53:27<14:40:49,  8.10s/it]

Epoch: 3472 | Training loss 2.8625366538763046 | Validation loss 2.7900668680667877



 35%|█████████████████████████                                               | 3474/10000 [7:53:35<14:39:53,  8.09s/it]

Epoch: 3473 | Training loss 2.8583072274923325 | Validation loss 2.789766013622284



 35%|█████████████████████████                                               | 3475/10000 [7:53:43<14:42:21,  8.11s/it]

Epoch: 3474 | Training loss 2.8661680668592453 | Validation loss 2.7963720858097076



 35%|█████████████████████████                                               | 3476/10000 [7:53:51<14:44:21,  8.13s/it]

Epoch: 3475 | Training loss 2.862295389175415 | Validation loss 2.7909304797649384



 35%|█████████████████████████                                               | 3477/10000 [7:53:59<14:46:42,  8.16s/it]

Epoch: 3476 | Training loss 2.867895781993866 | Validation loss 2.7891914546489716



 35%|█████████████████████████                                               | 3478/10000 [7:54:08<14:47:50,  8.17s/it]

Epoch: 3477 | Training loss 2.8650658577680588 | Validation loss 2.7901633381843567



 35%|█████████████████████████                                               | 3479/10000 [7:54:16<14:46:31,  8.16s/it]

Epoch: 3478 | Training loss 2.8613645434379578 | Validation loss 2.7855245769023895



 35%|█████████████████████████                                               | 3480/10000 [7:54:24<14:48:36,  8.18s/it]

Epoch: 3479 | Training loss 2.8690590038895607 | Validation loss 2.787309467792511



 35%|█████████████████████████                                               | 3481/10000 [7:54:32<14:47:02,  8.16s/it]

Epoch: 3480 | Training loss 2.8691898435354233 | Validation loss 2.788083791732788



 35%|█████████████████████████                                               | 3482/10000 [7:54:40<14:43:54,  8.14s/it]

Epoch: 3481 | Training loss 2.867840677499771 | Validation loss 2.7902101278305054



 35%|█████████████████████████                                               | 3483/10000 [7:54:48<14:43:34,  8.13s/it]

Epoch: 3482 | Training loss 2.870850831270218 | Validation loss 2.7956221997737885



 35%|█████████████████████████                                               | 3484/10000 [7:54:57<14:49:37,  8.19s/it]

Epoch: 3483 | Training loss 2.865596979856491 | Validation loss 2.797465056180954



 35%|█████████████████████████                                               | 3485/10000 [7:55:05<14:46:47,  8.17s/it]

Epoch: 3484 | Training loss 2.8702579513192177 | Validation loss 2.7924867272377014



 35%|█████████████████████████                                               | 3486/10000 [7:55:13<14:45:00,  8.15s/it]

Epoch: 3485 | Training loss 2.8659869357943535 | Validation loss 2.787802070379257



 35%|█████████████████████████                                               | 3487/10000 [7:55:21<14:41:11,  8.12s/it]

Epoch: 3486 | Training loss 2.8632839918136597 | Validation loss 2.790099948644638



 35%|█████████████████████████                                               | 3488/10000 [7:55:29<14:42:29,  8.13s/it]

Epoch: 3487 | Training loss 2.8664776608347893 | Validation loss 2.786230891942978



 35%|█████████████████████████                                               | 3489/10000 [7:55:37<14:39:53,  8.11s/it]

Epoch: 3488 | Training loss 2.8692814484238625 | Validation loss 2.787941634654999



 35%|█████████████████████████▏                                              | 3490/10000 [7:55:45<14:41:13,  8.12s/it]

Epoch: 3489 | Training loss 2.8695789873600006 | Validation loss 2.787421226501465



 35%|█████████████████████████▏                                              | 3491/10000 [7:55:53<14:41:57,  8.13s/it]

Epoch: 3490 | Training loss 2.8688967302441597 | Validation loss 2.788082867860794



 35%|█████████████████████████▏                                              | 3492/10000 [7:56:02<14:40:55,  8.12s/it]

Epoch: 3491 | Training loss 2.859340824186802 | Validation loss 2.785853087902069



 35%|█████████████████████████▏                                              | 3493/10000 [7:56:10<14:42:22,  8.14s/it]

Epoch: 3492 | Training loss 2.8641066551208496 | Validation loss 2.789814680814743



 35%|█████████████████████████▏                                              | 3494/10000 [7:56:18<14:43:40,  8.15s/it]

Epoch: 3493 | Training loss 2.862931065261364 | Validation loss 2.79248109459877



 35%|█████████████████████████▏                                              | 3495/10000 [7:56:26<14:39:56,  8.12s/it]

Epoch: 3494 | Training loss 2.869735062122345 | Validation loss 2.794439733028412



 35%|█████████████████████████▏                                              | 3496/10000 [7:56:34<14:41:20,  8.13s/it]

Epoch: 3495 | Training loss 2.864619918167591 | Validation loss 2.788685381412506



 35%|█████████████████████████▏                                              | 3497/10000 [7:56:42<14:38:08,  8.10s/it]

Epoch: 3496 | Training loss 2.8614995554089546 | Validation loss 2.786583751440048



 35%|█████████████████████████▏                                              | 3498/10000 [7:56:50<14:33:55,  8.06s/it]

Epoch: 3497 | Training loss 2.8681148290634155 | Validation loss 2.797710031270981



 35%|█████████████████████████▏                                              | 3499/10000 [7:56:58<14:31:00,  8.04s/it]

Epoch: 3498 | Training loss 2.8644315004348755 | Validation loss 2.7882450222969055



 35%|█████████████████████████▏                                              | 3500/10000 [7:57:06<14:33:59,  8.07s/it]

Epoch: 3499 | Training loss 2.8658039048314095 | Validation loss 2.7877509593963623



 35%|█████████████████████████▏                                              | 3501/10000 [7:57:14<14:34:27,  8.07s/it]

Epoch: 3500 | Training loss 2.868353731930256 | Validation loss 2.7904487550258636



 35%|█████████████████████████▏                                              | 3502/10000 [7:57:22<14:34:15,  8.07s/it]

Epoch: 3501 | Training loss 2.8660394102334976 | Validation loss 2.7986127734184265



 35%|█████████████████████████▏                                              | 3503/10000 [7:57:31<14:35:45,  8.09s/it]

Epoch: 3502 | Training loss 2.865692190825939 | Validation loss 2.7941467463970184



 35%|█████████████████████████▏                                              | 3504/10000 [7:57:39<14:34:01,  8.07s/it]

Epoch: 3503 | Training loss 2.8694181367754936 | Validation loss 2.794381767511368



 35%|█████████████████████████▏                                              | 3505/10000 [7:57:47<14:37:55,  8.11s/it]

Epoch: 3504 | Training loss 2.866973541676998 | Validation loss 2.7923851013183594



 35%|█████████████████████████▏                                              | 3506/10000 [7:57:55<14:42:03,  8.15s/it]

Epoch: 3505 | Training loss 2.8648147508502007 | Validation loss 2.7873555719852448



 35%|█████████████████████████▎                                              | 3507/10000 [7:58:03<14:42:52,  8.16s/it]

Epoch: 3506 | Training loss 2.8672375977039337 | Validation loss 2.78905126452446



 35%|█████████████████████████▎                                              | 3508/10000 [7:58:11<14:45:50,  8.19s/it]

Epoch: 3507 | Training loss 2.8673358112573624 | Validation loss 2.7890409231185913



 35%|█████████████████████████▎                                              | 3509/10000 [7:58:20<14:41:54,  8.15s/it]

Epoch: 3508 | Training loss 2.864212177693844 | Validation loss 2.790158122777939



 35%|█████████████████████████▎                                              | 3510/10000 [7:58:28<14:40:09,  8.14s/it]

Epoch: 3509 | Training loss 2.862201452255249 | Validation loss 2.7839952409267426



 35%|█████████████████████████▎                                              | 3511/10000 [7:58:36<14:41:53,  8.15s/it]

Epoch: 3510 | Training loss 2.8710775300860405 | Validation loss 2.792776733636856



 35%|█████████████████████████▎                                              | 3512/10000 [7:58:44<14:43:05,  8.17s/it]

Epoch: 3511 | Training loss 2.8697633892297745 | Validation loss 2.7917368412017822



 35%|█████████████████████████▎                                              | 3513/10000 [7:58:52<14:43:19,  8.17s/it]

Epoch: 3512 | Training loss 2.8622947707772255 | Validation loss 2.7897558510303497



 35%|█████████████████████████▎                                              | 3514/10000 [7:59:00<14:43:28,  8.17s/it]

Epoch: 3513 | Training loss 2.8667591139674187 | Validation loss 2.7905363738536835



 35%|█████████████████████████▎                                              | 3515/10000 [7:59:09<14:43:36,  8.18s/it]

Epoch: 3514 | Training loss 2.868549033999443 | Validation loss 2.7893109023571014



 35%|█████████████████████████▎                                              | 3516/10000 [7:59:17<14:40:05,  8.14s/it]

Epoch: 3515 | Training loss 2.869343876838684 | Validation loss 2.799830883741379



 35%|█████████████████████████▎                                              | 3517/10000 [7:59:25<14:41:44,  8.16s/it]

Epoch: 3516 | Training loss 2.8670411109924316 | Validation loss 2.7960781157016754



 35%|█████████████████████████▎                                              | 3518/10000 [7:59:33<14:40:12,  8.15s/it]

Epoch: 3517 | Training loss 2.870510458946228 | Validation loss 2.796098053455353



 35%|█████████████████████████▎                                              | 3519/10000 [7:59:41<14:37:35,  8.12s/it]

Epoch: 3518 | Training loss 2.8653595224022865 | Validation loss 2.7985122203826904



 35%|█████████████████████████▎                                              | 3520/10000 [7:59:49<14:34:39,  8.10s/it]

Epoch: 3519 | Training loss 2.870521694421768 | Validation loss 2.7950530350208282



 35%|█████████████████████████▎                                              | 3521/10000 [7:59:57<14:35:08,  8.10s/it]

Epoch: 3520 | Training loss 2.8693146482110023 | Validation loss 2.7881462574005127



 35%|█████████████████████████▎                                              | 3522/10000 [8:00:05<14:35:23,  8.11s/it]

Epoch: 3521 | Training loss 2.8698721304535866 | Validation loss 2.7952170968055725



 35%|█████████████████████████▎                                              | 3523/10000 [8:00:13<14:34:30,  8.10s/it]

Epoch: 3522 | Training loss 2.865761712193489 | Validation loss 2.7905473709106445



 35%|█████████████████████████▎                                              | 3524/10000 [8:00:21<14:34:21,  8.10s/it]

Epoch: 3523 | Training loss 2.8639915362000465 | Validation loss 2.788632810115814



 35%|█████████████████████████▍                                              | 3525/10000 [8:00:30<14:35:01,  8.11s/it]

Epoch: 3524 | Training loss 2.8703485429286957 | Validation loss 2.794862389564514



 35%|█████████████████████████▍                                              | 3526/10000 [8:00:38<14:34:12,  8.10s/it]

Epoch: 3525 | Training loss 2.866149753332138 | Validation loss 2.7911181151866913



 35%|█████████████████████████▍                                              | 3527/10000 [8:00:46<14:35:03,  8.11s/it]

Epoch: 3526 | Training loss 2.8682824298739433 | Validation loss 2.7906562089920044



 35%|█████████████████████████▍                                              | 3528/10000 [8:00:54<14:33:03,  8.09s/it]

Epoch: 3527 | Training loss 2.871613562107086 | Validation loss 2.792436510324478



 35%|█████████████████████████▍                                              | 3529/10000 [8:01:02<14:33:45,  8.10s/it]

Epoch: 3528 | Training loss 2.8680146411061287 | Validation loss 2.790794163942337



 35%|█████████████████████████▍                                              | 3530/10000 [8:01:10<14:33:40,  8.10s/it]

Epoch: 3529 | Training loss 2.8585859835147858 | Validation loss 2.7869434356689453



 35%|█████████████████████████▍                                              | 3531/10000 [8:01:18<14:35:28,  8.12s/it]

Epoch: 3530 | Training loss 2.8686481043696404 | Validation loss 2.7897171080112457



 35%|█████████████████████████▍                                              | 3532/10000 [8:01:26<14:37:23,  8.14s/it]

Epoch: 3531 | Training loss 2.8654982447624207 | Validation loss 2.796545535326004



 35%|█████████████████████████▍                                              | 3533/10000 [8:01:35<14:37:03,  8.14s/it]

Epoch: 3532 | Training loss 2.8675899878144264 | Validation loss 2.7940316200256348



 35%|█████████████████████████▍                                              | 3534/10000 [8:01:43<14:35:13,  8.12s/it]

Epoch: 3533 | Training loss 2.861745499074459 | Validation loss 2.7875950932502747



 35%|█████████████████████████▍                                              | 3535/10000 [8:01:51<14:34:53,  8.12s/it]

Epoch: 3534 | Training loss 2.8703777492046356 | Validation loss 2.790864884853363



 35%|█████████████████████████▍                                              | 3536/10000 [8:01:59<14:37:12,  8.14s/it]

Epoch: 3535 | Training loss 2.8651679307222366 | Validation loss 2.7863325774669647



 35%|█████████████████████████▍                                              | 3537/10000 [8:02:07<14:35:49,  8.13s/it]

Epoch: 3536 | Training loss 2.8653268814086914 | Validation loss 2.7881460189819336



 35%|█████████████████████████▍                                              | 3538/10000 [8:02:15<14:34:55,  8.12s/it]

Epoch: 3537 | Training loss 2.864295795559883 | Validation loss 2.7935384809970856



 35%|█████████████████████████▍                                              | 3539/10000 [8:02:23<14:36:37,  8.14s/it]

Epoch: 3538 | Training loss 2.868831217288971 | Validation loss 2.792679399251938



 35%|█████████████████████████▍                                              | 3540/10000 [8:02:31<14:36:08,  8.14s/it]

Epoch: 3539 | Training loss 2.8670724481344223 | Validation loss 2.7904110848903656



 35%|█████████████████████████▍                                              | 3541/10000 [8:02:40<14:37:23,  8.15s/it]

Epoch: 3540 | Training loss 2.862964391708374 | Validation loss 2.7930977046489716



 35%|█████████████████████████▌                                              | 3542/10000 [8:02:48<14:36:43,  8.15s/it]

Epoch: 3541 | Training loss 2.867994010448456 | Validation loss 2.7925265729427338



 35%|█████████████████████████▌                                              | 3543/10000 [8:02:56<14:35:09,  8.13s/it]

Epoch: 3542 | Training loss 2.8653975501656532 | Validation loss 2.7931955456733704



 35%|█████████████████████████▌                                              | 3544/10000 [8:03:04<14:31:26,  8.10s/it]

Epoch: 3543 | Training loss 2.8662344440817833 | Validation loss 2.790610760450363



 35%|█████████████████████████▌                                              | 3545/10000 [8:03:12<14:35:57,  8.14s/it]

Epoch: 3544 | Training loss 2.8656074181199074 | Validation loss 2.788980633020401



 35%|█████████████████████████▌                                              | 3546/10000 [8:03:20<14:32:34,  8.11s/it]

Epoch: 3545 | Training loss 2.870044596493244 | Validation loss 2.788911670446396



 35%|█████████████████████████▌                                              | 3547/10000 [8:03:28<14:37:07,  8.16s/it]

Epoch: 3546 | Training loss 2.8678292706608772 | Validation loss 2.7872822284698486



 35%|█████████████████████████▌                                              | 3548/10000 [8:03:37<14:38:18,  8.17s/it]

Epoch: 3547 | Training loss 2.86372110247612 | Validation loss 2.788840413093567



 35%|█████████████████████████▌                                              | 3549/10000 [8:03:45<14:33:57,  8.13s/it]

Epoch: 3548 | Training loss 2.8686370253562927 | Validation loss 2.7959645986557007



 36%|█████████████████████████▌                                              | 3550/10000 [8:03:53<14:34:06,  8.13s/it]

Epoch: 3549 | Training loss 2.862993896007538 | Validation loss 2.7833933234214783



 36%|█████████████████████████▌                                              | 3551/10000 [8:04:01<14:35:04,  8.14s/it]

Epoch: 3550 | Training loss 2.87033761292696 | Validation loss 2.8046934008598328



 36%|█████████████████████████▌                                              | 3552/10000 [8:04:09<14:34:06,  8.13s/it]

Epoch: 3551 | Training loss 2.863837294280529 | Validation loss 2.794548898935318



 36%|█████████████████████████▌                                              | 3553/10000 [8:04:17<14:32:52,  8.12s/it]

Epoch: 3552 | Training loss 2.865339018404484 | Validation loss 2.7922757267951965



 36%|█████████████████████████▌                                              | 3554/10000 [8:04:25<14:33:07,  8.13s/it]

Epoch: 3553 | Training loss 2.866110771894455 | Validation loss 2.78777214884758



 36%|█████████████████████████▌                                              | 3555/10000 [8:04:33<14:30:08,  8.10s/it]

Epoch: 3554 | Training loss 2.862388327717781 | Validation loss 2.7922136783599854



 36%|█████████████████████████▌                                              | 3556/10000 [8:04:42<14:33:09,  8.13s/it]

Epoch: 3555 | Training loss 2.86896201223135 | Validation loss 2.8023913502693176



 36%|█████████████████████████▌                                              | 3557/10000 [8:04:50<14:32:39,  8.13s/it]

Epoch: 3556 | Training loss 2.866978608071804 | Validation loss 2.7930603325366974



 36%|█████████████████████████▌                                              | 3558/10000 [8:04:58<14:35:14,  8.15s/it]

Epoch: 3557 | Training loss 2.867270343005657 | Validation loss 2.792346715927124



 36%|█████████████████████████▌                                              | 3559/10000 [8:05:06<14:35:07,  8.15s/it]

Epoch: 3558 | Training loss 2.864771693944931 | Validation loss 2.7917048037052155



 36%|█████████████████████████▋                                              | 3560/10000 [8:05:14<14:33:20,  8.14s/it]

Epoch: 3559 | Training loss 2.8690628185868263 | Validation loss 2.800246000289917



 36%|█████████████████████████▋                                              | 3561/10000 [8:05:22<14:30:18,  8.11s/it]

Epoch: 3560 | Training loss 2.8643621504306793 | Validation loss 2.7953990399837494



 36%|█████████████████████████▋                                              | 3562/10000 [8:05:30<14:28:57,  8.10s/it]

Epoch: 3561 | Training loss 2.861662417650223 | Validation loss 2.7915218770504



 36%|█████████████████████████▋                                              | 3563/10000 [8:05:38<14:29:25,  8.10s/it]

Epoch: 3562 | Training loss 2.8650708943605423 | Validation loss 2.793339788913727



 36%|█████████████████████████▋                                              | 3564/10000 [8:05:47<14:29:28,  8.11s/it]

Epoch: 3563 | Training loss 2.865526147186756 | Validation loss 2.7863976657390594



 36%|█████████████████████████▋                                              | 3565/10000 [8:05:55<14:30:24,  8.12s/it]

Epoch: 3564 | Training loss 2.8692856058478355 | Validation loss 2.7879888713359833



 36%|█████████████████████████▋                                              | 3566/10000 [8:06:03<14:29:21,  8.11s/it]

Epoch: 3565 | Training loss 2.859924837946892 | Validation loss 2.7910969853401184



 36%|█████████████████████████▋                                              | 3567/10000 [8:06:11<14:31:02,  8.12s/it]

Epoch: 3566 | Training loss 2.8572469353675842 | Validation loss 2.7864349484443665



 36%|█████████████████████████▋                                              | 3568/10000 [8:06:19<14:35:46,  8.17s/it]

Epoch: 3567 | Training loss 2.8653257191181183 | Validation loss 2.7924466133117676



 36%|█████████████████████████▋                                              | 3569/10000 [8:06:27<14:36:02,  8.17s/it]

Epoch: 3568 | Training loss 2.8657092973589897 | Validation loss 2.791386127471924



 36%|█████████████████████████▋                                              | 3570/10000 [8:06:35<14:34:10,  8.16s/it]

Epoch: 3569 | Training loss 2.8734185323119164 | Validation loss 2.7931870222091675



 36%|█████████████████████████▋                                              | 3571/10000 [8:06:44<14:31:14,  8.13s/it]

Epoch: 3570 | Training loss 2.867734916508198 | Validation loss 2.7911518216133118



 36%|█████████████████████████▋                                              | 3572/10000 [8:06:52<14:31:03,  8.13s/it]

Epoch: 3571 | Training loss 2.8709103167057037 | Validation loss 2.788635790348053



 36%|█████████████████████████▋                                              | 3573/10000 [8:07:00<14:28:01,  8.10s/it]

Epoch: 3572 | Training loss 2.86471139639616 | Validation loss 2.791699379682541



 36%|█████████████████████████▋                                              | 3574/10000 [8:07:08<14:36:27,  8.18s/it]

Epoch: 3573 | Training loss 2.8625254780054092 | Validation loss 2.7905675768852234



 36%|█████████████████████████▋                                              | 3575/10000 [8:07:16<14:32:44,  8.15s/it]

Epoch: 3574 | Training loss 2.8692386746406555 | Validation loss 2.789874702692032



 36%|█████████████████████████▋                                              | 3576/10000 [8:07:24<14:33:33,  8.16s/it]

Epoch: 3575 | Training loss 2.871816076338291 | Validation loss 2.787826895713806



 36%|█████████████████████████▊                                              | 3577/10000 [8:07:32<14:33:04,  8.16s/it]

Epoch: 3576 | Training loss 2.8707442209124565 | Validation loss 2.7908391058444977



 36%|█████████████████████████▊                                              | 3578/10000 [8:07:41<14:29:53,  8.13s/it]

Epoch: 3577 | Training loss 2.867474891245365 | Validation loss 2.7902278304100037



 36%|█████████████████████████▊                                              | 3579/10000 [8:07:49<14:30:24,  8.13s/it]

Epoch: 3578 | Training loss 2.859650135040283 | Validation loss 2.7897725105285645



 36%|█████████████████████████▊                                              | 3580/10000 [8:07:57<14:31:41,  8.15s/it]

Epoch: 3579 | Training loss 2.8707555681467056 | Validation loss 2.794680207967758



 36%|█████████████████████████▊                                              | 3581/10000 [8:08:05<14:31:36,  8.15s/it]

Epoch: 3580 | Training loss 2.8655209690332413 | Validation loss 2.7848973274230957



 36%|█████████████████████████▊                                              | 3582/10000 [8:08:13<14:34:00,  8.17s/it]

Epoch: 3581 | Training loss 2.870695747435093 | Validation loss 2.7889519035816193



 36%|█████████████████████████▊                                              | 3583/10000 [8:08:21<14:31:40,  8.15s/it]

Epoch: 3582 | Training loss 2.863953225314617 | Validation loss 2.8062849938869476



 36%|█████████████████████████▊                                              | 3584/10000 [8:08:29<14:29:45,  8.13s/it]

Epoch: 3583 | Training loss 2.8634529188275337 | Validation loss 2.7897897362709045



 36%|█████████████████████████▊                                              | 3585/10000 [8:08:38<14:29:13,  8.13s/it]

Epoch: 3584 | Training loss 2.8646231591701508 | Validation loss 2.795079320669174



 36%|█████████████████████████▊                                              | 3586/10000 [8:08:46<14:28:37,  8.13s/it]

Epoch: 3585 | Training loss 2.8661738857626915 | Validation loss 2.7997661530971527



 36%|█████████████████████████▊                                              | 3587/10000 [8:08:54<14:25:14,  8.10s/it]

Epoch: 3586 | Training loss 2.865908570587635 | Validation loss 2.7922251522541046



 36%|█████████████████████████▊                                              | 3588/10000 [8:09:02<14:28:22,  8.13s/it]

Epoch: 3587 | Training loss 2.860839568078518 | Validation loss 2.793820172548294



 36%|█████████████████████████▊                                              | 3589/10000 [8:09:10<14:31:57,  8.16s/it]

Epoch: 3588 | Training loss 2.866053931415081 | Validation loss 2.7855665385723114



 36%|█████████████████████████▊                                              | 3590/10000 [8:09:18<14:29:24,  8.14s/it]

Epoch: 3589 | Training loss 2.8673339411616325 | Validation loss 2.795370489358902



 36%|█████████████████████████▊                                              | 3591/10000 [8:09:26<14:27:39,  8.12s/it]

Epoch: 3590 | Training loss 2.8654419630765915 | Validation loss 2.7937204837799072



 36%|█████████████████████████▊                                              | 3592/10000 [8:09:34<14:25:22,  8.10s/it]

Epoch: 3591 | Training loss 2.8667358607053757 | Validation loss 2.795502930879593



 36%|█████████████████████████▊                                              | 3593/10000 [8:09:43<14:26:42,  8.12s/it]

Epoch: 3592 | Training loss 2.8678587898612022 | Validation loss 2.7916372418403625



 36%|█████████████████████████▉                                              | 3594/10000 [8:09:51<14:28:39,  8.14s/it]

Epoch: 3593 | Training loss 2.865789197385311 | Validation loss 2.785298466682434



 36%|█████████████████████████▉                                              | 3595/10000 [8:09:59<14:31:39,  8.17s/it]

Epoch: 3594 | Training loss 2.8656480461359024 | Validation loss 2.790112316608429



 36%|█████████████████████████▉                                              | 3596/10000 [8:10:07<14:30:05,  8.15s/it]

Epoch: 3595 | Training loss 2.8694948703050613 | Validation loss 2.7933197915554047



 36%|█████████████████████████▉                                              | 3597/10000 [8:10:15<14:28:43,  8.14s/it]

Epoch: 3596 | Training loss 2.8674575090408325 | Validation loss 2.796290546655655



 36%|█████████████████████████▉                                              | 3598/10000 [8:10:23<14:29:20,  8.15s/it]

Epoch: 3597 | Training loss 2.872229427099228 | Validation loss 2.793189585208893



 36%|█████████████████████████▉                                              | 3599/10000 [8:10:32<14:30:42,  8.16s/it]

Epoch: 3598 | Training loss 2.8691917434334755 | Validation loss 2.8007161617279053



 36%|█████████████████████████▉                                              | 3600/10000 [8:10:40<14:30:42,  8.16s/it]

Epoch: 3599 | Training loss 2.867566727101803 | Validation loss 2.7891910672187805



 36%|█████████████████████████▉                                              | 3601/10000 [8:10:48<14:27:39,  8.14s/it]

Epoch: 3600 | Training loss 2.8696678057312965 | Validation loss 2.7917241156101227



 36%|█████████████████████████▉                                              | 3602/10000 [8:10:56<14:28:27,  8.14s/it]

Epoch: 3601 | Training loss 2.8625452667474747 | Validation loss 2.789233535528183



 36%|█████████████████████████▉                                              | 3603/10000 [8:11:04<14:29:54,  8.16s/it]

Epoch: 3602 | Training loss 2.8670132756233215 | Validation loss 2.795472890138626



 36%|█████████████████████████▉                                              | 3604/10000 [8:11:12<14:29:34,  8.16s/it]

Epoch: 3603 | Training loss 2.866462267935276 | Validation loss 2.794251948595047



 36%|█████████████████████████▉                                              | 3605/10000 [8:11:20<14:26:21,  8.13s/it]

Epoch: 3604 | Training loss 2.8647405430674553 | Validation loss 2.797159194946289



 36%|█████████████████████████▉                                              | 3606/10000 [8:11:28<14:26:26,  8.13s/it]

Epoch: 3605 | Training loss 2.8673215508461 | Validation loss 2.7891738414764404



 36%|█████████████████████████▉                                              | 3607/10000 [8:11:37<14:26:24,  8.13s/it]

Epoch: 3606 | Training loss 2.860116809606552 | Validation loss 2.7878209948539734



 36%|█████████████████████████▉                                              | 3608/10000 [8:11:45<14:24:17,  8.11s/it]

Epoch: 3607 | Training loss 2.8638804629445076 | Validation loss 2.790367990732193



 36%|█████████████████████████▉                                              | 3609/10000 [8:11:53<14:23:17,  8.10s/it]

Epoch: 3608 | Training loss 2.866542376577854 | Validation loss 2.7914605140686035



 36%|█████████████████████████▉                                              | 3610/10000 [8:12:01<14:23:29,  8.11s/it]

Epoch: 3609 | Training loss 2.8655430898070335 | Validation loss 2.7926622331142426



 36%|█████████████████████████▉                                              | 3611/10000 [8:12:09<14:26:10,  8.13s/it]

Epoch: 3610 | Training loss 2.8651446029543877 | Validation loss 2.7918954491615295



 36%|██████████████████████████                                              | 3612/10000 [8:12:17<14:29:32,  8.17s/it]

Epoch: 3611 | Training loss 2.8660832569003105 | Validation loss 2.788963258266449



 36%|██████████████████████████                                              | 3613/10000 [8:12:26<14:31:27,  8.19s/it]

Epoch: 3612 | Training loss 2.867479123175144 | Validation loss 2.791235566139221



 36%|██████████████████████████                                              | 3614/10000 [8:12:34<14:28:40,  8.16s/it]

Epoch: 3613 | Training loss 2.864441767334938 | Validation loss 2.7892160415649414



 36%|██████████████████████████                                              | 3615/10000 [8:12:42<14:29:06,  8.17s/it]

Epoch: 3614 | Training loss 2.871054023504257 | Validation loss 2.7942944169044495



 36%|██████████████████████████                                              | 3616/10000 [8:12:50<14:26:21,  8.14s/it]

Epoch: 3615 | Training loss 2.8594671860337257 | Validation loss 2.788428843021393



 36%|██████████████████████████                                              | 3617/10000 [8:12:58<14:26:29,  8.14s/it]

Epoch: 3616 | Training loss 2.8697700947523117 | Validation loss 2.7961381673812866



 36%|██████████████████████████                                              | 3618/10000 [8:13:06<14:29:29,  8.17s/it]

Epoch: 3617 | Training loss 2.871030665934086 | Validation loss 2.7934801876544952



 36%|██████████████████████████                                              | 3619/10000 [8:13:14<14:27:03,  8.15s/it]

Epoch: 3618 | Training loss 2.860020227730274 | Validation loss 2.7893721163272858



 36%|██████████████████████████                                              | 3620/10000 [8:13:23<14:31:49,  8.20s/it]

Epoch: 3619 | Training loss 2.8681960701942444 | Validation loss 2.7941605150699615



 36%|██████████████████████████                                              | 3621/10000 [8:13:31<14:32:31,  8.21s/it]

Epoch: 3620 | Training loss 2.8678475692868233 | Validation loss 2.7948673367500305



 36%|██████████████████████████                                              | 3622/10000 [8:13:39<14:32:33,  8.21s/it]

Epoch: 3621 | Training loss 2.8665938898921013 | Validation loss 2.7907676696777344



 36%|██████████████████████████                                              | 3623/10000 [8:13:47<14:33:58,  8.22s/it]

Epoch: 3622 | Training loss 2.866817496716976 | Validation loss 2.791368156671524



 36%|██████████████████████████                                              | 3624/10000 [8:13:56<14:35:01,  8.23s/it]

Epoch: 3623 | Training loss 2.8657747358083725 | Validation loss 2.7965829968452454



 36%|██████████████████████████                                              | 3625/10000 [8:14:04<14:33:13,  8.22s/it]

Epoch: 3624 | Training loss 2.865738533437252 | Validation loss 2.7956879436969757



 36%|██████████████████████████                                              | 3626/10000 [8:14:12<14:30:17,  8.19s/it]

Epoch: 3625 | Training loss 2.8677071258425713 | Validation loss 2.7935341000556946



 36%|██████████████████████████                                              | 3627/10000 [8:14:20<14:31:40,  8.21s/it]

Epoch: 3626 | Training loss 2.869089052081108 | Validation loss 2.7899712920188904



 36%|██████████████████████████                                              | 3628/10000 [8:14:29<14:33:37,  8.23s/it]

Epoch: 3627 | Training loss 2.8700234666466713 | Validation loss 2.7946244180202484



 36%|██████████████████████████▏                                             | 3629/10000 [8:14:37<14:32:58,  8.22s/it]

Epoch: 3628 | Training loss 2.8693938925862312 | Validation loss 2.787350207567215



 36%|██████████████████████████▏                                             | 3630/10000 [8:14:45<14:31:37,  8.21s/it]

Epoch: 3629 | Training loss 2.867907777428627 | Validation loss 2.7937821447849274



 36%|██████████████████████████▏                                             | 3631/10000 [8:14:53<14:29:32,  8.19s/it]

Epoch: 3630 | Training loss 2.8672401160001755 | Validation loss 2.7904937863349915



 36%|██████████████████████████▏                                             | 3632/10000 [8:15:01<14:29:33,  8.19s/it]

Epoch: 3631 | Training loss 2.866150178015232 | Validation loss 2.7959790229797363



 36%|██████████████████████████▏                                             | 3633/10000 [8:15:09<14:28:28,  8.18s/it]

Epoch: 3632 | Training loss 2.863206058740616 | Validation loss 2.78932648897171



 36%|██████████████████████████▏                                             | 3634/10000 [8:15:18<14:33:10,  8.23s/it]

Epoch: 3633 | Training loss 2.871219888329506 | Validation loss 2.7901426255702972



 36%|██████████████████████████▏                                             | 3635/10000 [8:15:26<14:30:30,  8.21s/it]

Epoch: 3634 | Training loss 2.868029274046421 | Validation loss 2.7889020144939423



 36%|██████████████████████████▏                                             | 3636/10000 [8:15:34<14:30:37,  8.21s/it]

Epoch: 3635 | Training loss 2.8722052797675133 | Validation loss 2.790818303823471



 36%|██████████████████████████▏                                             | 3637/10000 [8:15:42<14:26:34,  8.17s/it]

Epoch: 3636 | Training loss 2.865369401872158 | Validation loss 2.790294885635376



 36%|██████████████████████████▏                                             | 3638/10000 [8:15:50<14:23:13,  8.14s/it]

Epoch: 3637 | Training loss 2.8640593513846397 | Validation loss 2.7887721061706543



 36%|██████████████████████████▏                                             | 3639/10000 [8:15:58<14:22:20,  8.13s/it]

Epoch: 3638 | Training loss 2.8650918751955032 | Validation loss 2.7929430305957794



 36%|██████████████████████████▏                                             | 3640/10000 [8:16:07<14:24:12,  8.15s/it]

Epoch: 3639 | Training loss 2.8610137924551964 | Validation loss 2.7889655232429504



 36%|██████████████████████████▏                                             | 3641/10000 [8:16:15<14:24:56,  8.16s/it]

Epoch: 3640 | Training loss 2.862414427101612 | Validation loss 2.7861357629299164



 36%|██████████████████████████▏                                             | 3642/10000 [8:16:23<14:24:29,  8.16s/it]

Epoch: 3641 | Training loss 2.864660359919071 | Validation loss 2.7878777384757996



 36%|██████████████████████████▏                                             | 3643/10000 [8:16:31<14:25:36,  8.17s/it]

Epoch: 3642 | Training loss 2.8579765632748604 | Validation loss 2.7896737158298492



 36%|██████████████████████████▏                                             | 3644/10000 [8:16:39<14:24:58,  8.17s/it]

Epoch: 3643 | Training loss 2.8642197623848915 | Validation loss 2.7906602323055267



 36%|██████████████████████████▏                                             | 3645/10000 [8:16:47<14:24:01,  8.16s/it]

Epoch: 3644 | Training loss 2.860779255628586 | Validation loss 2.7870181798934937



 36%|██████████████████████████▎                                             | 3646/10000 [8:16:55<14:21:08,  8.13s/it]

Epoch: 3645 | Training loss 2.8622145280241966 | Validation loss 2.803708642721176



 36%|██████████████████████████▎                                             | 3647/10000 [8:17:04<14:20:30,  8.13s/it]

Epoch: 3646 | Training loss 2.8643350675702095 | Validation loss 2.794551730155945



 36%|██████████████████████████▎                                             | 3648/10000 [8:17:12<14:21:04,  8.13s/it]

Epoch: 3647 | Training loss 2.8687952533364296 | Validation loss 2.794497162103653



 36%|██████████████████████████▎                                             | 3649/10000 [8:17:20<14:23:26,  8.16s/it]

Epoch: 3648 | Training loss 2.86837350577116 | Validation loss 2.7976535856723785



 36%|██████████████████████████▎                                             | 3650/10000 [8:17:28<14:24:29,  8.17s/it]

Epoch: 3649 | Training loss 2.8647257685661316 | Validation loss 2.792850583791733



 37%|██████████████████████████▎                                             | 3651/10000 [8:17:36<14:23:17,  8.16s/it]

Epoch: 3650 | Training loss 2.8642415553331375 | Validation loss 2.7923775911331177



 37%|██████████████████████████▎                                             | 3652/10000 [8:17:45<14:26:53,  8.19s/it]

Epoch: 3651 | Training loss 2.865927144885063 | Validation loss 2.7994767129421234



 37%|██████████████████████████▎                                             | 3653/10000 [8:17:53<14:24:57,  8.18s/it]

Epoch: 3652 | Training loss 2.865296393632889 | Validation loss 2.791124254465103



 37%|██████████████████████████▎                                             | 3654/10000 [8:18:01<14:24:35,  8.17s/it]

Epoch: 3653 | Training loss 2.86264468729496 | Validation loss 2.7936374247074127



 37%|██████████████████████████▎                                             | 3655/10000 [8:18:09<14:20:37,  8.14s/it]

Epoch: 3654 | Training loss 2.8720355108380318 | Validation loss 2.7919678688049316



 37%|██████████████████████████▎                                             | 3656/10000 [8:18:17<14:18:47,  8.12s/it]

Epoch: 3655 | Training loss 2.8659245520830154 | Validation loss 2.7923218607902527



 37%|██████████████████████████▎                                             | 3657/10000 [8:18:25<14:18:58,  8.13s/it]

Epoch: 3656 | Training loss 2.8698354214429855 | Validation loss 2.7896957099437714



 37%|██████████████████████████▎                                             | 3658/10000 [8:18:33<14:25:30,  8.19s/it]

Epoch: 3657 | Training loss 2.864377237856388 | Validation loss 2.7909580767154694



 37%|██████████████████████████▎                                             | 3659/10000 [8:18:42<14:28:18,  8.22s/it]

Epoch: 3658 | Training loss 2.864685148000717 | Validation loss 2.7953717708587646



 37%|██████████████████████████▎                                             | 3660/10000 [8:18:50<14:29:27,  8.23s/it]

Epoch: 3659 | Training loss 2.867019049823284 | Validation loss 2.787475496530533



 37%|██████████████████████████▎                                             | 3661/10000 [8:18:58<14:28:56,  8.22s/it]

Epoch: 3660 | Training loss 2.866359181702137 | Validation loss 2.7935588359832764



 37%|██████████████████████████▎                                             | 3662/10000 [8:19:06<14:28:26,  8.22s/it]

Epoch: 3661 | Training loss 2.8660438507795334 | Validation loss 2.794810026884079



 37%|██████████████████████████▎                                             | 3663/10000 [8:19:15<14:28:30,  8.22s/it]

Epoch: 3662 | Training loss 2.862729698419571 | Validation loss 2.7889568209648132



 37%|██████████████████████████▍                                             | 3664/10000 [8:19:23<14:27:30,  8.22s/it]

Epoch: 3663 | Training loss 2.8693137168884277 | Validation loss 2.7848927676677704



 37%|██████████████████████████▍                                             | 3665/10000 [8:19:31<14:26:50,  8.21s/it]

Epoch: 3664 | Training loss 2.868862897157669 | Validation loss 2.7971462309360504



 37%|██████████████████████████▍                                             | 3666/10000 [8:19:39<14:25:40,  8.20s/it]

Epoch: 3665 | Training loss 2.868506319820881 | Validation loss 2.7908504009246826



 37%|██████████████████████████▍                                             | 3667/10000 [8:19:47<14:26:50,  8.21s/it]

Epoch: 3666 | Training loss 2.862217180430889 | Validation loss 2.7939818799495697



 37%|██████████████████████████▍                                             | 3668/10000 [8:19:56<14:27:20,  8.22s/it]

Epoch: 3667 | Training loss 2.86954053491354 | Validation loss 2.796642243862152



 37%|██████████████████████████▍                                             | 3669/10000 [8:20:04<14:25:51,  8.21s/it]

Epoch: 3668 | Training loss 2.8614508658647537 | Validation loss 2.796879857778549



 37%|██████████████████████████▍                                             | 3670/10000 [8:20:12<14:26:50,  8.22s/it]

Epoch: 3669 | Training loss 2.8613850623369217 | Validation loss 2.795137494802475



 37%|██████████████████████████▍                                             | 3671/10000 [8:20:20<14:29:02,  8.24s/it]

Epoch: 3670 | Training loss 2.8645197972655296 | Validation loss 2.791157752275467



 37%|██████████████████████████▍                                             | 3672/10000 [8:20:29<14:30:29,  8.25s/it]

Epoch: 3671 | Training loss 2.8699463084340096 | Validation loss 2.795364588499069



 37%|██████████████████████████▍                                             | 3673/10000 [8:20:37<14:27:29,  8.23s/it]

Epoch: 3672 | Training loss 2.8606042563915253 | Validation loss 2.788351744413376



 37%|██████████████████████████▍                                             | 3674/10000 [8:20:45<14:26:25,  8.22s/it]

Epoch: 3673 | Training loss 2.8650420755147934 | Validation loss 2.788375109434128



 37%|██████████████████████████▍                                             | 3675/10000 [8:20:53<14:25:02,  8.21s/it]

Epoch: 3674 | Training loss 2.861542783677578 | Validation loss 2.7924323081970215



 37%|██████████████████████████▍                                             | 3676/10000 [8:21:01<14:20:05,  8.16s/it]

Epoch: 3675 | Training loss 2.860010512173176 | Validation loss 2.7949132919311523



 37%|██████████████████████████▍                                             | 3677/10000 [8:21:09<14:20:32,  8.17s/it]

Epoch: 3676 | Training loss 2.868687465786934 | Validation loss 2.7936067283153534



 37%|██████████████████████████▍                                             | 3678/10000 [8:21:18<14:21:17,  8.17s/it]

Epoch: 3677 | Training loss 2.865676559507847 | Validation loss 2.7892930805683136



 37%|██████████████████████████▍                                             | 3679/10000 [8:21:26<14:21:20,  8.18s/it]

Epoch: 3678 | Training loss 2.864127367734909 | Validation loss 2.7947016656398773



 37%|██████████████████████████▍                                             | 3680/10000 [8:21:34<14:23:55,  8.20s/it]

Epoch: 3679 | Training loss 2.8692876920104027 | Validation loss 2.797099769115448



 37%|██████████████████████████▌                                             | 3681/10000 [8:21:42<14:22:04,  8.19s/it]

Epoch: 3680 | Training loss 2.8625007048249245 | Validation loss 2.7906364798545837



 37%|██████████████████████████▌                                             | 3682/10000 [8:21:50<14:20:19,  8.17s/it]

Epoch: 3681 | Training loss 2.866269864141941 | Validation loss 2.789909452199936



 37%|██████████████████████████▌                                             | 3683/10000 [8:21:59<14:21:58,  8.19s/it]

Epoch: 3682 | Training loss 2.8688052520155907 | Validation loss 2.795789420604706



 37%|██████████████████████████▌                                             | 3684/10000 [8:22:07<14:20:57,  8.18s/it]

Epoch: 3683 | Training loss 2.8710443899035454 | Validation loss 2.7921895682811737



 37%|██████████████████████████▌                                             | 3685/10000 [8:22:15<14:20:37,  8.18s/it]

Epoch: 3684 | Training loss 2.865045979619026 | Validation loss 2.7908122539520264



 37%|██████████████████████████▌                                             | 3686/10000 [8:22:23<14:22:36,  8.20s/it]

Epoch: 3685 | Training loss 2.869124613702297 | Validation loss 2.7901925444602966



 37%|██████████████████████████▌                                             | 3687/10000 [8:22:31<14:23:26,  8.21s/it]

Epoch: 3686 | Training loss 2.8657715544104576 | Validation loss 2.794652611017227



 37%|██████████████████████████▌                                             | 3688/10000 [8:22:40<14:20:32,  8.18s/it]

Epoch: 3687 | Training loss 2.8709400221705437 | Validation loss 2.790942072868347



 37%|██████████████████████████▌                                             | 3689/10000 [8:22:48<14:18:28,  8.16s/it]

Epoch: 3688 | Training loss 2.8670555874705315 | Validation loss 2.798457145690918



 37%|██████████████████████████▌                                             | 3690/10000 [8:22:56<14:18:27,  8.16s/it]

Epoch: 3689 | Training loss 2.873475596308708 | Validation loss 2.796750396490097



 37%|██████████████████████████▌                                             | 3691/10000 [8:23:04<14:17:20,  8.15s/it]

Epoch: 3690 | Training loss 2.8635007441043854 | Validation loss 2.7920543551445007



 37%|██████████████████████████▌                                             | 3692/10000 [8:23:12<14:15:02,  8.13s/it]

Epoch: 3691 | Training loss 2.864982195198536 | Validation loss 2.790035456418991



 37%|██████████████████████████▌                                             | 3693/10000 [8:23:20<14:12:55,  8.11s/it]

Epoch: 3692 | Training loss 2.8704269528388977 | Validation loss 2.79400035738945



 37%|██████████████████████████▌                                             | 3694/10000 [8:23:28<14:17:17,  8.16s/it]

Epoch: 3693 | Training loss 2.8619164302945137 | Validation loss 2.790529191493988



 37%|██████████████████████████▌                                             | 3695/10000 [8:23:36<14:15:00,  8.14s/it]

Epoch: 3694 | Training loss 2.8667226135730743 | Validation loss 2.7950946390628815



 37%|██████████████████████████▌                                             | 3696/10000 [8:23:45<14:19:08,  8.18s/it]

Epoch: 3695 | Training loss 2.8568344712257385 | Validation loss 2.784284919500351



 37%|██████████████████████████▌                                             | 3697/10000 [8:23:53<14:20:35,  8.19s/it]

Epoch: 3696 | Training loss 2.8630498871207237 | Validation loss 2.789224147796631



 37%|██████████████████████████▋                                             | 3698/10000 [8:24:01<14:20:03,  8.19s/it]

Epoch: 3697 | Training loss 2.8695699870586395 | Validation loss 2.7987275421619415



 37%|██████████████████████████▋                                             | 3699/10000 [8:24:09<14:21:04,  8.20s/it]

Epoch: 3698 | Training loss 2.865156836807728 | Validation loss 2.7935386300086975



 37%|██████████████████████████▋                                             | 3700/10000 [8:24:18<14:19:17,  8.18s/it]

Epoch: 3699 | Training loss 2.8671249225735664 | Validation loss 2.792934834957123



 37%|██████████████████████████▋                                             | 3701/10000 [8:24:26<14:19:07,  8.18s/it]

Epoch: 3700 | Training loss 2.861213080585003 | Validation loss 2.790888875722885



 37%|██████████████████████████▋                                             | 3702/10000 [8:24:34<14:20:52,  8.20s/it]

Epoch: 3701 | Training loss 2.865213170647621 | Validation loss 2.7995645999908447



 37%|██████████████████████████▋                                             | 3703/10000 [8:24:42<14:19:39,  8.19s/it]

Epoch: 3702 | Training loss 2.8660164400935173 | Validation loss 2.7940456867218018



 37%|██████████████████████████▋                                             | 3704/10000 [8:24:50<14:19:36,  8.19s/it]

Epoch: 3703 | Training loss 2.8720207512378693 | Validation loss 2.7912744283676147



 37%|██████████████████████████▋                                             | 3705/10000 [8:24:59<14:19:36,  8.19s/it]

Epoch: 3704 | Training loss 2.868921548128128 | Validation loss 2.7915223240852356



 37%|██████████████████████████▋                                             | 3706/10000 [8:25:07<14:18:59,  8.19s/it]

Epoch: 3705 | Training loss 2.8643319830298424 | Validation loss 2.7891761362552643



 37%|██████████████████████████▋                                             | 3707/10000 [8:25:15<14:17:40,  8.18s/it]

Epoch: 3706 | Training loss 2.8623647540807724 | Validation loss 2.790879011154175



 37%|██████████████████████████▋                                             | 3708/10000 [8:25:23<14:16:37,  8.17s/it]

Epoch: 3707 | Training loss 2.8636995255947113 | Validation loss 2.7861113250255585



 37%|██████████████████████████▋                                             | 3709/10000 [8:25:31<14:13:26,  8.14s/it]

Epoch: 3708 | Training loss 2.8624398559331894 | Validation loss 2.7921645641326904



 37%|██████████████████████████▋                                             | 3710/10000 [8:25:39<14:10:07,  8.11s/it]

Epoch: 3709 | Training loss 2.864062637090683 | Validation loss 2.7939169108867645



 37%|██████████████████████████▋                                             | 3711/10000 [8:25:47<14:09:44,  8.11s/it]

Epoch: 3710 | Training loss 2.8666507974267006 | Validation loss 2.790744811296463



 37%|██████████████████████████▋                                             | 3712/10000 [8:25:55<14:07:30,  8.09s/it]

Epoch: 3711 | Training loss 2.86858306825161 | Validation loss 2.7912713289260864



 37%|██████████████████████████▋                                             | 3713/10000 [8:26:03<14:07:19,  8.09s/it]

Epoch: 3712 | Training loss 2.8635424822568893 | Validation loss 2.790588855743408



 37%|██████████████████████████▋                                             | 3714/10000 [8:26:11<14:08:44,  8.10s/it]

Epoch: 3713 | Training loss 2.872738502919674 | Validation loss 2.7975329160690308



 37%|██████████████████████████▋                                             | 3715/10000 [8:26:20<14:07:36,  8.09s/it]

Epoch: 3714 | Training loss 2.8628114089369774 | Validation loss 2.786464273929596



 37%|██████████████████████████▊                                             | 3716/10000 [8:26:28<14:09:16,  8.11s/it]

Epoch: 3715 | Training loss 2.8667806908488274 | Validation loss 2.791882038116455



 37%|██████████████████████████▊                                             | 3717/10000 [8:26:36<14:08:28,  8.10s/it]

Epoch: 3716 | Training loss 2.859966553747654 | Validation loss 2.791117638349533



 37%|██████████████████████████▊                                             | 3718/10000 [8:26:44<14:09:46,  8.12s/it]

Epoch: 3717 | Training loss 2.859837293624878 | Validation loss 2.7884241938591003



 37%|██████████████████████████▊                                             | 3719/10000 [8:26:52<14:15:04,  8.17s/it]

Epoch: 3718 | Training loss 2.8657136857509613 | Validation loss 2.795737087726593



 37%|██████████████████████████▊                                             | 3720/10000 [8:27:00<14:15:16,  8.17s/it]

Epoch: 3719 | Training loss 2.865873709321022 | Validation loss 2.788549780845642



 37%|██████████████████████████▊                                             | 3721/10000 [8:27:09<14:14:54,  8.17s/it]

Epoch: 3720 | Training loss 2.866499647498131 | Validation loss 2.79118674993515



 37%|██████████████████████████▊                                             | 3722/10000 [8:27:17<14:12:39,  8.15s/it]

Epoch: 3721 | Training loss 2.864830181002617 | Validation loss 2.7919530272483826



 37%|██████████████████████████▊                                             | 3723/10000 [8:27:25<14:12:58,  8.15s/it]

Epoch: 3722 | Training loss 2.8613840118050575 | Validation loss 2.7922914028167725



 37%|██████████████████████████▊                                             | 3724/10000 [8:27:33<14:12:13,  8.15s/it]

Epoch: 3723 | Training loss 2.8671642765402794 | Validation loss 2.7972131073474884



 37%|██████████████████████████▊                                             | 3725/10000 [8:27:41<14:08:37,  8.11s/it]

Epoch: 3724 | Training loss 2.865809842944145 | Validation loss 2.797610491514206



 37%|██████████████████████████▊                                             | 3726/10000 [8:27:49<14:05:31,  8.09s/it]

Epoch: 3725 | Training loss 2.860789269208908 | Validation loss 2.789247453212738



 37%|██████████████████████████▊                                             | 3727/10000 [8:27:57<14:07:23,  8.11s/it]

Epoch: 3726 | Training loss 2.8630110174417496 | Validation loss 2.7948439717292786



 37%|██████████████████████████▊                                             | 3728/10000 [8:28:05<14:13:31,  8.17s/it]

Epoch: 3727 | Training loss 2.8722512498497963 | Validation loss 2.7918477654457092



 37%|██████████████████████████▊                                             | 3729/10000 [8:28:14<14:14:05,  8.17s/it]

Epoch: 3728 | Training loss 2.867529481649399 | Validation loss 2.7910844683647156



 37%|██████████████████████████▊                                             | 3730/10000 [8:28:22<14:16:46,  8.20s/it]

Epoch: 3729 | Training loss 2.866115003824234 | Validation loss 2.7937138974666595



 37%|██████████████████████████▊                                             | 3731/10000 [8:28:30<14:17:00,  8.20s/it]

Epoch: 3730 | Training loss 2.86689455807209 | Validation loss 2.7900739908218384



 37%|██████████████████████████▊                                             | 3732/10000 [8:28:38<14:16:11,  8.20s/it]

Epoch: 3731 | Training loss 2.8633334264159203 | Validation loss 2.790839821100235



 37%|██████████████████████████▉                                             | 3733/10000 [8:28:47<14:17:34,  8.21s/it]

Epoch: 3732 | Training loss 2.8680833876132965 | Validation loss 2.794115573167801



 37%|██████████████████████████▉                                             | 3734/10000 [8:28:55<14:17:31,  8.21s/it]

Epoch: 3733 | Training loss 2.866895116865635 | Validation loss 2.789841741323471



 37%|██████████████████████████▉                                             | 3735/10000 [8:29:03<14:17:23,  8.21s/it]

Epoch: 3734 | Training loss 2.864857368171215 | Validation loss 2.7949127852916718



 37%|██████████████████████████▉                                             | 3736/10000 [8:29:11<14:19:10,  8.23s/it]

Epoch: 3735 | Training loss 2.863105997443199 | Validation loss 2.7948894798755646



 37%|██████████████████████████▉                                             | 3737/10000 [8:29:19<14:16:13,  8.20s/it]

Epoch: 3736 | Training loss 2.865579269826412 | Validation loss 2.7889318764209747



 37%|██████████████████████████▉                                             | 3738/10000 [8:29:28<14:15:27,  8.20s/it]

Epoch: 3737 | Training loss 2.860869936645031 | Validation loss 2.7887887060642242



 37%|██████████████████████████▉                                             | 3739/10000 [8:29:36<14:15:17,  8.20s/it]

Epoch: 3738 | Training loss 2.867948316037655 | Validation loss 2.7901711761951447



 37%|██████████████████████████▉                                             | 3740/10000 [8:29:44<14:19:29,  8.24s/it]

Epoch: 3739 | Training loss 2.8649234548211098 | Validation loss 2.791772574186325



 37%|██████████████████████████▉                                             | 3741/10000 [8:29:52<14:16:35,  8.21s/it]

Epoch: 3740 | Training loss 2.866929717361927 | Validation loss 2.7926486134529114



 37%|██████████████████████████▉                                             | 3742/10000 [8:30:00<14:14:53,  8.20s/it]

Epoch: 3741 | Training loss 2.8696566373109818 | Validation loss 2.7924086451530457



 37%|██████████████████████████▉                                             | 3743/10000 [8:30:09<14:12:44,  8.18s/it]

Epoch: 3742 | Training loss 2.8705650195479393 | Validation loss 2.7919431626796722



 37%|██████████████████████████▉                                             | 3744/10000 [8:30:17<14:13:44,  8.19s/it]

Epoch: 3743 | Training loss 2.866165593266487 | Validation loss 2.7885514199733734



 37%|██████████████████████████▉                                             | 3745/10000 [8:30:25<14:15:16,  8.20s/it]

Epoch: 3744 | Training loss 2.8666478991508484 | Validation loss 2.7919644713401794



 37%|██████████████████████████▉                                             | 3746/10000 [8:30:33<14:13:53,  8.19s/it]

Epoch: 3745 | Training loss 2.8641949892044067 | Validation loss 2.791205495595932



 37%|██████████████████████████▉                                             | 3747/10000 [8:30:41<14:13:33,  8.19s/it]

Epoch: 3746 | Training loss 2.8697736710309982 | Validation loss 2.7938857972621918



 37%|██████████████████████████▉                                             | 3748/10000 [8:30:50<14:16:31,  8.22s/it]

Epoch: 3747 | Training loss 2.8644420132040977 | Validation loss 2.7947515845298767



 37%|██████████████████████████▉                                             | 3749/10000 [8:30:58<14:15:34,  8.21s/it]

Epoch: 3748 | Training loss 2.8707567155361176 | Validation loss 2.7950359880924225



 38%|███████████████████████████                                             | 3750/10000 [8:31:06<14:12:05,  8.18s/it]

Epoch: 3749 | Training loss 2.874437801539898 | Validation loss 2.7909110486507416



 38%|███████████████████████████                                             | 3751/10000 [8:31:14<14:07:35,  8.14s/it]

Epoch: 3750 | Training loss 2.8658851236104965 | Validation loss 2.795358896255493



 38%|███████████████████████████                                             | 3752/10000 [8:31:22<14:07:46,  8.14s/it]

Epoch: 3751 | Training loss 2.8701144456863403 | Validation loss 2.7875407934188843



 38%|███████████████████████████                                             | 3753/10000 [8:31:30<14:05:55,  8.12s/it]

Epoch: 3752 | Training loss 2.8664832934737206 | Validation loss 2.797417402267456



 38%|███████████████████████████                                             | 3754/10000 [8:31:38<14:06:08,  8.13s/it]

Epoch: 3753 | Training loss 2.8676906153559685 | Validation loss 2.7922633290290833



 38%|███████████████████████████                                             | 3755/10000 [8:31:46<14:03:40,  8.11s/it]

Epoch: 3754 | Training loss 2.8641929402947426 | Validation loss 2.7897416949272156



 38%|███████████████████████████                                             | 3756/10000 [8:31:55<14:04:27,  8.11s/it]

Epoch: 3755 | Training loss 2.866331323981285 | Validation loss 2.7877886593341827



 38%|███████████████████████████                                             | 3757/10000 [8:32:03<14:08:21,  8.15s/it]

Epoch: 3756 | Training loss 2.866029866039753 | Validation loss 2.7890200912952423



 38%|███████████████████████████                                             | 3758/10000 [8:32:11<14:06:36,  8.14s/it]

Epoch: 3757 | Training loss 2.8609270974993706 | Validation loss 2.79229673743248



 38%|███████████████████████████                                             | 3759/10000 [8:32:19<14:11:47,  8.19s/it]

Epoch: 3758 | Training loss 2.870435431599617 | Validation loss 2.789068788290024



 38%|███████████████████████████                                             | 3760/10000 [8:32:27<14:13:23,  8.21s/it]

Epoch: 3759 | Training loss 2.8664979115128517 | Validation loss 2.787689298391342



 38%|███████████████████████████                                             | 3761/10000 [8:32:35<14:09:02,  8.17s/it]

Epoch: 3760 | Training loss 2.86604106426239 | Validation loss 2.7893713116645813



 38%|███████████████████████████                                             | 3762/10000 [8:32:44<14:06:53,  8.15s/it]

Epoch: 3761 | Training loss 2.862001247704029 | Validation loss 2.789465755224228



 38%|███████████████████████████                                             | 3763/10000 [8:32:52<14:07:50,  8.16s/it]

Epoch: 3762 | Training loss 2.864607162773609 | Validation loss 2.788012057542801



 38%|███████████████████████████                                             | 3764/10000 [8:33:00<14:09:38,  8.17s/it]

Epoch: 3763 | Training loss 2.8613113164901733 | Validation loss 2.7941958904266357



 38%|███████████████████████████                                             | 3765/10000 [8:33:08<14:08:05,  8.16s/it]

Epoch: 3764 | Training loss 2.8683101162314415 | Validation loss 2.7904451191425323



 38%|███████████████████████████                                             | 3766/10000 [8:33:16<14:01:15,  8.10s/it]

Epoch: 3765 | Training loss 2.861166827380657 | Validation loss 2.791202813386917



 38%|███████████████████████████                                             | 3767/10000 [8:33:24<14:00:50,  8.09s/it]

Epoch: 3766 | Training loss 2.8684287294745445 | Validation loss 2.794629752635956



 38%|███████████████████████████▏                                            | 3768/10000 [8:33:32<14:04:54,  8.13s/it]

Epoch: 3767 | Training loss 2.8706519082188606 | Validation loss 2.792575865983963



 38%|███████████████████████████▏                                            | 3769/10000 [8:33:41<14:04:15,  8.13s/it]

Epoch: 3768 | Training loss 2.8651958778500557 | Validation loss 2.7923462092876434



 38%|███████████████████████████▏                                            | 3770/10000 [8:33:49<14:03:43,  8.13s/it]

Epoch: 3769 | Training loss 2.8645179495215416 | Validation loss 2.7921652793884277



 38%|███████████████████████████▏                                            | 3771/10000 [8:33:57<14:04:15,  8.13s/it]

Epoch: 3770 | Training loss 2.8673379942774773 | Validation loss 2.7864086627960205



 38%|███████████████████████████▏                                            | 3772/10000 [8:34:05<14:03:36,  8.13s/it]

Epoch: 3771 | Training loss 2.864999033510685 | Validation loss 2.7950107753276825



 38%|███████████████████████████▏                                            | 3773/10000 [8:34:13<14:05:19,  8.15s/it]

Epoch: 3772 | Training loss 2.8660479560494423 | Validation loss 2.793275147676468



 38%|███████████████████████████▏                                            | 3774/10000 [8:34:21<14:08:42,  8.18s/it]

Epoch: 3773 | Training loss 2.8658984526991844 | Validation loss 2.794570416212082



 38%|███████████████████████████▏                                            | 3775/10000 [8:34:29<14:07:11,  8.17s/it]

Epoch: 3774 | Training loss 2.8722602501511574 | Validation loss 2.8024217188358307



 38%|███████████████████████████▏                                            | 3776/10000 [8:34:38<14:08:17,  8.18s/it]

Epoch: 3775 | Training loss 2.868992954492569 | Validation loss 2.793145567178726



 38%|███████████████████████████▏                                            | 3777/10000 [8:34:46<14:09:39,  8.19s/it]

Epoch: 3776 | Training loss 2.86299304664135 | Validation loss 2.793876588344574



 38%|███████████████████████████▏                                            | 3778/10000 [8:34:54<14:09:35,  8.19s/it]

Epoch: 3777 | Training loss 2.865286909043789 | Validation loss 2.786687135696411



 38%|███████████████████████████▏                                            | 3779/10000 [8:35:02<14:11:02,  8.21s/it]

Epoch: 3778 | Training loss 2.8588281497359276 | Validation loss 2.787704110145569



 38%|███████████████████████████▏                                            | 3780/10000 [8:35:11<14:12:29,  8.22s/it]

Epoch: 3779 | Training loss 2.8611044138669968 | Validation loss 2.7900979220867157



 38%|███████████████████████████▏                                            | 3781/10000 [8:35:19<14:09:36,  8.20s/it]

Epoch: 3780 | Training loss 2.8667320907115936 | Validation loss 2.7872924506664276



 38%|███████████████████████████▏                                            | 3782/10000 [8:35:27<14:10:26,  8.21s/it]

Epoch: 3781 | Training loss 2.8682795986533165 | Validation loss 2.7891843020915985



 38%|███████████████████████████▏                                            | 3783/10000 [8:35:35<14:09:26,  8.20s/it]

Epoch: 3782 | Training loss 2.8716394677758217 | Validation loss 2.788107752799988



 38%|███████████████████████████▏                                            | 3784/10000 [8:35:43<14:05:46,  8.16s/it]

Epoch: 3783 | Training loss 2.8592740073800087 | Validation loss 2.785433381795883



 38%|███████████████████████████▎                                            | 3785/10000 [8:35:51<14:01:47,  8.13s/it]

Epoch: 3784 | Training loss 2.8693953156471252 | Validation loss 2.7921567857265472



 38%|███████████████████████████▎                                            | 3786/10000 [8:35:59<14:01:50,  8.13s/it]

Epoch: 3785 | Training loss 2.8674624413251877 | Validation loss 2.7890553176403046



 38%|███████████████████████████▎                                            | 3787/10000 [8:36:08<14:02:18,  8.13s/it]

Epoch: 3786 | Training loss 2.869067519903183 | Validation loss 2.7975632548332214



 38%|███████████████████████████▎                                            | 3788/10000 [8:36:16<14:01:08,  8.12s/it]

Epoch: 3787 | Training loss 2.8703388273715973 | Validation loss 2.7965442836284637



 38%|███████████████████████████▎                                            | 3789/10000 [8:36:24<14:02:49,  8.14s/it]

Epoch: 3788 | Training loss 2.868670806288719 | Validation loss 2.7901646196842194



 38%|███████████████████████████▎                                            | 3790/10000 [8:36:32<14:04:18,  8.16s/it]

Epoch: 3789 | Training loss 2.863406218588352 | Validation loss 2.7898793518543243



 38%|███████████████████████████▎                                            | 3791/10000 [8:36:40<14:05:21,  8.17s/it]

Epoch: 3790 | Training loss 2.8697756081819534 | Validation loss 2.794537842273712



 38%|███████████████████████████▎                                            | 3792/10000 [8:36:48<14:08:00,  8.20s/it]

Epoch: 3791 | Training loss 2.864303432404995 | Validation loss 2.7968248426914215



 38%|███████████████████████████▎                                            | 3793/10000 [8:36:57<14:08:17,  8.20s/it]

Epoch: 3792 | Training loss 2.8622962012887 | Validation loss 2.7901789844036102



 38%|███████████████████████████▎                                            | 3794/10000 [8:37:05<14:10:03,  8.22s/it]

Epoch: 3793 | Training loss 2.865788884460926 | Validation loss 2.7862968146800995



 38%|███████████████████████████▎                                            | 3795/10000 [8:37:13<14:09:39,  8.22s/it]

Epoch: 3794 | Training loss 2.869367502629757 | Validation loss 2.788619965314865



 38%|███████████████████████████▎                                            | 3796/10000 [8:37:21<14:07:51,  8.20s/it]

Epoch: 3795 | Training loss 2.865704730153084 | Validation loss 2.7953785359859467



 38%|███████████████████████████▎                                            | 3797/10000 [8:37:30<14:08:03,  8.20s/it]

Epoch: 3796 | Training loss 2.866622120141983 | Validation loss 2.801534414291382



 38%|███████████████████████████▎                                            | 3798/10000 [8:37:38<14:04:47,  8.17s/it]

Epoch: 3797 | Training loss 2.8637030944228172 | Validation loss 2.7972631454467773



 38%|███████████████████████████▎                                            | 3799/10000 [8:37:46<14:06:30,  8.19s/it]

Epoch: 3798 | Training loss 2.869552046060562 | Validation loss 2.799564242362976



 38%|███████████████████████████▎                                            | 3800/10000 [8:37:54<14:08:02,  8.21s/it]

Epoch: 3799 | Training loss 2.87076336145401 | Validation loss 2.7942540645599365



 38%|███████████████████████████▎                                            | 3801/10000 [8:38:02<14:06:37,  8.19s/it]

Epoch: 3800 | Training loss 2.8625158593058586 | Validation loss 2.786340653896332



 38%|███████████████████████████▎                                            | 3802/10000 [8:38:10<14:05:05,  8.18s/it]

Epoch: 3801 | Training loss 2.8635907396674156 | Validation loss 2.7907005548477173



 38%|███████████████████████████▍                                            | 3803/10000 [8:38:19<14:03:28,  8.17s/it]

Epoch: 3802 | Training loss 2.8636089265346527 | Validation loss 2.7884106636047363



 38%|███████████████████████████▍                                            | 3804/10000 [8:38:27<14:05:04,  8.18s/it]

Epoch: 3803 | Training loss 2.8684315159916878 | Validation loss 2.7898656725883484



 38%|███████████████████████████▍                                            | 3805/10000 [8:38:35<14:04:42,  8.18s/it]

Epoch: 3804 | Training loss 2.861781194806099 | Validation loss 2.786306619644165



 38%|███████████████████████████▍                                            | 3806/10000 [8:38:43<14:06:02,  8.20s/it]

Epoch: 3805 | Training loss 2.8616780415177345 | Validation loss 2.7849071323871613



 38%|███████████████████████████▍                                            | 3807/10000 [8:38:51<14:05:59,  8.20s/it]

Epoch: 3806 | Training loss 2.8707915991544724 | Validation loss 2.788358300924301



 38%|███████████████████████████▍                                            | 3808/10000 [8:38:59<14:03:27,  8.17s/it]

Epoch: 3807 | Training loss 2.864793747663498 | Validation loss 2.7905403673648834



 38%|███████████████████████████▍                                            | 3809/10000 [8:39:08<14:04:59,  8.19s/it]

Epoch: 3808 | Training loss 2.8579985201358795 | Validation loss 2.786606252193451



 38%|███████████████████████████▍                                            | 3810/10000 [8:39:16<14:17:14,  8.31s/it]

Epoch: 3809 | Training loss 2.8668937757611275 | Validation loss 2.800385743379593



 38%|███████████████████████████▍                                            | 3811/10000 [8:39:25<14:15:32,  8.29s/it]

Epoch: 3810 | Training loss 2.864213205873966 | Validation loss 2.790652334690094



 38%|███████████████████████████▍                                            | 3812/10000 [8:39:33<14:06:04,  8.20s/it]

Epoch: 3811 | Training loss 2.8653460294008255 | Validation loss 2.7923369109630585



 38%|███████████████████████████▍                                            | 3813/10000 [8:39:41<14:03:13,  8.18s/it]

Epoch: 3812 | Training loss 2.856393024325371 | Validation loss 2.789434164762497



 38%|███████████████████████████▍                                            | 3814/10000 [8:39:49<14:00:45,  8.15s/it]

Epoch: 3813 | Training loss 2.8624925389885902 | Validation loss 2.7898529767990112



 38%|███████████████████████████▍                                            | 3815/10000 [8:39:57<13:54:13,  8.09s/it]

Epoch: 3814 | Training loss 2.8672953844070435 | Validation loss 2.788195490837097



 38%|███████████████████████████▍                                            | 3816/10000 [8:40:05<13:54:20,  8.10s/it]

Epoch: 3815 | Training loss 2.860556498169899 | Validation loss 2.791159003973007



 38%|███████████████████████████▍                                            | 3817/10000 [8:40:13<13:56:24,  8.12s/it]

Epoch: 3816 | Training loss 2.8677837029099464 | Validation loss 2.7947732508182526



 38%|███████████████████████████▍                                            | 3818/10000 [8:40:21<13:58:15,  8.14s/it]

Epoch: 3817 | Training loss 2.867875561118126 | Validation loss 2.7905681133270264



 38%|███████████████████████████▍                                            | 3819/10000 [8:40:29<13:59:58,  8.15s/it]

Epoch: 3818 | Training loss 2.8683284893631935 | Validation loss 2.7902272641658783



 38%|███████████████████████████▌                                            | 3820/10000 [8:40:38<13:59:11,  8.15s/it]

Epoch: 3819 | Training loss 2.863025076687336 | Validation loss 2.7904119193553925



 38%|███████████████████████████▌                                            | 3821/10000 [8:40:46<14:01:22,  8.17s/it]

Epoch: 3820 | Training loss 2.872185602784157 | Validation loss 2.796758472919464



 38%|███████████████████████████▌                                            | 3822/10000 [8:40:54<14:05:18,  8.21s/it]

Epoch: 3821 | Training loss 2.8660249933600426 | Validation loss 2.7964859306812286



 38%|███████████████████████████▌                                            | 3823/10000 [8:41:02<14:05:11,  8.21s/it]

Epoch: 3822 | Training loss 2.8681288063526154 | Validation loss 2.7865794003009796



 38%|███████████████████████████▌                                            | 3824/10000 [8:41:10<14:04:10,  8.20s/it]

Epoch: 3823 | Training loss 2.8674105927348137 | Validation loss 2.7960047721862793



 38%|███████████████████████████▌                                            | 3825/10000 [8:41:19<14:05:17,  8.21s/it]

Epoch: 3824 | Training loss 2.8683999106287956 | Validation loss 2.7904323637485504



 38%|███████████████████████████▌                                            | 3826/10000 [8:41:27<14:03:45,  8.20s/it]

Epoch: 3825 | Training loss 2.874164901673794 | Validation loss 2.7916479408740997



 38%|███████████████████████████▌                                            | 3827/10000 [8:41:35<14:01:58,  8.18s/it]

Epoch: 3826 | Training loss 2.8671141043305397 | Validation loss 2.794327050447464



 38%|███████████████████████████▌                                            | 3828/10000 [8:41:43<14:04:08,  8.21s/it]

Epoch: 3827 | Training loss 2.867861345410347 | Validation loss 2.787444055080414



 38%|███████████████████████████▌                                            | 3829/10000 [8:41:51<14:05:17,  8.22s/it]

Epoch: 3828 | Training loss 2.8663879930973053 | Validation loss 2.793395459651947



 38%|███████████████████████████▌                                            | 3830/10000 [8:42:00<14:02:57,  8.20s/it]

Epoch: 3829 | Training loss 2.8621121123433113 | Validation loss 2.794362783432007



 38%|███████████████████████████▌                                            | 3831/10000 [8:42:08<14:02:13,  8.19s/it]

Epoch: 3830 | Training loss 2.863192342221737 | Validation loss 2.7880427837371826



 38%|███████████████████████████▌                                            | 3832/10000 [8:42:16<14:10:02,  8.27s/it]

Epoch: 3831 | Training loss 2.8642750158905983 | Validation loss 2.7844994962215424



 38%|███████████████████████████▌                                            | 3833/10000 [8:42:25<14:31:50,  8.48s/it]

Epoch: 3832 | Training loss 2.861188843846321 | Validation loss 2.7871464788913727



 38%|███████████████████████████▌                                            | 3834/10000 [8:42:36<15:28:36,  9.04s/it]

Epoch: 3833 | Training loss 2.865666724741459 | Validation loss 2.7872306406497955



 38%|███████████████████████████▌                                            | 3835/10000 [8:42:47<16:46:30,  9.80s/it]

Epoch: 3834 | Training loss 2.8625611290335655 | Validation loss 2.7860698997974396



 38%|███████████████████████████▌                                            | 3836/10000 [8:42:59<17:44:41, 10.36s/it]

Epoch: 3835 | Training loss 2.8670120388269424 | Validation loss 2.78899484872818



 38%|███████████████████████████▋                                            | 3837/10000 [8:43:09<17:43:57, 10.36s/it]

Epoch: 3836 | Training loss 2.8654014468193054 | Validation loss 2.7921881079673767



 38%|███████████████████████████▋                                            | 3838/10000 [8:43:18<16:57:52,  9.91s/it]

Epoch: 3837 | Training loss 2.8644102215766907 | Validation loss 2.7921695709228516



 38%|███████████████████████████▋                                            | 3839/10000 [8:43:26<16:05:27,  9.40s/it]

Epoch: 3838 | Training loss 2.8619273751974106 | Validation loss 2.7883092761039734



 38%|███████████████████████████▋                                            | 3840/10000 [8:43:34<15:26:17,  9.02s/it]

Epoch: 3839 | Training loss 2.8665266782045364 | Validation loss 2.787909507751465



 38%|███████████████████████████▋                                            | 3841/10000 [8:43:42<14:54:53,  8.72s/it]

Epoch: 3840 | Training loss 2.866534546017647 | Validation loss 2.796551078557968



 38%|███████████████████████████▋                                            | 3842/10000 [8:43:50<14:33:40,  8.51s/it]

Epoch: 3841 | Training loss 2.8718961775302887 | Validation loss 2.793231099843979



 38%|███████████████████████████▋                                            | 3843/10000 [8:43:59<14:20:07,  8.38s/it]

Epoch: 3842 | Training loss 2.862535186111927 | Validation loss 2.8009135723114014



 38%|███████████████████████████▋                                            | 3844/10000 [8:44:07<14:12:46,  8.31s/it]

Epoch: 3843 | Training loss 2.865050882101059 | Validation loss 2.790818750858307



 38%|███████████████████████████▋                                            | 3845/10000 [8:44:15<14:06:41,  8.25s/it]

Epoch: 3844 | Training loss 2.87249393761158 | Validation loss 2.786609709262848



 38%|███████████████████████████▋                                            | 3846/10000 [8:44:23<14:02:22,  8.21s/it]

Epoch: 3845 | Training loss 2.8658167719841003 | Validation loss 2.7893623411655426



 38%|███████████████████████████▋                                            | 3847/10000 [8:44:31<13:59:47,  8.19s/it]

Epoch: 3846 | Training loss 2.8634674325585365 | Validation loss 2.7872446179389954



 38%|███████████████████████████▋                                            | 3848/10000 [8:44:39<13:54:59,  8.14s/it]

Epoch: 3847 | Training loss 2.866276293992996 | Validation loss 2.7856598794460297



 38%|███████████████████████████▋                                            | 3849/10000 [8:44:47<13:51:13,  8.11s/it]

Epoch: 3848 | Training loss 2.8627296090126038 | Validation loss 2.7850774824619293



 38%|███████████████████████████▋                                            | 3850/10000 [8:44:55<13:51:56,  8.12s/it]

Epoch: 3849 | Training loss 2.8669897317886353 | Validation loss 2.7950073182582855



 39%|███████████████████████████▋                                            | 3851/10000 [8:45:03<13:51:19,  8.11s/it]

Epoch: 3850 | Training loss 2.8724522963166237 | Validation loss 2.7907923460006714



 39%|███████████████████████████▋                                            | 3852/10000 [8:45:11<13:49:53,  8.10s/it]

Epoch: 3851 | Training loss 2.868029363453388 | Validation loss 2.7914210855960846



 39%|███████████████████████████▋                                            | 3853/10000 [8:45:20<13:50:23,  8.11s/it]

Epoch: 3852 | Training loss 2.865073226392269 | Validation loss 2.792942136526108



 39%|███████████████████████████▋                                            | 3854/10000 [8:45:28<13:52:58,  8.13s/it]

Epoch: 3853 | Training loss 2.861138917505741 | Validation loss 2.798703581094742



 39%|███████████████████████████▊                                            | 3855/10000 [8:45:36<13:53:21,  8.14s/it]

Epoch: 3854 | Training loss 2.8628983572125435 | Validation loss 2.800474166870117



 39%|███████████████████████████▊                                            | 3856/10000 [8:45:44<13:52:06,  8.13s/it]

Epoch: 3855 | Training loss 2.869488351047039 | Validation loss 2.7883830070495605



 39%|███████████████████████████▊                                            | 3857/10000 [8:45:52<13:48:18,  8.09s/it]

Epoch: 3856 | Training loss 2.868597388267517 | Validation loss 2.79868683218956



 39%|███████████████████████████▊                                            | 3858/10000 [8:46:00<13:48:00,  8.09s/it]

Epoch: 3857 | Training loss 2.872278556227684 | Validation loss 2.8001714646816254



 39%|███████████████████████████▊                                            | 3859/10000 [8:46:08<13:47:14,  8.08s/it]

Epoch: 3858 | Training loss 2.8669526651501656 | Validation loss 2.792171359062195



 39%|███████████████████████████▊                                            | 3860/10000 [8:46:16<13:48:38,  8.10s/it]

Epoch: 3859 | Training loss 2.862365670502186 | Validation loss 2.786113917827606



 39%|███████████████████████████▊                                            | 3861/10000 [8:46:24<13:49:34,  8.11s/it]

Epoch: 3860 | Training loss 2.865677885711193 | Validation loss 2.7927805185317993



 39%|███████████████████████████▊                                            | 3862/10000 [8:46:32<13:49:13,  8.11s/it]

Epoch: 3861 | Training loss 2.8660417571663857 | Validation loss 2.790509432554245



 39%|███████████████████████████▊                                            | 3863/10000 [8:46:41<13:48:58,  8.10s/it]

Epoch: 3862 | Training loss 2.8681216314435005 | Validation loss 2.7970493733882904



 39%|███████████████████████████▊                                            | 3864/10000 [8:46:49<13:47:29,  8.09s/it]

Epoch: 3863 | Training loss 2.8639899119734764 | Validation loss 2.792018234729767



 39%|███████████████████████████▊                                            | 3865/10000 [8:46:57<13:48:41,  8.10s/it]

Epoch: 3864 | Training loss 2.865333452820778 | Validation loss 2.7957260608673096



 39%|███████████████████████████▊                                            | 3866/10000 [8:47:05<13:45:05,  8.07s/it]

Epoch: 3865 | Training loss 2.8676127418875694 | Validation loss 2.7899082005023956



 39%|███████████████████████████▊                                            | 3867/10000 [8:47:13<13:41:28,  8.04s/it]

Epoch: 3866 | Training loss 2.862655021250248 | Validation loss 2.7888598144054413



 39%|███████████████████████████▊                                            | 3868/10000 [8:47:21<13:44:22,  8.07s/it]

Epoch: 3867 | Training loss 2.860028237104416 | Validation loss 2.7866540253162384



 39%|███████████████████████████▊                                            | 3869/10000 [8:47:29<13:45:49,  8.08s/it]

Epoch: 3868 | Training loss 2.860933095216751 | Validation loss 2.790455013513565



 39%|███████████████████████████▊                                            | 3870/10000 [8:47:37<13:43:54,  8.06s/it]

Epoch: 3869 | Training loss 2.8623766899108887 | Validation loss 2.788171410560608



 39%|███████████████████████████▊                                            | 3871/10000 [8:47:45<13:44:53,  8.08s/it]

Epoch: 3870 | Training loss 2.867392271757126 | Validation loss 2.7964862287044525



 39%|███████████████████████████▉                                            | 3872/10000 [8:47:53<13:44:37,  8.07s/it]

Epoch: 3871 | Training loss 2.8683286160230637 | Validation loss 2.80554336309433



 39%|███████████████████████████▉                                            | 3873/10000 [8:48:01<13:41:52,  8.05s/it]

Epoch: 3872 | Training loss 2.858481742441654 | Validation loss 2.7938961684703827



 39%|███████████████████████████▉                                            | 3874/10000 [8:48:09<13:42:52,  8.06s/it]

Epoch: 3873 | Training loss 2.862022891640663 | Validation loss 2.786101311445236



 39%|███████████████████████████▉                                            | 3875/10000 [8:48:17<13:43:28,  8.07s/it]

Epoch: 3874 | Training loss 2.864522434771061 | Validation loss 2.7881161272525787



 39%|███████████████████████████▉                                            | 3876/10000 [8:48:25<13:45:50,  8.09s/it]

Epoch: 3875 | Training loss 2.867296651005745 | Validation loss 2.7949063181877136



 39%|███████████████████████████▉                                            | 3877/10000 [8:48:34<13:50:23,  8.14s/it]

Epoch: 3876 | Training loss 2.8694507032632828 | Validation loss 2.7932651340961456



 39%|███████████████████████████▉                                            | 3878/10000 [8:48:42<13:49:49,  8.13s/it]

Epoch: 3877 | Training loss 2.8688797652721405 | Validation loss 2.789379060268402



 39%|███████████████████████████▉                                            | 3879/10000 [8:48:50<13:48:40,  8.12s/it]

Epoch: 3878 | Training loss 2.865161009132862 | Validation loss 2.794331669807434



 39%|███████████████████████████▉                                            | 3880/10000 [8:48:58<13:51:47,  8.15s/it]

Epoch: 3879 | Training loss 2.862938314676285 | Validation loss 2.793409436941147



 39%|███████████████████████████▉                                            | 3881/10000 [8:49:06<13:48:34,  8.12s/it]

Epoch: 3880 | Training loss 2.8681169748306274 | Validation loss 2.7928272485733032



 39%|███████████████████████████▉                                            | 3882/10000 [8:49:14<13:47:42,  8.12s/it]

Epoch: 3881 | Training loss 2.8590530902147293 | Validation loss 2.789060413837433



 39%|███████████████████████████▉                                            | 3883/10000 [8:49:22<13:45:42,  8.10s/it]

Epoch: 3882 | Training loss 2.8700542375445366 | Validation loss 2.7872095108032227



 39%|███████████████████████████▉                                            | 3884/10000 [8:49:31<13:46:52,  8.11s/it]

Epoch: 3883 | Training loss 2.863179326057434 | Validation loss 2.788927912712097



 39%|███████████████████████████▉                                            | 3885/10000 [8:49:39<13:43:30,  8.08s/it]

Epoch: 3884 | Training loss 2.866597928106785 | Validation loss 2.7914062440395355



 39%|███████████████████████████▉                                            | 3886/10000 [8:49:47<13:44:05,  8.09s/it]

Epoch: 3885 | Training loss 2.8703136891126633 | Validation loss 2.7940917909145355



 39%|███████████████████████████▉                                            | 3887/10000 [8:49:55<13:44:25,  8.09s/it]

Epoch: 3886 | Training loss 2.859091430902481 | Validation loss 2.7875248193740845



 39%|███████████████████████████▉                                            | 3888/10000 [8:50:03<13:42:38,  8.08s/it]

Epoch: 3887 | Training loss 2.866354763507843 | Validation loss 2.7903308868408203



 39%|████████████████████████████                                            | 3889/10000 [8:50:11<13:42:49,  8.08s/it]

Epoch: 3888 | Training loss 2.87042286247015 | Validation loss 2.792383700609207



 39%|████████████████████████████                                            | 3890/10000 [8:50:19<13:44:46,  8.10s/it]

Epoch: 3889 | Training loss 2.861337460577488 | Validation loss 2.7875459492206573



 39%|████████████████████████████                                            | 3891/10000 [8:50:27<13:50:26,  8.16s/it]

Epoch: 3890 | Training loss 2.868531160056591 | Validation loss 2.7964874505996704



 39%|████████████████████████████                                            | 3892/10000 [8:50:35<13:49:14,  8.15s/it]

Epoch: 3891 | Training loss 2.865861013531685 | Validation loss 2.789074420928955



 39%|████████████████████████████                                            | 3893/10000 [8:50:43<13:46:15,  8.12s/it]

Epoch: 3892 | Training loss 2.8700381591916084 | Validation loss 2.7963446974754333



 39%|████████████████████████████                                            | 3894/10000 [8:50:52<13:47:32,  8.13s/it]

Epoch: 3893 | Training loss 2.862144313752651 | Validation loss 2.794128566980362



 39%|████████████████████████████                                            | 3895/10000 [8:51:00<13:45:56,  8.12s/it]

Epoch: 3894 | Training loss 2.862220101058483 | Validation loss 2.789314240217209



 39%|████████████████████████████                                            | 3896/10000 [8:51:08<13:45:48,  8.12s/it]

Epoch: 3895 | Training loss 2.860133722424507 | Validation loss 2.7877933382987976



 39%|████████████████████████████                                            | 3897/10000 [8:51:16<13:44:16,  8.10s/it]

Epoch: 3896 | Training loss 2.8671255111694336 | Validation loss 2.7936915457248688



 39%|████████████████████████████                                            | 3898/10000 [8:51:24<13:44:31,  8.11s/it]

Epoch: 3897 | Training loss 2.870228946208954 | Validation loss 2.7908343970775604



 39%|████████████████████████████                                            | 3899/10000 [8:51:32<13:44:09,  8.11s/it]

Epoch: 3898 | Training loss 2.8646005988121033 | Validation loss 2.7879277169704437



 39%|████████████████████████████                                            | 3900/10000 [8:51:40<13:44:56,  8.11s/it]

Epoch: 3899 | Training loss 2.863221801817417 | Validation loss 2.7895950376987457



 39%|████████████████████████████                                            | 3901/10000 [8:51:48<13:42:25,  8.09s/it]

Epoch: 3900 | Training loss 2.8661853596568108 | Validation loss 2.7938596308231354



 39%|████████████████████████████                                            | 3902/10000 [8:51:56<13:44:06,  8.11s/it]

Epoch: 3901 | Training loss 2.861857071518898 | Validation loss 2.7943421006202698



 39%|████████████████████████████                                            | 3903/10000 [8:52:05<13:44:43,  8.12s/it]

Epoch: 3902 | Training loss 2.8666508868336678 | Validation loss 2.794523537158966



 39%|████████████████████████████                                            | 3904/10000 [8:52:13<13:46:59,  8.14s/it]

Epoch: 3903 | Training loss 2.8693050146102905 | Validation loss 2.7858590185642242



 39%|████████████████████████████                                            | 3905/10000 [8:52:21<13:49:30,  8.17s/it]

Epoch: 3904 | Training loss 2.8685779348015785 | Validation loss 2.792518973350525



 39%|████████████████████████████                                            | 3906/10000 [8:52:29<13:47:50,  8.15s/it]

Epoch: 3905 | Training loss 2.866950824856758 | Validation loss 2.792621374130249



 39%|████████████████████████████▏                                           | 3907/10000 [8:52:37<13:47:10,  8.15s/it]

Epoch: 3906 | Training loss 2.863061472773552 | Validation loss 2.7902014553546906



 39%|████████████████████████████▏                                           | 3908/10000 [8:52:45<13:45:40,  8.13s/it]

Epoch: 3907 | Training loss 2.865387186408043 | Validation loss 2.789528101682663



 39%|████████████████████████████▏                                           | 3909/10000 [8:52:54<13:46:31,  8.14s/it]

Epoch: 3908 | Training loss 2.8680211678147316 | Validation loss 2.7934882938861847



 39%|████████████████████████████▏                                           | 3910/10000 [8:53:02<13:46:15,  8.14s/it]

Epoch: 3909 | Training loss 2.866972416639328 | Validation loss 2.7924814224243164



 39%|████████████████████████████▏                                           | 3911/10000 [8:53:10<13:45:50,  8.14s/it]

Epoch: 3910 | Training loss 2.8632865101099014 | Validation loss 2.7866058945655823



 39%|████████████████████████████▏                                           | 3912/10000 [8:53:18<13:47:33,  8.16s/it]

Epoch: 3911 | Training loss 2.871916189789772 | Validation loss 2.788034111261368



 39%|████████████████████████████▏                                           | 3913/10000 [8:53:26<13:48:09,  8.16s/it]

Epoch: 3912 | Training loss 2.8690486550331116 | Validation loss 2.7938874065876007



 39%|████████████████████████████▏                                           | 3914/10000 [8:53:34<13:50:26,  8.19s/it]

Epoch: 3913 | Training loss 2.8621197193861008 | Validation loss 2.7891729176044464



 39%|████████████████████████████▏                                           | 3915/10000 [8:53:42<13:46:13,  8.15s/it]

Epoch: 3914 | Training loss 2.860388658940792 | Validation loss 2.792022794485092



 39%|████████████████████████████▏                                           | 3916/10000 [8:53:51<13:47:29,  8.16s/it]

Epoch: 3915 | Training loss 2.8591955974698067 | Validation loss 2.787391275167465



 39%|████████████████████████████▏                                           | 3917/10000 [8:53:59<13:44:40,  8.13s/it]

Epoch: 3916 | Training loss 2.8561358377337456 | Validation loss 2.7906047999858856



 39%|████████████████████████████▏                                           | 3918/10000 [8:54:07<13:43:03,  8.12s/it]

Epoch: 3917 | Training loss 2.8678161054849625 | Validation loss 2.7894138395786285



 39%|████████████████████████████▏                                           | 3919/10000 [8:54:15<13:42:23,  8.11s/it]

Epoch: 3918 | Training loss 2.871143028140068 | Validation loss 2.792142689228058



 39%|████████████████████████████▏                                           | 3920/10000 [8:54:23<13:39:29,  8.09s/it]

Epoch: 3919 | Training loss 2.867718704044819 | Validation loss 2.798262804746628



 39%|████████████████████████████▏                                           | 3921/10000 [8:54:31<13:38:17,  8.08s/it]

Epoch: 3920 | Training loss 2.870847910642624 | Validation loss 2.791179656982422



 39%|████████████████████████████▏                                           | 3922/10000 [8:54:39<13:40:03,  8.10s/it]

Epoch: 3921 | Training loss 2.86388136446476 | Validation loss 2.7882344722747803



 39%|████████████████████████████▏                                           | 3923/10000 [8:54:47<13:38:43,  8.08s/it]

Epoch: 3922 | Training loss 2.8654408529400826 | Validation loss 2.7896955609321594



 39%|████████████████████████████▎                                           | 3924/10000 [8:54:55<13:36:22,  8.06s/it]

Epoch: 3923 | Training loss 2.8676692098379135 | Validation loss 2.7986696660518646



 39%|████████████████████████████▎                                           | 3925/10000 [8:55:03<13:37:03,  8.07s/it]

Epoch: 3924 | Training loss 2.8709231540560722 | Validation loss 2.7882590293884277



 39%|████████████████████████████▎                                           | 3926/10000 [8:55:11<13:36:53,  8.07s/it]

Epoch: 3925 | Training loss 2.864879570901394 | Validation loss 2.786815047264099



 39%|████████████████████████████▎                                           | 3927/10000 [8:55:19<13:35:50,  8.06s/it]

Epoch: 3926 | Training loss 2.862416736781597 | Validation loss 2.788840413093567



 39%|████████████████████████████▎                                           | 3928/10000 [8:55:27<13:35:27,  8.06s/it]

Epoch: 3927 | Training loss 2.866488642990589 | Validation loss 2.7906079292297363



 39%|████████████████████████████▎                                           | 3929/10000 [8:55:36<13:36:38,  8.07s/it]

Epoch: 3928 | Training loss 2.8639735728502274 | Validation loss 2.7872127294540405



 39%|████████████████████████████▎                                           | 3930/10000 [8:55:44<13:40:16,  8.11s/it]

Epoch: 3929 | Training loss 2.862536571919918 | Validation loss 2.7864527106285095



 39%|████████████████████████████▎                                           | 3931/10000 [8:55:52<13:40:24,  8.11s/it]

Epoch: 3930 | Training loss 2.8639501184225082 | Validation loss 2.7924802899360657



 39%|████████████████████████████▎                                           | 3932/10000 [8:56:00<13:40:55,  8.12s/it]

Epoch: 3931 | Training loss 2.8650429621338844 | Validation loss 2.7918650209903717



 39%|████████████████████████████▎                                           | 3933/10000 [8:56:08<13:38:26,  8.09s/it]

Epoch: 3932 | Training loss 2.861520901322365 | Validation loss 2.7897914350032806



 39%|████████████████████████████▎                                           | 3934/10000 [8:56:16<13:38:33,  8.10s/it]

Epoch: 3933 | Training loss 2.8650863021612167 | Validation loss 2.788953185081482



 39%|████████████████████████████▎                                           | 3935/10000 [8:56:24<13:38:19,  8.10s/it]

Epoch: 3934 | Training loss 2.8682421892881393 | Validation loss 2.7877660393714905



 39%|████████████████████████████▎                                           | 3936/10000 [8:56:32<13:38:23,  8.10s/it]

Epoch: 3935 | Training loss 2.8609574884176254 | Validation loss 2.790169894695282



 39%|████████████████████████████▎                                           | 3937/10000 [8:56:40<13:37:24,  8.09s/it]

Epoch: 3936 | Training loss 2.8624906465411186 | Validation loss 2.788666605949402



 39%|████████████████████████████▎                                           | 3938/10000 [8:56:49<13:41:35,  8.13s/it]

Epoch: 3937 | Training loss 2.868748314678669 | Validation loss 2.7924624383449554



 39%|████████████████████████████▎                                           | 3939/10000 [8:56:57<13:36:14,  8.08s/it]

Epoch: 3938 | Training loss 2.8693587705492973 | Validation loss 2.798263341188431



 39%|████████████████████████████▎                                           | 3940/10000 [8:57:05<13:40:45,  8.13s/it]

Epoch: 3939 | Training loss 2.868266336619854 | Validation loss 2.793231338262558



 39%|████████████████████████████▍                                           | 3941/10000 [8:57:13<13:39:23,  8.11s/it]

Epoch: 3940 | Training loss 2.863525405526161 | Validation loss 2.785177856683731



 39%|████████████████████████████▍                                           | 3942/10000 [8:57:21<13:41:45,  8.14s/it]

Epoch: 3941 | Training loss 2.8684825226664543 | Validation loss 2.789896845817566



 39%|████████████████████████████▍                                           | 3943/10000 [8:57:29<13:40:32,  8.13s/it]

Epoch: 3942 | Training loss 2.8676457554101944 | Validation loss 2.793342173099518



 39%|████████████████████████████▍                                           | 3944/10000 [8:57:37<13:41:01,  8.13s/it]

Epoch: 3943 | Training loss 2.8665455505251884 | Validation loss 2.794523060321808



 39%|████████████████████████████▍                                           | 3945/10000 [8:57:45<13:38:51,  8.11s/it]

Epoch: 3944 | Training loss 2.870230197906494 | Validation loss 2.8013565242290497



 39%|████████████████████████████▍                                           | 3946/10000 [8:57:54<13:43:23,  8.16s/it]

Epoch: 3945 | Training loss 2.8673544451594353 | Validation loss 2.7907404005527496



 39%|████████████████████████████▍                                           | 3947/10000 [8:58:02<13:46:02,  8.19s/it]

Epoch: 3946 | Training loss 2.8716106712818146 | Validation loss 2.804569274187088



 39%|████████████████████████████▍                                           | 3948/10000 [8:58:10<13:43:44,  8.17s/it]

Epoch: 3947 | Training loss 2.863858789205551 | Validation loss 2.7883607149124146



 39%|████████████████████████████▍                                           | 3949/10000 [8:58:18<13:44:29,  8.18s/it]

The best model was saved!
Epoch: 3948 | Training loss 2.86148864030838 | Validation loss 2.781742513179779



 40%|████████████████████████████▍                                           | 3950/10000 [8:58:26<13:39:48,  8.13s/it]

Epoch: 3949 | Training loss 2.8689440563321114 | Validation loss 2.7910906076431274



 40%|████████████████████████████▍                                           | 3951/10000 [8:58:34<13:36:54,  8.10s/it]

Epoch: 3950 | Training loss 2.8723998963832855 | Validation loss 2.794883906841278



 40%|████████████████████████████▍                                           | 3952/10000 [8:58:42<13:36:15,  8.10s/it]

Epoch: 3951 | Training loss 2.8667394816875458 | Validation loss 2.792984753847122



 40%|████████████████████████████▍                                           | 3953/10000 [8:58:50<13:34:18,  8.08s/it]

Epoch: 3952 | Training loss 2.8719391375780106 | Validation loss 2.799058824777603



 40%|████████████████████████████▍                                           | 3954/10000 [8:58:59<13:35:45,  8.10s/it]

Epoch: 3953 | Training loss 2.8634634912014008 | Validation loss 2.79263174533844



 40%|████████████████████████████▍                                           | 3955/10000 [8:59:07<13:33:24,  8.07s/it]

Epoch: 3954 | Training loss 2.8641525357961655 | Validation loss 2.793909043073654



 40%|████████████████████████████▍                                           | 3956/10000 [8:59:15<13:34:35,  8.09s/it]

Epoch: 3955 | Training loss 2.8643795177340508 | Validation loss 2.794025033712387



 40%|████████████████████████████▍                                           | 3957/10000 [8:59:23<13:33:57,  8.08s/it]

Epoch: 3956 | Training loss 2.865782916545868 | Validation loss 2.791842758655548



 40%|████████████████████████████▍                                           | 3958/10000 [8:59:31<13:31:07,  8.05s/it]

Epoch: 3957 | Training loss 2.8652467280626297 | Validation loss 2.7899836599826813



 40%|████████████████████████████▌                                           | 3959/10000 [8:59:39<13:30:02,  8.05s/it]

Epoch: 3958 | Training loss 2.8663713335990906 | Validation loss 2.795526683330536



 40%|████████████████████████████▌                                           | 3960/10000 [8:59:47<13:30:13,  8.05s/it]

Epoch: 3959 | Training loss 2.8665510565042496 | Validation loss 2.792813301086426



 40%|████████████████████████████▌                                           | 3961/10000 [8:59:55<13:29:46,  8.05s/it]

Epoch: 3960 | Training loss 2.864694058895111 | Validation loss 2.78693163394928



 40%|████████████████████████████▌                                           | 3962/10000 [9:00:03<13:30:21,  8.05s/it]

Epoch: 3961 | Training loss 2.8620526045560837 | Validation loss 2.789877027273178



 40%|████████████████████████████▌                                           | 3963/10000 [9:00:11<13:30:41,  8.06s/it]

Epoch: 3962 | Training loss 2.867537446320057 | Validation loss 2.7898823618888855



 40%|████████████████████████████▌                                           | 3964/10000 [9:00:19<13:31:30,  8.07s/it]

Epoch: 3963 | Training loss 2.8632140904664993 | Validation loss 2.785360038280487



 40%|████████████████████████████▌                                           | 3965/10000 [9:00:27<13:34:22,  8.10s/it]

Epoch: 3964 | Training loss 2.867405019700527 | Validation loss 2.7899243533611298



 40%|████████████████████████████▌                                           | 3966/10000 [9:00:35<13:32:31,  8.08s/it]

Epoch: 3965 | Training loss 2.8678771927952766 | Validation loss 2.789760112762451



 40%|████████████████████████████▌                                           | 3967/10000 [9:00:43<13:31:08,  8.07s/it]

Epoch: 3966 | Training loss 2.867400109767914 | Validation loss 2.785372734069824



 40%|████████████████████████████▌                                           | 3968/10000 [9:00:51<13:28:46,  8.04s/it]

Epoch: 3967 | Training loss 2.871538244187832 | Validation loss 2.7873637676239014



 40%|████████████████████████████▌                                           | 3969/10000 [9:00:59<13:29:22,  8.05s/it]

Epoch: 3968 | Training loss 2.868179626762867 | Validation loss 2.8069489300251007



 40%|████████████████████████████▌                                           | 3970/10000 [9:01:08<13:32:05,  8.08s/it]

Epoch: 3969 | Training loss 2.864234432578087 | Validation loss 2.7900449335575104



 40%|████████████████████████████▌                                           | 3971/10000 [9:01:16<13:33:27,  8.10s/it]

Epoch: 3970 | Training loss 2.8638262674212456 | Validation loss 2.7887555360794067



 40%|████████████████████████████▌                                           | 3972/10000 [9:01:24<13:35:28,  8.12s/it]

Epoch: 3971 | Training loss 2.8685159981250763 | Validation loss 2.78856360912323



 40%|████████████████████████████▌                                           | 3973/10000 [9:01:32<13:30:07,  8.06s/it]

Epoch: 3972 | Training loss 2.87096731364727 | Validation loss 2.789780557155609



 40%|████████████████████████████▌                                           | 3974/10000 [9:01:40<13:28:42,  8.05s/it]

Epoch: 3973 | Training loss 2.8701866790652275 | Validation loss 2.797817051410675



 40%|████████████████████████████▌                                           | 3975/10000 [9:01:48<13:32:25,  8.09s/it]

Epoch: 3974 | Training loss 2.8634127229452133 | Validation loss 2.7999875843524933



 40%|████████████████████████████▋                                           | 3976/10000 [9:01:56<13:32:38,  8.09s/it]

Epoch: 3975 | Training loss 2.8663800954818726 | Validation loss 2.7912429869174957



 40%|████████████████████████████▋                                           | 3977/10000 [9:02:04<13:31:49,  8.09s/it]

Epoch: 3976 | Training loss 2.8658612966537476 | Validation loss 2.7922032475471497



 40%|████████████████████████████▋                                           | 3978/10000 [9:02:12<13:32:10,  8.09s/it]

Epoch: 3977 | Training loss 2.8598051965236664 | Validation loss 2.793756991624832



 40%|████████████████████████████▋                                           | 3979/10000 [9:02:20<13:30:58,  8.08s/it]

Epoch: 3978 | Training loss 2.8716276362538338 | Validation loss 2.7920886278152466



 40%|████████████████████████████▋                                           | 3980/10000 [9:02:28<13:30:06,  8.07s/it]

Epoch: 3979 | Training loss 2.8700197488069534 | Validation loss 2.802557945251465



 40%|████████████████████████████▋                                           | 3981/10000 [9:02:37<13:32:29,  8.10s/it]

Epoch: 3980 | Training loss 2.863347716629505 | Validation loss 2.791915148496628



 40%|████████████████████████████▋                                           | 3982/10000 [9:02:45<13:35:20,  8.13s/it]

Epoch: 3981 | Training loss 2.8705186024308205 | Validation loss 2.7924784719944



 40%|████████████████████████████▋                                           | 3983/10000 [9:02:53<13:34:49,  8.13s/it]

Epoch: 3982 | Training loss 2.865425854921341 | Validation loss 2.789798676967621



 40%|████████████████████████████▋                                           | 3984/10000 [9:03:01<13:32:33,  8.10s/it]

Epoch: 3983 | Training loss 2.862613692879677 | Validation loss 2.793258309364319



 40%|████████████████████████████▋                                           | 3985/10000 [9:03:09<13:31:25,  8.09s/it]

Epoch: 3984 | Training loss 2.866125024855137 | Validation loss 2.791250556707382



 40%|████████████████████████████▋                                           | 3986/10000 [9:03:17<13:28:39,  8.07s/it]

Epoch: 3985 | Training loss 2.8660099655389786 | Validation loss 2.790950834751129



 40%|████████████████████████████▋                                           | 3987/10000 [9:03:25<13:28:27,  8.07s/it]

Epoch: 3986 | Training loss 2.868469938635826 | Validation loss 2.792063057422638



 40%|████████████████████████████▋                                           | 3988/10000 [9:03:33<13:29:22,  8.08s/it]

Epoch: 3987 | Training loss 2.872752957046032 | Validation loss 2.7903829216957092



 40%|████████████████████████████▋                                           | 3989/10000 [9:03:41<13:30:04,  8.09s/it]

Epoch: 3988 | Training loss 2.870296522974968 | Validation loss 2.7918328046798706



 40%|████████████████████████████▋                                           | 3990/10000 [9:03:49<13:27:38,  8.06s/it]

Epoch: 3989 | Training loss 2.864838644862175 | Validation loss 2.792186915874481



 40%|████████████████████████████▋                                           | 3991/10000 [9:03:57<13:27:44,  8.07s/it]

Epoch: 3990 | Training loss 2.8667372837662697 | Validation loss 2.7883108258247375



 40%|████████████████████████████▋                                           | 3992/10000 [9:04:05<13:26:50,  8.06s/it]

Epoch: 3991 | Training loss 2.8609017208218575 | Validation loss 2.789171576499939



 40%|████████████████████████████▋                                           | 3993/10000 [9:04:13<13:27:07,  8.06s/it]

Epoch: 3992 | Training loss 2.865266926586628 | Validation loss 2.793163925409317



 40%|████████████████████████████▊                                           | 3994/10000 [9:04:22<13:27:44,  8.07s/it]

Epoch: 3993 | Training loss 2.866955816745758 | Validation loss 2.7896831333637238



 40%|████████████████████████████▊                                           | 3995/10000 [9:04:30<13:26:40,  8.06s/it]

Epoch: 3994 | Training loss 2.8631303012371063 | Validation loss 2.7877646386623383



 40%|████████████████████████████▊                                           | 3996/10000 [9:04:38<13:25:24,  8.05s/it]

Epoch: 3995 | Training loss 2.865867294371128 | Validation loss 2.7889617681503296



 40%|████████████████████████████▊                                           | 3997/10000 [9:04:46<13:28:16,  8.08s/it]

Epoch: 3996 | Training loss 2.8633473366498947 | Validation loss 2.78600412607193



 40%|████████████████████████████▊                                           | 3998/10000 [9:04:54<13:25:35,  8.05s/it]

Epoch: 3997 | Training loss 2.8644195944070816 | Validation loss 2.794944256544113



 40%|████████████████████████████▊                                           | 3999/10000 [9:05:02<13:24:36,  8.04s/it]

Epoch: 3998 | Training loss 2.8643379658460617 | Validation loss 2.7908952236175537



 40%|████████████████████████████▊                                           | 4000/10000 [9:05:10<13:27:30,  8.08s/it]

Epoch: 3999 | Training loss 2.871231622993946 | Validation loss 2.7883812189102173



 40%|████████████████████████████▊                                           | 4001/10000 [9:05:18<13:29:07,  8.09s/it]

Epoch: 4000 | Training loss 2.873332493007183 | Validation loss 2.7900132834911346



 40%|████████████████████████████▊                                           | 4002/10000 [9:05:26<13:30:40,  8.11s/it]

Epoch: 4001 | Training loss 2.863251857459545 | Validation loss 2.7904271483421326



 40%|████████████████████████████▊                                           | 4003/10000 [9:05:34<13:32:10,  8.13s/it]

Epoch: 4002 | Training loss 2.863518387079239 | Validation loss 2.793482393026352



 40%|████████████████████████████▊                                           | 4004/10000 [9:05:43<13:31:45,  8.12s/it]

Epoch: 4003 | Training loss 2.8650158494710922 | Validation loss 2.788014739751816



 40%|████████████████████████████▊                                           | 4005/10000 [9:05:51<13:31:38,  8.12s/it]

Epoch: 4004 | Training loss 2.861061319708824 | Validation loss 2.788020133972168



 40%|████████████████████████████▊                                           | 4006/10000 [9:05:59<13:31:44,  8.13s/it]

Epoch: 4005 | Training loss 2.8645457103848457 | Validation loss 2.78890123963356



 40%|████████████████████████████▊                                           | 4007/10000 [9:06:07<13:29:53,  8.11s/it]

Epoch: 4006 | Training loss 2.865893229842186 | Validation loss 2.7932270169258118



 40%|████████████████████████████▊                                           | 4008/10000 [9:06:15<13:27:16,  8.08s/it]

Epoch: 4007 | Training loss 2.866847537457943 | Validation loss 2.793777823448181



 40%|████████████████████████████▊                                           | 4009/10000 [9:06:23<13:29:09,  8.10s/it]

Epoch: 4008 | Training loss 2.8654636219143867 | Validation loss 2.7896776497364044



 40%|████████████████████████████▊                                           | 4010/10000 [9:06:31<13:32:33,  8.14s/it]

Epoch: 4009 | Training loss 2.8656324446201324 | Validation loss 2.7885492146015167



 40%|████████████████████████████▉                                           | 4011/10000 [9:06:39<13:31:19,  8.13s/it]

Epoch: 4010 | Training loss 2.866006314754486 | Validation loss 2.7943819165229797



 40%|████████████████████████████▉                                           | 4012/10000 [9:06:48<13:34:41,  8.16s/it]

Epoch: 4011 | Training loss 2.86916583776474 | Validation loss 2.793998569250107



 40%|████████████████████████████▉                                           | 4013/10000 [9:06:56<13:30:49,  8.13s/it]

Epoch: 4012 | Training loss 2.861557297408581 | Validation loss 2.786589026451111



 40%|████████████████████████████▉                                           | 4014/10000 [9:07:04<13:29:02,  8.11s/it]

Epoch: 4013 | Training loss 2.862435005605221 | Validation loss 2.794507086277008



 40%|████████████████████████████▉                                           | 4015/10000 [9:07:12<13:29:33,  8.12s/it]

Epoch: 4014 | Training loss 2.8693197146058083 | Validation loss 2.791686773300171



 40%|████████████████████████████▉                                           | 4016/10000 [9:07:20<13:26:21,  8.09s/it]

Epoch: 4015 | Training loss 2.8669378384947777 | Validation loss 2.7986524999141693



 40%|████████████████████████████▉                                           | 4017/10000 [9:07:28<13:25:42,  8.08s/it]

Epoch: 4016 | Training loss 2.86922537535429 | Validation loss 2.7956466376781464



 40%|████████████████████████████▉                                           | 4018/10000 [9:07:36<13:26:11,  8.09s/it]

Epoch: 4017 | Training loss 2.8640531301498413 | Validation loss 2.7889198064804077



 40%|████████████████████████████▉                                           | 4019/10000 [9:07:44<13:26:36,  8.09s/it]

Epoch: 4018 | Training loss 2.8701560720801353 | Validation loss 2.788430780172348



 40%|████████████████████████████▉                                           | 4020/10000 [9:07:52<13:25:29,  8.08s/it]

Epoch: 4019 | Training loss 2.8606102764606476 | Validation loss 2.7882058322429657



 40%|████████████████████████████▉                                           | 4021/10000 [9:08:00<13:28:21,  8.11s/it]

Epoch: 4020 | Training loss 2.8731096759438515 | Validation loss 2.789536714553833



 40%|████████████████████████████▉                                           | 4022/10000 [9:08:08<13:26:00,  8.09s/it]

Epoch: 4021 | Training loss 2.865810163319111 | Validation loss 2.7959741353988647



 40%|████████████████████████████▉                                           | 4023/10000 [9:08:16<13:23:04,  8.06s/it]

Epoch: 4022 | Training loss 2.8653610423207283 | Validation loss 2.797615259885788



 40%|████████████████████████████▉                                           | 4024/10000 [9:08:25<13:26:02,  8.09s/it]

Epoch: 4023 | Training loss 2.868263393640518 | Validation loss 2.790317863225937



 40%|████████████████████████████▉                                           | 4025/10000 [9:08:33<13:24:49,  8.08s/it]

Epoch: 4024 | Training loss 2.8660368770360947 | Validation loss 2.789650022983551



 40%|████████████████████████████▉                                           | 4026/10000 [9:08:41<13:24:50,  8.08s/it]

Epoch: 4025 | Training loss 2.862974651157856 | Validation loss 2.7987113893032074



 40%|████████████████████████████▉                                           | 4027/10000 [9:08:49<13:26:18,  8.10s/it]

Epoch: 4026 | Training loss 2.862159311771393 | Validation loss 2.7868068516254425



 40%|█████████████████████████████                                           | 4028/10000 [9:08:57<13:28:42,  8.13s/it]

Epoch: 4027 | Training loss 2.8653509840369225 | Validation loss 2.7852587401866913



 40%|█████████████████████████████                                           | 4029/10000 [9:09:05<13:29:20,  8.13s/it]

Epoch: 4028 | Training loss 2.8689083084464073 | Validation loss 2.7909174859523773



 40%|█████████████████████████████                                           | 4030/10000 [9:09:13<13:26:52,  8.11s/it]

Epoch: 4029 | Training loss 2.8665253296494484 | Validation loss 2.7902378737926483



 40%|█████████████████████████████                                           | 4031/10000 [9:09:21<13:26:05,  8.10s/it]

Epoch: 4030 | Training loss 2.868442542850971 | Validation loss 2.7954384088516235



 40%|█████████████████████████████                                           | 4032/10000 [9:09:29<13:22:38,  8.07s/it]

Epoch: 4031 | Training loss 2.8662362918257713 | Validation loss 2.788894236087799



 40%|█████████████████████████████                                           | 4033/10000 [9:09:37<13:21:35,  8.06s/it]

Epoch: 4032 | Training loss 2.8694572374224663 | Validation loss 2.804121285676956



 40%|█████████████████████████████                                           | 4034/10000 [9:09:46<13:27:50,  8.12s/it]

Epoch: 4033 | Training loss 2.867210440337658 | Validation loss 2.7903899252414703



 40%|█████████████████████████████                                           | 4035/10000 [9:09:54<13:23:44,  8.08s/it]

Epoch: 4034 | Training loss 2.8673274740576744 | Validation loss 2.792387843132019



 40%|█████████████████████████████                                           | 4036/10000 [9:10:02<13:20:23,  8.05s/it]

Epoch: 4035 | Training loss 2.865557946264744 | Validation loss 2.7913455069065094



 40%|█████████████████████████████                                           | 4037/10000 [9:10:10<13:17:58,  8.03s/it]

Epoch: 4036 | Training loss 2.868055686354637 | Validation loss 2.7905604541301727



 40%|█████████████████████████████                                           | 4038/10000 [9:10:18<13:16:44,  8.02s/it]

Epoch: 4037 | Training loss 2.85970775783062 | Validation loss 2.7903245389461517



 40%|█████████████████████████████                                           | 4039/10000 [9:10:26<13:19:10,  8.04s/it]

Epoch: 4038 | Training loss 2.8659008592367172 | Validation loss 2.795300453901291



 40%|█████████████████████████████                                           | 4040/10000 [9:10:34<13:20:44,  8.06s/it]

Epoch: 4039 | Training loss 2.8632182478904724 | Validation loss 2.790530502796173



 40%|█████████████████████████████                                           | 4041/10000 [9:10:42<13:18:27,  8.04s/it]

Epoch: 4040 | Training loss 2.8688150197267532 | Validation loss 2.7932258248329163



 40%|█████████████████████████████                                           | 4042/10000 [9:10:50<13:18:45,  8.04s/it]

Epoch: 4041 | Training loss 2.869839422404766 | Validation loss 2.790480613708496



 40%|█████████████████████████████                                           | 4043/10000 [9:10:58<13:19:51,  8.06s/it]

Epoch: 4042 | Training loss 2.861106254160404 | Validation loss 2.793923258781433



 40%|█████████████████████████████                                           | 4044/10000 [9:11:06<13:18:41,  8.05s/it]

Epoch: 4043 | Training loss 2.866912506520748 | Validation loss 2.7897981107234955



 40%|█████████████████████████████                                           | 4045/10000 [9:11:14<13:19:42,  8.06s/it]

Epoch: 4044 | Training loss 2.871416673064232 | Validation loss 2.7923573553562164



 40%|█████████████████████████████▏                                          | 4046/10000 [9:11:22<13:19:55,  8.06s/it]

Epoch: 4045 | Training loss 2.865087479352951 | Validation loss 2.7913874685764313



 40%|█████████████████████████████▏                                          | 4047/10000 [9:11:30<13:20:55,  8.07s/it]

Epoch: 4046 | Training loss 2.869006432592869 | Validation loss 2.7930843830108643



 40%|█████████████████████████████▏                                          | 4048/10000 [9:11:38<13:20:42,  8.07s/it]

Epoch: 4047 | Training loss 2.865304619073868 | Validation loss 2.791109472513199



 40%|█████████████████████████████▏                                          | 4049/10000 [9:11:46<13:25:40,  8.12s/it]

Epoch: 4048 | Training loss 2.869275338947773 | Validation loss 2.7922383248806



 40%|█████████████████████████████▏                                          | 4050/10000 [9:11:55<13:26:15,  8.13s/it]

Epoch: 4049 | Training loss 2.8662182688713074 | Validation loss 2.791002780199051



 41%|█████████████████████████████▏                                          | 4051/10000 [9:12:03<13:23:52,  8.11s/it]

Epoch: 4050 | Training loss 2.867989480495453 | Validation loss 2.789955973625183



 41%|█████████████████████████████▏                                          | 4052/10000 [9:12:11<13:24:27,  8.11s/it]

Epoch: 4051 | Training loss 2.8675397261977196 | Validation loss 2.7916421592235565



 41%|█████████████████████████████▏                                          | 4053/10000 [9:12:19<13:18:46,  8.06s/it]

Epoch: 4052 | Training loss 2.8685660511255264 | Validation loss 2.7970086336135864



 41%|█████████████████████████████▏                                          | 4054/10000 [9:12:27<13:20:52,  8.08s/it]

Epoch: 4053 | Training loss 2.8705309703946114 | Validation loss 2.8002741038799286



 41%|█████████████████████████████▏                                          | 4055/10000 [9:12:35<13:21:20,  8.09s/it]

Epoch: 4054 | Training loss 2.8686457574367523 | Validation loss 2.7886228561401367



 41%|█████████████████████████████▏                                          | 4056/10000 [9:12:43<13:18:44,  8.06s/it]

Epoch: 4055 | Training loss 2.8667818382382393 | Validation loss 2.7924092411994934



 41%|█████████████████████████████▏                                          | 4057/10000 [9:12:51<13:18:49,  8.06s/it]

Epoch: 4056 | Training loss 2.8698536083102226 | Validation loss 2.793348640203476



 41%|█████████████████████████████▏                                          | 4058/10000 [9:12:59<13:21:11,  8.09s/it]

Epoch: 4057 | Training loss 2.8627226799726486 | Validation loss 2.7906464338302612



 41%|█████████████████████████████▏                                          | 4059/10000 [9:13:07<13:21:50,  8.10s/it]

Epoch: 4058 | Training loss 2.8677613139152527 | Validation loss 2.7942822873592377



 41%|█████████████████████████████▏                                          | 4060/10000 [9:13:15<13:20:19,  8.08s/it]

Epoch: 4059 | Training loss 2.8645936772227287 | Validation loss 2.793352395296097



 41%|█████████████████████████████▏                                          | 4061/10000 [9:13:23<13:19:47,  8.08s/it]

Epoch: 4060 | Training loss 2.8692370280623436 | Validation loss 2.7933930456638336



 41%|█████████████████████████████▏                                          | 4062/10000 [9:13:31<13:18:00,  8.06s/it]

Epoch: 4061 | Training loss 2.8633527532219887 | Validation loss 2.7917274236679077



 41%|█████████████████████████████▎                                          | 4063/10000 [9:13:40<13:20:49,  8.09s/it]

Epoch: 4062 | Training loss 2.869053915143013 | Validation loss 2.79297798871994



 41%|█████████████████████████████▎                                          | 4064/10000 [9:13:48<13:19:46,  8.08s/it]

Epoch: 4063 | Training loss 2.867245987057686 | Validation loss 2.794953376054764



 41%|█████████████████████████████▎                                          | 4065/10000 [9:13:56<13:23:27,  8.12s/it]

Epoch: 4064 | Training loss 2.866179496049881 | Validation loss 2.790963649749756



 41%|█████████████████████████████▎                                          | 4066/10000 [9:14:04<13:23:06,  8.12s/it]

Epoch: 4065 | Training loss 2.870635651051998 | Validation loss 2.7951359152793884



 41%|█████████████████████████████▎                                          | 4067/10000 [9:14:12<13:21:31,  8.11s/it]

Epoch: 4066 | Training loss 2.870165601372719 | Validation loss 2.7904053330421448



 41%|█████████████████████████████▎                                          | 4068/10000 [9:14:20<13:19:23,  8.09s/it]

Epoch: 4067 | Training loss 2.8657001703977585 | Validation loss 2.7968945503234863



 41%|█████████████████████████████▎                                          | 4069/10000 [9:14:28<13:18:49,  8.08s/it]

Epoch: 4068 | Training loss 2.865434780716896 | Validation loss 2.797099381685257



 41%|█████████████████████████████▎                                          | 4070/10000 [9:14:36<13:17:28,  8.07s/it]

Epoch: 4069 | Training loss 2.8669413030147552 | Validation loss 2.796882390975952



 41%|█████████████████████████████▎                                          | 4071/10000 [9:14:44<13:17:22,  8.07s/it]

Epoch: 4070 | Training loss 2.8659100756049156 | Validation loss 2.7944579124450684



 41%|█████████████████████████████▎                                          | 4072/10000 [9:14:52<13:17:19,  8.07s/it]

Epoch: 4071 | Training loss 2.86708652228117 | Validation loss 2.798407882452011



 41%|█████████████████████████████▎                                          | 4073/10000 [9:15:00<13:15:52,  8.06s/it]

Epoch: 4072 | Training loss 2.8665217831730843 | Validation loss 2.7949041724205017



 41%|█████████████████████████████▎                                          | 4074/10000 [9:15:08<13:17:04,  8.07s/it]

Epoch: 4073 | Training loss 2.8665396720170975 | Validation loss 2.7895514965057373



 41%|█████████████████████████████▎                                          | 4075/10000 [9:15:17<13:19:12,  8.09s/it]

Epoch: 4074 | Training loss 2.8675192818045616 | Validation loss 2.7932009994983673



 41%|█████████████████████████████▎                                          | 4076/10000 [9:15:25<13:20:22,  8.11s/it]

Epoch: 4075 | Training loss 2.8671595230698586 | Validation loss 2.7894602715969086



 41%|█████████████████████████████▎                                          | 4077/10000 [9:15:33<13:20:03,  8.10s/it]

Epoch: 4076 | Training loss 2.869703859090805 | Validation loss 2.787431001663208



 41%|█████████████████████████████▎                                          | 4078/10000 [9:15:41<13:20:19,  8.11s/it]

Epoch: 4077 | Training loss 2.865194007754326 | Validation loss 2.7965332567691803



 41%|█████████████████████████████▎                                          | 4079/10000 [9:15:49<13:19:13,  8.10s/it]

Epoch: 4078 | Training loss 2.863802134990692 | Validation loss 2.7932902574539185



 41%|█████████████████████████████▍                                          | 4080/10000 [9:15:57<13:19:11,  8.10s/it]

Epoch: 4079 | Training loss 2.867997668683529 | Validation loss 2.7927088141441345



 41%|█████████████████████████████▍                                          | 4081/10000 [9:16:05<13:15:03,  8.06s/it]

Epoch: 4080 | Training loss 2.8692733496427536 | Validation loss 2.800750881433487



 41%|█████████████████████████████▍                                          | 4082/10000 [9:16:13<13:14:17,  8.05s/it]

Epoch: 4081 | Training loss 2.8652631640434265 | Validation loss 2.792497605085373



 41%|█████████████████████████████▍                                          | 4083/10000 [9:16:21<13:14:12,  8.05s/it]

Epoch: 4082 | Training loss 2.8687214106321335 | Validation loss 2.795975297689438



 41%|█████████████████████████████▍                                          | 4084/10000 [9:16:29<13:17:51,  8.09s/it]

Epoch: 4083 | Training loss 2.866501584649086 | Validation loss 2.7938042879104614



 41%|█████████████████████████████▍                                          | 4085/10000 [9:16:38<13:17:49,  8.09s/it]

Epoch: 4084 | Training loss 2.8605401068925858 | Validation loss 2.791730374097824



 41%|█████████████████████████████▍                                          | 4086/10000 [9:16:46<13:16:55,  8.09s/it]

Epoch: 4085 | Training loss 2.8719261959195137 | Validation loss 2.793117105960846



 41%|█████████████████████████████▍                                          | 4087/10000 [9:16:54<13:16:25,  8.08s/it]

Epoch: 4086 | Training loss 2.8663066178560257 | Validation loss 2.7933558523654938



 41%|█████████████████████████████▍                                          | 4088/10000 [9:17:02<13:14:36,  8.06s/it]

Epoch: 4087 | Training loss 2.8691444247961044 | Validation loss 2.794908732175827



 41%|█████████████████████████████▍                                          | 4089/10000 [9:17:10<13:15:13,  8.07s/it]

Epoch: 4088 | Training loss 2.872138924896717 | Validation loss 2.7900583148002625



 41%|█████████████████████████████▍                                          | 4090/10000 [9:17:18<13:17:39,  8.10s/it]

Epoch: 4089 | Training loss 2.863237276673317 | Validation loss 2.802700161933899



 41%|█████████████████████████████▍                                          | 4091/10000 [9:17:26<13:19:58,  8.12s/it]

Epoch: 4090 | Training loss 2.864356741309166 | Validation loss 2.7961405813694



 41%|█████████████████████████████▍                                          | 4092/10000 [9:17:34<13:19:17,  8.12s/it]

Epoch: 4091 | Training loss 2.867413006722927 | Validation loss 2.7920344173908234



 41%|█████████████████████████████▍                                          | 4093/10000 [9:17:42<13:20:58,  8.14s/it]

Epoch: 4092 | Training loss 2.867776393890381 | Validation loss 2.790435642004013



 41%|█████████████████████████████▍                                          | 4094/10000 [9:17:51<13:20:27,  8.13s/it]

Epoch: 4093 | Training loss 2.868065744638443 | Validation loss 2.7961665391921997



 41%|█████████████████████████████▍                                          | 4095/10000 [9:17:59<13:23:09,  8.16s/it]

Epoch: 4094 | Training loss 2.867767781019211 | Validation loss 2.789248138666153



 41%|█████████████████████████████▍                                          | 4096/10000 [9:18:07<13:21:16,  8.14s/it]

Epoch: 4095 | Training loss 2.8630665615200996 | Validation loss 2.792576491832733



 41%|█████████████████████████████▍                                          | 4097/10000 [9:18:15<13:18:36,  8.12s/it]

Epoch: 4096 | Training loss 2.8673383742570877 | Validation loss 2.786751538515091



 41%|█████████████████████████████▌                                          | 4098/10000 [9:18:23<13:19:39,  8.13s/it]

Epoch: 4097 | Training loss 2.864191949367523 | Validation loss 2.783509910106659



 41%|█████████████████████████████▌                                          | 4099/10000 [9:18:31<13:18:45,  8.12s/it]

Epoch: 4098 | Training loss 2.8649867326021194 | Validation loss 2.7928251326084137



 41%|█████████████████████████████▌                                          | 4100/10000 [9:18:40<13:26:48,  8.20s/it]

Epoch: 4099 | Training loss 2.8603359162807465 | Validation loss 2.7847964465618134



 41%|█████████████████████████████▌                                          | 4101/10000 [9:18:48<13:24:16,  8.18s/it]

Epoch: 4100 | Training loss 2.8615979701280594 | Validation loss 2.791040360927582



 41%|█████████████████████████████▌                                          | 4102/10000 [9:18:56<13:19:09,  8.13s/it]

Epoch: 4101 | Training loss 2.8696562349796295 | Validation loss 2.7900520265102386



 41%|█████████████████████████████▌                                          | 4103/10000 [9:19:04<13:17:45,  8.12s/it]

Epoch: 4102 | Training loss 2.862935923039913 | Validation loss 2.8015891313552856



 41%|█████████████████████████████▌                                          | 4104/10000 [9:19:12<13:20:24,  8.15s/it]

Epoch: 4103 | Training loss 2.870675563812256 | Validation loss 2.7940042912960052



 41%|█████████████████████████████▌                                          | 4105/10000 [9:19:20<13:20:51,  8.15s/it]

Epoch: 4104 | Training loss 2.86614777892828 | Validation loss 2.790983110666275



 41%|█████████████████████████████▌                                          | 4106/10000 [9:19:28<13:16:57,  8.11s/it]

Epoch: 4105 | Training loss 2.864050790667534 | Validation loss 2.7952374815940857



 41%|█████████████████████████████▌                                          | 4107/10000 [9:19:36<13:19:24,  8.14s/it]

Epoch: 4106 | Training loss 2.869307480752468 | Validation loss 2.7935360968112946



 41%|█████████████████████████████▌                                          | 4108/10000 [9:19:45<13:20:45,  8.15s/it]

Epoch: 4107 | Training loss 2.8666330873966217 | Validation loss 2.792493164539337



 41%|█████████████████████████████▌                                          | 4109/10000 [9:19:53<13:22:16,  8.17s/it]

Epoch: 4108 | Training loss 2.863634407520294 | Validation loss 2.78878515958786



 41%|█████████████████████████████▌                                          | 4110/10000 [9:20:01<13:18:52,  8.14s/it]

Epoch: 4109 | Training loss 2.8681518882513046 | Validation loss 2.7878557741642



 41%|█████████████████████████████▌                                          | 4111/10000 [9:20:09<13:17:44,  8.13s/it]

Epoch: 4110 | Training loss 2.8718283101916313 | Validation loss 2.792557716369629



 41%|█████████████████████████████▌                                          | 4112/10000 [9:20:17<13:15:53,  8.11s/it]

Epoch: 4111 | Training loss 2.8667707219719887 | Validation loss 2.786988317966461



 41%|█████████████████████████████▌                                          | 4113/10000 [9:20:25<13:16:39,  8.12s/it]

Epoch: 4112 | Training loss 2.8640069365501404 | Validation loss 2.791621893644333



 41%|█████████████████████████████▌                                          | 4114/10000 [9:20:33<13:17:49,  8.13s/it]

Epoch: 4113 | Training loss 2.87195748090744 | Validation loss 2.790862590074539



 41%|█████████████████████████████▋                                          | 4115/10000 [9:20:41<13:13:57,  8.09s/it]

Epoch: 4114 | Training loss 2.869296170771122 | Validation loss 2.7971962094306946



 41%|█████████████████████████████▋                                          | 4116/10000 [9:20:50<13:16:49,  8.13s/it]

Epoch: 4115 | Training loss 2.8622252717614174 | Validation loss 2.789894074201584



 41%|█████████████████████████████▋                                          | 4117/10000 [9:20:58<13:17:24,  8.13s/it]

Epoch: 4116 | Training loss 2.8658231869339943 | Validation loss 2.7862058877944946



 41%|█████████████████████████████▋                                          | 4118/10000 [9:21:06<13:18:44,  8.15s/it]

Epoch: 4117 | Training loss 2.8708954229950905 | Validation loss 2.7868773341178894



 41%|█████████████████████████████▋                                          | 4119/10000 [9:21:14<13:18:10,  8.14s/it]

Epoch: 4118 | Training loss 2.8692794144153595 | Validation loss 2.7901586294174194



 41%|█████████████████████████████▋                                          | 4120/10000 [9:21:22<13:20:08,  8.16s/it]

Epoch: 4119 | Training loss 2.866664841771126 | Validation loss 2.7900348007678986



 41%|█████████████████████████████▋                                          | 4121/10000 [9:21:30<13:19:07,  8.16s/it]

Epoch: 4120 | Training loss 2.8673869520425797 | Validation loss 2.7901689410209656



 41%|█████████████████████████████▋                                          | 4122/10000 [9:21:38<13:18:49,  8.15s/it]

Epoch: 4121 | Training loss 2.865118071436882 | Validation loss 2.7912601828575134



 41%|█████████████████████████████▋                                          | 4123/10000 [9:21:46<13:14:06,  8.11s/it]

Epoch: 4122 | Training loss 2.8697356581687927 | Validation loss 2.7904793322086334



 41%|█████████████████████████████▋                                          | 4124/10000 [9:21:54<13:11:03,  8.08s/it]

Epoch: 4123 | Training loss 2.8660527616739273 | Validation loss 2.7891777753829956



 41%|█████████████████████████████▋                                          | 4125/10000 [9:22:03<13:13:02,  8.10s/it]

Epoch: 4124 | Training loss 2.867859050631523 | Validation loss 2.788900464773178



 41%|█████████████████████████████▋                                          | 4126/10000 [9:22:11<13:13:53,  8.11s/it]

Epoch: 4125 | Training loss 2.867373414337635 | Validation loss 2.7922392189502716



 41%|█████████████████████████████▋                                          | 4127/10000 [9:22:19<13:11:14,  8.08s/it]

Epoch: 4126 | Training loss 2.8650992959737778 | Validation loss 2.790861129760742



 41%|█████████████████████████████▋                                          | 4128/10000 [9:22:27<13:14:52,  8.12s/it]

Epoch: 4127 | Training loss 2.8650494292378426 | Validation loss 2.7977088689804077



 41%|█████████████████████████████▋                                          | 4129/10000 [9:22:35<13:15:58,  8.13s/it]

Epoch: 4128 | Training loss 2.8704345524311066 | Validation loss 2.795837014913559



 41%|█████████████████████████████▋                                          | 4130/10000 [9:22:43<13:18:51,  8.17s/it]

Epoch: 4129 | Training loss 2.867042750120163 | Validation loss 2.7949450612068176



 41%|█████████████████████████████▋                                          | 4131/10000 [9:22:52<13:17:22,  8.15s/it]

Epoch: 4130 | Training loss 2.8662237972021103 | Validation loss 2.7906589210033417



 41%|█████████████████████████████▊                                          | 4132/10000 [9:23:00<13:16:41,  8.15s/it]

Epoch: 4131 | Training loss 2.8726783469319344 | Validation loss 2.7928059697151184



 41%|█████████████████████████████▊                                          | 4133/10000 [9:23:08<13:16:25,  8.14s/it]

Epoch: 4132 | Training loss 2.865984298288822 | Validation loss 2.7868323922157288



 41%|█████████████████████████████▊                                          | 4134/10000 [9:23:16<13:09:30,  8.08s/it]

Epoch: 4133 | Training loss 2.8661555275321007 | Validation loss 2.7925136983394623



 41%|█████████████████████████████▊                                          | 4135/10000 [9:23:24<13:10:21,  8.09s/it]

Epoch: 4134 | Training loss 2.869763620197773 | Validation loss 2.7893344461917877



 41%|█████████████████████████████▊                                          | 4136/10000 [9:23:32<13:13:13,  8.12s/it]

Epoch: 4135 | Training loss 2.8734211400151253 | Validation loss 2.7942513525485992



 41%|█████████████████████████████▊                                          | 4137/10000 [9:23:40<13:13:53,  8.12s/it]

Epoch: 4136 | Training loss 2.8606159910559654 | Validation loss 2.7907969057559967



 41%|█████████████████████████████▊                                          | 4138/10000 [9:23:48<13:14:23,  8.13s/it]

Epoch: 4137 | Training loss 2.8651712983846664 | Validation loss 2.799387603998184



 41%|█████████████████████████████▊                                          | 4139/10000 [9:23:56<13:10:07,  8.09s/it]

Epoch: 4138 | Training loss 2.8637871518731117 | Validation loss 2.8001081347465515



 41%|█████████████████████████████▊                                          | 4140/10000 [9:24:04<13:08:05,  8.07s/it]

Epoch: 4139 | Training loss 2.8698219284415245 | Validation loss 2.793408691883087



 41%|█████████████████████████████▊                                          | 4141/10000 [9:24:12<13:08:55,  8.08s/it]

Epoch: 4140 | Training loss 2.8658354952931404 | Validation loss 2.792712479829788



 41%|█████████████████████████████▊                                          | 4142/10000 [9:24:21<13:10:21,  8.10s/it]

Epoch: 4141 | Training loss 2.8673227354884148 | Validation loss 2.7924781441688538



 41%|█████████████████████████████▊                                          | 4143/10000 [9:24:29<13:13:59,  8.13s/it]

Epoch: 4142 | Training loss 2.8655798882246017 | Validation loss 2.791562110185623



 41%|█████████████████████████████▊                                          | 4144/10000 [9:24:37<13:13:21,  8.13s/it]

Epoch: 4143 | Training loss 2.862071119248867 | Validation loss 2.7905158400535583



 41%|█████████████████████████████▊                                          | 4145/10000 [9:24:45<13:13:36,  8.13s/it]

Epoch: 4144 | Training loss 2.8636534959077835 | Validation loss 2.7966870963573456



 41%|█████████████████████████████▊                                          | 4146/10000 [9:24:53<13:11:39,  8.11s/it]

Epoch: 4145 | Training loss 2.866459973156452 | Validation loss 2.7897664606571198



 41%|█████████████████████████████▊                                          | 4147/10000 [9:25:01<13:11:38,  8.12s/it]

Epoch: 4146 | Training loss 2.865048326551914 | Validation loss 2.7946681678295135



 41%|█████████████████████████████▊                                          | 4148/10000 [9:25:09<13:12:24,  8.12s/it]

Epoch: 4147 | Training loss 2.8657830357551575 | Validation loss 2.7925058901309967



 41%|█████████████████████████████▊                                          | 4149/10000 [9:25:17<13:09:46,  8.10s/it]

Epoch: 4148 | Training loss 2.862198457121849 | Validation loss 2.789569616317749



 42%|█████████████████████████████▉                                          | 4150/10000 [9:25:26<13:11:04,  8.11s/it]

Epoch: 4149 | Training loss 2.867004156112671 | Validation loss 2.7938346564769745



 42%|█████████████████████████████▉                                          | 4151/10000 [9:25:34<13:08:42,  8.09s/it]

Epoch: 4150 | Training loss 2.8646569177508354 | Validation loss 2.792300194501877



 42%|█████████████████████████████▉                                          | 4152/10000 [9:25:42<13:05:42,  8.06s/it]

Epoch: 4151 | Training loss 2.868597999215126 | Validation loss 2.7965228259563446



 42%|█████████████████████████████▉                                          | 4153/10000 [9:25:50<13:05:21,  8.06s/it]

Epoch: 4152 | Training loss 2.8661413714289665 | Validation loss 2.791973501443863



 42%|█████████████████████████████▉                                          | 4154/10000 [9:25:58<13:09:13,  8.10s/it]

Epoch: 4153 | Training loss 2.864903949201107 | Validation loss 2.789018750190735



 42%|█████████████████████████████▉                                          | 4155/10000 [9:26:06<13:07:17,  8.08s/it]

Epoch: 4154 | Training loss 2.8719937801361084 | Validation loss 2.792672485113144



 42%|█████████████████████████████▉                                          | 4156/10000 [9:26:14<13:08:16,  8.09s/it]

Epoch: 4155 | Training loss 2.863723248243332 | Validation loss 2.7956030666828156



 42%|█████████████████████████████▉                                          | 4157/10000 [9:26:22<13:13:26,  8.15s/it]

Epoch: 4156 | Training loss 2.8674789667129517 | Validation loss 2.792339861392975



 42%|█████████████████████████████▉                                          | 4158/10000 [9:26:30<13:09:45,  8.11s/it]

Epoch: 4157 | Training loss 2.8650538995862007 | Validation loss 2.784916490316391



 42%|█████████████████████████████▉                                          | 4159/10000 [9:26:38<13:08:53,  8.10s/it]

Epoch: 4158 | Training loss 2.864390768110752 | Validation loss 2.7941707372665405



 42%|█████████████████████████████▉                                          | 4160/10000 [9:26:47<13:09:36,  8.11s/it]

Epoch: 4159 | Training loss 2.8659273236989975 | Validation loss 2.7952044010162354



 42%|█████████████████████████████▉                                          | 4161/10000 [9:26:55<13:09:06,  8.11s/it]

Epoch: 4160 | Training loss 2.8656235337257385 | Validation loss 2.7957994639873505



 42%|█████████████████████████████▉                                          | 4162/10000 [9:27:03<13:09:33,  8.11s/it]

Epoch: 4161 | Training loss 2.8703811168670654 | Validation loss 2.79401096701622



 42%|█████████████████████████████▉                                          | 4163/10000 [9:27:11<13:07:40,  8.10s/it]

Epoch: 4162 | Training loss 2.8653450086712837 | Validation loss 2.7894359827041626



 42%|█████████████████████████████▉                                          | 4164/10000 [9:27:19<13:04:35,  8.07s/it]

Epoch: 4163 | Training loss 2.8664624616503716 | Validation loss 2.789048910140991



 42%|█████████████████████████████▉                                          | 4165/10000 [9:27:27<13:05:52,  8.08s/it]

Epoch: 4164 | Training loss 2.8640158399939537 | Validation loss 2.794684052467346



 42%|█████████████████████████████▉                                          | 4166/10000 [9:27:35<13:07:14,  8.10s/it]

Epoch: 4165 | Training loss 2.8649186119437218 | Validation loss 2.7918801009655



 42%|██████████████████████████████                                          | 4167/10000 [9:27:43<13:06:50,  8.09s/it]

Epoch: 4166 | Training loss 2.863355852663517 | Validation loss 2.794269770383835



 42%|██████████████████████████████                                          | 4168/10000 [9:27:51<13:08:21,  8.11s/it]

Epoch: 4167 | Training loss 2.866594985127449 | Validation loss 2.7918827533721924



 42%|██████████████████████████████                                          | 4169/10000 [9:27:59<13:06:30,  8.09s/it]

Epoch: 4168 | Training loss 2.8680178597569466 | Validation loss 2.7897017300128937



 42%|██████████████████████████████                                          | 4170/10000 [9:28:07<13:05:46,  8.09s/it]

Epoch: 4169 | Training loss 2.8622759133577347 | Validation loss 2.795311152935028



 42%|██████████████████████████████                                          | 4171/10000 [9:28:15<13:03:48,  8.07s/it]

Epoch: 4170 | Training loss 2.866466946899891 | Validation loss 2.8115506172180176



 42%|██████████████████████████████                                          | 4172/10000 [9:28:24<13:04:44,  8.08s/it]

Epoch: 4171 | Training loss 2.8686649054288864 | Validation loss 2.7928944528102875



 42%|██████████████████████████████                                          | 4173/10000 [9:28:32<13:04:35,  8.08s/it]

Epoch: 4172 | Training loss 2.868390627205372 | Validation loss 2.7895984947681427



 42%|██████████████████████████████                                          | 4174/10000 [9:28:40<13:03:16,  8.07s/it]

Epoch: 4173 | Training loss 2.865044169127941 | Validation loss 2.7921824753284454



 42%|██████████████████████████████                                          | 4175/10000 [9:28:48<13:00:27,  8.04s/it]

Epoch: 4174 | Training loss 2.859500303864479 | Validation loss 2.7914322912693024



 42%|██████████████████████████████                                          | 4176/10000 [9:28:56<13:02:35,  8.06s/it]

Epoch: 4175 | Training loss 2.8638642877340317 | Validation loss 2.792854368686676



 42%|██████████████████████████████                                          | 4177/10000 [9:29:04<13:04:32,  8.08s/it]

Epoch: 4176 | Training loss 2.871777296066284 | Validation loss 2.796774923801422



 42%|██████████████████████████████                                          | 4178/10000 [9:29:12<13:04:57,  8.09s/it]

Epoch: 4177 | Training loss 2.86482372879982 | Validation loss 2.7943656146526337



 42%|██████████████████████████████                                          | 4179/10000 [9:29:20<13:07:01,  8.11s/it]

Epoch: 4178 | Training loss 2.8651755526661873 | Validation loss 2.7893759310245514



 42%|██████████████████████████████                                          | 4180/10000 [9:29:28<13:06:12,  8.11s/it]

Epoch: 4179 | Training loss 2.8689452707767487 | Validation loss 2.8039807975292206



 42%|██████████████████████████████                                          | 4181/10000 [9:29:36<13:05:01,  8.09s/it]

Epoch: 4180 | Training loss 2.8755830377340317 | Validation loss 2.794695734977722



 42%|██████████████████████████████                                          | 4182/10000 [9:29:44<13:05:12,  8.10s/it]

Epoch: 4181 | Training loss 2.864141471683979 | Validation loss 2.796701967716217



 42%|██████████████████████████████                                          | 4183/10000 [9:29:53<13:05:08,  8.10s/it]

Epoch: 4182 | Training loss 2.8686137348413467 | Validation loss 2.8013628125190735



 42%|██████████████████████████████                                          | 4184/10000 [9:30:01<13:03:32,  8.08s/it]

Epoch: 4183 | Training loss 2.8686592802405357 | Validation loss 2.793294608592987



 42%|██████████████████████████████▏                                         | 4185/10000 [9:30:09<13:04:52,  8.10s/it]

Epoch: 4184 | Training loss 2.860703766345978 | Validation loss 2.7916169464588165



 42%|██████████████████████████████▏                                         | 4186/10000 [9:30:17<13:05:15,  8.10s/it]

Epoch: 4185 | Training loss 2.8607615381479263 | Validation loss 2.796023190021515



 42%|██████████████████████████████▏                                         | 4187/10000 [9:30:25<13:05:06,  8.10s/it]

Epoch: 4186 | Training loss 2.8718335330486298 | Validation loss 2.787755787372589



 42%|██████████████████████████████▏                                         | 4188/10000 [9:30:33<13:05:17,  8.11s/it]

Epoch: 4187 | Training loss 2.8644043132662773 | Validation loss 2.792598307132721



 42%|██████████████████████████████▏                                         | 4189/10000 [9:30:41<13:06:22,  8.12s/it]

Epoch: 4188 | Training loss 2.8637584671378136 | Validation loss 2.789718270301819



 42%|██████████████████████████████▏                                         | 4190/10000 [9:30:49<13:06:33,  8.12s/it]

Epoch: 4189 | Training loss 2.86406821757555 | Validation loss 2.7885494232177734



 42%|██████████████████████████████▏                                         | 4191/10000 [9:30:57<13:03:32,  8.09s/it]

Epoch: 4190 | Training loss 2.865444429218769 | Validation loss 2.8000230491161346



 42%|██████████████████████████████▏                                         | 4192/10000 [9:31:05<13:01:48,  8.08s/it]

Epoch: 4191 | Training loss 2.8690946251153946 | Validation loss 2.790640652179718



 42%|██████████████████████████████▏                                         | 4193/10000 [9:31:13<13:00:08,  8.06s/it]

Epoch: 4192 | Training loss 2.8578539937734604 | Validation loss 2.785493940114975



 42%|██████████████████████████████▏                                         | 4194/10000 [9:31:21<12:59:52,  8.06s/it]

Epoch: 4193 | Training loss 2.8596265465021133 | Validation loss 2.7898373901844025



 42%|██████████████████████████████▏                                         | 4195/10000 [9:31:29<12:58:18,  8.04s/it]

Epoch: 4194 | Training loss 2.8731406182050705 | Validation loss 2.7945815920829773



 42%|██████████████████████████████▏                                         | 4196/10000 [9:31:38<13:00:17,  8.07s/it]

Epoch: 4195 | Training loss 2.864265590906143 | Validation loss 2.787365823984146



 42%|██████████████████████████████▏                                         | 4197/10000 [9:31:46<13:01:09,  8.08s/it]

Epoch: 4196 | Training loss 2.8632868230342865 | Validation loss 2.7864855229854584



 42%|██████████████████████████████▏                                         | 4198/10000 [9:31:54<13:01:58,  8.09s/it]

Epoch: 4197 | Training loss 2.8647636771202087 | Validation loss 2.7870765328407288



 42%|██████████████████████████████▏                                         | 4199/10000 [9:32:02<13:05:30,  8.12s/it]

Epoch: 4198 | Training loss 2.8654144778847694 | Validation loss 2.795600652694702



 42%|██████████████████████████████▏                                         | 4200/10000 [9:32:10<13:06:09,  8.13s/it]

Epoch: 4199 | Training loss 2.863934464752674 | Validation loss 2.790845900774002



 42%|██████████████████████████████▏                                         | 4201/10000 [9:32:18<13:07:35,  8.15s/it]

Epoch: 4200 | Training loss 2.8652861788868904 | Validation loss 2.7982905209064484



 42%|██████████████████████████████▎                                         | 4202/10000 [9:32:27<13:14:15,  8.22s/it]

Epoch: 4201 | Training loss 2.8688600063323975 | Validation loss 2.793483078479767



 42%|██████████████████████████████▎                                         | 4203/10000 [9:32:35<13:15:25,  8.23s/it]

Epoch: 4202 | Training loss 2.8650931268930435 | Validation loss 2.7941087186336517



 42%|██████████████████████████████▎                                         | 4204/10000 [9:32:43<13:16:25,  8.24s/it]

Epoch: 4203 | Training loss 2.8632050305604935 | Validation loss 2.7884505093097687



 42%|██████████████████████████████▎                                         | 4205/10000 [9:32:51<13:12:17,  8.20s/it]

Epoch: 4204 | Training loss 2.864156424999237 | Validation loss 2.7896094024181366



 42%|██████████████████████████████▎                                         | 4206/10000 [9:32:59<13:09:10,  8.17s/it]

Epoch: 4205 | Training loss 2.8640380650758743 | Validation loss 2.788738191127777



 42%|██████████████████████████████▎                                         | 4207/10000 [9:33:08<13:05:51,  8.14s/it]

Epoch: 4206 | Training loss 2.8667505234479904 | Validation loss 2.7931635081768036



 42%|██████████████████████████████▎                                         | 4208/10000 [9:33:16<13:05:18,  8.14s/it]

Epoch: 4207 | Training loss 2.8675767704844475 | Validation loss 2.7906923294067383



 42%|██████████████████████████████▎                                         | 4209/10000 [9:33:24<13:04:39,  8.13s/it]

Epoch: 4208 | Training loss 2.862176187336445 | Validation loss 2.787869691848755



 42%|██████████████████████████████▎                                         | 4210/10000 [9:33:32<13:02:49,  8.11s/it]

Epoch: 4209 | Training loss 2.862900212407112 | Validation loss 2.786304086446762



 42%|██████████████████████████████▎                                         | 4211/10000 [9:33:40<13:04:36,  8.13s/it]

Epoch: 4210 | Training loss 2.868274837732315 | Validation loss 2.7904829680919647



 42%|██████████████████████████████▎                                         | 4212/10000 [9:33:48<13:03:08,  8.12s/it]

Epoch: 4211 | Training loss 2.863871581852436 | Validation loss 2.7872880399227142



 42%|██████████████████████████████▎                                         | 4213/10000 [9:33:56<13:01:09,  8.10s/it]

Epoch: 4212 | Training loss 2.863839440047741 | Validation loss 2.793240934610367



 42%|██████████████████████████████▎                                         | 4214/10000 [9:34:04<12:58:20,  8.07s/it]

Epoch: 4213 | Training loss 2.8657113537192345 | Validation loss 2.7903395295143127



 42%|██████████████████████████████▎                                         | 4215/10000 [9:34:12<13:00:03,  8.09s/it]

Epoch: 4214 | Training loss 2.872203513979912 | Validation loss 2.7946047484874725



 42%|██████████████████████████████▎                                         | 4216/10000 [9:34:20<13:02:33,  8.12s/it]

Epoch: 4215 | Training loss 2.8645330742001534 | Validation loss 2.7922352254390717



 42%|██████████████████████████████▎                                         | 4217/10000 [9:34:29<13:04:06,  8.14s/it]

Epoch: 4216 | Training loss 2.8615187034010887 | Validation loss 2.787336528301239



 42%|██████████████████████████████▎                                         | 4218/10000 [9:34:37<13:00:13,  8.10s/it]

Epoch: 4217 | Training loss 2.869032621383667 | Validation loss 2.79098579287529



 42%|██████████████████████████████▍                                         | 4219/10000 [9:34:45<12:59:03,  8.09s/it]

Epoch: 4218 | Training loss 2.8660988807678223 | Validation loss 2.795078843832016



 42%|██████████████████████████████▍                                         | 4220/10000 [9:34:53<12:57:42,  8.07s/it]

Epoch: 4219 | Training loss 2.863021083176136 | Validation loss 2.789284974336624



 42%|██████████████████████████████▍                                         | 4221/10000 [9:35:01<12:58:49,  8.09s/it]

Epoch: 4220 | Training loss 2.867214299738407 | Validation loss 2.7897359132766724



 42%|██████████████████████████████▍                                         | 4222/10000 [9:35:09<12:58:14,  8.08s/it]

Epoch: 4221 | Training loss 2.8653990030288696 | Validation loss 2.797672778367996



 42%|██████████████████████████████▍                                         | 4223/10000 [9:35:17<12:56:56,  8.07s/it]

Epoch: 4222 | Training loss 2.867785818874836 | Validation loss 2.7970563173294067



 42%|██████████████████████████████▍                                         | 4224/10000 [9:35:25<12:55:30,  8.06s/it]

Epoch: 4223 | Training loss 2.8677645549178123 | Validation loss 2.7922493517398834



 42%|██████████████████████████████▍                                         | 4225/10000 [9:35:33<12:56:14,  8.06s/it]

Epoch: 4224 | Training loss 2.8666442558169365 | Validation loss 2.792404532432556



 42%|██████████████████████████████▍                                         | 4226/10000 [9:35:41<12:57:00,  8.07s/it]

Epoch: 4225 | Training loss 2.8694404512643814 | Validation loss 2.7896179854869843



 42%|██████████████████████████████▍                                         | 4227/10000 [9:35:49<12:59:29,  8.10s/it]

Epoch: 4226 | Training loss 2.869011342525482 | Validation loss 2.7898913621902466



 42%|██████████████████████████████▍                                         | 4228/10000 [9:35:58<13:01:07,  8.12s/it]

Epoch: 4227 | Training loss 2.8683274164795876 | Validation loss 2.797451615333557



 42%|██████████████████████████████▍                                         | 4229/10000 [9:36:06<13:02:17,  8.13s/it]

Epoch: 4228 | Training loss 2.8634885251522064 | Validation loss 2.7925764322280884



 42%|██████████████████████████████▍                                         | 4230/10000 [9:36:14<12:57:41,  8.09s/it]

Epoch: 4229 | Training loss 2.8646234795451164 | Validation loss 2.7901837527751923



 42%|██████████████████████████████▍                                         | 4231/10000 [9:36:22<12:55:44,  8.07s/it]

Epoch: 4230 | Training loss 2.8627955839037895 | Validation loss 2.7889157831668854



 42%|██████████████████████████████▍                                         | 4232/10000 [9:36:30<12:57:56,  8.09s/it]

Epoch: 4231 | Training loss 2.8680678084492683 | Validation loss 2.7940749526023865



 42%|██████████████████████████████▍                                         | 4233/10000 [9:36:38<12:59:01,  8.11s/it]

Epoch: 4232 | Training loss 2.8616094812750816 | Validation loss 2.7886904180049896



 42%|██████████████████████████████▍                                         | 4234/10000 [9:36:46<12:59:50,  8.11s/it]

Epoch: 4233 | Training loss 2.8626943081617355 | Validation loss 2.7908836901187897



 42%|██████████████████████████████▍                                         | 4235/10000 [9:36:54<12:58:00,  8.10s/it]

Epoch: 4234 | Training loss 2.8687283396720886 | Validation loss 2.7949342727661133



 42%|██████████████████████████████▍                                         | 4236/10000 [9:37:02<12:57:06,  8.09s/it]

Epoch: 4235 | Training loss 2.864669293165207 | Validation loss 2.791196495294571



 42%|██████████████████████████████▌                                         | 4237/10000 [9:37:10<13:00:00,  8.12s/it]

Epoch: 4236 | Training loss 2.863713227212429 | Validation loss 2.7961139380931854



 42%|██████████████████████████████▌                                         | 4238/10000 [9:37:19<12:58:51,  8.11s/it]

Epoch: 4237 | Training loss 2.8683695569634438 | Validation loss 2.793370336294174



 42%|██████████████████████████████▌                                         | 4239/10000 [9:37:27<12:58:11,  8.10s/it]

Epoch: 4238 | Training loss 2.866851195693016 | Validation loss 2.789313703775406



 42%|██████████████████████████████▌                                         | 4240/10000 [9:37:35<12:56:09,  8.09s/it]

Epoch: 4239 | Training loss 2.8697608560323715 | Validation loss 2.788609653711319



 42%|██████████████████████████████▌                                         | 4241/10000 [9:37:43<12:53:55,  8.06s/it]

Epoch: 4240 | Training loss 2.866688646376133 | Validation loss 2.793489009141922



 42%|██████████████████████████████▌                                         | 4242/10000 [9:37:51<12:58:31,  8.11s/it]

Epoch: 4241 | Training loss 2.866114728152752 | Validation loss 2.789153069257736



 42%|██████████████████████████████▌                                         | 4243/10000 [9:37:59<12:58:04,  8.11s/it]

Epoch: 4242 | Training loss 2.863000810146332 | Validation loss 2.7879447638988495



 42%|██████████████████████████████▌                                         | 4244/10000 [9:38:07<12:57:16,  8.10s/it]

Epoch: 4243 | Training loss 2.864775985479355 | Validation loss 2.7916444838047028



 42%|██████████████████████████████▌                                         | 4245/10000 [9:38:15<12:55:18,  8.08s/it]

Epoch: 4244 | Training loss 2.855812191963196 | Validation loss 2.787747025489807



 42%|██████████████████████████████▌                                         | 4246/10000 [9:38:23<12:52:40,  8.06s/it]

Epoch: 4245 | Training loss 2.8619513884186745 | Validation loss 2.788588911294937



 42%|██████████████████████████████▌                                         | 4247/10000 [9:38:31<12:52:53,  8.06s/it]

Epoch: 4246 | Training loss 2.8642437011003494 | Validation loss 2.7891373336315155



 42%|██████████████████████████████▌                                         | 4248/10000 [9:38:39<12:53:02,  8.06s/it]

Epoch: 4247 | Training loss 2.8647735565900803 | Validation loss 2.791189134120941



 42%|██████████████████████████████▌                                         | 4249/10000 [9:38:47<12:52:38,  8.06s/it]

Epoch: 4248 | Training loss 2.868161104619503 | Validation loss 2.7934147119522095



 42%|██████████████████████████████▌                                         | 4250/10000 [9:38:55<12:53:40,  8.07s/it]

Epoch: 4249 | Training loss 2.871107243001461 | Validation loss 2.789328873157501



 43%|██████████████████████████████▌                                         | 4251/10000 [9:39:03<12:51:17,  8.05s/it]

Epoch: 4250 | Training loss 2.863594599068165 | Validation loss 2.788689464330673



 43%|██████████████████████████████▌                                         | 4252/10000 [9:39:12<12:54:31,  8.08s/it]

Epoch: 4251 | Training loss 2.8708490058779716 | Validation loss 2.800825148820877



 43%|██████████████████████████████▌                                         | 4253/10000 [9:39:20<12:53:30,  8.08s/it]

Epoch: 4252 | Training loss 2.863804407417774 | Validation loss 2.7964332699775696



 43%|██████████████████████████████▋                                         | 4254/10000 [9:39:28<12:53:04,  8.07s/it]

Epoch: 4253 | Training loss 2.867944560945034 | Validation loss 2.798017144203186



 43%|██████████████████████████████▋                                         | 4255/10000 [9:39:36<12:52:54,  8.07s/it]

Epoch: 4254 | Training loss 2.86582912504673 | Validation loss 2.785615473985672



 43%|██████████████████████████████▋                                         | 4256/10000 [9:39:44<12:54:59,  8.10s/it]

Epoch: 4255 | Training loss 2.866682842373848 | Validation loss 2.7874497175216675



 43%|██████████████████████████████▋                                         | 4257/10000 [9:39:52<12:58:09,  8.13s/it]

Epoch: 4256 | Training loss 2.867716535925865 | Validation loss 2.79072442650795



 43%|██████████████████████████████▋                                         | 4258/10000 [9:40:00<12:55:51,  8.11s/it]

Epoch: 4257 | Training loss 2.867750681936741 | Validation loss 2.789896607398987



 43%|██████████████████████████████▋                                         | 4259/10000 [9:40:08<12:54:14,  8.09s/it]

Epoch: 4258 | Training loss 2.857099160552025 | Validation loss 2.7900571823120117



 43%|██████████████████████████████▋                                         | 4260/10000 [9:40:16<12:53:51,  8.09s/it]

Epoch: 4259 | Training loss 2.86479951441288 | Validation loss 2.7907654643058777



 43%|██████████████████████████████▋                                         | 4261/10000 [9:40:24<12:55:25,  8.11s/it]

Epoch: 4260 | Training loss 2.8645179271698 | Validation loss 2.800644189119339



 43%|██████████████████████████████▋                                         | 4262/10000 [9:40:33<12:56:50,  8.12s/it]

Epoch: 4261 | Training loss 2.8624418526887894 | Validation loss 2.787765383720398



 43%|██████████████████████████████▋                                         | 4263/10000 [9:40:41<12:52:04,  8.07s/it]

Epoch: 4262 | Training loss 2.8664056584239006 | Validation loss 2.791225492954254



 43%|██████████████████████████████▋                                         | 4264/10000 [9:40:49<12:52:55,  8.08s/it]

Epoch: 4263 | Training loss 2.868586987257004 | Validation loss 2.7888403236865997



 43%|██████████████████████████████▋                                         | 4265/10000 [9:40:57<12:53:21,  8.09s/it]

Epoch: 4264 | Training loss 2.863336257636547 | Validation loss 2.7940552830696106



 43%|██████████████████████████████▋                                         | 4266/10000 [9:41:05<12:50:54,  8.07s/it]

Epoch: 4265 | Training loss 2.862874411046505 | Validation loss 2.791049510240555



 43%|██████████████████████████████▋                                         | 4267/10000 [9:41:13<12:51:51,  8.08s/it]

Epoch: 4266 | Training loss 2.867771327495575 | Validation loss 2.7955897450447083



 43%|██████████████████████████████▋                                         | 4268/10000 [9:41:21<12:52:25,  8.09s/it]

Epoch: 4267 | Training loss 2.858521342277527 | Validation loss 2.7882949709892273



 43%|██████████████████████████████▋                                         | 4269/10000 [9:41:29<12:51:52,  8.08s/it]

Epoch: 4268 | Training loss 2.8648870810866356 | Validation loss 2.788429915904999



 43%|██████████████████████████████▋                                         | 4270/10000 [9:41:37<12:51:39,  8.08s/it]

Epoch: 4269 | Training loss 2.8656696751713753 | Validation loss 2.784652143716812



 43%|██████████████████████████████▊                                         | 4271/10000 [9:41:45<12:58:27,  8.15s/it]

Epoch: 4270 | Training loss 2.867207407951355 | Validation loss 2.795067310333252



 43%|██████████████████████████████▊                                         | 4272/10000 [9:41:54<12:55:06,  8.12s/it]

Epoch: 4271 | Training loss 2.859959699213505 | Validation loss 2.7930214405059814



 43%|██████████████████████████████▊                                         | 4273/10000 [9:42:02<12:54:30,  8.11s/it]

Epoch: 4272 | Training loss 2.8680860176682472 | Validation loss 2.788792908191681



 43%|██████████████████████████████▊                                         | 4274/10000 [9:42:10<12:56:10,  8.13s/it]

Epoch: 4273 | Training loss 2.8621274903416634 | Validation loss 2.790294498205185



 43%|██████████████████████████████▊                                         | 4275/10000 [9:42:18<12:55:10,  8.12s/it]

Epoch: 4274 | Training loss 2.8644217029213905 | Validation loss 2.790439873933792



 43%|██████████████████████████████▊                                         | 4276/10000 [9:42:26<12:54:25,  8.12s/it]

Epoch: 4275 | Training loss 2.8686994090676308 | Validation loss 2.7988196313381195



 43%|██████████████████████████████▊                                         | 4277/10000 [9:42:34<12:56:11,  8.14s/it]

Epoch: 4276 | Training loss 2.8671927079558372 | Validation loss 2.7941399216651917



 43%|██████████████████████████████▊                                         | 4278/10000 [9:42:42<12:59:37,  8.18s/it]

Epoch: 4277 | Training loss 2.861162446439266 | Validation loss 2.7904467582702637



 43%|██████████████████████████████▊                                         | 4279/10000 [9:42:51<12:59:13,  8.17s/it]

Epoch: 4278 | Training loss 2.864662915468216 | Validation loss 2.789112687110901



 43%|██████████████████████████████▊                                         | 4280/10000 [9:42:59<12:54:01,  8.12s/it]

Epoch: 4279 | Training loss 2.8631347119808197 | Validation loss 2.7874357998371124



 43%|██████████████████████████████▊                                         | 4281/10000 [9:43:07<12:51:14,  8.09s/it]

Epoch: 4280 | Training loss 2.866267278790474 | Validation loss 2.7922481894493103



 43%|██████████████████████████████▊                                         | 4282/10000 [9:43:15<12:51:50,  8.10s/it]

Epoch: 4281 | Training loss 2.8636025115847588 | Validation loss 2.791605532169342



 43%|██████████████████████████████▊                                         | 4283/10000 [9:43:23<12:51:51,  8.10s/it]

Epoch: 4282 | Training loss 2.8727787658572197 | Validation loss 2.791667640209198



 43%|██████████████████████████████▊                                         | 4284/10000 [9:43:31<12:53:10,  8.12s/it]

Epoch: 4283 | Training loss 2.8589681833982468 | Validation loss 2.785693258047104



 43%|██████████████████████████████▊                                         | 4285/10000 [9:43:39<12:50:25,  8.09s/it]

Epoch: 4284 | Training loss 2.8694455698132515 | Validation loss 2.7918494045734406



 43%|██████████████████████████████▊                                         | 4286/10000 [9:43:47<12:52:26,  8.11s/it]

Epoch: 4285 | Training loss 2.86199276894331 | Validation loss 2.786637306213379



 43%|██████████████████████████████▊                                         | 4287/10000 [9:43:55<12:51:39,  8.10s/it]

Epoch: 4286 | Training loss 2.8655204698443413 | Validation loss 2.792685627937317



 43%|██████████████████████████████▊                                         | 4288/10000 [9:44:03<12:51:30,  8.10s/it]

Epoch: 4287 | Training loss 2.8596885353326797 | Validation loss 2.787344992160797



 43%|██████████████████████████████▉                                         | 4289/10000 [9:44:12<12:51:28,  8.11s/it]

Epoch: 4288 | Training loss 2.8599095791578293 | Validation loss 2.785210996866226



 43%|██████████████████████████████▉                                         | 4290/10000 [9:44:20<12:51:38,  8.11s/it]

Epoch: 4289 | Training loss 2.866717219352722 | Validation loss 2.788569688796997



 43%|██████████████████████████████▉                                         | 4291/10000 [9:44:28<12:53:32,  8.13s/it]

Epoch: 4290 | Training loss 2.8701501339673996 | Validation loss 2.7950960993766785



 43%|██████████████████████████████▉                                         | 4292/10000 [9:44:36<12:53:01,  8.13s/it]

Epoch: 4291 | Training loss 2.8675162121653557 | Validation loss 2.790062338113785



 43%|██████████████████████████████▉                                         | 4293/10000 [9:44:44<12:54:53,  8.15s/it]

Epoch: 4292 | Training loss 2.8666614666581154 | Validation loss 2.7954595386981964



 43%|██████████████████████████████▉                                         | 4294/10000 [9:44:52<12:49:04,  8.09s/it]

Epoch: 4293 | Training loss 2.863543339073658 | Validation loss 2.788581520318985



 43%|██████████████████████████████▉                                         | 4295/10000 [9:45:00<12:51:07,  8.11s/it]

Epoch: 4294 | Training loss 2.864900268614292 | Validation loss 2.7900676131248474



 43%|██████████████████████████████▉                                         | 4296/10000 [9:45:08<12:50:14,  8.10s/it]

Epoch: 4295 | Training loss 2.8606594800949097 | Validation loss 2.783866196870804



 43%|██████████████████████████████▉                                         | 4297/10000 [9:45:17<12:53:36,  8.14s/it]

Epoch: 4296 | Training loss 2.8637116998434067 | Validation loss 2.7889596223831177



 43%|██████████████████████████████▉                                         | 4298/10000 [9:45:25<12:51:07,  8.11s/it]

Epoch: 4297 | Training loss 2.8687504455447197 | Validation loss 2.790798932313919



 43%|██████████████████████████████▉                                         | 4299/10000 [9:45:33<12:48:21,  8.09s/it]

Epoch: 4298 | Training loss 2.8683556988835335 | Validation loss 2.79169824719429



 43%|██████████████████████████████▉                                         | 4300/10000 [9:45:41<12:51:23,  8.12s/it]

Epoch: 4299 | Training loss 2.867498941719532 | Validation loss 2.7905898988246918



 43%|██████████████████████████████▉                                         | 4301/10000 [9:45:49<12:48:08,  8.09s/it]

Epoch: 4300 | Training loss 2.8641528114676476 | Validation loss 2.7867570221424103



 43%|██████████████████████████████▉                                         | 4302/10000 [9:45:57<12:44:47,  8.05s/it]

Epoch: 4301 | Training loss 2.864025078713894 | Validation loss 2.7903199195861816



 43%|██████████████████████████████▉                                         | 4303/10000 [9:46:05<12:42:22,  8.03s/it]

Epoch: 4302 | Training loss 2.870961494743824 | Validation loss 2.7963988184928894



 43%|██████████████████████████████▉                                         | 4304/10000 [9:46:13<12:44:49,  8.06s/it]

Epoch: 4303 | Training loss 2.866334095597267 | Validation loss 2.7916642129421234



 43%|██████████████████████████████▉                                         | 4305/10000 [9:46:21<12:41:23,  8.02s/it]

Epoch: 4304 | Training loss 2.863770015537739 | Validation loss 2.789870500564575



 43%|███████████████████████████████                                         | 4306/10000 [9:46:29<12:49:21,  8.11s/it]

Epoch: 4305 | Training loss 2.8716346994042397 | Validation loss 2.792393773794174



 43%|███████████████████████████████                                         | 4307/10000 [9:46:37<12:49:29,  8.11s/it]

Epoch: 4306 | Training loss 2.865547738969326 | Validation loss 2.7899453938007355



 43%|███████████████████████████████                                         | 4308/10000 [9:46:45<12:46:25,  8.08s/it]

Epoch: 4307 | Training loss 2.8634384721517563 | Validation loss 2.791633725166321



 43%|███████████████████████████████                                         | 4309/10000 [9:46:53<12:47:24,  8.09s/it]

Epoch: 4308 | Training loss 2.8645247519016266 | Validation loss 2.7886236011981964



 43%|███████████████████████████████                                         | 4310/10000 [9:47:01<12:46:40,  8.08s/it]

Epoch: 4309 | Training loss 2.864993318915367 | Validation loss 2.7870909571647644



 43%|███████████████████████████████                                         | 4311/10000 [9:47:10<12:46:09,  8.08s/it]

Epoch: 4310 | Training loss 2.871456518769264 | Validation loss 2.7921541035175323



 43%|███████████████████████████████                                         | 4312/10000 [9:47:18<12:43:27,  8.05s/it]

Epoch: 4311 | Training loss 2.8661789000034332 | Validation loss 2.788830488920212



 43%|███████████████████████████████                                         | 4313/10000 [9:47:26<12:44:13,  8.06s/it]

Epoch: 4312 | Training loss 2.871539331972599 | Validation loss 2.788671374320984



 43%|███████████████████████████████                                         | 4314/10000 [9:47:34<12:45:36,  8.08s/it]

Epoch: 4313 | Training loss 2.865881137549877 | Validation loss 2.789331465959549



 43%|███████████████████████████████                                         | 4315/10000 [9:47:42<12:45:37,  8.08s/it]

Epoch: 4314 | Training loss 2.8623168393969536 | Validation loss 2.796264171600342



 43%|███████████████████████████████                                         | 4316/10000 [9:47:50<12:43:24,  8.06s/it]

Epoch: 4315 | Training loss 2.8690171614289284 | Validation loss 2.7930958569049835



 43%|███████████████████████████████                                         | 4317/10000 [9:47:58<12:47:11,  8.10s/it]

Epoch: 4316 | Training loss 2.8721065521240234 | Validation loss 2.799336940050125



 43%|███████████████████████████████                                         | 4318/10000 [9:48:06<12:49:01,  8.12s/it]

Epoch: 4317 | Training loss 2.869288444519043 | Validation loss 2.792422831058502



 43%|███████████████████████████████                                         | 4319/10000 [9:48:14<12:51:42,  8.15s/it]

Epoch: 4318 | Training loss 2.860545225441456 | Validation loss 2.7911792993545532



 43%|███████████████████████████████                                         | 4320/10000 [9:48:22<12:48:54,  8.12s/it]

Epoch: 4319 | Training loss 2.865348197519779 | Validation loss 2.789436787366867



 43%|███████████████████████████████                                         | 4321/10000 [9:48:31<12:49:03,  8.13s/it]

Epoch: 4320 | Training loss 2.8694555833935738 | Validation loss 2.793531209230423



 43%|███████████████████████████████                                         | 4322/10000 [9:48:39<12:46:54,  8.10s/it]

Epoch: 4321 | Training loss 2.872193932533264 | Validation loss 2.789394795894623



 43%|███████████████████████████████▏                                        | 4323/10000 [9:48:47<12:49:22,  8.13s/it]

Epoch: 4322 | Training loss 2.867216430604458 | Validation loss 2.788567751646042



 43%|███████████████████████████████▏                                        | 4324/10000 [9:48:55<12:46:37,  8.10s/it]

Epoch: 4323 | Training loss 2.8656068220734596 | Validation loss 2.7902090549468994



 43%|███████████████████████████████▏                                        | 4325/10000 [9:49:03<12:45:35,  8.09s/it]

Epoch: 4324 | Training loss 2.8638197109103203 | Validation loss 2.7915188670158386



 43%|███████████████████████████████▏                                        | 4326/10000 [9:49:11<12:43:55,  8.08s/it]

Epoch: 4325 | Training loss 2.861878901720047 | Validation loss 2.7948668599128723



 43%|███████████████████████████████▏                                        | 4327/10000 [9:49:19<12:46:36,  8.11s/it]

Epoch: 4326 | Training loss 2.8668590262532234 | Validation loss 2.7896811068058014



 43%|███████████████████████████████▏                                        | 4328/10000 [9:49:27<12:47:59,  8.12s/it]

Epoch: 4327 | Training loss 2.8651277124881744 | Validation loss 2.7916943430900574



 43%|███████████████████████████████▏                                        | 4329/10000 [9:49:35<12:46:19,  8.11s/it]

Epoch: 4328 | Training loss 2.8658900260925293 | Validation loss 2.7924949526786804



 43%|███████████████████████████████▏                                        | 4330/10000 [9:49:44<12:47:22,  8.12s/it]

Epoch: 4329 | Training loss 2.863214649260044 | Validation loss 2.788391202688217



 43%|███████████████████████████████▏                                        | 4331/10000 [9:49:52<12:48:02,  8.13s/it]

Epoch: 4330 | Training loss 2.859602265059948 | Validation loss 2.7892340421676636



 43%|███████████████████████████████▏                                        | 4332/10000 [9:50:00<12:46:14,  8.11s/it]

Epoch: 4331 | Training loss 2.869487799704075 | Validation loss 2.7917320132255554



 43%|███████████████████████████████▏                                        | 4333/10000 [9:50:08<12:46:41,  8.12s/it]

Epoch: 4332 | Training loss 2.8663563206791878 | Validation loss 2.7862931191921234



 43%|███████████████████████████████▏                                        | 4334/10000 [9:50:16<12:46:32,  8.12s/it]

Epoch: 4333 | Training loss 2.872960075736046 | Validation loss 2.7887299060821533



 43%|███████████████████████████████▏                                        | 4335/10000 [9:50:24<12:45:57,  8.11s/it]

Epoch: 4334 | Training loss 2.867167480289936 | Validation loss 2.7889744639396667



 43%|███████████████████████████████▏                                        | 4336/10000 [9:50:32<12:42:53,  8.08s/it]

Epoch: 4335 | Training loss 2.8679747730493546 | Validation loss 2.7982292473316193



 43%|███████████████████████████████▏                                        | 4337/10000 [9:50:40<12:41:36,  8.07s/it]

Epoch: 4336 | Training loss 2.8676932230591774 | Validation loss 2.793471723794937



 43%|███████████████████████████████▏                                        | 4338/10000 [9:50:48<12:38:53,  8.04s/it]

Epoch: 4337 | Training loss 2.8696719333529472 | Validation loss 2.7978015542030334



 43%|███████████████████████████████▏                                        | 4339/10000 [9:50:56<12:41:50,  8.07s/it]

Epoch: 4338 | Training loss 2.864092707633972 | Validation loss 2.788573145866394



 43%|███████████████████████████████▏                                        | 4340/10000 [9:51:04<12:40:18,  8.06s/it]

Epoch: 4339 | Training loss 2.8682283386588097 | Validation loss 2.788336396217346



 43%|███████████████████████████████▎                                        | 4341/10000 [9:51:12<12:40:56,  8.07s/it]

Epoch: 4340 | Training loss 2.8622871935367584 | Validation loss 2.7995280623435974



 43%|███████████████████████████████▎                                        | 4342/10000 [9:51:21<12:42:38,  8.09s/it]

Epoch: 4341 | Training loss 2.863898754119873 | Validation loss 2.790600538253784



 43%|███████████████████████████████▎                                        | 4343/10000 [9:51:29<12:42:25,  8.09s/it]

Epoch: 4342 | Training loss 2.8662728145718575 | Validation loss 2.789581924676895



 43%|███████████████████████████████▎                                        | 4344/10000 [9:51:37<12:45:23,  8.12s/it]

Epoch: 4343 | Training loss 2.8660384491086006 | Validation loss 2.796884059906006



 43%|███████████████████████████████▎                                        | 4345/10000 [9:51:45<12:43:56,  8.11s/it]

Epoch: 4344 | Training loss 2.8673555180430412 | Validation loss 2.7958686649799347



 43%|███████████████████████████████▎                                        | 4346/10000 [9:51:53<12:41:52,  8.08s/it]

Epoch: 4345 | Training loss 2.8630343601107597 | Validation loss 2.790043145418167



 43%|███████████████████████████████▎                                        | 4347/10000 [9:52:01<12:41:23,  8.08s/it]

Epoch: 4346 | Training loss 2.866167090833187 | Validation loss 2.7874141931533813



 43%|███████████████████████████████▎                                        | 4348/10000 [9:52:09<12:42:19,  8.09s/it]

Epoch: 4347 | Training loss 2.864943750202656 | Validation loss 2.787326991558075



 43%|███████████████████████████████▎                                        | 4349/10000 [9:52:17<12:42:07,  8.09s/it]

Epoch: 4348 | Training loss 2.866198807954788 | Validation loss 2.7913343012332916



 44%|███████████████████████████████▎                                        | 4350/10000 [9:52:25<12:45:20,  8.13s/it]

Epoch: 4349 | Training loss 2.867543935775757 | Validation loss 2.789712071418762



 44%|███████████████████████████████▎                                        | 4351/10000 [9:52:33<12:42:16,  8.10s/it]

Epoch: 4350 | Training loss 2.8677370622754097 | Validation loss 2.7891333997249603



 44%|███████████████████████████████▎                                        | 4352/10000 [9:52:42<12:41:26,  8.09s/it]

Epoch: 4351 | Training loss 2.871296651661396 | Validation loss 2.790234237909317



 44%|███████████████████████████████▎                                        | 4353/10000 [9:52:50<12:42:56,  8.11s/it]

Epoch: 4352 | Training loss 2.864329107105732 | Validation loss 2.7919411957263947



 44%|███████████████████████████████▎                                        | 4354/10000 [9:52:58<12:41:19,  8.09s/it]

Epoch: 4353 | Training loss 2.8675059229135513 | Validation loss 2.7916630804538727



 44%|███████████████████████████████▎                                        | 4355/10000 [9:53:06<12:43:45,  8.12s/it]

Epoch: 4354 | Training loss 2.8677299544215202 | Validation loss 2.789805233478546



 44%|███████████████████████████████▎                                        | 4356/10000 [9:53:14<12:43:43,  8.12s/it]

Epoch: 4355 | Training loss 2.8693539798259735 | Validation loss 2.7861981093883514



 44%|███████████████████████████████▎                                        | 4357/10000 [9:53:22<12:43:34,  8.12s/it]

Epoch: 4356 | Training loss 2.8664074018597603 | Validation loss 2.7928650081157684



 44%|███████████████████████████████▍                                        | 4358/10000 [9:53:30<12:43:03,  8.11s/it]

Epoch: 4357 | Training loss 2.8666645362973213 | Validation loss 2.7939247488975525



 44%|███████████████████████████████▍                                        | 4359/10000 [9:53:38<12:41:21,  8.10s/it]

Epoch: 4358 | Training loss 2.8651627898216248 | Validation loss 2.7895428240299225



 44%|███████████████████████████████▍                                        | 4360/10000 [9:53:46<12:41:19,  8.10s/it]

Epoch: 4359 | Training loss 2.8697254061698914 | Validation loss 2.7922975420951843



 44%|███████████████████████████████▍                                        | 4361/10000 [9:53:55<12:41:16,  8.10s/it]

Epoch: 4360 | Training loss 2.8662558123469353 | Validation loss 2.7873774468898773



 44%|███████████████████████████████▍                                        | 4362/10000 [9:54:03<12:41:59,  8.11s/it]

Epoch: 4361 | Training loss 2.8679866641759872 | Validation loss 2.7865719497203827



 44%|███████████████████████████████▍                                        | 4363/10000 [9:54:11<12:44:24,  8.14s/it]

Epoch: 4362 | Training loss 2.8661207109689713 | Validation loss 2.7968796491622925



 44%|███████████████████████████████▍                                        | 4364/10000 [9:54:19<12:44:09,  8.14s/it]

Epoch: 4363 | Training loss 2.860365353524685 | Validation loss 2.7844517827033997



 44%|███████████████████████████████▍                                        | 4365/10000 [9:54:27<12:44:03,  8.14s/it]

Epoch: 4364 | Training loss 2.864580310881138 | Validation loss 2.7950344383716583



 44%|███████████████████████████████▍                                        | 4366/10000 [9:54:35<12:44:20,  8.14s/it]

Epoch: 4365 | Training loss 2.8641325309872627 | Validation loss 2.7943120896816254



 44%|███████████████████████████████▍                                        | 4367/10000 [9:54:43<12:42:13,  8.12s/it]

Epoch: 4366 | Training loss 2.862903445959091 | Validation loss 2.7861721515655518



 44%|███████████████████████████████▍                                        | 4368/10000 [9:54:51<12:41:44,  8.12s/it]

Epoch: 4367 | Training loss 2.8670638352632523 | Validation loss 2.7909596264362335



 44%|███████████████████████████████▍                                        | 4369/10000 [9:55:00<12:40:50,  8.11s/it]

Epoch: 4368 | Training loss 2.8622641637921333 | Validation loss 2.7867905497550964



 44%|███████████████████████████████▍                                        | 4370/10000 [9:55:08<12:40:57,  8.11s/it]

Epoch: 4369 | Training loss 2.868325337767601 | Validation loss 2.7914668321609497



 44%|███████████████████████████████▍                                        | 4371/10000 [9:55:16<12:43:31,  8.14s/it]

Epoch: 4370 | Training loss 2.8682270646095276 | Validation loss 2.788940727710724



 44%|███████████████████████████████▍                                        | 4372/10000 [9:55:24<12:45:26,  8.16s/it]

Epoch: 4371 | Training loss 2.859379157423973 | Validation loss 2.795588582754135



 44%|███████████████████████████████▍                                        | 4373/10000 [9:55:32<12:45:48,  8.17s/it]

Epoch: 4372 | Training loss 2.868801087141037 | Validation loss 2.791768401861191



 44%|███████████████████████████████▍                                        | 4374/10000 [9:55:40<12:46:08,  8.17s/it]

Epoch: 4373 | Training loss 2.8649665117263794 | Validation loss 2.7918678522109985



 44%|███████████████████████████████▌                                        | 4375/10000 [9:55:49<12:45:24,  8.16s/it]

Epoch: 4374 | Training loss 2.8661841675639153 | Validation loss 2.7908114194869995



 44%|███████████████████████████████▌                                        | 4376/10000 [9:55:57<12:42:48,  8.14s/it]

Epoch: 4375 | Training loss 2.867111898958683 | Validation loss 2.7923114895820618



 44%|███████████████████████████████▌                                        | 4377/10000 [9:56:05<12:42:32,  8.14s/it]

Epoch: 4376 | Training loss 2.8613279163837433 | Validation loss 2.7887326180934906



 44%|███████████████████████████████▌                                        | 4378/10000 [9:56:13<12:45:56,  8.17s/it]

Epoch: 4377 | Training loss 2.8733593448996544 | Validation loss 2.795001983642578



 44%|███████████████████████████████▌                                        | 4379/10000 [9:56:21<12:44:48,  8.16s/it]

Epoch: 4378 | Training loss 2.8622803762555122 | Validation loss 2.7956123650074005



 44%|███████████████████████████████▌                                        | 4380/10000 [9:56:29<12:46:30,  8.18s/it]

Epoch: 4379 | Training loss 2.8674900755286217 | Validation loss 2.799386501312256



 44%|███████████████████████████████▌                                        | 4381/10000 [9:56:38<12:46:17,  8.18s/it]

Epoch: 4380 | Training loss 2.8658995181322098 | Validation loss 2.7907101213932037



 44%|███████████████████████████████▌                                        | 4382/10000 [9:56:46<12:44:36,  8.17s/it]

Epoch: 4381 | Training loss 2.8682133331894875 | Validation loss 2.790162891149521



 44%|███████████████████████████████▌                                        | 4383/10000 [9:56:54<12:41:50,  8.14s/it]

Epoch: 4382 | Training loss 2.868350699543953 | Validation loss 2.7922594845294952



 44%|███████████████████████████████▌                                        | 4384/10000 [9:57:02<12:44:28,  8.17s/it]

Epoch: 4383 | Training loss 2.8660311847925186 | Validation loss 2.7944756150245667



 44%|███████████████████████████████▌                                        | 4385/10000 [9:57:10<12:47:44,  8.20s/it]

Epoch: 4384 | Training loss 2.861855275928974 | Validation loss 2.786562919616699



 44%|███████████████████████████████▌                                        | 4386/10000 [9:57:19<12:49:10,  8.22s/it]

Epoch: 4385 | Training loss 2.8609566390514374 | Validation loss 2.7884423434734344



 44%|███████████████████████████████▌                                        | 4387/10000 [9:57:27<12:48:47,  8.22s/it]

Epoch: 4386 | Training loss 2.8651896864175797 | Validation loss 2.7893351316452026



 44%|███████████████████████████████▌                                        | 4388/10000 [9:57:35<12:47:07,  8.20s/it]

Epoch: 4387 | Training loss 2.864246793091297 | Validation loss 2.786046117544174



 44%|███████████████████████████████▌                                        | 4389/10000 [9:57:43<12:44:41,  8.18s/it]

Epoch: 4388 | Training loss 2.8648928105831146 | Validation loss 2.791975051164627



 44%|███████████████████████████████▌                                        | 4390/10000 [9:57:51<12:41:38,  8.15s/it]

Epoch: 4389 | Training loss 2.86564788967371 | Validation loss 2.791046053171158



 44%|███████████████████████████████▌                                        | 4391/10000 [9:57:59<12:45:17,  8.19s/it]

Epoch: 4390 | Training loss 2.864445686340332 | Validation loss 2.791543334722519



 44%|███████████████████████████████▌                                        | 4392/10000 [9:58:08<12:43:48,  8.17s/it]

Epoch: 4391 | Training loss 2.864560142159462 | Validation loss 2.7900440990924835



 44%|███████████████████████████████▋                                        | 4393/10000 [9:58:16<12:42:09,  8.16s/it]

Epoch: 4392 | Training loss 2.868470624089241 | Validation loss 2.7863203585147858



 44%|███████████████████████████████▋                                        | 4394/10000 [9:58:24<12:42:39,  8.16s/it]

Epoch: 4393 | Training loss 2.862341418862343 | Validation loss 2.792071133852005



 44%|███████████████████████████████▋                                        | 4395/10000 [9:58:32<12:41:43,  8.15s/it]

Epoch: 4394 | Training loss 2.868860274553299 | Validation loss 2.7928745448589325



 44%|███████████████████████████████▋                                        | 4396/10000 [9:58:40<12:41:04,  8.15s/it]

Epoch: 4395 | Training loss 2.8614763766527176 | Validation loss 2.7879139184951782



 44%|███████████████████████████████▋                                        | 4397/10000 [9:58:48<12:44:01,  8.18s/it]

Epoch: 4396 | Training loss 2.8680636137723923 | Validation loss 2.7884235978126526



 44%|███████████████████████████████▋                                        | 4398/10000 [9:58:56<12:42:07,  8.16s/it]

Epoch: 4397 | Training loss 2.866915211081505 | Validation loss 2.786310613155365



 44%|███████████████████████████████▋                                        | 4399/10000 [9:59:05<12:43:49,  8.18s/it]

Epoch: 4398 | Training loss 2.8715059757232666 | Validation loss 2.790372312068939



 44%|███████████████████████████████▋                                        | 4400/10000 [9:59:13<12:44:56,  8.20s/it]

Epoch: 4399 | Training loss 2.8681925386190414 | Validation loss 2.787246912717819



 44%|███████████████████████████████▋                                        | 4401/10000 [9:59:21<12:40:25,  8.15s/it]

Epoch: 4400 | Training loss 2.8696160838007927 | Validation loss 2.7898798882961273



 44%|███████████████████████████████▋                                        | 4402/10000 [9:59:29<12:39:53,  8.14s/it]

Epoch: 4401 | Training loss 2.8648571744561195 | Validation loss 2.791459560394287



 44%|███████████████████████████████▋                                        | 4403/10000 [9:59:37<12:40:57,  8.16s/it]

Epoch: 4402 | Training loss 2.866368167102337 | Validation loss 2.7896171510219574



 44%|███████████████████████████████▋                                        | 4404/10000 [9:59:45<12:41:26,  8.16s/it]

Epoch: 4403 | Training loss 2.8584411442279816 | Validation loss 2.784980833530426



 44%|███████████████████████████████▋                                        | 4405/10000 [9:59:54<12:40:37,  8.16s/it]

Epoch: 4404 | Training loss 2.863815724849701 | Validation loss 2.790825694799423



 44%|███████████████████████████████▎                                       | 4406/10000 [10:00:02<12:39:24,  8.15s/it]

Epoch: 4405 | Training loss 2.873668350279331 | Validation loss 2.7911471128463745



 44%|███████████████████████████████▎                                       | 4407/10000 [10:00:10<12:42:50,  8.18s/it]

Epoch: 4406 | Training loss 2.867921084165573 | Validation loss 2.789978712797165



 44%|███████████████████████████████▎                                       | 4408/10000 [10:00:18<12:39:44,  8.15s/it]

Epoch: 4407 | Training loss 2.8660483807325363 | Validation loss 2.7874693274497986



 44%|███████████████████████████████▎                                       | 4409/10000 [10:00:26<12:45:01,  8.21s/it]

Epoch: 4408 | Training loss 2.867883652448654 | Validation loss 2.7938159108161926



 44%|███████████████████████████████▎                                       | 4410/10000 [10:00:34<12:39:56,  8.16s/it]

Epoch: 4409 | Training loss 2.8668733462691307 | Validation loss 2.7906035780906677



 44%|███████████████████████████████▎                                       | 4411/10000 [10:00:43<12:38:43,  8.15s/it]

Epoch: 4410 | Training loss 2.8643285930156708 | Validation loss 2.790490597486496



 44%|███████████████████████████████▎                                       | 4412/10000 [10:00:51<12:36:11,  8.12s/it]

Epoch: 4411 | Training loss 2.867450438439846 | Validation loss 2.787488132715225



 44%|███████████████████████████████▎                                       | 4413/10000 [10:00:59<12:36:01,  8.12s/it]

Epoch: 4412 | Training loss 2.86312048882246 | Validation loss 2.7858591973781586



 44%|███████████████████████████████▎                                       | 4414/10000 [10:01:07<12:34:34,  8.10s/it]

Epoch: 4413 | Training loss 2.871213674545288 | Validation loss 2.799144685268402



 44%|███████████████████████████████▎                                       | 4415/10000 [10:01:15<12:31:41,  8.08s/it]

Epoch: 4414 | Training loss 2.8659680783748627 | Validation loss 2.7950633764266968



 44%|███████████████████████████████▎                                       | 4416/10000 [10:01:23<12:31:03,  8.07s/it]

Epoch: 4415 | Training loss 2.8688515946269035 | Validation loss 2.788942724466324



 44%|███████████████████████████████▎                                       | 4417/10000 [10:01:31<12:30:55,  8.07s/it]

Epoch: 4416 | Training loss 2.8641838803887367 | Validation loss 2.7878675758838654



 44%|███████████████████████████████▎                                       | 4418/10000 [10:01:39<12:28:11,  8.04s/it]

Epoch: 4417 | Training loss 2.8654933869838715 | Validation loss 2.791862577199936



 44%|███████████████████████████████▎                                       | 4419/10000 [10:01:47<12:29:17,  8.06s/it]

Epoch: 4418 | Training loss 2.8647885471582413 | Validation loss 2.7921960055828094



 44%|███████████████████████████████▍                                       | 4420/10000 [10:01:55<12:26:09,  8.02s/it]

Epoch: 4419 | Training loss 2.8700473457574844 | Validation loss 2.792273610830307



 44%|███████████████████████████████▍                                       | 4421/10000 [10:02:03<12:26:28,  8.03s/it]

Epoch: 4420 | Training loss 2.870414115488529 | Validation loss 2.7954900562763214



 44%|███████████████████████████████▍                                       | 4422/10000 [10:02:11<12:27:36,  8.04s/it]

Epoch: 4421 | Training loss 2.864302471280098 | Validation loss 2.793791800737381



 44%|███████████████████████████████▍                                       | 4423/10000 [10:02:19<12:30:51,  8.08s/it]

Epoch: 4422 | Training loss 2.8650784865021706 | Validation loss 2.787429094314575



 44%|███████████████████████████████▍                                       | 4424/10000 [10:02:27<12:31:53,  8.09s/it]

Epoch: 4423 | Training loss 2.8655145689845085 | Validation loss 2.78522652387619



 44%|███████████████████████████████▍                                       | 4425/10000 [10:02:35<12:31:36,  8.09s/it]

Epoch: 4424 | Training loss 2.862249933183193 | Validation loss 2.7921953797340393



 44%|███████████████████████████████▍                                       | 4426/10000 [10:02:44<12:33:59,  8.12s/it]

Epoch: 4425 | Training loss 2.8676405251026154 | Validation loss 2.7989710569381714



 44%|███████████████████████████████▍                                       | 4427/10000 [10:02:52<12:34:50,  8.13s/it]

Epoch: 4426 | Training loss 2.863730676472187 | Validation loss 2.7874952852725983



 44%|███████████████████████████████▍                                       | 4428/10000 [10:03:00<12:32:01,  8.10s/it]

Epoch: 4427 | Training loss 2.864758625626564 | Validation loss 2.791097790002823



 44%|███████████████████████████████▍                                       | 4429/10000 [10:03:08<12:28:58,  8.07s/it]

Epoch: 4428 | Training loss 2.863774724304676 | Validation loss 2.7888453006744385



 44%|███████████████████████████████▍                                       | 4430/10000 [10:03:16<12:28:08,  8.06s/it]

Epoch: 4429 | Training loss 2.8616494461894035 | Validation loss 2.7924638986587524



 44%|███████████████████████████████▍                                       | 4431/10000 [10:03:24<12:30:04,  8.08s/it]

Epoch: 4430 | Training loss 2.866901859641075 | Validation loss 2.79177063703537



 44%|███████████████████████████████▍                                       | 4432/10000 [10:03:32<12:30:29,  8.09s/it]

Epoch: 4431 | Training loss 2.862362213432789 | Validation loss 2.793852388858795



 44%|███████████████████████████████▍                                       | 4433/10000 [10:03:40<12:29:02,  8.07s/it]

Epoch: 4432 | Training loss 2.8665663301944733 | Validation loss 2.7940529882907867



 44%|███████████████████████████████▍                                       | 4434/10000 [10:03:48<12:33:20,  8.12s/it]

Epoch: 4433 | Training loss 2.862096779048443 | Validation loss 2.7920905351638794



 44%|███████████████████████████████▍                                       | 4435/10000 [10:03:56<12:31:25,  8.10s/it]

Epoch: 4434 | Training loss 2.864901527762413 | Validation loss 2.787414461374283



 44%|███████████████████████████████▍                                       | 4436/10000 [10:04:04<12:29:10,  8.08s/it]

Epoch: 4435 | Training loss 2.8654646649956703 | Validation loss 2.7892081439495087



 44%|███████████████████████████████▌                                       | 4437/10000 [10:04:13<12:28:28,  8.07s/it]

Epoch: 4436 | Training loss 2.8668928146362305 | Validation loss 2.7929881513118744



 44%|███████████████████████████████▌                                       | 4438/10000 [10:04:21<12:27:57,  8.07s/it]

Epoch: 4437 | Training loss 2.8671327978372574 | Validation loss 2.788269519805908



 44%|███████████████████████████████▌                                       | 4439/10000 [10:04:29<12:31:47,  8.11s/it]

Epoch: 4438 | Training loss 2.8689496368169785 | Validation loss 2.7920656204223633



 44%|███████████████████████████████▌                                       | 4440/10000 [10:04:37<12:33:37,  8.13s/it]

Epoch: 4439 | Training loss 2.8637454733252525 | Validation loss 2.792576253414154



 44%|███████████████████████████████▌                                       | 4441/10000 [10:04:45<12:32:42,  8.12s/it]

Epoch: 4440 | Training loss 2.8608546927571297 | Validation loss 2.799114465713501



 44%|███████████████████████████████▌                                       | 4442/10000 [10:04:53<12:30:12,  8.10s/it]

Epoch: 4441 | Training loss 2.862130269408226 | Validation loss 2.7888132333755493



 44%|███████████████████████████████▌                                       | 4443/10000 [10:05:01<12:33:39,  8.14s/it]

Epoch: 4442 | Training loss 2.866752967238426 | Validation loss 2.795396000146866



 44%|███████████████████████████████▌                                       | 4444/10000 [10:05:09<12:33:21,  8.14s/it]

Epoch: 4443 | Training loss 2.8610613644123077 | Validation loss 2.7880029678344727



 44%|███████████████████████████████▌                                       | 4445/10000 [10:05:18<12:32:22,  8.13s/it]

Epoch: 4444 | Training loss 2.866351008415222 | Validation loss 2.793986827135086



 44%|███████████████████████████████▌                                       | 4446/10000 [10:05:26<12:32:24,  8.13s/it]

Epoch: 4445 | Training loss 2.8611003682017326 | Validation loss 2.7918064892292023



 44%|███████████████████████████████▌                                       | 4447/10000 [10:05:34<12:30:50,  8.11s/it]

Epoch: 4446 | Training loss 2.86271770298481 | Validation loss 2.7921349108219147



 44%|███████████████████████████████▌                                       | 4448/10000 [10:05:42<12:31:15,  8.12s/it]

Epoch: 4447 | Training loss 2.8673198893666267 | Validation loss 2.7900378704071045



 44%|███████████████████████████████▌                                       | 4449/10000 [10:05:50<12:28:54,  8.09s/it]

Epoch: 4448 | Training loss 2.864414595067501 | Validation loss 2.7896406054496765



 44%|███████████████████████████████▌                                       | 4450/10000 [10:05:58<12:29:58,  8.11s/it]

Epoch: 4449 | Training loss 2.8638620376586914 | Validation loss 2.7940613925457



 45%|███████████████████████████████▌                                       | 4451/10000 [10:06:06<12:31:41,  8.13s/it]

Epoch: 4450 | Training loss 2.8673531487584114 | Validation loss 2.787270665168762



 45%|███████████████████████████████▌                                       | 4452/10000 [10:06:14<12:32:06,  8.13s/it]

Epoch: 4451 | Training loss 2.8759007081389427 | Validation loss 2.7904711067676544



 45%|███████████████████████████████▌                                       | 4453/10000 [10:06:23<12:34:32,  8.16s/it]

Epoch: 4452 | Training loss 2.8644277825951576 | Validation loss 2.791069895029068



 45%|███████████████████████████████▌                                       | 4454/10000 [10:06:31<12:29:20,  8.11s/it]

Epoch: 4453 | Training loss 2.8680689334869385 | Validation loss 2.7883320450782776



 45%|███████████████████████████████▋                                       | 4455/10000 [10:06:39<12:28:12,  8.10s/it]

Epoch: 4454 | Training loss 2.866669125854969 | Validation loss 2.794108122587204



 45%|███████████████████████████████▋                                       | 4456/10000 [10:06:47<12:27:50,  8.09s/it]

Epoch: 4455 | Training loss 2.871003419160843 | Validation loss 2.7920054495334625



 45%|███████████████████████████████▋                                       | 4457/10000 [10:06:55<12:27:04,  8.09s/it]

Epoch: 4456 | Training loss 2.8661356642842293 | Validation loss 2.789246827363968



 45%|███████████████████████████████▋                                       | 4458/10000 [10:07:03<12:27:22,  8.09s/it]

Epoch: 4457 | Training loss 2.872050493955612 | Validation loss 2.7904841005802155



 45%|███████████████████████████████▋                                       | 4459/10000 [10:07:11<12:23:59,  8.06s/it]

Epoch: 4458 | Training loss 2.868276335299015 | Validation loss 2.795418381690979



 45%|███████████████████████████████▋                                       | 4460/10000 [10:07:19<12:21:38,  8.03s/it]

Epoch: 4459 | Training loss 2.866655707359314 | Validation loss 2.7919480204582214



 45%|███████████████████████████████▋                                       | 4461/10000 [10:07:27<12:24:19,  8.06s/it]

Epoch: 4460 | Training loss 2.864902473986149 | Validation loss 2.7878612279891968



 45%|███████████████████████████████▋                                       | 4462/10000 [10:07:35<12:23:58,  8.06s/it]

Epoch: 4461 | Training loss 2.862857922911644 | Validation loss 2.7907288670539856



 45%|███████████████████████████████▋                                       | 4463/10000 [10:07:43<12:22:01,  8.04s/it]

Epoch: 4462 | Training loss 2.8640041202306747 | Validation loss 2.7890594601631165



 45%|███████████████████████████████▋                                       | 4464/10000 [10:07:51<12:20:41,  8.03s/it]

Epoch: 4463 | Training loss 2.8683167546987534 | Validation loss 2.787851005792618



 45%|███████████████████████████████▋                                       | 4465/10000 [10:07:59<12:21:52,  8.04s/it]

Epoch: 4464 | Training loss 2.864958457648754 | Validation loss 2.790361315011978



 45%|███████████████████████████████▋                                       | 4466/10000 [10:08:07<12:23:31,  8.06s/it]

Epoch: 4465 | Training loss 2.865493029356003 | Validation loss 2.7933453619480133



 45%|███████████████████████████████▋                                       | 4467/10000 [10:08:15<12:25:27,  8.08s/it]

Epoch: 4466 | Training loss 2.863691695034504 | Validation loss 2.7894552648067474



 45%|███████████████████████████████▋                                       | 4468/10000 [10:08:24<12:26:43,  8.10s/it]

Epoch: 4467 | Training loss 2.8640451058745384 | Validation loss 2.7922694385051727



 45%|███████████████████████████████▋                                       | 4469/10000 [10:08:32<12:26:46,  8.10s/it]

Epoch: 4468 | Training loss 2.861648954451084 | Validation loss 2.794104278087616



 45%|███████████████████████████████▋                                       | 4470/10000 [10:08:40<12:29:41,  8.13s/it]

Epoch: 4469 | Training loss 2.858800858259201 | Validation loss 2.787501037120819



 45%|███████████████████████████████▋                                       | 4471/10000 [10:08:48<12:28:14,  8.12s/it]

Epoch: 4470 | Training loss 2.867653325200081 | Validation loss 2.7935658991336823



 45%|███████████████████████████████▊                                       | 4472/10000 [10:08:56<12:27:14,  8.11s/it]

Epoch: 4471 | Training loss 2.866516686975956 | Validation loss 2.7923682034015656



 45%|███████████████████████████████▊                                       | 4473/10000 [10:09:04<12:24:45,  8.09s/it]

Epoch: 4472 | Training loss 2.8617059364914894 | Validation loss 2.791175037622452



 45%|███████████████████████████████▊                                       | 4474/10000 [10:09:12<12:24:36,  8.08s/it]

Epoch: 4473 | Training loss 2.867402598261833 | Validation loss 2.795830935239792



 45%|███████████████████████████████▊                                       | 4475/10000 [10:09:20<12:22:45,  8.07s/it]

Epoch: 4474 | Training loss 2.8624683544039726 | Validation loss 2.786760598421097



 45%|███████████████████████████████▊                                       | 4476/10000 [10:09:28<12:22:47,  8.07s/it]

Epoch: 4475 | Training loss 2.857831098139286 | Validation loss 2.7979610562324524



 45%|███████████████████████████████▊                                       | 4477/10000 [10:09:36<12:23:13,  8.07s/it]

Epoch: 4476 | Training loss 2.8700373843312263 | Validation loss 2.7916682362556458



 45%|███████████████████████████████▊                                       | 4478/10000 [10:09:44<12:23:25,  8.08s/it]

Epoch: 4477 | Training loss 2.8676335364580154 | Validation loss 2.786769926548004



 45%|███████████████████████████████▊                                       | 4479/10000 [10:09:53<12:24:01,  8.09s/it]

Epoch: 4478 | Training loss 2.8650770410895348 | Validation loss 2.7882109582424164



 45%|███████████████████████████████▊                                       | 4480/10000 [10:10:01<12:24:25,  8.09s/it]

Epoch: 4479 | Training loss 2.8648081719875336 | Validation loss 2.7879652976989746



 45%|███████████████████████████████▊                                       | 4481/10000 [10:10:09<12:24:34,  8.09s/it]

Epoch: 4480 | Training loss 2.8685837760567665 | Validation loss 2.787969559431076



 45%|███████████████████████████████▊                                       | 4482/10000 [10:10:17<12:24:13,  8.09s/it]

Epoch: 4481 | Training loss 2.868460029363632 | Validation loss 2.792984753847122



 45%|███████████████████████████████▊                                       | 4483/10000 [10:10:25<12:25:42,  8.11s/it]

Epoch: 4482 | Training loss 2.8658222779631615 | Validation loss 2.7888504564762115



 45%|███████████████████████████████▊                                       | 4484/10000 [10:10:33<12:24:58,  8.10s/it]

Epoch: 4483 | Training loss 2.869391232728958 | Validation loss 2.794111043214798



 45%|███████████████████████████████▊                                       | 4485/10000 [10:10:41<12:22:14,  8.08s/it]

Epoch: 4484 | Training loss 2.864912584424019 | Validation loss 2.788139820098877



 45%|███████████████████████████████▊                                       | 4486/10000 [10:10:49<12:21:07,  8.06s/it]

Epoch: 4485 | Training loss 2.869023561477661 | Validation loss 2.794213980436325



 45%|███████████████████████████████▊                                       | 4487/10000 [10:10:57<12:21:07,  8.07s/it]

Epoch: 4486 | Training loss 2.866755597293377 | Validation loss 2.789877563714981



 45%|███████████████████████████████▊                                       | 4488/10000 [10:11:05<12:18:57,  8.04s/it]

Epoch: 4487 | Training loss 2.8628274276852608 | Validation loss 2.786463141441345



 45%|███████████████████████████████▊                                       | 4489/10000 [10:11:13<12:21:35,  8.07s/it]

Epoch: 4488 | Training loss 2.8648219034075737 | Validation loss 2.7907005846500397



 45%|███████████████████████████████▉                                       | 4490/10000 [10:11:22<12:30:33,  8.17s/it]

Epoch: 4489 | Training loss 2.8674895018339157 | Validation loss 2.7907896041870117



 45%|███████████████████████████████▉                                       | 4491/10000 [10:11:30<12:28:03,  8.15s/it]

Epoch: 4490 | Training loss 2.8638738989830017 | Validation loss 2.7908172011375427



 45%|███████████████████████████████▉                                       | 4492/10000 [10:11:38<12:26:15,  8.13s/it]

Epoch: 4491 | Training loss 2.8673866391181946 | Validation loss 2.788895308971405



 45%|███████████████████████████████▉                                       | 4493/10000 [10:11:46<12:25:26,  8.12s/it]

Epoch: 4492 | Training loss 2.8651712387800217 | Validation loss 2.7868640422821045



 45%|███████████████████████████████▉                                       | 4494/10000 [10:11:54<12:26:55,  8.14s/it]

Epoch: 4493 | Training loss 2.866380453109741 | Validation loss 2.7900157272815704



 45%|███████████████████████████████▉                                       | 4495/10000 [10:12:02<12:26:46,  8.14s/it]

Epoch: 4494 | Training loss 2.8664339259266853 | Validation loss 2.7891122102737427



 45%|███████████████████████████████▉                                       | 4496/10000 [10:12:10<12:26:05,  8.13s/it]

Epoch: 4495 | Training loss 2.860945239663124 | Validation loss 2.78701913356781



 45%|███████████████████████████████▉                                       | 4497/10000 [10:12:18<12:22:55,  8.10s/it]

Epoch: 4496 | Training loss 2.8654168397188187 | Validation loss 2.783723473548889



 45%|███████████████████████████████▉                                       | 4498/10000 [10:12:27<12:26:20,  8.14s/it]

Epoch: 4497 | Training loss 2.8720669224858284 | Validation loss 2.792628765106201



 45%|███████████████████████████████▉                                       | 4499/10000 [10:12:35<12:26:24,  8.14s/it]

Epoch: 4498 | Training loss 2.8629648461937904 | Validation loss 2.788735806941986



 45%|███████████████████████████████▉                                       | 4500/10000 [10:12:43<12:25:44,  8.14s/it]

Epoch: 4499 | Training loss 2.863082118332386 | Validation loss 2.78676375746727



 45%|███████████████████████████████▉                                       | 4501/10000 [10:12:51<12:25:06,  8.13s/it]

Epoch: 4500 | Training loss 2.8690469712018967 | Validation loss 2.793067753314972



 45%|███████████████████████████████▉                                       | 4502/10000 [10:12:59<12:22:58,  8.11s/it]

Epoch: 4501 | Training loss 2.8668767660856247 | Validation loss 2.7892415821552277



 45%|███████████████████████████████▉                                       | 4503/10000 [10:13:07<12:25:16,  8.13s/it]

Epoch: 4502 | Training loss 2.865225613117218 | Validation loss 2.795216679573059



 45%|███████████████████████████████▉                                       | 4504/10000 [10:13:15<12:26:10,  8.15s/it]

Epoch: 4503 | Training loss 2.868930406868458 | Validation loss 2.7942788898944855



 45%|███████████████████████████████▉                                       | 4505/10000 [10:13:24<12:27:29,  8.16s/it]

Epoch: 4504 | Training loss 2.867290958762169 | Validation loss 2.7945809960365295



 45%|███████████████████████████████▉                                       | 4506/10000 [10:13:32<12:26:05,  8.15s/it]

Epoch: 4505 | Training loss 2.866552732884884 | Validation loss 2.792748123407364



 45%|███████████████████████████████▉                                       | 4507/10000 [10:13:40<12:24:32,  8.13s/it]

Epoch: 4506 | Training loss 2.863530285656452 | Validation loss 2.7862440645694733



 45%|████████████████████████████████                                       | 4508/10000 [10:13:48<12:25:45,  8.15s/it]

Epoch: 4507 | Training loss 2.8649613931775093 | Validation loss 2.790396124124527



 45%|████████████████████████████████                                       | 4509/10000 [10:13:56<12:22:46,  8.12s/it]

Epoch: 4508 | Training loss 2.8627291694283485 | Validation loss 2.786216080188751



 45%|████████████████████████████████                                       | 4510/10000 [10:14:04<12:23:13,  8.12s/it]

Epoch: 4509 | Training loss 2.869410455226898 | Validation loss 2.788451075553894



 45%|████████████████████████████████                                       | 4511/10000 [10:14:12<12:26:03,  8.16s/it]

Epoch: 4510 | Training loss 2.8612135872244835 | Validation loss 2.792051315307617



 45%|████████████████████████████████                                       | 4512/10000 [10:14:21<12:23:41,  8.13s/it]

Epoch: 4511 | Training loss 2.8714450746774673 | Validation loss 2.7973107993602753



 45%|████████████████████████████████                                       | 4513/10000 [10:14:29<12:22:19,  8.12s/it]

Epoch: 4512 | Training loss 2.8670756220817566 | Validation loss 2.79147070646286



 45%|████████████████████████████████                                       | 4514/10000 [10:14:37<12:23:53,  8.14s/it]

Epoch: 4513 | Training loss 2.868105374276638 | Validation loss 2.791166514158249



 45%|████████████████████████████████                                       | 4515/10000 [10:14:45<12:25:29,  8.15s/it]

Epoch: 4514 | Training loss 2.867657631635666 | Validation loss 2.7918435633182526



 45%|████████████████████████████████                                       | 4516/10000 [10:14:53<12:24:22,  8.14s/it]

Epoch: 4515 | Training loss 2.8701596409082413 | Validation loss 2.7922571003437042



 45%|████████████████████████████████                                       | 4517/10000 [10:15:01<12:24:00,  8.14s/it]

Epoch: 4516 | Training loss 2.8628216609358788 | Validation loss 2.7927726209163666



 45%|████████████████████████████████                                       | 4518/10000 [10:15:09<12:23:13,  8.13s/it]

Epoch: 4517 | Training loss 2.865577645599842 | Validation loss 2.793133854866028



 45%|████████████████████████████████                                       | 4519/10000 [10:15:18<12:22:58,  8.13s/it]

Epoch: 4518 | Training loss 2.862387977540493 | Validation loss 2.7905167043209076



 45%|████████████████████████████████                                       | 4520/10000 [10:15:26<12:23:21,  8.14s/it]

Epoch: 4519 | Training loss 2.866401381790638 | Validation loss 2.7882111370563507



 45%|████████████████████████████████                                       | 4521/10000 [10:15:34<12:23:23,  8.14s/it]

Epoch: 4520 | Training loss 2.863216795027256 | Validation loss 2.7902497947216034



 45%|████████████████████████████████                                       | 4522/10000 [10:15:42<12:24:11,  8.15s/it]

Epoch: 4521 | Training loss 2.8598288893699646 | Validation loss 2.790612757205963



 45%|████████████████████████████████                                       | 4523/10000 [10:15:50<12:25:19,  8.17s/it]

Epoch: 4522 | Training loss 2.8667084872722626 | Validation loss 2.794253647327423



 45%|████████████████████████████████                                       | 4524/10000 [10:15:58<12:26:26,  8.18s/it]

Epoch: 4523 | Training loss 2.8664817959070206 | Validation loss 2.7867352664470673



 45%|████████████████████████████████▏                                      | 4525/10000 [10:16:07<12:25:58,  8.18s/it]

Epoch: 4524 | Training loss 2.8634013012051582 | Validation loss 2.7940231561660767



 45%|████████████████████████████████▏                                      | 4526/10000 [10:16:15<12:24:28,  8.16s/it]

Epoch: 4525 | Training loss 2.8619241788983345 | Validation loss 2.7876402735710144



 45%|████████████████████████████████▏                                      | 4527/10000 [10:16:23<12:26:52,  8.19s/it]

Epoch: 4526 | Training loss 2.863013356924057 | Validation loss 2.788652002811432



 45%|████████████████████████████████▏                                      | 4528/10000 [10:16:31<12:24:35,  8.16s/it]

Epoch: 4527 | Training loss 2.862897500395775 | Validation loss 2.791048288345337



 45%|████████████████████████████████▏                                      | 4529/10000 [10:16:39<12:23:46,  8.16s/it]

Epoch: 4528 | Training loss 2.867889277637005 | Validation loss 2.791408956050873



 45%|████████████████████████████████▏                                      | 4530/10000 [10:16:47<12:25:08,  8.17s/it]

Epoch: 4529 | Training loss 2.870771899819374 | Validation loss 2.789846897125244



 45%|████████████████████████████████▏                                      | 4531/10000 [10:16:56<12:24:47,  8.17s/it]

Epoch: 4530 | Training loss 2.8669501543045044 | Validation loss 2.7855684757232666



 45%|████████████████████████████████▏                                      | 4532/10000 [10:17:04<12:21:28,  8.14s/it]

Epoch: 4531 | Training loss 2.8670038878917694 | Validation loss 2.796284645795822



 45%|████████████████████████████████▏                                      | 4533/10000 [10:17:12<12:19:09,  8.11s/it]

Epoch: 4532 | Training loss 2.8669336289167404 | Validation loss 2.7916323244571686



 45%|████████████████████████████████▏                                      | 4534/10000 [10:17:20<12:18:21,  8.10s/it]

Epoch: 4533 | Training loss 2.867040291428566 | Validation loss 2.786246120929718



 45%|████████████████████████████████▏                                      | 4535/10000 [10:17:28<12:20:20,  8.13s/it]

Epoch: 4534 | Training loss 2.8669430911540985 | Validation loss 2.7896116971969604



 45%|████████████████████████████████▏                                      | 4536/10000 [10:17:36<12:19:57,  8.13s/it]

Epoch: 4535 | Training loss 2.8691155463457108 | Validation loss 2.787748247385025



 45%|████████████████████████████████▏                                      | 4537/10000 [10:17:44<12:22:45,  8.16s/it]

Epoch: 4536 | Training loss 2.8690643534064293 | Validation loss 2.7912385761737823



 45%|████████████████████████████████▏                                      | 4538/10000 [10:17:52<12:19:50,  8.13s/it]

Epoch: 4537 | Training loss 2.866738334298134 | Validation loss 2.7893999218940735



 45%|████████████████████████████████▏                                      | 4539/10000 [10:18:01<12:20:12,  8.13s/it]

Epoch: 4538 | Training loss 2.8715960010886192 | Validation loss 2.800176739692688



 45%|████████████████████████████████▏                                      | 4540/10000 [10:18:09<12:16:54,  8.10s/it]

Epoch: 4539 | Training loss 2.8630581498146057 | Validation loss 2.789972335100174



 45%|████████████████████████████████▏                                      | 4541/10000 [10:18:17<12:15:14,  8.08s/it]

Epoch: 4540 | Training loss 2.8677334636449814 | Validation loss 2.7901003062725067



 45%|████████████████████████████████▏                                      | 4542/10000 [10:18:25<12:19:09,  8.13s/it]

Epoch: 4541 | Training loss 2.8649927228689194 | Validation loss 2.7910684049129486



 45%|████████████████████████████████▎                                      | 4543/10000 [10:18:33<12:17:02,  8.10s/it]

Epoch: 4542 | Training loss 2.8633168637752533 | Validation loss 2.7913558781147003



 45%|████████████████████████████████▎                                      | 4544/10000 [10:18:41<12:19:19,  8.13s/it]

Epoch: 4543 | Training loss 2.8657408580183983 | Validation loss 2.7877167463302612



 45%|████████████████████████████████▎                                      | 4545/10000 [10:18:49<12:19:41,  8.14s/it]

Epoch: 4544 | Training loss 2.8617817610502243 | Validation loss 2.789951890707016



 45%|████████████████████████████████▎                                      | 4546/10000 [10:18:57<12:19:25,  8.13s/it]

Epoch: 4545 | Training loss 2.8647831678390503 | Validation loss 2.7886105477809906



 45%|████████████████████████████████▎                                      | 4547/10000 [10:19:05<12:17:38,  8.12s/it]

Epoch: 4546 | Training loss 2.871583968400955 | Validation loss 2.797825038433075



 45%|████████████████████████████████▎                                      | 4548/10000 [10:19:14<12:17:56,  8.12s/it]

Epoch: 4547 | Training loss 2.8634235337376595 | Validation loss 2.7936251759529114



 45%|████████████████████████████████▎                                      | 4549/10000 [10:19:22<12:18:36,  8.13s/it]

Epoch: 4548 | Training loss 2.872186005115509 | Validation loss 2.7864397764205933



 46%|████████████████████████████████▎                                      | 4550/10000 [10:19:30<12:13:48,  8.08s/it]

Epoch: 4549 | Training loss 2.866133823990822 | Validation loss 2.791656881570816



 46%|████████████████████████████████▎                                      | 4551/10000 [10:19:38<12:09:59,  8.04s/it]

Epoch: 4550 | Training loss 2.8703992143273354 | Validation loss 2.791350543498993



 46%|████████████████████████████████▎                                      | 4552/10000 [10:19:46<12:10:45,  8.05s/it]

Epoch: 4551 | Training loss 2.8671815544366837 | Validation loss 2.7901443541049957



 46%|████████████████████████████████▎                                      | 4553/10000 [10:19:54<12:12:02,  8.06s/it]

Epoch: 4552 | Training loss 2.8656010031700134 | Validation loss 2.798008769750595



 46%|████████████████████████████████▎                                      | 4554/10000 [10:20:02<12:11:41,  8.06s/it]

Epoch: 4553 | Training loss 2.867066204547882 | Validation loss 2.788930267095566



 46%|████████████████████████████████▎                                      | 4555/10000 [10:20:10<12:14:43,  8.10s/it]

Epoch: 4554 | Training loss 2.8648789674043655 | Validation loss 2.792188048362732



 46%|████████████████████████████████▎                                      | 4556/10000 [10:20:18<12:13:54,  8.09s/it]

Epoch: 4555 | Training loss 2.861885145306587 | Validation loss 2.7934695184230804



 46%|████████████████████████████████▎                                      | 4557/10000 [10:20:26<12:14:09,  8.09s/it]

Epoch: 4556 | Training loss 2.864353783428669 | Validation loss 2.791141837835312



 46%|████████████████████████████████▎                                      | 4558/10000 [10:20:34<12:13:49,  8.09s/it]

Epoch: 4557 | Training loss 2.8662354797124863 | Validation loss 2.791256606578827



 46%|████████████████████████████████▎                                      | 4559/10000 [10:20:42<12:13:10,  8.08s/it]

Epoch: 4558 | Training loss 2.865305431187153 | Validation loss 2.792127639055252



 46%|████████████████████████████████▍                                      | 4560/10000 [10:20:50<12:12:40,  8.08s/it]

Epoch: 4559 | Training loss 2.864261955022812 | Validation loss 2.7888350188732147



 46%|████████████████████████████████▍                                      | 4561/10000 [10:20:58<12:10:06,  8.05s/it]

Epoch: 4560 | Training loss 2.8625952154397964 | Validation loss 2.7872218191623688



 46%|████████████████████████████████▍                                      | 4562/10000 [10:21:06<12:11:20,  8.07s/it]

Epoch: 4561 | Training loss 2.8721642419695854 | Validation loss 2.7961672842502594



 46%|████████████████████████████████▍                                      | 4563/10000 [10:21:15<12:10:01,  8.06s/it]

Epoch: 4562 | Training loss 2.86197417229414 | Validation loss 2.7849678099155426



 46%|████████████████████████████████▍                                      | 4564/10000 [10:21:23<12:09:21,  8.05s/it]

Epoch: 4563 | Training loss 2.8712886720895767 | Validation loss 2.791881948709488



 46%|████████████████████████████████▍                                      | 4565/10000 [10:21:31<12:11:30,  8.08s/it]

Epoch: 4564 | Training loss 2.8608100190758705 | Validation loss 2.789518803358078



 46%|████████████████████████████████▍                                      | 4566/10000 [10:21:39<12:14:22,  8.11s/it]

Epoch: 4565 | Training loss 2.8708793967962265 | Validation loss 2.7906805872917175



 46%|████████████████████████████████▍                                      | 4567/10000 [10:21:47<12:11:53,  8.08s/it]

Epoch: 4566 | Training loss 2.8672655895352364 | Validation loss 2.7905478477478027



 46%|████████████████████████████████▍                                      | 4568/10000 [10:21:55<12:11:22,  8.08s/it]

Epoch: 4567 | Training loss 2.860975980758667 | Validation loss 2.785815417766571



 46%|████████████████████████████████▍                                      | 4569/10000 [10:22:03<12:12:20,  8.09s/it]

Epoch: 4568 | Training loss 2.866673454642296 | Validation loss 2.7896652221679688



 46%|████████████████████████████████▍                                      | 4570/10000 [10:22:11<12:12:51,  8.10s/it]

Epoch: 4569 | Training loss 2.865371972322464 | Validation loss 2.7900975346565247



 46%|████████████████████████████████▍                                      | 4571/10000 [10:22:19<12:12:48,  8.10s/it]

Epoch: 4570 | Training loss 2.8668209612369537 | Validation loss 2.7885946929454803



 46%|████████████████████████████████▍                                      | 4572/10000 [10:22:27<12:11:53,  8.09s/it]

Epoch: 4571 | Training loss 2.862230435013771 | Validation loss 2.7884660959243774



 46%|████████████████████████████████▍                                      | 4573/10000 [10:22:36<12:12:57,  8.10s/it]

Epoch: 4572 | Training loss 2.8657779544591904 | Validation loss 2.7894282042980194



 46%|████████████████████████████████▍                                      | 4574/10000 [10:22:44<12:11:57,  8.09s/it]

Epoch: 4573 | Training loss 2.8714209273457527 | Validation loss 2.7940744161605835



 46%|████████████████████████████████▍                                      | 4575/10000 [10:22:52<12:12:53,  8.11s/it]

Epoch: 4574 | Training loss 2.8727749660611153 | Validation loss 2.799539566040039



 46%|████████████████████████████████▍                                      | 4576/10000 [10:23:00<12:12:41,  8.10s/it]

Epoch: 4575 | Training loss 2.8689245730638504 | Validation loss 2.7942483723163605



 46%|████████████████████████████████▍                                      | 4577/10000 [10:23:08<12:13:17,  8.11s/it]

Epoch: 4576 | Training loss 2.8666873052716255 | Validation loss 2.7987568378448486



 46%|████████████████████████████████▌                                      | 4578/10000 [10:23:16<12:11:36,  8.10s/it]

Epoch: 4577 | Training loss 2.8651162683963776 | Validation loss 2.7885127663612366



 46%|████████████████████████████████▌                                      | 4579/10000 [10:23:24<12:09:31,  8.07s/it]

Epoch: 4578 | Training loss 2.8703302070498466 | Validation loss 2.7872642874717712



 46%|████████████████████████████████▌                                      | 4580/10000 [10:23:32<12:10:05,  8.08s/it]

Epoch: 4579 | Training loss 2.8744816705584526 | Validation loss 2.7936477959156036



 46%|████████████████████████████████▌                                      | 4581/10000 [10:23:40<12:09:24,  8.08s/it]

Epoch: 4580 | Training loss 2.8765069767832756 | Validation loss 2.7936489582061768



 46%|████████████████████████████████▌                                      | 4582/10000 [10:23:48<12:13:25,  8.12s/it]

Epoch: 4581 | Training loss 2.868010491132736 | Validation loss 2.801147907972336



 46%|████████████████████████████████▌                                      | 4583/10000 [10:23:56<12:11:58,  8.11s/it]

Epoch: 4582 | Training loss 2.86636733263731 | Validation loss 2.7885498702526093



 46%|████████████████████████████████▌                                      | 4584/10000 [10:24:05<12:12:56,  8.12s/it]

Epoch: 4583 | Training loss 2.8655125945806503 | Validation loss 2.797436475753784



 46%|████████████████████████████████▌                                      | 4585/10000 [10:24:13<12:11:55,  8.11s/it]

Epoch: 4584 | Training loss 2.8693762347102165 | Validation loss 2.79521644115448



 46%|████████████████████████████████▌                                      | 4586/10000 [10:24:21<12:11:08,  8.10s/it]

Epoch: 4585 | Training loss 2.8711406514048576 | Validation loss 2.798200339078903



 46%|████████████████████████████████▌                                      | 4587/10000 [10:24:29<12:12:17,  8.12s/it]

Epoch: 4586 | Training loss 2.87039702385664 | Validation loss 2.7904618084430695



 46%|████████████████████████████████▌                                      | 4588/10000 [10:24:37<12:07:02,  8.06s/it]

Epoch: 4587 | Training loss 2.863951101899147 | Validation loss 2.7893907129764557



 46%|████████████████████████████████▌                                      | 4589/10000 [10:24:45<12:05:29,  8.04s/it]

Epoch: 4588 | Training loss 2.86583399027586 | Validation loss 2.795415550470352



 46%|████████████████████████████████▌                                      | 4590/10000 [10:24:53<12:05:13,  8.04s/it]

Epoch: 4589 | Training loss 2.8708475306630135 | Validation loss 2.7967512011528015



 46%|████████████████████████████████▌                                      | 4591/10000 [10:25:01<12:04:58,  8.04s/it]

Epoch: 4590 | Training loss 2.86159086227417 | Validation loss 2.798277348279953



 46%|████████████████████████████████▌                                      | 4592/10000 [10:25:09<12:05:14,  8.05s/it]

Epoch: 4591 | Training loss 2.8697410598397255 | Validation loss 2.794853389263153



 46%|████████████████████████████████▌                                      | 4593/10000 [10:25:17<12:09:41,  8.10s/it]

Epoch: 4592 | Training loss 2.8640851452946663 | Validation loss 2.7971007227897644



 46%|████████████████████████████████▌                                      | 4594/10000 [10:25:25<12:11:54,  8.12s/it]

Epoch: 4593 | Training loss 2.8682574406266212 | Validation loss 2.79686838388443



 46%|████████████████████████████████▌                                      | 4595/10000 [10:25:34<12:12:02,  8.13s/it]

Epoch: 4594 | Training loss 2.863724648952484 | Validation loss 2.7924106121063232



 46%|████████████████████████████████▋                                      | 4596/10000 [10:25:42<12:08:49,  8.09s/it]

Epoch: 4595 | Training loss 2.873616501688957 | Validation loss 2.7938949167728424



 46%|████████████████████████████████▋                                      | 4597/10000 [10:25:50<12:07:23,  8.08s/it]

Epoch: 4596 | Training loss 2.866545833647251 | Validation loss 2.78762286901474



 46%|████████████████████████████████▋                                      | 4598/10000 [10:25:58<12:06:59,  8.07s/it]

Epoch: 4597 | Training loss 2.864400953054428 | Validation loss 2.798372894525528



 46%|████████████████████████████████▋                                      | 4599/10000 [10:26:06<12:06:22,  8.07s/it]

Epoch: 4598 | Training loss 2.8737529143691063 | Validation loss 2.8019827008247375



 46%|████████████████████████████████▋                                      | 4600/10000 [10:26:14<12:03:39,  8.04s/it]

Epoch: 4599 | Training loss 2.8699246793985367 | Validation loss 2.8036635518074036



 46%|████████████████████████████████▋                                      | 4601/10000 [10:26:22<12:05:32,  8.06s/it]

Epoch: 4600 | Training loss 2.8646708205342293 | Validation loss 2.7968027889728546



 46%|████████████████████████████████▋                                      | 4602/10000 [10:26:30<12:05:35,  8.07s/it]

Epoch: 4601 | Training loss 2.8662762343883514 | Validation loss 2.7897867560386658



 46%|████████████████████████████████▋                                      | 4603/10000 [10:26:38<12:03:24,  8.04s/it]

Epoch: 4602 | Training loss 2.873038776218891 | Validation loss 2.7983605563640594



 46%|████████████████████████████████▋                                      | 4604/10000 [10:26:46<12:01:53,  8.03s/it]

Epoch: 4603 | Training loss 2.8682107105851173 | Validation loss 2.7940147817134857



 46%|████████████████████████████████▋                                      | 4605/10000 [10:26:54<12:01:12,  8.02s/it]

Epoch: 4604 | Training loss 2.858139716088772 | Validation loss 2.788646250963211



 46%|████████████████████████████████▋                                      | 4606/10000 [10:27:02<12:04:54,  8.06s/it]

Epoch: 4605 | Training loss 2.862042896449566 | Validation loss 2.7922609746456146



 46%|████████████████████████████████▋                                      | 4607/10000 [10:27:10<12:04:32,  8.06s/it]

Epoch: 4606 | Training loss 2.8686192706227303 | Validation loss 2.790897011756897



 46%|████████████████████████████████▋                                      | 4608/10000 [10:27:18<12:04:14,  8.06s/it]

Epoch: 4607 | Training loss 2.8656879514455795 | Validation loss 2.794465661048889



 46%|████████████████████████████████▋                                      | 4609/10000 [10:27:26<12:06:05,  8.08s/it]

Epoch: 4608 | Training loss 2.8651582077145576 | Validation loss 2.7939599454402924



 46%|████████████████████████████████▋                                      | 4610/10000 [10:27:34<12:04:01,  8.06s/it]

Epoch: 4609 | Training loss 2.8672834262251854 | Validation loss 2.790560871362686



 46%|████████████████████████████████▋                                      | 4611/10000 [10:27:42<12:02:33,  8.04s/it]

Epoch: 4610 | Training loss 2.862835705280304 | Validation loss 2.800024598836899



 46%|████████████████████████████████▋                                      | 4612/10000 [10:27:50<12:02:17,  8.04s/it]

Epoch: 4611 | Training loss 2.860995590686798 | Validation loss 2.794297307729721



 46%|████████████████████████████████▊                                      | 4613/10000 [10:27:59<12:05:48,  8.08s/it]

Epoch: 4612 | Training loss 2.866872012615204 | Validation loss 2.792298525571823



 46%|████████████████████████████████▊                                      | 4614/10000 [10:28:07<12:02:45,  8.05s/it]

Epoch: 4613 | Training loss 2.8629891723394394 | Validation loss 2.789690613746643



 46%|████████████████████████████████▊                                      | 4615/10000 [10:28:15<12:05:35,  8.08s/it]

Epoch: 4614 | Training loss 2.8666986599564552 | Validation loss 2.7923193871974945



 46%|████████████████████████████████▊                                      | 4616/10000 [10:28:23<12:05:56,  8.09s/it]

Epoch: 4615 | Training loss 2.8661618158221245 | Validation loss 2.7911324501037598



 46%|████████████████████████████████▊                                      | 4617/10000 [10:28:31<12:10:03,  8.14s/it]

Epoch: 4616 | Training loss 2.8638249784708023 | Validation loss 2.7909659147262573



 46%|████████████████████████████████▊                                      | 4618/10000 [10:28:39<12:12:49,  8.17s/it]

Epoch: 4617 | Training loss 2.8625910729169846 | Validation loss 2.791136294603348



 46%|████████████████████████████████▊                                      | 4619/10000 [10:28:47<12:13:28,  8.18s/it]

Epoch: 4618 | Training loss 2.8684957921504974 | Validation loss 2.7914025485515594



 46%|████████████████████████████████▊                                      | 4620/10000 [10:28:56<12:12:18,  8.17s/it]

Epoch: 4619 | Training loss 2.8684931099414825 | Validation loss 2.793430745601654



 46%|████████████████████████████████▊                                      | 4621/10000 [10:29:04<12:10:55,  8.15s/it]

Epoch: 4620 | Training loss 2.868170477449894 | Validation loss 2.7951675355434418



 46%|████████████████████████████████▊                                      | 4622/10000 [10:29:12<12:11:01,  8.16s/it]

Epoch: 4621 | Training loss 2.8675584718585014 | Validation loss 2.7940744161605835



 46%|████████████████████████████████▊                                      | 4623/10000 [10:29:20<12:05:46,  8.10s/it]

Epoch: 4622 | Training loss 2.8663282990455627 | Validation loss 2.7938779294490814



 46%|████████████████████████████████▊                                      | 4624/10000 [10:29:28<12:07:50,  8.12s/it]

Epoch: 4623 | Training loss 2.8679854050278664 | Validation loss 2.7908757627010345



 46%|████████████████████████████████▊                                      | 4625/10000 [10:29:36<12:07:45,  8.12s/it]

Epoch: 4624 | Training loss 2.8674186617136 | Validation loss 2.7937334775924683



 46%|████████████████████████████████▊                                      | 4626/10000 [10:29:44<12:10:29,  8.16s/it]

Epoch: 4625 | Training loss 2.864213965833187 | Validation loss 2.7922026813030243



 46%|████████████████████████████████▊                                      | 4627/10000 [10:29:53<12:11:55,  8.17s/it]

Epoch: 4626 | Training loss 2.8664866611361504 | Validation loss 2.7953963577747345



 46%|████████████████████████████████▊                                      | 4628/10000 [10:30:01<12:13:12,  8.19s/it]

Epoch: 4627 | Training loss 2.869374841451645 | Validation loss 2.7940409779548645



 46%|████████████████████████████████▊                                      | 4629/10000 [10:30:09<12:12:26,  8.18s/it]

Epoch: 4628 | Training loss 2.867426484823227 | Validation loss 2.7935991883277893



 46%|████████████████████████████████▊                                      | 4630/10000 [10:30:17<12:10:39,  8.16s/it]

Epoch: 4629 | Training loss 2.8677556812763214 | Validation loss 2.7998185455799103



 46%|████████████████████████████████▉                                      | 4631/10000 [10:30:25<12:10:09,  8.16s/it]

Epoch: 4630 | Training loss 2.868193246424198 | Validation loss 2.7934269309043884



 46%|████████████████████████████████▉                                      | 4632/10000 [10:30:33<12:08:50,  8.15s/it]

Epoch: 4631 | Training loss 2.869085468351841 | Validation loss 2.7960515320301056



 46%|████████████████████████████████▉                                      | 4633/10000 [10:30:41<12:05:05,  8.11s/it]

Epoch: 4632 | Training loss 2.8687459379434586 | Validation loss 2.7986755073070526



 46%|████████████████████████████████▉                                      | 4634/10000 [10:30:49<12:04:02,  8.10s/it]

Epoch: 4633 | Training loss 2.8645254150032997 | Validation loss 2.7911667823791504



 46%|████████████████████████████████▉                                      | 4635/10000 [10:30:58<12:04:04,  8.10s/it]

Epoch: 4634 | Training loss 2.866653189063072 | Validation loss 2.797953426837921



 46%|████████████████████████████████▉                                      | 4636/10000 [10:31:06<12:04:56,  8.11s/it]

Epoch: 4635 | Training loss 2.8668883815407753 | Validation loss 2.7898453176021576



 46%|████████████████████████████████▉                                      | 4637/10000 [10:31:14<12:04:32,  8.11s/it]

Epoch: 4636 | Training loss 2.86522226780653 | Validation loss 2.789692223072052



 46%|████████████████████████████████▉                                      | 4638/10000 [10:31:22<12:04:40,  8.11s/it]

Epoch: 4637 | Training loss 2.867039665579796 | Validation loss 2.791096031665802



 46%|████████████████████████████████▉                                      | 4639/10000 [10:31:30<12:02:15,  8.08s/it]

Epoch: 4638 | Training loss 2.8635727539658546 | Validation loss 2.7926067411899567



 46%|████████████████████████████████▉                                      | 4640/10000 [10:31:38<12:03:47,  8.10s/it]

Epoch: 4639 | Training loss 2.8674860820174217 | Validation loss 2.796592950820923



 46%|████████████████████████████████▉                                      | 4641/10000 [10:31:46<12:02:44,  8.09s/it]

Epoch: 4640 | Training loss 2.8729071617126465 | Validation loss 2.7924090325832367



 46%|████████████████████████████████▉                                      | 4642/10000 [10:31:54<12:01:37,  8.08s/it]

Epoch: 4641 | Training loss 2.8712480813264847 | Validation loss 2.809324860572815



 46%|████████████████████████████████▉                                      | 4643/10000 [10:32:02<12:02:03,  8.09s/it]

Epoch: 4642 | Training loss 2.8684428185224533 | Validation loss 2.791354328393936



 46%|████████████████████████████████▉                                      | 4644/10000 [10:32:10<12:02:15,  8.09s/it]

Epoch: 4643 | Training loss 2.8673961982131004 | Validation loss 2.7966725528240204



 46%|████████████████████████████████▉                                      | 4645/10000 [10:32:19<12:03:18,  8.10s/it]

Epoch: 4644 | Training loss 2.869284637272358 | Validation loss 2.7949166893959045



 46%|████████████████████████████████▉                                      | 4646/10000 [10:32:27<12:05:11,  8.13s/it]

Epoch: 4645 | Training loss 2.8697620928287506 | Validation loss 2.7971736192703247



 46%|████████████████████████████████▉                                      | 4647/10000 [10:32:35<12:02:41,  8.10s/it]

Epoch: 4646 | Training loss 2.8635482788085938 | Validation loss 2.792642444372177



 46%|█████████████████████████████████                                      | 4648/10000 [10:32:43<12:02:38,  8.10s/it]

Epoch: 4647 | Training loss 2.8678233176469803 | Validation loss 2.798720508813858



 46%|█████████████████████████████████                                      | 4649/10000 [10:32:51<12:01:50,  8.09s/it]

Epoch: 4648 | Training loss 2.866342268884182 | Validation loss 2.786835342645645



 46%|█████████████████████████████████                                      | 4650/10000 [10:32:59<12:01:27,  8.09s/it]

Epoch: 4649 | Training loss 2.870661422610283 | Validation loss 2.792365998029709



 47%|█████████████████████████████████                                      | 4651/10000 [10:33:07<12:00:48,  8.09s/it]

Epoch: 4650 | Training loss 2.8682723715901375 | Validation loss 2.7957044541835785



 47%|█████████████████████████████████                                      | 4652/10000 [10:33:15<11:56:06,  8.03s/it]

Epoch: 4651 | Training loss 2.868721902370453 | Validation loss 2.8009054958820343



 47%|█████████████████████████████████                                      | 4653/10000 [10:33:23<11:59:03,  8.07s/it]

Epoch: 4652 | Training loss 2.866934932768345 | Validation loss 2.795040637254715



 47%|█████████████████████████████████                                      | 4654/10000 [10:33:31<11:57:43,  8.06s/it]

Epoch: 4653 | Training loss 2.865199536085129 | Validation loss 2.7910505831241608



 47%|█████████████████████████████████                                      | 4655/10000 [10:33:39<11:57:11,  8.05s/it]

Epoch: 4654 | Training loss 2.872610352933407 | Validation loss 2.7897036373615265



 47%|█████████████████████████████████                                      | 4656/10000 [10:33:47<12:00:58,  8.09s/it]

Epoch: 4655 | Training loss 2.8620505332946777 | Validation loss 2.789143979549408



 47%|█████████████████████████████████                                      | 4657/10000 [10:33:56<12:01:48,  8.11s/it]

Epoch: 4656 | Training loss 2.866919554769993 | Validation loss 2.79282009601593



 47%|█████████████████████████████████                                      | 4658/10000 [10:34:04<11:59:53,  8.09s/it]

Epoch: 4657 | Training loss 2.866229496896267 | Validation loss 2.789268285036087



 47%|█████████████████████████████████                                      | 4659/10000 [10:34:12<11:58:39,  8.07s/it]

Epoch: 4658 | Training loss 2.8652032613754272 | Validation loss 2.793326497077942



 47%|█████████████████████████████████                                      | 4660/10000 [10:34:20<12:03:03,  8.12s/it]

Epoch: 4659 | Training loss 2.8658515959978104 | Validation loss 2.7924696505069733



 47%|█████████████████████████████████                                      | 4661/10000 [10:34:28<12:03:58,  8.14s/it]

Epoch: 4660 | Training loss 2.864544630050659 | Validation loss 2.793621838092804



 47%|█████████████████████████████████                                      | 4662/10000 [10:34:36<12:03:47,  8.14s/it]

Epoch: 4661 | Training loss 2.8633998259902 | Validation loss 2.792121320962906



 47%|█████████████████████████████████                                      | 4663/10000 [10:34:44<12:00:37,  8.10s/it]

Epoch: 4662 | Training loss 2.8683002069592476 | Validation loss 2.7987403869628906



 47%|█████████████████████████████████                                      | 4664/10000 [10:34:52<12:02:16,  8.12s/it]

Epoch: 4663 | Training loss 2.8637335672974586 | Validation loss 2.798000454902649



 47%|█████████████████████████████████                                      | 4665/10000 [10:35:00<12:01:37,  8.12s/it]

Epoch: 4664 | Training loss 2.8715301528573036 | Validation loss 2.796854317188263



 47%|█████████████████████████████████▏                                     | 4666/10000 [10:35:09<12:01:33,  8.12s/it]

Epoch: 4665 | Training loss 2.8648312762379646 | Validation loss 2.7900327146053314



 47%|█████████████████████████████████▏                                     | 4667/10000 [10:35:17<12:02:39,  8.13s/it]

Epoch: 4666 | Training loss 2.8653300628066063 | Validation loss 2.7903532683849335



 47%|█████████████████████████████████▏                                     | 4668/10000 [10:35:25<12:03:24,  8.14s/it]

Epoch: 4667 | Training loss 2.870047479867935 | Validation loss 2.7943031787872314



 47%|█████████████████████████████████▏                                     | 4669/10000 [10:35:33<12:00:11,  8.11s/it]

Epoch: 4668 | Training loss 2.8665343895554543 | Validation loss 2.790847212076187



 47%|█████████████████████████████████▏                                     | 4670/10000 [10:35:41<11:54:58,  8.05s/it]

Epoch: 4669 | Training loss 2.8719337061047554 | Validation loss 2.7992780208587646



 47%|█████████████████████████████████▏                                     | 4671/10000 [10:35:49<11:56:39,  8.07s/it]

Epoch: 4670 | Training loss 2.867458038032055 | Validation loss 2.791088193655014



 47%|█████████████████████████████████▏                                     | 4672/10000 [10:35:57<11:57:50,  8.08s/it]

Epoch: 4671 | Training loss 2.865133412182331 | Validation loss 2.792546898126602



 47%|█████████████████████████████████▏                                     | 4673/10000 [10:36:05<11:56:08,  8.07s/it]

Epoch: 4672 | Training loss 2.866016037762165 | Validation loss 2.7933194041252136



 47%|█████████████████████████████████▏                                     | 4674/10000 [10:36:13<11:55:17,  8.06s/it]

Epoch: 4673 | Training loss 2.863072469830513 | Validation loss 2.7935832738876343



 47%|█████████████████████████████████▏                                     | 4675/10000 [10:36:21<11:57:33,  8.09s/it]

Epoch: 4674 | Training loss 2.8675773218274117 | Validation loss 2.7899423241615295



 47%|█████████████████████████████████▏                                     | 4676/10000 [10:36:30<12:00:47,  8.12s/it]

Epoch: 4675 | Training loss 2.8651038482785225 | Validation loss 2.7882134318351746



 47%|█████████████████████████████████▏                                     | 4677/10000 [10:36:38<12:01:25,  8.13s/it]

Epoch: 4676 | Training loss 2.867594964802265 | Validation loss 2.795876443386078



 47%|█████████████████████████████████▏                                     | 4678/10000 [10:36:46<12:04:58,  8.17s/it]

Epoch: 4677 | Training loss 2.8699479028582573 | Validation loss 2.7997289896011353



 47%|█████████████████████████████████▏                                     | 4679/10000 [10:36:54<12:02:36,  8.15s/it]

Epoch: 4678 | Training loss 2.8729240149259567 | Validation loss 2.7930794656276703



 47%|█████████████████████████████████▏                                     | 4680/10000 [10:37:02<12:01:36,  8.14s/it]

Epoch: 4679 | Training loss 2.865131914615631 | Validation loss 2.793281853199005



 47%|█████████████████████████████████▏                                     | 4681/10000 [10:37:10<11:59:16,  8.11s/it]

Epoch: 4680 | Training loss 2.862262509763241 | Validation loss 2.7953866124153137



 47%|█████████████████████████████████▏                                     | 4682/10000 [10:37:18<12:00:05,  8.12s/it]

Epoch: 4681 | Training loss 2.8673442527651787 | Validation loss 2.789995014667511



 47%|█████████████████████████████████▏                                     | 4683/10000 [10:37:27<12:00:13,  8.13s/it]

Epoch: 4682 | Training loss 2.868891626596451 | Validation loss 2.796556442975998



 47%|█████████████████████████████████▎                                     | 4684/10000 [10:37:35<12:01:50,  8.15s/it]

Epoch: 4683 | Training loss 2.8686413764953613 | Validation loss 2.7931645810604095



 47%|█████████████████████████████████▎                                     | 4685/10000 [10:37:43<12:03:01,  8.16s/it]

Epoch: 4684 | Training loss 2.8662030026316643 | Validation loss 2.7948973178863525



 47%|█████████████████████████████████▎                                     | 4686/10000 [10:37:51<11:59:11,  8.12s/it]

Epoch: 4685 | Training loss 2.869602784514427 | Validation loss 2.7901026010513306



 47%|█████████████████████████████████▎                                     | 4687/10000 [10:37:59<11:59:22,  8.12s/it]

Epoch: 4686 | Training loss 2.8607083559036255 | Validation loss 2.791474163532257



 47%|█████████████████████████████████▎                                     | 4688/10000 [10:38:07<11:58:16,  8.11s/it]

Epoch: 4687 | Training loss 2.8679092079401016 | Validation loss 2.79554346203804



 47%|█████████████████████████████████▎                                     | 4689/10000 [10:38:15<11:57:47,  8.11s/it]

Epoch: 4688 | Training loss 2.870423622429371 | Validation loss 2.790780693292618



 47%|█████████████████████████████████▎                                     | 4690/10000 [10:38:23<11:59:09,  8.13s/it]

Epoch: 4689 | Training loss 2.8642938062548637 | Validation loss 2.790916830301285



 47%|█████████████████████████████████▎                                     | 4691/10000 [10:38:32<11:59:14,  8.13s/it]

Epoch: 4690 | Training loss 2.8653363958001137 | Validation loss 2.7896368503570557



 47%|█████████████████████████████████▎                                     | 4692/10000 [10:38:40<11:58:01,  8.12s/it]

Epoch: 4691 | Training loss 2.8653726130723953 | Validation loss 2.7884169816970825



 47%|█████████████████████████████████▎                                     | 4693/10000 [10:38:48<11:57:28,  8.11s/it]

Epoch: 4692 | Training loss 2.866096116602421 | Validation loss 2.7925431728363037



 47%|█████████████████████████████████▎                                     | 4694/10000 [10:38:56<11:55:26,  8.09s/it]

Epoch: 4693 | Training loss 2.863496296107769 | Validation loss 2.790733575820923



 47%|█████████████████████████████████▎                                     | 4695/10000 [10:39:04<11:58:33,  8.13s/it]

Epoch: 4694 | Training loss 2.86862463504076 | Validation loss 2.7930178344249725



 47%|█████████████████████████████████▎                                     | 4696/10000 [10:39:12<11:59:31,  8.14s/it]

Epoch: 4695 | Training loss 2.8676652759313583 | Validation loss 2.79515278339386



 47%|█████████████████████████████████▎                                     | 4697/10000 [10:39:20<11:58:20,  8.13s/it]

Epoch: 4696 | Training loss 2.864916242659092 | Validation loss 2.793879061937332



 47%|█████████████████████████████████▎                                     | 4698/10000 [10:39:28<11:56:15,  8.11s/it]

Epoch: 4697 | Training loss 2.8677852898836136 | Validation loss 2.7967062890529633



 47%|█████████████████████████████████▎                                     | 4699/10000 [10:39:36<11:56:00,  8.10s/it]

Epoch: 4698 | Training loss 2.8628236949443817 | Validation loss 2.793306827545166



 47%|█████████████████████████████████▎                                     | 4700/10000 [10:39:44<11:55:14,  8.10s/it]

Epoch: 4699 | Training loss 2.865908667445183 | Validation loss 2.800794243812561



 47%|█████████████████████████████████▍                                     | 4701/10000 [10:39:52<11:52:44,  8.07s/it]

Epoch: 4700 | Training loss 2.868767559528351 | Validation loss 2.8060511648654938



 47%|█████████████████████████████████▍                                     | 4702/10000 [10:40:01<11:53:50,  8.08s/it]

Epoch: 4701 | Training loss 2.8619256392121315 | Validation loss 2.7867332696914673



 47%|█████████████████████████████████▍                                     | 4703/10000 [10:40:09<11:54:33,  8.09s/it]

Epoch: 4702 | Training loss 2.860505275428295 | Validation loss 2.78605654835701



 47%|█████████████████████████████████▍                                     | 4704/10000 [10:40:17<11:54:36,  8.10s/it]

Epoch: 4703 | Training loss 2.869072824716568 | Validation loss 2.7928898632526398



 47%|█████████████████████████████████▍                                     | 4705/10000 [10:40:25<11:55:49,  8.11s/it]

Epoch: 4704 | Training loss 2.8704996407032013 | Validation loss 2.7929665446281433



 47%|█████████████████████████████████▍                                     | 4706/10000 [10:40:33<11:53:51,  8.09s/it]

Epoch: 4705 | Training loss 2.8600480780005455 | Validation loss 2.789622724056244



 47%|█████████████████████████████████▍                                     | 4707/10000 [10:40:41<11:54:04,  8.09s/it]

Epoch: 4706 | Training loss 2.8659958094358444 | Validation loss 2.792220413684845



 47%|█████████████████████████████████▍                                     | 4708/10000 [10:40:49<11:52:00,  8.07s/it]

Epoch: 4707 | Training loss 2.8660698607563972 | Validation loss 2.799826681613922



 47%|█████████████████████████████████▍                                     | 4709/10000 [10:40:57<11:53:30,  8.09s/it]

Epoch: 4708 | Training loss 2.8691930696368217 | Validation loss 2.7935658991336823



 47%|█████████████████████████████████▍                                     | 4710/10000 [10:41:05<11:50:44,  8.06s/it]

Epoch: 4709 | Training loss 2.8602515906095505 | Validation loss 2.793903738260269



 47%|█████████████████████████████████▍                                     | 4711/10000 [10:41:13<11:50:06,  8.06s/it]

Epoch: 4710 | Training loss 2.866469979286194 | Validation loss 2.787460684776306



 47%|█████████████████████████████████▍                                     | 4712/10000 [10:41:21<11:49:56,  8.06s/it]

Epoch: 4711 | Training loss 2.8629505410790443 | Validation loss 2.792448252439499



 47%|█████████████████████████████████▍                                     | 4713/10000 [10:41:29<11:50:11,  8.06s/it]

Epoch: 4712 | Training loss 2.8626295998692513 | Validation loss 2.791578620672226



 47%|█████████████████████████████████▍                                     | 4714/10000 [10:41:37<11:48:20,  8.04s/it]

Epoch: 4713 | Training loss 2.8704569712281227 | Validation loss 2.798705518245697



 47%|█████████████████████████████████▍                                     | 4715/10000 [10:41:46<11:50:13,  8.06s/it]

Epoch: 4714 | Training loss 2.8654869496822357 | Validation loss 2.798989713191986



 47%|█████████████████████████████████▍                                     | 4716/10000 [10:41:54<11:52:38,  8.09s/it]

Epoch: 4715 | Training loss 2.8656835332512856 | Validation loss 2.7911936342716217



 47%|█████████████████████████████████▍                                     | 4717/10000 [10:42:02<11:51:54,  8.09s/it]

Epoch: 4716 | Training loss 2.8694378063082695 | Validation loss 2.7938727140426636



 47%|█████████████████████████████████▍                                     | 4718/10000 [10:42:10<11:53:25,  8.10s/it]

Epoch: 4717 | Training loss 2.865780681371689 | Validation loss 2.7913763225078583



 47%|█████████████████████████████████▌                                     | 4719/10000 [10:42:18<11:54:41,  8.12s/it]

Epoch: 4718 | Training loss 2.87158589810133 | Validation loss 2.7986148595809937



 47%|█████████████████████████████████▌                                     | 4720/10000 [10:42:26<11:55:45,  8.13s/it]

Epoch: 4719 | Training loss 2.8680935353040695 | Validation loss 2.7962708175182343



 47%|█████████████████████████████████▌                                     | 4721/10000 [10:42:34<11:55:55,  8.14s/it]

Epoch: 4720 | Training loss 2.867071971297264 | Validation loss 2.7923513650894165



 47%|█████████████████████████████████▌                                     | 4722/10000 [10:42:43<11:55:19,  8.13s/it]

Epoch: 4721 | Training loss 2.8652210533618927 | Validation loss 2.7886836528778076



 47%|█████████████████████████████████▌                                     | 4723/10000 [10:42:51<11:57:31,  8.16s/it]

Epoch: 4722 | Training loss 2.867140494287014 | Validation loss 2.792565107345581



 47%|█████████████████████████████████▌                                     | 4724/10000 [10:42:59<11:55:31,  8.14s/it]

Epoch: 4723 | Training loss 2.8681154400110245 | Validation loss 2.800178289413452



 47%|█████████████████████████████████▌                                     | 4725/10000 [10:43:07<11:51:41,  8.10s/it]

Epoch: 4724 | Training loss 2.8699761629104614 | Validation loss 2.788886547088623



 47%|█████████████████████████████████▌                                     | 4726/10000 [10:43:15<11:52:11,  8.10s/it]

Epoch: 4725 | Training loss 2.8623049557209015 | Validation loss 2.791696071624756



 47%|█████████████████████████████████▌                                     | 4727/10000 [10:43:23<11:53:42,  8.12s/it]

Epoch: 4726 | Training loss 2.859995201230049 | Validation loss 2.7897831201553345



 47%|█████████████████████████████████▌                                     | 4728/10000 [10:43:31<11:56:21,  8.15s/it]

Epoch: 4727 | Training loss 2.8666686341166496 | Validation loss 2.7934268414974213



 47%|█████████████████████████████████▌                                     | 4729/10000 [10:43:39<11:54:28,  8.13s/it]

Epoch: 4728 | Training loss 2.8680514618754387 | Validation loss 2.78973925113678



 47%|█████████████████████████████████▌                                     | 4730/10000 [10:43:48<11:53:28,  8.12s/it]

Epoch: 4729 | Training loss 2.8637697845697403 | Validation loss 2.794171392917633



 47%|█████████████████████████████████▌                                     | 4731/10000 [10:43:56<11:56:18,  8.16s/it]

Epoch: 4730 | Training loss 2.8705000430345535 | Validation loss 2.7978387475013733



 47%|█████████████████████████████████▌                                     | 4732/10000 [10:44:04<11:55:09,  8.15s/it]

Epoch: 4731 | Training loss 2.8664123117923737 | Validation loss 2.7948399782180786



 47%|█████████████████████████████████▌                                     | 4733/10000 [10:44:12<11:56:22,  8.16s/it]

Epoch: 4732 | Training loss 2.8686970323324203 | Validation loss 2.7982828617095947



 47%|█████████████████████████████████▌                                     | 4734/10000 [10:44:20<11:54:22,  8.14s/it]

Epoch: 4733 | Training loss 2.8617829009890556 | Validation loss 2.7936691343784332



 47%|█████████████████████████████████▌                                     | 4735/10000 [10:44:28<11:57:25,  8.18s/it]

Epoch: 4734 | Training loss 2.872583545744419 | Validation loss 2.7965683341026306



 47%|█████████████████████████████████▋                                     | 4736/10000 [10:44:36<11:54:07,  8.14s/it]

Epoch: 4735 | Training loss 2.8693533912301064 | Validation loss 2.7917077839374542



 47%|█████████████████████████████████▋                                     | 4737/10000 [10:44:45<11:54:58,  8.15s/it]

Epoch: 4736 | Training loss 2.8646320030093193 | Validation loss 2.7968287765979767



 47%|█████████████████████████████████▋                                     | 4738/10000 [10:44:53<11:53:32,  8.14s/it]

Epoch: 4737 | Training loss 2.874572977423668 | Validation loss 2.7981203496456146



 47%|█████████████████████████████████▋                                     | 4739/10000 [10:45:01<11:53:45,  8.14s/it]

Epoch: 4738 | Training loss 2.871162883937359 | Validation loss 2.7925593852996826



 47%|█████████████████████████████████▋                                     | 4740/10000 [10:45:09<11:54:14,  8.15s/it]

Epoch: 4739 | Training loss 2.8686658516526222 | Validation loss 2.7939800322055817



 47%|█████████████████████████████████▋                                     | 4741/10000 [10:45:17<11:53:27,  8.14s/it]

Epoch: 4740 | Training loss 2.8688458800315857 | Validation loss 2.795161873102188



 47%|█████████████████████████████████▋                                     | 4742/10000 [10:45:25<11:56:03,  8.17s/it]

Epoch: 4741 | Training loss 2.859643369913101 | Validation loss 2.7931018471717834



 47%|█████████████████████████████████▋                                     | 4743/10000 [10:45:34<11:56:32,  8.18s/it]

Epoch: 4742 | Training loss 2.870528168976307 | Validation loss 2.7976827025413513



 47%|█████████████████████████████████▋                                     | 4744/10000 [10:45:42<11:56:13,  8.18s/it]

Epoch: 4743 | Training loss 2.8635132163763046 | Validation loss 2.7876373529434204



 47%|█████████████████████████████████▋                                     | 4745/10000 [10:45:50<11:53:49,  8.15s/it]

Epoch: 4744 | Training loss 2.876048617064953 | Validation loss 2.793459564447403



 47%|█████████████████████████████████▋                                     | 4746/10000 [10:45:58<11:49:08,  8.10s/it]

Epoch: 4745 | Training loss 2.865282580256462 | Validation loss 2.80013245344162



 47%|█████████████████████████████████▋                                     | 4747/10000 [10:46:06<11:47:54,  8.09s/it]

Epoch: 4746 | Training loss 2.869970701634884 | Validation loss 2.7935591340065002



 47%|█████████████████████████████████▋                                     | 4748/10000 [10:46:14<11:47:47,  8.09s/it]

Epoch: 4747 | Training loss 2.8680091723799706 | Validation loss 2.7917696237564087



 47%|█████████████████████████████████▋                                     | 4749/10000 [10:46:22<11:49:10,  8.10s/it]

Epoch: 4748 | Training loss 2.8689045533537865 | Validation loss 2.7969477474689484



 48%|█████████████████████████████████▋                                     | 4750/10000 [10:46:30<11:45:53,  8.07s/it]

Epoch: 4749 | Training loss 2.8660033345222473 | Validation loss 2.794386565685272



 48%|█████████████████████████████████▋                                     | 4751/10000 [10:46:38<11:44:38,  8.05s/it]

Epoch: 4750 | Training loss 2.865340270102024 | Validation loss 2.7893953919410706



 48%|█████████████████████████████████▋                                     | 4752/10000 [10:46:46<11:43:20,  8.04s/it]

Epoch: 4751 | Training loss 2.864715486764908 | Validation loss 2.7918813228607178



 48%|█████████████████████████████████▋                                     | 4753/10000 [10:46:54<11:45:40,  8.07s/it]

Epoch: 4752 | Training loss 2.8694712221622467 | Validation loss 2.7908293306827545



 48%|█████████████████████████████████▊                                     | 4754/10000 [10:47:02<11:46:26,  8.08s/it]

Epoch: 4753 | Training loss 2.870416261255741 | Validation loss 2.799201160669327



 48%|█████████████████████████████████▊                                     | 4755/10000 [10:47:10<11:46:30,  8.08s/it]

Epoch: 4754 | Training loss 2.8683503940701485 | Validation loss 2.7929092049598694



 48%|█████████████████████████████████▊                                     | 4756/10000 [10:47:19<11:45:36,  8.07s/it]

Epoch: 4755 | Training loss 2.858257442712784 | Validation loss 2.799083858728409



 48%|█████████████████████████████████▊                                     | 4757/10000 [10:47:27<11:45:22,  8.07s/it]

Epoch: 4756 | Training loss 2.866428181529045 | Validation loss 2.802602857351303



 48%|█████████████████████████████████▊                                     | 4758/10000 [10:47:35<11:45:12,  8.07s/it]

Epoch: 4757 | Training loss 2.8664862364530563 | Validation loss 2.787459582090378



 48%|█████████████████████████████████▊                                     | 4759/10000 [10:47:43<11:42:12,  8.04s/it]

Epoch: 4758 | Training loss 2.868377059698105 | Validation loss 2.792249470949173



 48%|█████████████████████████████████▊                                     | 4760/10000 [10:47:51<11:43:41,  8.06s/it]

Epoch: 4759 | Training loss 2.8610460832715034 | Validation loss 2.787226229906082



 48%|█████████████████████████████████▊                                     | 4761/10000 [10:47:59<11:46:26,  8.09s/it]

Epoch: 4760 | Training loss 2.869736149907112 | Validation loss 2.7914515137672424



 48%|█████████████████████████████████▊                                     | 4762/10000 [10:48:07<11:44:58,  8.08s/it]

Epoch: 4761 | Training loss 2.8640266582369804 | Validation loss 2.7909819185733795



 48%|█████████████████████████████████▊                                     | 4763/10000 [10:48:15<11:44:17,  8.07s/it]

Epoch: 4762 | Training loss 2.866432562470436 | Validation loss 2.790776014328003



 48%|█████████████████████████████████▊                                     | 4764/10000 [10:48:23<11:44:59,  8.08s/it]

Epoch: 4763 | Training loss 2.8653882518410683 | Validation loss 2.7898800671100616



 48%|█████████████████████████████████▊                                     | 4765/10000 [10:48:31<11:45:28,  8.09s/it]

Epoch: 4764 | Training loss 2.862771764397621 | Validation loss 2.7904751002788544



 48%|█████████████████████████████████▊                                     | 4766/10000 [10:48:39<11:46:10,  8.10s/it]

Epoch: 4765 | Training loss 2.8641374185681343 | Validation loss 2.791474759578705



 48%|█████████████████████████████████▊                                     | 4767/10000 [10:48:47<11:47:27,  8.11s/it]

Epoch: 4766 | Training loss 2.867085851728916 | Validation loss 2.7905176877975464



 48%|█████████████████████████████████▊                                     | 4768/10000 [10:48:55<11:44:39,  8.08s/it]

Epoch: 4767 | Training loss 2.86614166200161 | Validation loss 2.790816217660904



 48%|█████████████████████████████████▊                                     | 4769/10000 [10:49:04<11:42:57,  8.06s/it]

Epoch: 4768 | Training loss 2.8651366904377937 | Validation loss 2.789203256368637



 48%|█████████████████████████████████▊                                     | 4770/10000 [10:49:12<11:42:58,  8.06s/it]

Epoch: 4769 | Training loss 2.8646534234285355 | Validation loss 2.7976073920726776



 48%|█████████████████████████████████▊                                     | 4771/10000 [10:49:20<11:45:01,  8.09s/it]

Epoch: 4770 | Training loss 2.8654862120747566 | Validation loss 2.796854317188263



 48%|█████████████████████████████████▉                                     | 4772/10000 [10:49:28<11:45:16,  8.09s/it]

Epoch: 4771 | Training loss 2.8670770823955536 | Validation loss 2.7911345064640045



 48%|█████████████████████████████████▉                                     | 4773/10000 [10:49:36<11:47:50,  8.13s/it]

Epoch: 4772 | Training loss 2.862121783196926 | Validation loss 2.7895600497722626



 48%|█████████████████████████████████▉                                     | 4774/10000 [10:49:44<11:45:27,  8.10s/it]

Epoch: 4773 | Training loss 2.8648014962673187 | Validation loss 2.7924564480781555



 48%|█████████████████████████████████▉                                     | 4775/10000 [10:49:52<11:45:57,  8.11s/it]

Epoch: 4774 | Training loss 2.867805637419224 | Validation loss 2.7887127101421356



 48%|█████████████████████████████████▉                                     | 4776/10000 [10:50:00<11:44:08,  8.09s/it]

Epoch: 4775 | Training loss 2.864997275173664 | Validation loss 2.7916155457496643



 48%|█████████████████████████████████▉                                     | 4777/10000 [10:50:08<11:42:06,  8.07s/it]

Epoch: 4776 | Training loss 2.8658612221479416 | Validation loss 2.7962377667427063



 48%|█████████████████████████████████▉                                     | 4778/10000 [10:50:16<11:45:46,  8.11s/it]

Epoch: 4777 | Training loss 2.8635252937674522 | Validation loss 2.7917234897613525



 48%|█████████████████████████████████▉                                     | 4779/10000 [10:50:25<11:48:01,  8.14s/it]

Epoch: 4778 | Training loss 2.870253160595894 | Validation loss 2.7894985675811768



 48%|█████████████████████████████████▉                                     | 4780/10000 [10:50:33<11:48:00,  8.14s/it]

Epoch: 4779 | Training loss 2.8738325089216232 | Validation loss 2.8017694652080536



 48%|█████████████████████████████████▉                                     | 4781/10000 [10:50:41<11:51:47,  8.18s/it]

Epoch: 4780 | Training loss 2.8697426170110703 | Validation loss 2.7984528839588165



 48%|█████████████████████████████████▉                                     | 4782/10000 [10:50:49<11:50:21,  8.17s/it]

Epoch: 4781 | Training loss 2.870978891849518 | Validation loss 2.7956849336624146



 48%|█████████████████████████████████▉                                     | 4783/10000 [10:50:57<11:51:00,  8.18s/it]

Epoch: 4782 | Training loss 2.862079583108425 | Validation loss 2.789937347173691



 48%|█████████████████████████████████▉                                     | 4784/10000 [10:51:06<11:49:23,  8.16s/it]

Epoch: 4783 | Training loss 2.8696733564138412 | Validation loss 2.796259105205536



 48%|█████████████████████████████████▉                                     | 4785/10000 [10:51:14<11:48:07,  8.15s/it]

Epoch: 4784 | Training loss 2.8652744591236115 | Validation loss 2.7903659343719482



 48%|█████████████████████████████████▉                                     | 4786/10000 [10:51:22<11:46:55,  8.13s/it]

Epoch: 4785 | Training loss 2.8639856353402138 | Validation loss 2.7928139865398407



 48%|█████████████████████████████████▉                                     | 4787/10000 [10:51:30<11:48:23,  8.15s/it]

Epoch: 4786 | Training loss 2.8651829585433006 | Validation loss 2.7870818078517914



 48%|█████████████████████████████████▉                                     | 4788/10000 [10:51:38<11:46:30,  8.13s/it]

Epoch: 4787 | Training loss 2.8643612042069435 | Validation loss 2.7915675044059753



 48%|██████████████████████████████████                                     | 4789/10000 [10:51:46<11:46:51,  8.14s/it]

Epoch: 4788 | Training loss 2.8647175803780556 | Validation loss 2.7871008813381195



 48%|██████████████████████████████████                                     | 4790/10000 [10:51:54<11:44:25,  8.11s/it]

Epoch: 4789 | Training loss 2.86805409938097 | Validation loss 2.7941017746925354



 48%|██████████████████████████████████                                     | 4791/10000 [10:52:02<11:45:14,  8.12s/it]

Epoch: 4790 | Training loss 2.867860808968544 | Validation loss 2.7949711978435516



 48%|██████████████████████████████████                                     | 4792/10000 [10:52:11<11:44:55,  8.12s/it]

Epoch: 4791 | Training loss 2.8719236701726913 | Validation loss 2.797892928123474



 48%|██████████████████████████████████                                     | 4793/10000 [10:52:19<11:43:40,  8.11s/it]

Epoch: 4792 | Training loss 2.8684979751706123 | Validation loss 2.7947872281074524



 48%|██████████████████████████████████                                     | 4794/10000 [10:52:27<11:43:52,  8.11s/it]

Epoch: 4793 | Training loss 2.862223044037819 | Validation loss 2.790476083755493



 48%|██████████████████████████████████                                     | 4795/10000 [10:52:35<11:47:32,  8.16s/it]

Epoch: 4794 | Training loss 2.867911070585251 | Validation loss 2.7894389629364014



 48%|██████████████████████████████████                                     | 4796/10000 [10:52:43<11:46:00,  8.14s/it]

Epoch: 4795 | Training loss 2.8628852888941765 | Validation loss 2.79166316986084



 48%|██████████████████████████████████                                     | 4797/10000 [10:52:51<11:45:35,  8.14s/it]

Epoch: 4796 | Training loss 2.864652507007122 | Validation loss 2.7943018674850464



 48%|██████████████████████████████████                                     | 4798/10000 [10:52:59<11:44:07,  8.12s/it]

Epoch: 4797 | Training loss 2.857545278966427 | Validation loss 2.7840768694877625



 48%|██████████████████████████████████                                     | 4799/10000 [10:53:07<11:43:40,  8.12s/it]

Epoch: 4798 | Training loss 2.864163674414158 | Validation loss 2.789627730846405



 48%|██████████████████████████████████                                     | 4800/10000 [10:53:15<11:41:09,  8.09s/it]

Epoch: 4799 | Training loss 2.866692788898945 | Validation loss 2.789096713066101



 48%|██████████████████████████████████                                     | 4801/10000 [10:53:24<11:44:20,  8.13s/it]

Epoch: 4800 | Training loss 2.8584209010004997 | Validation loss 2.7860105335712433



 48%|██████████████████████████████████                                     | 4802/10000 [10:53:32<11:45:05,  8.14s/it]

Epoch: 4801 | Training loss 2.862492486834526 | Validation loss 2.789294719696045



 48%|██████████████████████████████████                                     | 4803/10000 [10:53:40<11:47:43,  8.17s/it]

Epoch: 4802 | Training loss 2.8687741309404373 | Validation loss 2.7923667430877686



 48%|██████████████████████████████████                                     | 4804/10000 [10:53:48<11:47:01,  8.16s/it]

Epoch: 4803 | Training loss 2.8724803552031517 | Validation loss 2.799009293317795



 48%|██████████████████████████████████                                     | 4805/10000 [10:53:56<11:46:53,  8.16s/it]

Epoch: 4804 | Training loss 2.864509031176567 | Validation loss 2.7905540466308594



 48%|██████████████████████████████████                                     | 4806/10000 [10:54:05<11:51:48,  8.22s/it]

Epoch: 4805 | Training loss 2.8688889294862747 | Validation loss 2.7917501032352448



 48%|██████████████████████████████████▏                                    | 4807/10000 [10:54:13<11:47:20,  8.17s/it]

Epoch: 4806 | Training loss 2.8668143153190613 | Validation loss 2.7909503877162933



 48%|██████████████████████████████████▏                                    | 4808/10000 [10:54:21<11:44:59,  8.15s/it]

Epoch: 4807 | Training loss 2.866075947880745 | Validation loss 2.7926204800605774



 48%|██████████████████████████████████▏                                    | 4809/10000 [10:54:29<11:44:33,  8.14s/it]

Epoch: 4808 | Training loss 2.862281270325184 | Validation loss 2.791817933320999



 48%|██████████████████████████████████▏                                    | 4810/10000 [10:54:37<11:44:10,  8.14s/it]

Epoch: 4809 | Training loss 2.8616850152611732 | Validation loss 2.7887805104255676



 48%|██████████████████████████████████▏                                    | 4811/10000 [10:54:45<11:41:51,  8.12s/it]

Epoch: 4810 | Training loss 2.8716496005654335 | Validation loss 2.794788032770157



 48%|██████████████████████████████████▏                                    | 4812/10000 [10:54:53<11:42:55,  8.13s/it]

Epoch: 4811 | Training loss 2.8636372461915016 | Validation loss 2.7955636978149414



 48%|██████████████████████████████████▏                                    | 4813/10000 [10:55:01<11:39:41,  8.09s/it]

Epoch: 4812 | Training loss 2.8701289892196655 | Validation loss 2.7915635108947754



 48%|██████████████████████████████████▏                                    | 4814/10000 [10:55:09<11:39:44,  8.10s/it]

Epoch: 4813 | Training loss 2.8675225526094437 | Validation loss 2.7923487424850464



 48%|██████████████████████████████████▏                                    | 4815/10000 [10:55:18<11:40:45,  8.11s/it]

Epoch: 4814 | Training loss 2.8693583235144615 | Validation loss 2.790514886379242



 48%|██████████████████████████████████▏                                    | 4816/10000 [10:55:26<11:40:23,  8.11s/it]

Epoch: 4815 | Training loss 2.870850592851639 | Validation loss 2.7922355830669403



 48%|██████████████████████████████████▏                                    | 4817/10000 [10:55:34<11:40:05,  8.10s/it]

Epoch: 4816 | Training loss 2.8654970824718475 | Validation loss 2.7883764803409576



 48%|██████████████████████████████████▏                                    | 4818/10000 [10:55:42<11:36:36,  8.07s/it]

Epoch: 4817 | Training loss 2.8687495067715645 | Validation loss 2.790294647216797



 48%|██████████████████████████████████▏                                    | 4819/10000 [10:55:50<11:37:00,  8.07s/it]

Epoch: 4818 | Training loss 2.8648151382803917 | Validation loss 2.7877713441848755



 48%|██████████████████████████████████▏                                    | 4820/10000 [10:55:58<11:37:37,  8.08s/it]

Epoch: 4819 | Training loss 2.865119941532612 | Validation loss 2.787806957960129



 48%|██████████████████████████████████▏                                    | 4821/10000 [10:56:06<11:40:02,  8.11s/it]

Epoch: 4820 | Training loss 2.87333545088768 | Validation loss 2.7929336726665497



 48%|██████████████████████████████████▏                                    | 4822/10000 [10:56:14<11:39:40,  8.11s/it]

Epoch: 4821 | Training loss 2.8598139956593513 | Validation loss 2.7874253392219543



 48%|██████████████████████████████████▏                                    | 4823/10000 [10:56:23<11:43:59,  8.16s/it]

Epoch: 4822 | Training loss 2.8674700409173965 | Validation loss 2.801113337278366



 48%|██████████████████████████████████▎                                    | 4824/10000 [10:56:31<11:43:40,  8.16s/it]

Epoch: 4823 | Training loss 2.8654408305883408 | Validation loss 2.792259156703949



 48%|██████████████████████████████████▎                                    | 4825/10000 [10:56:39<11:43:46,  8.16s/it]

Epoch: 4824 | Training loss 2.8647714257240295 | Validation loss 2.7900129556655884



 48%|██████████████████████████████████▎                                    | 4826/10000 [10:56:47<11:42:55,  8.15s/it]

Epoch: 4825 | Training loss 2.8668747544288635 | Validation loss 2.7995517551898956



 48%|██████████████████████████████████▎                                    | 4827/10000 [10:56:55<11:42:18,  8.15s/it]

Epoch: 4826 | Training loss 2.8680356815457344 | Validation loss 2.7908187806606293



 48%|██████████████████████████████████▎                                    | 4828/10000 [10:57:03<11:42:12,  8.15s/it]

Epoch: 4827 | Training loss 2.8692255914211273 | Validation loss 2.793247252702713



 48%|██████████████████████████████████▎                                    | 4829/10000 [10:57:11<11:41:40,  8.14s/it]

Epoch: 4828 | Training loss 2.867637127637863 | Validation loss 2.790298044681549



 48%|██████████████████████████████████▎                                    | 4830/10000 [10:57:19<11:40:29,  8.13s/it]

Epoch: 4829 | Training loss 2.8667245656251907 | Validation loss 2.788937956094742



 48%|██████████████████████████████████▎                                    | 4831/10000 [10:57:27<11:36:52,  8.09s/it]

Epoch: 4830 | Training loss 2.8664065822958946 | Validation loss 2.7887770533561707



 48%|██████████████████████████████████▎                                    | 4832/10000 [10:57:36<11:37:57,  8.10s/it]

Epoch: 4831 | Training loss 2.8638283163309097 | Validation loss 2.794941693544388



 48%|██████████████████████████████████▎                                    | 4833/10000 [10:57:44<11:40:44,  8.14s/it]

Epoch: 4832 | Training loss 2.863959953188896 | Validation loss 2.7891087532043457



 48%|██████████████████████████████████▎                                    | 4834/10000 [10:57:52<11:42:06,  8.15s/it]

Epoch: 4833 | Training loss 2.863579012453556 | Validation loss 2.7911438643932343



 48%|██████████████████████████████████▎                                    | 4835/10000 [10:58:00<11:41:49,  8.15s/it]

Epoch: 4834 | Training loss 2.8653702288866043 | Validation loss 2.796169728040695



 48%|██████████████████████████████████▎                                    | 4836/10000 [10:58:08<11:41:09,  8.15s/it]

Epoch: 4835 | Training loss 2.8640993759036064 | Validation loss 2.7925897240638733



 48%|██████████████████████████████████▎                                    | 4837/10000 [10:58:16<11:38:13,  8.11s/it]

Epoch: 4836 | Training loss 2.8616848587989807 | Validation loss 2.7909786701202393



 48%|██████████████████████████████████▎                                    | 4838/10000 [10:58:25<11:41:11,  8.15s/it]

Epoch: 4837 | Training loss 2.862176939845085 | Validation loss 2.7874266505241394



 48%|██████████████████████████████████▎                                    | 4839/10000 [10:58:33<11:41:31,  8.16s/it]

Epoch: 4838 | Training loss 2.868847981095314 | Validation loss 2.792402505874634



 48%|██████████████████████████████████▎                                    | 4840/10000 [10:58:41<11:38:55,  8.13s/it]

Epoch: 4839 | Training loss 2.869372084736824 | Validation loss 2.7870887517929077



 48%|██████████████████████████████████▎                                    | 4841/10000 [10:58:49<11:37:47,  8.12s/it]

Epoch: 4840 | Training loss 2.863822638988495 | Validation loss 2.789302885532379



 48%|██████████████████████████████████▍                                    | 4842/10000 [10:58:57<11:38:08,  8.12s/it]

Epoch: 4841 | Training loss 2.863425984978676 | Validation loss 2.7987641990184784



 48%|██████████████████████████████████▍                                    | 4843/10000 [10:59:05<11:37:56,  8.12s/it]

Epoch: 4842 | Training loss 2.8668560087680817 | Validation loss 2.7890492975711823



 48%|██████████████████████████████████▍                                    | 4844/10000 [10:59:13<11:37:45,  8.12s/it]

Epoch: 4843 | Training loss 2.8649288713932037 | Validation loss 2.7910214960575104



 48%|██████████████████████████████████▍                                    | 4845/10000 [10:59:21<11:39:12,  8.14s/it]

Epoch: 4844 | Training loss 2.8621531650424004 | Validation loss 2.791094869375229



 48%|██████████████████████████████████▍                                    | 4846/10000 [10:59:30<11:40:31,  8.16s/it]

Epoch: 4845 | Training loss 2.8663595467805862 | Validation loss 2.79321950674057



 48%|██████████████████████████████████▍                                    | 4847/10000 [10:59:38<11:42:58,  8.19s/it]

Epoch: 4846 | Training loss 2.8692542910575867 | Validation loss 2.793324798345566



 48%|██████████████████████████████████▍                                    | 4848/10000 [10:59:46<11:43:34,  8.19s/it]

Epoch: 4847 | Training loss 2.862070545554161 | Validation loss 2.791778028011322



 48%|██████████████████████████████████▍                                    | 4849/10000 [10:59:54<11:37:45,  8.13s/it]

Epoch: 4848 | Training loss 2.863522283732891 | Validation loss 2.789303094148636



 48%|██████████████████████████████████▍                                    | 4850/10000 [11:00:02<11:36:52,  8.12s/it]

Epoch: 4849 | Training loss 2.866605408489704 | Validation loss 2.7924516797065735



 49%|██████████████████████████████████▍                                    | 4851/10000 [11:00:10<11:36:22,  8.11s/it]

Epoch: 4850 | Training loss 2.8662894815206528 | Validation loss 2.790506601333618



 49%|██████████████████████████████████▍                                    | 4852/10000 [11:00:18<11:33:53,  8.09s/it]

Epoch: 4851 | Training loss 2.862053468823433 | Validation loss 2.7860527634620667



 49%|██████████████████████████████████▍                                    | 4853/10000 [11:00:26<11:35:44,  8.11s/it]

Epoch: 4852 | Training loss 2.872962102293968 | Validation loss 2.792114794254303



 49%|██████████████████████████████████▍                                    | 4854/10000 [11:00:35<11:34:49,  8.10s/it]

Epoch: 4853 | Training loss 2.864329144358635 | Validation loss 2.7927721738815308



 49%|██████████████████████████████████▍                                    | 4855/10000 [11:00:43<11:35:36,  8.11s/it]

Epoch: 4854 | Training loss 2.8647119104862213 | Validation loss 2.7885514199733734



 49%|██████████████████████████████████▍                                    | 4856/10000 [11:00:51<11:35:40,  8.11s/it]

Epoch: 4855 | Training loss 2.86506337672472 | Validation loss 2.7892889380455017



 49%|██████████████████████████████████▍                                    | 4857/10000 [11:00:59<11:35:33,  8.11s/it]

Epoch: 4856 | Training loss 2.863062433898449 | Validation loss 2.796273946762085



 49%|██████████████████████████████████▍                                    | 4858/10000 [11:01:07<11:34:14,  8.10s/it]

Epoch: 4857 | Training loss 2.8631544187664986 | Validation loss 2.7864967584609985



 49%|██████████████████████████████████▍                                    | 4859/10000 [11:01:15<11:34:09,  8.10s/it]

Epoch: 4858 | Training loss 2.8654228672385216 | Validation loss 2.793760508298874



 49%|██████████████████████████████████▌                                    | 4860/10000 [11:01:23<11:35:34,  8.12s/it]

Epoch: 4859 | Training loss 2.8678745552897453 | Validation loss 2.7902412116527557



 49%|██████████████████████████████████▌                                    | 4861/10000 [11:01:31<11:35:55,  8.13s/it]

Epoch: 4860 | Training loss 2.872405394911766 | Validation loss 2.789664328098297



 49%|██████████████████████████████████▌                                    | 4862/10000 [11:01:40<11:36:03,  8.13s/it]

Epoch: 4861 | Training loss 2.870882786810398 | Validation loss 2.7916844487190247



 49%|██████████████████████████████████▌                                    | 4863/10000 [11:01:48<11:31:36,  8.08s/it]

Epoch: 4862 | Training loss 2.863118290901184 | Validation loss 2.7900432646274567



 49%|██████████████████████████████████▌                                    | 4864/10000 [11:01:56<11:30:03,  8.06s/it]

Epoch: 4863 | Training loss 2.8638110384345055 | Validation loss 2.792364776134491



 49%|██████████████████████████████████▌                                    | 4865/10000 [11:02:04<11:30:57,  8.07s/it]

Epoch: 4864 | Training loss 2.868775501847267 | Validation loss 2.789082020521164



 49%|██████████████████████████████████▌                                    | 4866/10000 [11:02:12<11:32:18,  8.09s/it]

Epoch: 4865 | Training loss 2.867214947938919 | Validation loss 2.7923813462257385



 49%|██████████████████████████████████▌                                    | 4867/10000 [11:02:20<11:31:10,  8.08s/it]

Epoch: 4866 | Training loss 2.860712416470051 | Validation loss 2.791999340057373



 49%|██████████████████████████████████▌                                    | 4868/10000 [11:02:28<11:32:20,  8.09s/it]

Epoch: 4867 | Training loss 2.8694705218076706 | Validation loss 2.7944407165050507



 49%|██████████████████████████████████▌                                    | 4869/10000 [11:02:36<11:33:09,  8.11s/it]

Epoch: 4868 | Training loss 2.8663994148373604 | Validation loss 2.7889354825019836



 49%|██████████████████████████████████▌                                    | 4870/10000 [11:02:44<11:32:53,  8.10s/it]

Epoch: 4869 | Training loss 2.8653538525104523 | Validation loss 2.790917992591858



 49%|██████████████████████████████████▌                                    | 4871/10000 [11:02:52<11:32:18,  8.10s/it]

Epoch: 4870 | Training loss 2.8638506531715393 | Validation loss 2.786855399608612



 49%|██████████████████████████████████▌                                    | 4872/10000 [11:03:00<11:31:51,  8.10s/it]

Epoch: 4871 | Training loss 2.8646915033459663 | Validation loss 2.790261596441269



 49%|██████████████████████████████████▌                                    | 4873/10000 [11:03:08<11:30:16,  8.08s/it]

Epoch: 4872 | Training loss 2.8708038553595543 | Validation loss 2.7882589399814606



 49%|██████████████████████████████████▌                                    | 4874/10000 [11:03:16<11:30:40,  8.08s/it]

Epoch: 4873 | Training loss 2.871632233262062 | Validation loss 2.7946211397647858



 49%|██████████████████████████████████▌                                    | 4875/10000 [11:03:25<11:34:39,  8.13s/it]

Epoch: 4874 | Training loss 2.8641103953123093 | Validation loss 2.791234940290451



 49%|██████████████████████████████████▌                                    | 4876/10000 [11:03:33<11:35:44,  8.15s/it]

Epoch: 4875 | Training loss 2.8652939051389694 | Validation loss 2.79040664434433



 49%|██████████████████████████████████▋                                    | 4877/10000 [11:03:41<11:32:29,  8.11s/it]

Epoch: 4876 | Training loss 2.860284425318241 | Validation loss 2.7872088849544525



 49%|██████████████████████████████████▋                                    | 4878/10000 [11:03:49<11:34:08,  8.13s/it]

Epoch: 4877 | Training loss 2.8655722960829735 | Validation loss 2.7871575951576233



 49%|██████████████████████████████████▋                                    | 4879/10000 [11:03:57<11:32:28,  8.11s/it]

Epoch: 4878 | Training loss 2.865913949906826 | Validation loss 2.7932104766368866



 49%|██████████████████████████████████▋                                    | 4880/10000 [11:04:05<11:31:43,  8.11s/it]

Epoch: 4879 | Training loss 2.8608309254050255 | Validation loss 2.791399270296097



 49%|██████████████████████████████████▋                                    | 4881/10000 [11:04:13<11:31:30,  8.11s/it]

Epoch: 4880 | Training loss 2.8679667562246323 | Validation loss 2.790093630552292



 49%|██████████████████████████████████▋                                    | 4882/10000 [11:04:22<11:32:07,  8.11s/it]

Epoch: 4881 | Training loss 2.8675996363162994 | Validation loss 2.791584759950638



 49%|██████████████████████████████████▋                                    | 4883/10000 [11:04:30<11:30:56,  8.10s/it]

Epoch: 4882 | Training loss 2.8610228076577187 | Validation loss 2.79404354095459



 49%|██████████████████████████████████▋                                    | 4884/10000 [11:04:38<11:30:48,  8.10s/it]

Epoch: 4883 | Training loss 2.8652016445994377 | Validation loss 2.7924049496650696



 49%|██████████████████████████████████▋                                    | 4885/10000 [11:04:46<11:28:40,  8.08s/it]

Epoch: 4884 | Training loss 2.865961514413357 | Validation loss 2.801801711320877



 49%|██████████████████████████████████▋                                    | 4886/10000 [11:04:54<11:28:17,  8.08s/it]

Epoch: 4885 | Training loss 2.868934854865074 | Validation loss 2.789256066083908



 49%|██████████████████████████████████▋                                    | 4887/10000 [11:05:02<11:30:03,  8.10s/it]

Epoch: 4886 | Training loss 2.8688458651304245 | Validation loss 2.7942081689834595



 49%|██████████████████████████████████▋                                    | 4888/10000 [11:05:10<11:31:43,  8.12s/it]

Epoch: 4887 | Training loss 2.8647794350981712 | Validation loss 2.7870515882968903



 49%|██████████████████████████████████▋                                    | 4889/10000 [11:05:18<11:32:34,  8.13s/it]

Epoch: 4888 | Training loss 2.8648007065057755 | Validation loss 2.792332887649536



 49%|██████████████████████████████████▋                                    | 4890/10000 [11:05:26<11:35:18,  8.16s/it]

Epoch: 4889 | Training loss 2.865638844668865 | Validation loss 2.7872640192508698



 49%|██████████████████████████████████▋                                    | 4891/10000 [11:05:35<11:35:59,  8.17s/it]

Epoch: 4890 | Training loss 2.869493179023266 | Validation loss 2.792402058839798



 49%|██████████████████████████████████▋                                    | 4892/10000 [11:05:43<11:33:36,  8.15s/it]

Epoch: 4891 | Training loss 2.867817647755146 | Validation loss 2.7924366891384125



 49%|██████████████████████████████████▋                                    | 4893/10000 [11:05:51<11:29:59,  8.11s/it]

Epoch: 4892 | Training loss 2.868359297513962 | Validation loss 2.798483520746231



 49%|██████████████████████████████████▋                                    | 4894/10000 [11:05:59<11:27:08,  8.07s/it]

Epoch: 4893 | Training loss 2.8624031245708466 | Validation loss 2.7896443605422974



 49%|██████████████████████████████████▊                                    | 4895/10000 [11:06:07<11:28:52,  8.10s/it]

Epoch: 4894 | Training loss 2.863607734441757 | Validation loss 2.785599023103714



 49%|██████████████████████████████████▊                                    | 4896/10000 [11:06:15<11:27:39,  8.08s/it]

Epoch: 4895 | Training loss 2.8668881580233574 | Validation loss 2.789039820432663



 49%|██████████████████████████████████▊                                    | 4897/10000 [11:06:23<11:29:15,  8.10s/it]

Epoch: 4896 | Training loss 2.865811303257942 | Validation loss 2.7905881106853485



 49%|██████████████████████████████████▊                                    | 4898/10000 [11:06:31<11:26:15,  8.07s/it]

Epoch: 4897 | Training loss 2.8623790740966797 | Validation loss 2.7868669033050537



 49%|██████████████████████████████████▊                                    | 4899/10000 [11:06:39<11:26:07,  8.07s/it]

Epoch: 4898 | Training loss 2.8681835532188416 | Validation loss 2.7919280230998993



 49%|██████████████████████████████████▊                                    | 4900/10000 [11:06:47<11:26:13,  8.07s/it]

Epoch: 4899 | Training loss 2.864313967525959 | Validation loss 2.7895842492580414



 49%|██████████████████████████████████▊                                    | 4901/10000 [11:06:55<11:27:27,  8.09s/it]

Epoch: 4900 | Training loss 2.8648571223020554 | Validation loss 2.786555230617523



 49%|██████████████████████████████████▊                                    | 4902/10000 [11:07:04<11:27:36,  8.09s/it]

Epoch: 4901 | Training loss 2.8675021901726723 | Validation loss 2.7901511490345



 49%|██████████████████████████████████▊                                    | 4903/10000 [11:07:11<11:24:11,  8.05s/it]

Epoch: 4902 | Training loss 2.8673229664564133 | Validation loss 2.793858826160431



 49%|██████████████████████████████████▊                                    | 4904/10000 [11:07:20<11:24:52,  8.06s/it]

Epoch: 4903 | Training loss 2.864559181034565 | Validation loss 2.7869328260421753



 49%|██████████████████████████████████▊                                    | 4905/10000 [11:07:28<11:27:17,  8.09s/it]

Epoch: 4904 | Training loss 2.8646111115813255 | Validation loss 2.7923565208911896



 49%|██████████████████████████████████▊                                    | 4906/10000 [11:07:36<11:28:59,  8.12s/it]

Epoch: 4905 | Training loss 2.8679630011320114 | Validation loss 2.792538493871689



 49%|██████████████████████████████████▊                                    | 4907/10000 [11:07:44<11:26:51,  8.09s/it]

Epoch: 4906 | Training loss 2.865799658000469 | Validation loss 2.79095458984375



 49%|██████████████████████████████████▊                                    | 4908/10000 [11:07:52<11:27:45,  8.10s/it]

Epoch: 4907 | Training loss 2.86418429762125 | Validation loss 2.788939207792282



 49%|██████████████████████████████████▊                                    | 4909/10000 [11:08:00<11:25:32,  8.08s/it]

Epoch: 4908 | Training loss 2.869437597692013 | Validation loss 2.790259689092636



 49%|██████████████████████████████████▊                                    | 4910/10000 [11:08:08<11:24:44,  8.07s/it]

Epoch: 4909 | Training loss 2.8653097599744797 | Validation loss 2.7971842885017395



 49%|██████████████████████████████████▊                                    | 4911/10000 [11:08:16<11:26:35,  8.09s/it]

Epoch: 4910 | Training loss 2.8669217005372047 | Validation loss 2.7919299006462097



 49%|██████████████████████████████████▉                                    | 4912/10000 [11:08:24<11:27:00,  8.10s/it]

Epoch: 4911 | Training loss 2.8635961040854454 | Validation loss 2.789313018321991



 49%|██████████████████████████████████▉                                    | 4913/10000 [11:08:33<11:29:15,  8.13s/it]

Epoch: 4912 | Training loss 2.8685755655169487 | Validation loss 2.7927554547786713



 49%|██████████████████████████████████▉                                    | 4914/10000 [11:08:41<11:28:55,  8.13s/it]

Epoch: 4913 | Training loss 2.8653392791748047 | Validation loss 2.7890341579914093



 49%|██████████████████████████████████▉                                    | 4915/10000 [11:08:49<11:29:42,  8.14s/it]

Epoch: 4914 | Training loss 2.8701147362589836 | Validation loss 2.7877287566661835



 49%|██████████████████████████████████▉                                    | 4916/10000 [11:08:57<11:27:06,  8.11s/it]

Epoch: 4915 | Training loss 2.868953913450241 | Validation loss 2.7915563583374023



 49%|██████████████████████████████████▉                                    | 4917/10000 [11:09:05<11:29:57,  8.14s/it]

Epoch: 4916 | Training loss 2.8660453632473946 | Validation loss 2.794342130422592



 49%|██████████████████████████████████▉                                    | 4918/10000 [11:09:13<11:30:20,  8.15s/it]

Epoch: 4917 | Training loss 2.8667932003736496 | Validation loss 2.789986699819565



 49%|██████████████████████████████████▉                                    | 4919/10000 [11:09:21<11:29:45,  8.15s/it]

Epoch: 4918 | Training loss 2.8662711828947067 | Validation loss 2.792624831199646



 49%|██████████████████████████████████▉                                    | 4920/10000 [11:09:30<11:28:10,  8.13s/it]

Epoch: 4919 | Training loss 2.863362140953541 | Validation loss 2.7948209047317505



 49%|██████████████████████████████████▉                                    | 4921/10000 [11:09:38<11:27:24,  8.12s/it]

Epoch: 4920 | Training loss 2.864728882908821 | Validation loss 2.7926405668258667



 49%|██████████████████████████████████▉                                    | 4922/10000 [11:09:46<11:26:00,  8.11s/it]

Epoch: 4921 | Training loss 2.868766836822033 | Validation loss 2.7954919040203094



 49%|██████████████████████████████████▉                                    | 4923/10000 [11:09:54<11:24:58,  8.10s/it]

Epoch: 4922 | Training loss 2.8677384331822395 | Validation loss 2.7933928966522217



 49%|██████████████████████████████████▉                                    | 4924/10000 [11:10:02<11:26:40,  8.12s/it]

Epoch: 4923 | Training loss 2.8641238063573837 | Validation loss 2.79782697558403



 49%|██████████████████████████████████▉                                    | 4925/10000 [11:10:10<11:21:43,  8.06s/it]

Epoch: 4924 | Training loss 2.8683691695332527 | Validation loss 2.787602126598358



 49%|██████████████████████████████████▉                                    | 4926/10000 [11:10:18<11:24:22,  8.09s/it]

Epoch: 4925 | Training loss 2.87058312445879 | Validation loss 2.790326774120331



 49%|██████████████████████████████████▉                                    | 4927/10000 [11:10:26<11:25:41,  8.11s/it]

Epoch: 4926 | Training loss 2.870612971484661 | Validation loss 2.7897432446479797



 49%|██████████████████████████████████▉                                    | 4928/10000 [11:10:34<11:25:23,  8.11s/it]

Epoch: 4927 | Training loss 2.8693751469254494 | Validation loss 2.7907979786396027



 49%|██████████████████████████████████▉                                    | 4929/10000 [11:10:42<11:25:31,  8.11s/it]

Epoch: 4928 | Training loss 2.862314984202385 | Validation loss 2.791614532470703



 49%|███████████████████████████████████                                    | 4930/10000 [11:10:51<11:24:46,  8.10s/it]

Epoch: 4929 | Training loss 2.8679075613617897 | Validation loss 2.790687084197998



 49%|███████████████████████████████████                                    | 4931/10000 [11:10:59<11:22:09,  8.07s/it]

Epoch: 4930 | Training loss 2.860024742782116 | Validation loss 2.7901795506477356



 49%|███████████████████████████████████                                    | 4932/10000 [11:11:07<11:22:23,  8.08s/it]

Epoch: 4931 | Training loss 2.8699211850762367 | Validation loss 2.796372652053833



 49%|███████████████████████████████████                                    | 4933/10000 [11:11:15<11:25:13,  8.11s/it]

Epoch: 4932 | Training loss 2.8635424822568893 | Validation loss 2.795568972826004



 49%|███████████████████████████████████                                    | 4934/10000 [11:11:23<11:24:02,  8.10s/it]

Epoch: 4933 | Training loss 2.8633254915475845 | Validation loss 2.7896803617477417



 49%|███████████████████████████████████                                    | 4935/10000 [11:11:31<11:24:17,  8.11s/it]

Epoch: 4934 | Training loss 2.8645349517464638 | Validation loss 2.788392096757889



 49%|███████████████████████████████████                                    | 4936/10000 [11:11:39<11:24:48,  8.11s/it]

Epoch: 4935 | Training loss 2.8647995442152023 | Validation loss 2.789562165737152



 49%|███████████████████████████████████                                    | 4937/10000 [11:11:47<11:23:34,  8.10s/it]

Epoch: 4936 | Training loss 2.861699067056179 | Validation loss 2.7900776267051697



 49%|███████████████████████████████████                                    | 4938/10000 [11:11:55<11:26:20,  8.14s/it]

Epoch: 4937 | Training loss 2.8666957169771194 | Validation loss 2.796142429113388



 49%|███████████████████████████████████                                    | 4939/10000 [11:12:03<11:22:58,  8.10s/it]

Epoch: 4938 | Training loss 2.8680252879858017 | Validation loss 2.7915873527526855



 49%|███████████████████████████████████                                    | 4940/10000 [11:12:12<11:25:17,  8.13s/it]

Epoch: 4939 | Training loss 2.861849784851074 | Validation loss 2.793481409549713



 49%|███████████████████████████████████                                    | 4941/10000 [11:12:20<11:27:17,  8.15s/it]

Epoch: 4940 | Training loss 2.8617755621671677 | Validation loss 2.7890924215316772



 49%|███████████████████████████████████                                    | 4942/10000 [11:12:28<11:26:45,  8.15s/it]

Epoch: 4941 | Training loss 2.8606923148036003 | Validation loss 2.792135715484619



 49%|███████████████████████████████████                                    | 4943/10000 [11:12:36<11:25:35,  8.13s/it]

Epoch: 4942 | Training loss 2.8625533133745193 | Validation loss 2.791606992483139



 49%|███████████████████████████████████                                    | 4944/10000 [11:12:44<11:26:47,  8.15s/it]

Epoch: 4943 | Training loss 2.8627321645617485 | Validation loss 2.7889446914196014



 49%|███████████████████████████████████                                    | 4945/10000 [11:12:52<11:25:58,  8.14s/it]

Epoch: 4944 | Training loss 2.862191751599312 | Validation loss 2.7886647284030914



 49%|███████████████████████████████████                                    | 4946/10000 [11:13:00<11:24:01,  8.12s/it]

Epoch: 4945 | Training loss 2.8731445595622063 | Validation loss 2.789770245552063



 49%|███████████████████████████████████                                    | 4947/10000 [11:13:09<11:26:26,  8.15s/it]

Epoch: 4946 | Training loss 2.862423524260521 | Validation loss 2.7884376645088196



 49%|███████████████████████████████████▏                                   | 4948/10000 [11:13:17<11:24:30,  8.13s/it]

Epoch: 4947 | Training loss 2.863132007420063 | Validation loss 2.7838176488876343



 49%|███████████████████████████████████▏                                   | 4949/10000 [11:13:25<11:25:17,  8.14s/it]

Epoch: 4948 | Training loss 2.86712234467268 | Validation loss 2.791093558073044



 50%|███████████████████████████████████▏                                   | 4950/10000 [11:13:33<11:24:31,  8.13s/it]

Epoch: 4949 | Training loss 2.8651421293616295 | Validation loss 2.7900839149951935



 50%|███████████████████████████████████▏                                   | 4951/10000 [11:13:41<11:21:58,  8.10s/it]

Epoch: 4950 | Training loss 2.8644014820456505 | Validation loss 2.7879849076271057



 50%|███████████████████████████████████▏                                   | 4952/10000 [11:13:49<11:18:14,  8.06s/it]

Epoch: 4951 | Training loss 2.8676650300621986 | Validation loss 2.7902587354183197



 50%|███████████████████████████████████▏                                   | 4953/10000 [11:13:57<11:17:06,  8.05s/it]

Epoch: 4952 | Training loss 2.871913768351078 | Validation loss 2.7954379618167877



 50%|███████████████████████████████████▏                                   | 4954/10000 [11:14:05<11:16:18,  8.04s/it]

Epoch: 4953 | Training loss 2.862771689891815 | Validation loss 2.789922833442688



 50%|███████████████████████████████████▏                                   | 4955/10000 [11:14:13<11:18:54,  8.07s/it]

Epoch: 4954 | Training loss 2.8656491935253143 | Validation loss 2.789034903049469



 50%|███████████████████████████████████▏                                   | 4956/10000 [11:14:21<11:23:23,  8.13s/it]

Epoch: 4955 | Training loss 2.8665717616677284 | Validation loss 2.7916029691696167



 50%|███████████████████████████████████▏                                   | 4957/10000 [11:14:30<11:22:31,  8.12s/it]

Epoch: 4956 | Training loss 2.8619692623615265 | Validation loss 2.788137286901474



 50%|███████████████████████████████████▏                                   | 4958/10000 [11:14:38<11:21:11,  8.11s/it]

Epoch: 4957 | Training loss 2.8624095022678375 | Validation loss 2.7895200550556183



 50%|███████████████████████████████████▏                                   | 4959/10000 [11:14:46<11:19:23,  8.09s/it]

Epoch: 4958 | Training loss 2.866721987724304 | Validation loss 2.7871479094028473



 50%|███████████████████████████████████▏                                   | 4960/10000 [11:14:54<11:19:38,  8.09s/it]

Epoch: 4959 | Training loss 2.861574999988079 | Validation loss 2.795526534318924



 50%|███████████████████████████████████▏                                   | 4961/10000 [11:15:02<11:16:38,  8.06s/it]

Epoch: 4960 | Training loss 2.868837021291256 | Validation loss 2.7867520451545715



 50%|███████████████████████████████████▏                                   | 4962/10000 [11:15:10<11:17:13,  8.07s/it]

Epoch: 4961 | Training loss 2.8684957697987556 | Validation loss 2.7855629324913025



 50%|███████████████████████████████████▏                                   | 4963/10000 [11:15:18<11:17:13,  8.07s/it]

Epoch: 4962 | Training loss 2.8665291741490364 | Validation loss 2.7961601614952087



 50%|███████████████████████████████████▏                                   | 4964/10000 [11:15:26<11:17:59,  8.08s/it]

Epoch: 4963 | Training loss 2.864685580134392 | Validation loss 2.793787896633148



 50%|███████████████████████████████████▎                                   | 4965/10000 [11:15:34<11:15:37,  8.05s/it]

Epoch: 4964 | Training loss 2.8607733696699142 | Validation loss 2.792839229106903



 50%|███████████████████████████████████▎                                   | 4966/10000 [11:15:42<11:19:08,  8.09s/it]

Epoch: 4965 | Training loss 2.864116370677948 | Validation loss 2.78917396068573



 50%|███████████████████████████████████▎                                   | 4967/10000 [11:15:50<11:17:40,  8.08s/it]

Epoch: 4966 | Training loss 2.862708255648613 | Validation loss 2.7916559875011444



 50%|███████████████████████████████████▎                                   | 4968/10000 [11:15:59<11:35:37,  8.29s/it]

Epoch: 4967 | Training loss 2.8702483773231506 | Validation loss 2.7911561727523804


 50%|███████████████████████████████████▎                                   | 4968/10000 [11:16:04<11:24:46,  8.17s/it]


KeyboardInterrupt: 

In [None]:
from torchsummary import summary

In [None]:
summary(model, (9, 50, 224), batch_size=64)

In [7]:
# pd.DataFrame({
#     "epoch": range(0, len(val_loss)),
#     "train_loss": train_loss[:],
#     "val_loss": val_loss,
# }).to_csv("Attentionlosses.csv", index=False)