<h1>NN Model</h1>

In [1]:
from pycocotools.coco import COCO
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as functions
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
#import torchvision
from torchvision import transforms
import re

In [2]:
print("cuda" if torch.cuda.is_available() else "cpu")

cuda


In [3]:
DATADIR = "cocodoom/"
USED_RUNS = ["run1", "run2", "run3"]

dataSplit, TRAIN_RUN = "run-full-train", "run1"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [4]:
coco_train = COCO(annFile)

loading annotations into memory...


Done (t=22.57s)
creating index...


index created!


In [5]:
dataSplit, VAL_RUN = "run-full-val", "run2"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [6]:
coco_val = COCO(annFile)

loading annotations into memory...


Done (t=18.33s)
creating index...


index created!


In [7]:
dataSplit, TEST_RUN = "run-full-test", "run3"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [8]:
coco_test = COCO(annFile)

loading annotations into memory...


Done (t=11.54s)
creating index...


index created!


In [9]:
player_positions = {"run1":[], "run2":[], "run3":[]}
motion_vectors = {"run1":[], "run2":[], "run3":[]}

for run in USED_RUNS:
    with open(DATADIR+run+"/log.txt", 'r') as log_file:
        for line in log_file:
            if "player" in line:
                line = line.strip()
                tic, stats = line.split("player:")
                x, y, z, angle = stats.split(",")
    
                # Store position in the dictionary
                player_positions[run].append((float(x), float(y), float(z), float(angle)))
                if len(player_positions[run]) >= 2:
                    player_position = player_positions[run][-1]
                    prev_player_position = player_positions[run][-2]
                    
                    dx = player_position[0] - prev_player_position[0]
                    dy = player_position[1] - prev_player_position[1]
                    dz = player_position[2] - prev_player_position[2]
                    dangle = np.pi - abs(abs(player_position[3] - prev_player_position[3]) - np.pi)
                    
                    dx_relative = dx * np.cos(2 * np.pi - prev_player_position[3]) + dy * np.cos(prev_player_position[3] - 1/2 * np.pi)
                    dy_relative = dx * np.sin(2 * np.pi - prev_player_position[3]) + dy * np.sin(prev_player_position[3] - 1/2 * np.pi)
                    motion_vector = (dx_relative, dy_relative, dz, dangle)
                    motion_vectors[run].append(motion_vector)

In [10]:
class DoomMotionDataset(Dataset):
    def __init__(self, coco, run, input_window, prediction_window, transform=None):
        self.coco = coco
        self.run = run
        self.img_ids = self.coco.getImgIds()
        self.transform = transform
        self.input_window = input_window
        self.prediction_window = prediction_window

    def __len__(self):
        return len(self.img_ids)

    def fullSegmentationFormat(self, rgb_filename):
        seg_image = self.load_image(self.getSegmentationMask(DATADIR + rgb_filename))
        if seg_image == None:
            return seg_image
        seg_class_map = self.color_to_index(seg_image)
        seg_class_one_hot = functions.one_hot(seg_class_map, num_classes=4).to(dtype=torch.float).permute(2, 0, 1)
        return seg_class_one_hot

    def fullDepthFormat(self, rgb_filename):
        depth_mask = self.load_image(self.getDepthMask(DATADIR + rgb_filename))
        if depth_mask == None:
            return depth_mask
        depth_mask = torch.tensor(depth_mask, dtype=torch.float32)
        return depth_mask

    def getSegmentationMask(self, rgb_filename):
        return rgb_filename.replace("rgb", "objects")

    def getDepthMask(self, rgb_filename):
        return rgb_filename.replace("rgb", "depth")

    def color_to_index(self, segmentation_image):
        # Map colors to class indices
        r, g, b = segmentation_image
        pixel_values = r + (g *  2**8) + (b * 2**16)  # From cocodoom documentation, converts to an object id

        class_map = torch.full_like(pixel_values, 3, dtype=torch.long)

        sky = (1 << 23) + 0
        horizontal = (1 << 23) + 1
        vertical = (1 << 23) + 2
        
        class_map[x == sky] = 0
        class_map[x == horizontal] = 1
        class_map[x == vertical] = 2
        return class_map

    def load_image(self, path):
        if os.path.exists(path):
            img = Image.open(path)
            return transforms.ToTensor()(img)
        return None

    def __getitem__(self, idx):
        # Load the RGB image
        rgb_filename = self.coco.loadImgs(self.img_ids[idx])[0]['file_name']
        #print(rgb_filename)
        tic = int(rgb_filename.replace(".png", "").split("/")[-1])
        next_tic = tic+1
        previous_tic = tic-1
        prev_motion_vectors = []
        next_motion_vectors = []
        prev_seg = []
        prev_dep = []

        for t in range(input_window, 0, -1):
            if tic-t < 0:
                prev_motion_vectors.append(motion_vectors[self.run][0])
                prev_filename = self.coco.loadImgs(self.img_ids[0])[0]['file_name']
                seg = self.fullSegmentationFormat(prev_filename)
                dep = self.fullDepthFormat(prev_filename)
                prev_seg.append(seg)
                prev_dep.append(dep)
                continue
            elif tic-t >= len(motion_vectors[self.run]):
                prev_motion_vectors.append(motion_vectors[self.run][-1])
                prev_filename = self.coco.loadImgs(self.img_ids[-1])[0]['file_name']
                seg = self.fullSegmentationFormat(prev_filename)
                dep = self.fullDepthFormat(prev_filename)
                prev_seg.append(seg)
                prev_dep.append(dep)
                continue
            prev_motion_vectors.append(motion_vectors[self.run][tic-t])
            prev_filename = rgb_filename[:-10] + str(max(tic - t, 2)).rjust(6, "0") + ".png"
            # run1/map01/rgb/000002.png
            if os.path.exists(DATADIR + prev_filename):
                seg = self.fullSegmentationFormat(prev_filename)
                #print(f"seg shape: {seg.shape}")
                dep = self.fullDepthFormat(prev_filename)
                #print(f"dep shape: {dep.shape}")
                prev_seg.append(seg)
                prev_dep.append(dep)
            else:
                prev_seg.append(torch.zeros((4, 200, 320)))
                prev_dep.append(torch.zeros((1, 200, 320)))
                

        for t in range(1, prediction_window+1):
            if tic+t >= len(motion_vectors[self.run]):
                next_motion_vectors.append(motion_vectors[self.run][-1])
                continue
            next_motion_vectors.append(motion_vectors[self.run][tic+t])

        # if dx > 1000:
        #     print(f"idx: {idx}")
        #     print(f"rgb_filename: {rgb_filename}")
        #     print(f"tic: {tic}")
        #     print(f"next_tic: {next_tic}")
        #     print(f"previous_tic: {previous_tic}")
        #     print(f"Sus {idx}")
        #     print(f"prev_player_position: {prev_player_position}")
        #     print(f"player_position: {player_position}")
        #     print(f"next_player_position: {next_player_position}")
        #     print(f"prev_motion_vector: {prev_motion_vector}")
        #     print(f"next_motion_vector: {next_motion_vector}")

        #print(prev_motion_vectors)
        #print(next_motion_vectors)
            
        prev_motion_vectors = torch.tensor(prev_motion_vectors, dtype=torch.float32)
        next_motion_vectors = torch.tensor(next_motion_vectors, dtype=torch.float32)
        #print(len(prev_seg))
        prev_seg = torch.stack(prev_seg)
        prev_dep = torch.stack(prev_dep)
        
        return {"prev_motion" : prev_motion_vectors, "next_motion" : next_motion_vectors, "previous_seg" : prev_seg, "previous_dep" : prev_dep}


In [11]:
class NeuralNetwork(nn.Module):
  def __init__(self, batch_size, input_length, sequence_length, activation_function=functions.relu, device=torch.device("cpu")):
    super(NeuralNetwork, self).__init__()
    self.batch_size = batch_size
    self.input_length = input_length
    self.sequence_length = sequence_length

    # Encoder
    # Conv layers
    self.conv_seg = nn.Conv2d(4, 1, kernel_size=3, stride=2, padding=1, bias=False).to(device)
    self.conv_dep = nn.Conv2d(1, 1, kernel_size=3, stride=2, padding=1, bias=False).to(device)

    self.motion_fc = nn.Linear(4, 32).to(device)
      
    # Pre-fusion LSTMs
    self.vis_LSTM = nn.LSTM(input_size=32000, hidden_size=256, batch_first=True).to(device)
    self.inertia_LSTM = nn.LSTM(input_size=32, hidden_size=256, batch_first=True).to(device)

    # Fusion LSTM
    self.fusion_LSTM = nn.LSTM(input_size=512, hidden_size=256, batch_first=True).to(device)

    # Decoder
    self.de_motion_fc = nn.Linear(4, 32).to(device)
    self.de_vis_LSTM = nn.LSTM(input_size=32, hidden_size=256, batch_first=True).to(device) #Unsure what the input size of this should be as it actually receives nothing
    self.de_inertia_LSTM = nn.LSTM(input_size=32, hidden_size=256, batch_first=True).to(device)
    self.de_fusion_LSTM = nn.LSTM(input_size=512, hidden_size=256, batch_first=True).to(device)
    self.output_fc = nn.Linear(256, 4).to(device)

  def forward(self, segmentation, depth, prev_motion):
    hidden_vis = None
    hidden_inert = None
    hidden_fus = None
    
    for t in range(self.input_length):
        #print(segmentation.shape)
        seg = self.conv_seg(segmentation[:,t])
        #print(seg.shape)
        dep = self.conv_dep(depth[:,t])
        #print(dep.shape)
        mot = self.motion_fc(prev_motion[:,t])
        vis = torch.cat((seg, dep), dim=1)
        vis = torch.flatten(vis, start_dim=1)
        #print(vis.shape)
        if hidden_vis != None:
            output_vis, hidden_vis = self.vis_LSTM(vis, hidden_vis)
        else:
            output_vis, hidden_vis = self.vis_LSTM(vis)
        if hidden_inert != None:
            output_inert, hidden_inert = self.inertia_LSTM(mot, hidden_inert)
        else:
            output_inert, hidden_inert = self.inertia_LSTM(mot)
        combined = torch.cat((output_vis, output_inert), dim=1)
        if hidden_fus != None:
            _, hidden_fus = self.fusion_LSTM(combined, hidden_fus)
        else:
            _, hidden_fus = self.fusion_LSTM(combined)

    #print("Prev motion: " + str(prev_motion.shape))
    de_mot = prev_motion[:,-1]
    output_tensor = torch.zeros(self.sequence_length, segmentation.size(0), 4).to(segmentation.device)
    for t in range(self.sequence_length):
        #print(de_mot.shape)
        de_mot = self.de_motion_fc(de_mot)
        de_output_inert, hidden_inert = self.de_inertia_LSTM(de_mot, hidden_inert)
        de_output_vis, hidden_vis = self.de_vis_LSTM(torch.zeros(segmentation.size(0), 32).to(segmentation.device), hidden_vis)
        #print(de_output_vis.shape, de_output_inert.shape)
        combined = torch.cat((de_output_vis, de_output_inert), dim=1)
        de_output_fus, hidden_fus = self.de_fusion_LSTM(combined, hidden_fus)
        #print("de_output_fus: " + str(de_output_fus.shape))
        output_t = self.output_fc(de_output_fus)
        #print("output_t: " + str(output_t.shape))
        #output_t = output_t.unsqueeze(0)
        de_mot = output_t
        output_tensor[t] = output_t.unsqueeze(0)
        
    return output_tensor

In [12]:
torch.cuda.empty_cache()
batch_size = 256
learning_rate = 1e-3
num_epochs = 10
input_window = 5
prediction_window = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("cuda" if torch.cuda.is_available() else "cpu")
model = NeuralNetwork(batch_size, input_window, prediction_window, device=device).to(device)
model.load_state_dict(torch.load("multimodal_seq2seq.pth", weights_only=True))

train_dataset = DoomMotionDataset(coco_train, TRAIN_RUN, input_window, prediction_window)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

val_dataset = DoomMotionDataset(coco_val, VAL_RUN, input_window, prediction_window)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
    for batch_idx, batch in enumerate(progress_bar):
        prev_motion, next_motion, previous_seg, previous_dep = batch["prev_motion"], batch["next_motion"], batch["previous_seg"], batch["previous_dep"]
        prev_motion, next_motion, previous_seg, previous_dep = prev_motion.to(device), next_motion.to(device), previous_seg.to(device), previous_dep.to(device)

        if prev_motion.size(0) != next_motion.size(0) != previous_seg.size(0) != previous_dep.size(0):
                continue

        optimizer.zero_grad()

        outputs = model(previous_seg, previous_dep, prev_motion)
        outputs = outputs.permute(1, 0, 2)

        if outputs.size(0) != next_motion.size(0):
            continue
        
        loss = criterion(outputs, next_motion)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": prev_motion.size(0)
        })

    # Average loss per epoch
    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    
    
    progress_bar = tqdm(val_loader, desc="Validation", unit="batch")
    
    with torch.no_grad():  # Disable gradient calculations for evaluation
        for batch_idx, batch in enumerate(progress_bar):
            prev_motion, next_motion, previous_seg, previous_dep = batch["prev_motion"], batch["next_motion"], batch["previous_seg"], batch["previous_dep"]
            prev_motion, next_motion, previous_seg, previous_dep = prev_motion.to(device), next_motion.to(device), previous_seg.to(device), previous_dep.to(device)

            if prev_motion.size(0) != next_motion.size(0) != previous_seg.size(0) != previous_dep.size(0):
                continue
                
            outputs = model(previous_seg, previous_dep, prev_motion)
            outputs = outputs.permute(1, 0, 2)

            if outputs.size(0) != next_motion.size(0):
                continue
            
            loss = criterion(outputs, next_motion)
            
            running_loss += loss.item()
            
            progress_bar.set_postfix({
                "batch_loss": loss.item(),
                "batch_index": batch_idx + 1,
                "batch_size": prev_motion.size(0)
            })
    
    # Average loss over all batches
    val_loss = running_loss / len(val_loader)
    print(f"Val Loss: {val_loss:.4f}")

# Save the trained model
torch.save(model.state_dict(), "multimodal_seq2seq_20epochs.pth")

cuda


Epoch 1/10:   0%|                                                                            | 0/991 [00:00<?, ?batch/s]

  depth_mask = torch.tensor(depth_mask, dtype=torch.float32)


Epoch 1/10:   0%|                            | 0/991 [00:24<?, ?batch/s, batch_loss=16.9, batch_index=1, batch_size=256]

Epoch 1/10:   0%|                  | 1/991 [00:24<6:52:24, 24.99s/batch, batch_loss=16.9, batch_index=1, batch_size=256]

Epoch 1/10:   0%|                  | 1/991 [00:40<6:52:24, 24.99s/batch, batch_loss=17.2, batch_index=2, batch_size=256]

Epoch 1/10:   0%|                  | 2/991 [00:40<5:17:54, 19.29s/batch, batch_loss=17.2, batch_index=2, batch_size=256]

Epoch 1/10:   0%|                  | 2/991 [00:56<5:17:54, 19.29s/batch, batch_loss=11.4, batch_index=3, batch_size=256]

Epoch 1/10:   0%|                  | 3/991 [00:56<4:51:54, 17.73s/batch, batch_loss=11.4, batch_index=3, batch_size=256]

Epoch 1/10:   0%|                  | 3/991 [01:14<4:51:54, 17.73s/batch, batch_loss=6.41, batch_index=4, batch_size=256]

Epoch 1/10:   0%|                  | 4/991 [01:14<4:52:34, 17.79s/batch, batch_loss=6.41, batch_index=4, batch_size=256]

Epoch 1/10:   0%|                  | 4/991 [01:29<4:52:34, 17.79s/batch, batch_loss=19.1, batch_index=5, batch_size=256]

Epoch 1/10:   1%|                  | 5/991 [01:29<4:37:19, 16.88s/batch, batch_loss=19.1, batch_index=5, batch_size=256]

Epoch 1/10:   1%|                  | 5/991 [01:46<4:37:19, 16.88s/batch, batch_loss=24.7, batch_index=6, batch_size=256]

Epoch 1/10:   1%|                  | 6/991 [01:46<4:36:27, 16.84s/batch, batch_loss=24.7, batch_index=6, batch_size=256]

Epoch 1/10:   1%|                  | 6/991 [02:02<4:36:27, 16.84s/batch, batch_loss=16.5, batch_index=7, batch_size=256]

Epoch 1/10:   1%|▏                 | 7/991 [02:02<4:31:45, 16.57s/batch, batch_loss=16.5, batch_index=7, batch_size=256]

Epoch 1/10:   1%|▏                  | 7/991 [02:18<4:31:45, 16.57s/batch, batch_loss=602, batch_index=8, batch_size=256]

Epoch 1/10:   1%|▏                  | 8/991 [02:18<4:29:56, 16.48s/batch, batch_loss=602, batch_index=8, batch_size=256]

Epoch 1/10:   1%|▏                 | 8/991 [02:34<4:29:56, 16.48s/batch, batch_loss=15.3, batch_index=9, batch_size=256]

Epoch 1/10:   1%|▏                 | 9/991 [02:34<4:29:04, 16.44s/batch, batch_loss=15.3, batch_index=9, batch_size=256]

Epoch 1/10:   1%|▏                | 9/991 [02:55<4:29:04, 16.44s/batch, batch_loss=16.1, batch_index=10, batch_size=256]

Epoch 1/10:   1%|▏               | 10/991 [02:55<4:49:06, 17.68s/batch, batch_loss=16.1, batch_index=10, batch_size=256]

Epoch 1/10:   1%|▏               | 10/991 [03:12<4:49:06, 17.68s/batch, batch_loss=11.7, batch_index=11, batch_size=256]

Epoch 1/10:   1%|▏               | 11/991 [03:12<4:46:30, 17.54s/batch, batch_loss=11.7, batch_index=11, batch_size=256]

Epoch 1/10:   1%|▏            | 11/991 [03:28<4:46:30, 17.54s/batch, batch_loss=1.99e+3, batch_index=12, batch_size=256]

Epoch 1/10:   1%|▏            | 12/991 [03:28<4:40:09, 17.17s/batch, batch_loss=1.99e+3, batch_index=12, batch_size=256]

Epoch 1/10:   1%|▏               | 12/991 [03:45<4:40:09, 17.17s/batch, batch_loss=17.4, batch_index=13, batch_size=256]

Epoch 1/10:   1%|▏               | 13/991 [03:45<4:37:05, 17.00s/batch, batch_loss=17.4, batch_index=13, batch_size=256]

Epoch 1/10:   1%|▏               | 13/991 [04:01<4:37:05, 17.00s/batch, batch_loss=12.3, batch_index=14, batch_size=256]

Epoch 1/10:   1%|▏               | 14/991 [04:01<4:32:37, 16.74s/batch, batch_loss=12.3, batch_index=14, batch_size=256]

Epoch 1/10:   1%|▏               | 14/991 [04:18<4:32:37, 16.74s/batch, batch_loss=10.4, batch_index=15, batch_size=256]

Epoch 1/10:   2%|▏               | 15/991 [04:18<4:32:56, 16.78s/batch, batch_loss=10.4, batch_index=15, batch_size=256]

Epoch 1/10:   2%|▏               | 15/991 [04:34<4:32:56, 16.78s/batch, batch_loss=13.2, batch_index=16, batch_size=256]

Epoch 1/10:   2%|▎               | 16/991 [04:34<4:29:19, 16.57s/batch, batch_loss=13.2, batch_index=16, batch_size=256]

Epoch 1/10:   2%|▎                 | 16/991 [04:50<4:29:19, 16.57s/batch, batch_loss=14, batch_index=17, batch_size=256]

Epoch 1/10:   2%|▎                 | 17/991 [04:50<4:27:37, 16.49s/batch, batch_loss=14, batch_index=17, batch_size=256]

Epoch 1/10:   2%|▎               | 17/991 [05:07<4:27:37, 16.49s/batch, batch_loss=10.2, batch_index=18, batch_size=256]

Epoch 1/10:   2%|▎               | 18/991 [05:07<4:27:31, 16.50s/batch, batch_loss=10.2, batch_index=18, batch_size=256]

Epoch 1/10:   2%|▏            | 18/991 [05:23<4:27:31, 16.50s/batch, batch_loss=8.96e+3, batch_index=19, batch_size=256]

Epoch 1/10:   2%|▏            | 19/991 [05:23<4:25:23, 16.38s/batch, batch_loss=8.96e+3, batch_index=19, batch_size=256]

Epoch 1/10:   2%|▎               | 19/991 [05:40<4:25:23, 16.38s/batch, batch_loss=11.1, batch_index=20, batch_size=256]

Epoch 1/10:   2%|▎               | 20/991 [05:40<4:27:24, 16.52s/batch, batch_loss=11.1, batch_index=20, batch_size=256]

Epoch 1/10:   2%|▎               | 20/991 [05:55<4:27:24, 16.52s/batch, batch_loss=16.9, batch_index=21, batch_size=256]

Epoch 1/10:   2%|▎               | 21/991 [05:55<4:21:30, 16.18s/batch, batch_loss=16.9, batch_index=21, batch_size=256]

Epoch 1/10:   2%|▎            | 21/991 [06:11<4:21:30, 16.18s/batch, batch_loss=1.08e+3, batch_index=22, batch_size=256]

Epoch 1/10:   2%|▎            | 22/991 [06:11<4:19:39, 16.08s/batch, batch_loss=1.08e+3, batch_index=22, batch_size=256]

Epoch 1/10:   2%|▎               | 22/991 [06:27<4:19:39, 16.08s/batch, batch_loss=9.04, batch_index=23, batch_size=256]

Epoch 1/10:   2%|▎               | 23/991 [06:27<4:19:31, 16.09s/batch, batch_loss=9.04, batch_index=23, batch_size=256]

Epoch 1/10:   2%|▎               | 23/991 [06:42<4:19:31, 16.09s/batch, batch_loss=12.9, batch_index=24, batch_size=256]

Epoch 1/10:   2%|▍               | 24/991 [06:42<4:15:40, 15.86s/batch, batch_loss=12.9, batch_index=24, batch_size=256]

Epoch 1/10:   2%|▍               | 24/991 [06:58<4:15:40, 15.86s/batch, batch_loss=13.3, batch_index=25, batch_size=256]

Epoch 1/10:   3%|▍               | 25/991 [06:58<4:13:38, 15.75s/batch, batch_loss=13.3, batch_index=25, batch_size=256]

Epoch 1/10:   3%|▍               | 25/991 [07:15<4:13:38, 15.75s/batch, batch_loss=16.6, batch_index=26, batch_size=256]

Epoch 1/10:   3%|▍               | 26/991 [07:15<4:21:36, 16.27s/batch, batch_loss=16.6, batch_index=26, batch_size=256]

Epoch 1/10:   3%|▍               | 26/991 [07:31<4:21:36, 16.27s/batch, batch_loss=15.6, batch_index=27, batch_size=256]

Epoch 1/10:   3%|▍               | 27/991 [07:31<4:20:28, 16.21s/batch, batch_loss=15.6, batch_index=27, batch_size=256]

Epoch 1/10:   3%|▎            | 27/991 [07:46<4:20:28, 16.21s/batch, batch_loss=1.14e+3, batch_index=28, batch_size=256]

Epoch 1/10:   3%|▎            | 28/991 [07:46<4:13:59, 15.83s/batch, batch_loss=1.14e+3, batch_index=28, batch_size=256]

Epoch 1/10:   3%|▍               | 28/991 [08:02<4:13:59, 15.83s/batch, batch_loss=12.1, batch_index=29, batch_size=256]

Epoch 1/10:   3%|▍               | 29/991 [08:02<4:15:11, 15.92s/batch, batch_loss=12.1, batch_index=29, batch_size=256]

Epoch 1/10:   3%|▍               | 29/991 [08:18<4:15:11, 15.92s/batch, batch_loss=11.5, batch_index=30, batch_size=256]

Epoch 1/10:   3%|▍               | 30/991 [08:18<4:15:24, 15.95s/batch, batch_loss=11.5, batch_index=30, batch_size=256]

Epoch 1/10:   3%|▍               | 30/991 [08:34<4:15:24, 15.95s/batch, batch_loss=10.2, batch_index=31, batch_size=256]

Epoch 1/10:   3%|▌               | 31/991 [08:34<4:13:32, 15.85s/batch, batch_loss=10.2, batch_index=31, batch_size=256]

Epoch 1/10:   3%|▍             | 31/991 [08:50<4:13:32, 15.85s/batch, batch_loss=1.3e+4, batch_index=32, batch_size=256]

Epoch 1/10:   3%|▍             | 32/991 [08:50<4:15:03, 15.96s/batch, batch_loss=1.3e+4, batch_index=32, batch_size=256]

Epoch 1/10:   3%|▌               | 32/991 [09:06<4:15:03, 15.96s/batch, batch_loss=13.9, batch_index=33, batch_size=256]

Epoch 1/10:   3%|▌               | 33/991 [09:06<4:15:06, 15.98s/batch, batch_loss=13.9, batch_index=33, batch_size=256]

Epoch 1/10:   3%|▌               | 33/991 [09:22<4:15:06, 15.98s/batch, batch_loss=9.94, batch_index=34, batch_size=256]

Epoch 1/10:   3%|▌               | 34/991 [09:22<4:15:06, 15.99s/batch, batch_loss=9.94, batch_index=34, batch_size=256]

Epoch 1/10:   3%|▌               | 34/991 [09:38<4:15:06, 15.99s/batch, batch_loss=12.9, batch_index=35, batch_size=256]

Epoch 1/10:   4%|▌               | 35/991 [09:38<4:11:56, 15.81s/batch, batch_loss=12.9, batch_index=35, batch_size=256]

Epoch 1/10:   4%|▌               | 35/991 [09:54<4:11:56, 15.81s/batch, batch_loss=11.8, batch_index=36, batch_size=256]

Epoch 1/10:   4%|▌               | 36/991 [09:54<4:12:41, 15.88s/batch, batch_loss=11.8, batch_index=36, batch_size=256]

Epoch 1/10:   4%|▌               | 36/991 [10:09<4:12:41, 15.88s/batch, batch_loss=12.1, batch_index=37, batch_size=256]

Epoch 1/10:   4%|▌               | 37/991 [10:09<4:09:42, 15.70s/batch, batch_loss=12.1, batch_index=37, batch_size=256]

Epoch 1/10:   4%|▌               | 37/991 [10:24<4:09:42, 15.70s/batch, batch_loss=7.89, batch_index=38, batch_size=256]

Epoch 1/10:   4%|▌               | 38/991 [10:24<4:07:44, 15.60s/batch, batch_loss=7.89, batch_index=38, batch_size=256]

Epoch 1/10:   4%|▍            | 38/991 [10:40<4:07:44, 15.60s/batch, batch_loss=1.73e+3, batch_index=39, batch_size=256]

Epoch 1/10:   4%|▌            | 39/991 [10:40<4:06:02, 15.51s/batch, batch_loss=1.73e+3, batch_index=39, batch_size=256]

Epoch 1/10:   4%|▋               | 39/991 [10:56<4:06:02, 15.51s/batch, batch_loss=14.9, batch_index=40, batch_size=256]

Epoch 1/10:   4%|▋               | 40/991 [10:56<4:10:02, 15.78s/batch, batch_loss=14.9, batch_index=40, batch_size=256]

Epoch 1/10:   4%|▌            | 40/991 [11:13<4:10:02, 15.78s/batch, batch_loss=6.21e+3, batch_index=41, batch_size=256]

Epoch 1/10:   4%|▌            | 41/991 [11:13<4:17:03, 16.23s/batch, batch_loss=6.21e+3, batch_index=41, batch_size=256]

Epoch 1/10:   4%|▋               | 41/991 [11:30<4:17:03, 16.23s/batch, batch_loss=16.4, batch_index=42, batch_size=256]

Epoch 1/10:   4%|▋               | 42/991 [11:30<4:18:03, 16.32s/batch, batch_loss=16.4, batch_index=42, batch_size=256]

Epoch 1/10:   4%|▋               | 42/991 [11:46<4:18:03, 16.32s/batch, batch_loss=10.4, batch_index=43, batch_size=256]

Epoch 1/10:   4%|▋               | 43/991 [11:46<4:17:27, 16.30s/batch, batch_loss=10.4, batch_index=43, batch_size=256]

Epoch 1/10:   4%|▊                 | 43/991 [12:02<4:17:27, 16.30s/batch, batch_loss=16, batch_index=44, batch_size=256]

Epoch 1/10:   4%|▊                 | 44/991 [12:02<4:13:47, 16.08s/batch, batch_loss=16, batch_index=44, batch_size=256]

Epoch 1/10:   4%|▋               | 44/991 [12:17<4:13:47, 16.08s/batch, batch_loss=17.8, batch_index=45, batch_size=256]

Epoch 1/10:   5%|▋               | 45/991 [12:17<4:09:01, 15.79s/batch, batch_loss=17.8, batch_index=45, batch_size=256]

Epoch 1/10:   5%|▋               | 45/991 [12:32<4:09:01, 15.79s/batch, batch_loss=14.9, batch_index=46, batch_size=256]

Epoch 1/10:   5%|▋               | 46/991 [12:32<4:07:04, 15.69s/batch, batch_loss=14.9, batch_index=46, batch_size=256]

Epoch 1/10:   5%|▊                | 46/991 [12:52<4:07:04, 15.69s/batch, batch_loss=6.5, batch_index=47, batch_size=256]

Epoch 1/10:   5%|▊                | 47/991 [12:52<4:25:11, 16.86s/batch, batch_loss=6.5, batch_index=47, batch_size=256]

Epoch 1/10:   5%|▊               | 47/991 [13:08<4:25:11, 16.86s/batch, batch_loss=15.6, batch_index=48, batch_size=256]

Epoch 1/10:   5%|▊               | 48/991 [13:08<4:22:56, 16.73s/batch, batch_loss=15.6, batch_index=48, batch_size=256]

Epoch 1/10:   5%|▊               | 48/991 [13:25<4:22:56, 16.73s/batch, batch_loss=12.1, batch_index=49, batch_size=256]

Epoch 1/10:   5%|▊               | 49/991 [13:25<4:23:19, 16.77s/batch, batch_loss=12.1, batch_index=49, batch_size=256]

Epoch 1/10:   5%|▊               | 49/991 [13:41<4:23:19, 16.77s/batch, batch_loss=15.6, batch_index=50, batch_size=256]

Epoch 1/10:   5%|▊               | 50/991 [13:41<4:20:38, 16.62s/batch, batch_loss=15.6, batch_index=50, batch_size=256]

Epoch 1/10:   5%|▊               | 50/991 [13:57<4:20:38, 16.62s/batch, batch_loss=9.69, batch_index=51, batch_size=256]

Epoch 1/10:   5%|▊               | 51/991 [13:57<4:16:34, 16.38s/batch, batch_loss=9.69, batch_index=51, batch_size=256]

Epoch 1/10:   5%|▊               | 51/991 [14:13<4:16:34, 16.38s/batch, batch_loss=13.8, batch_index=52, batch_size=256]

Epoch 1/10:   5%|▊               | 52/991 [14:13<4:12:40, 16.15s/batch, batch_loss=13.8, batch_index=52, batch_size=256]

Epoch 1/10:   5%|▊               | 52/991 [14:30<4:12:40, 16.15s/batch, batch_loss=14.3, batch_index=53, batch_size=256]

Epoch 1/10:   5%|▊               | 53/991 [14:30<4:16:03, 16.38s/batch, batch_loss=14.3, batch_index=53, batch_size=256]

Epoch 1/10:   5%|▊               | 53/991 [14:47<4:16:03, 16.38s/batch, batch_loss=9.52, batch_index=54, batch_size=256]

Epoch 1/10:   5%|▊               | 54/991 [14:47<4:21:22, 16.74s/batch, batch_loss=9.52, batch_index=54, batch_size=256]

Epoch 1/10:   5%|▊               | 54/991 [15:04<4:21:22, 16.74s/batch, batch_loss=9.74, batch_index=55, batch_size=256]

Epoch 1/10:   6%|▉               | 55/991 [15:04<4:20:27, 16.70s/batch, batch_loss=9.74, batch_index=55, batch_size=256]

Epoch 1/10:   6%|▉               | 55/991 [15:24<4:20:27, 16.70s/batch, batch_loss=12.5, batch_index=56, batch_size=256]

Epoch 1/10:   6%|▉               | 56/991 [15:24<4:35:44, 17.69s/batch, batch_loss=12.5, batch_index=56, batch_size=256]

Epoch 1/10:   6%|▉               | 56/991 [15:41<4:35:44, 17.69s/batch, batch_loss=7.24, batch_index=57, batch_size=256]

Epoch 1/10:   6%|▉               | 57/991 [15:41<4:32:11, 17.49s/batch, batch_loss=7.24, batch_index=57, batch_size=256]

Epoch 1/10:   6%|▉               | 57/991 [15:57<4:32:11, 17.49s/batch, batch_loss=14.6, batch_index=58, batch_size=256]

Epoch 1/10:   6%|▉               | 58/991 [15:57<4:25:17, 17.06s/batch, batch_loss=14.6, batch_index=58, batch_size=256]

Epoch 1/10:   6%|▉               | 58/991 [16:13<4:25:17, 17.06s/batch, batch_loss=10.4, batch_index=59, batch_size=256]

Epoch 1/10:   6%|▉               | 59/991 [16:13<4:20:51, 16.79s/batch, batch_loss=10.4, batch_index=59, batch_size=256]

Epoch 1/10:   6%|▉               | 59/991 [16:30<4:20:51, 16.79s/batch, batch_loss=15.5, batch_index=60, batch_size=256]

Epoch 1/10:   6%|▉               | 60/991 [16:30<4:20:58, 16.82s/batch, batch_loss=15.5, batch_index=60, batch_size=256]

Epoch 1/10:   6%|▉               | 60/991 [16:47<4:20:58, 16.82s/batch, batch_loss=10.1, batch_index=61, batch_size=256]

Epoch 1/10:   6%|▉               | 61/991 [16:47<4:22:07, 16.91s/batch, batch_loss=10.1, batch_index=61, batch_size=256]

Epoch 1/10:   6%|▉               | 61/991 [17:03<4:22:07, 16.91s/batch, batch_loss=11.8, batch_index=62, batch_size=256]

Epoch 1/10:   6%|█               | 62/991 [17:03<4:17:59, 16.66s/batch, batch_loss=11.8, batch_index=62, batch_size=256]

Epoch 1/10:   6%|█                | 62/991 [17:20<4:17:59, 16.66s/batch, batch_loss=419, batch_index=63, batch_size=256]

Epoch 1/10:   6%|█                | 63/991 [17:20<4:19:40, 16.79s/batch, batch_loss=419, batch_index=63, batch_size=256]

Epoch 1/10:   6%|█                | 63/991 [17:38<4:19:40, 16.79s/batch, batch_loss=798, batch_index=64, batch_size=256]

Epoch 1/10:   6%|█                | 64/991 [17:38<4:24:44, 17.14s/batch, batch_loss=798, batch_index=64, batch_size=256]

Epoch 1/10:   6%|▊            | 64/991 [17:55<4:24:44, 17.14s/batch, batch_loss=2.55e+3, batch_index=65, batch_size=256]

Epoch 1/10:   7%|▊            | 65/991 [17:55<4:22:08, 16.99s/batch, batch_loss=2.55e+3, batch_index=65, batch_size=256]

Epoch 1/10:   7%|█               | 65/991 [18:11<4:22:08, 16.99s/batch, batch_loss=4.34, batch_index=66, batch_size=256]

Epoch 1/10:   7%|█               | 66/991 [18:11<4:17:53, 16.73s/batch, batch_loss=4.34, batch_index=66, batch_size=256]

Epoch 1/10:   7%|█               | 66/991 [18:28<4:17:53, 16.73s/batch, batch_loss=10.2, batch_index=67, batch_size=256]

Epoch 1/10:   7%|█               | 67/991 [18:28<4:18:30, 16.79s/batch, batch_loss=10.2, batch_index=67, batch_size=256]

Epoch 1/10:   7%|█               | 67/991 [18:44<4:18:30, 16.79s/batch, batch_loss=8.06, batch_index=68, batch_size=256]

Epoch 1/10:   7%|█               | 68/991 [18:44<4:13:51, 16.50s/batch, batch_loss=8.06, batch_index=68, batch_size=256]

Epoch 1/10:   7%|█               | 68/991 [19:00<4:13:51, 16.50s/batch, batch_loss=21.2, batch_index=69, batch_size=256]

Epoch 1/10:   7%|█               | 69/991 [19:00<4:12:59, 16.46s/batch, batch_loss=21.2, batch_index=69, batch_size=256]

Epoch 1/10:   7%|█               | 69/991 [19:17<4:12:59, 16.46s/batch, batch_loss=7.64, batch_index=70, batch_size=256]

Epoch 1/10:   7%|█▏              | 70/991 [19:17<4:12:04, 16.42s/batch, batch_loss=7.64, batch_index=70, batch_size=256]

Epoch 1/10:   7%|█▏              | 70/991 [19:33<4:12:04, 16.42s/batch, batch_loss=11.8, batch_index=71, batch_size=256]

Epoch 1/10:   7%|█▏              | 71/991 [19:33<4:12:41, 16.48s/batch, batch_loss=11.8, batch_index=71, batch_size=256]

Epoch 1/10:   7%|█▏              | 71/991 [19:49<4:12:41, 16.48s/batch, batch_loss=13.9, batch_index=72, batch_size=256]

Epoch 1/10:   7%|█▏              | 72/991 [19:49<4:11:03, 16.39s/batch, batch_loss=13.9, batch_index=72, batch_size=256]

Epoch 1/10:   7%|█▎                | 72/991 [20:06<4:11:03, 16.39s/batch, batch_loss=25, batch_index=73, batch_size=256]

Epoch 1/10:   7%|█▎                | 73/991 [20:06<4:11:10, 16.42s/batch, batch_loss=25, batch_index=73, batch_size=256]

Epoch 1/10:   7%|▉            | 73/991 [20:22<4:11:10, 16.42s/batch, batch_loss=1.74e+3, batch_index=74, batch_size=256]

Epoch 1/10:   7%|▉            | 74/991 [20:22<4:10:28, 16.39s/batch, batch_loss=1.74e+3, batch_index=74, batch_size=256]

Epoch 1/10:   7%|█▏              | 74/991 [20:38<4:10:28, 16.39s/batch, batch_loss=15.9, batch_index=75, batch_size=256]

Epoch 1/10:   8%|█▏              | 75/991 [20:38<4:07:26, 16.21s/batch, batch_loss=15.9, batch_index=75, batch_size=256]

Epoch 1/10:   8%|█▏              | 75/991 [20:53<4:07:26, 16.21s/batch, batch_loss=11.1, batch_index=76, batch_size=256]

Epoch 1/10:   8%|█▏              | 76/991 [20:53<4:03:01, 15.94s/batch, batch_loss=11.1, batch_index=76, batch_size=256]

Epoch 1/10:   8%|█▍                | 76/991 [21:09<4:03:01, 15.94s/batch, batch_loss=10, batch_index=77, batch_size=256]

Epoch 1/10:   8%|█▍                | 77/991 [21:09<4:03:27, 15.98s/batch, batch_loss=10, batch_index=77, batch_size=256]

Epoch 1/10:   8%|█▏              | 77/991 [21:26<4:03:27, 15.98s/batch, batch_loss=12.8, batch_index=78, batch_size=256]

Epoch 1/10:   8%|█▎              | 78/991 [21:26<4:04:01, 16.04s/batch, batch_loss=12.8, batch_index=78, batch_size=256]

Epoch 1/10:   8%|█▎               | 78/991 [21:42<4:04:01, 16.04s/batch, batch_loss=8.1, batch_index=79, batch_size=256]

Epoch 1/10:   8%|█▎               | 79/991 [21:42<4:04:43, 16.10s/batch, batch_loss=8.1, batch_index=79, batch_size=256]

Epoch 1/10:   8%|█▎              | 79/991 [21:58<4:04:43, 16.10s/batch, batch_loss=9.26, batch_index=80, batch_size=256]

Epoch 1/10:   8%|█▎              | 80/991 [21:58<4:05:09, 16.15s/batch, batch_loss=9.26, batch_index=80, batch_size=256]

Epoch 1/10:   8%|█▎              | 80/991 [22:15<4:05:09, 16.15s/batch, batch_loss=12.8, batch_index=81, batch_size=256]

Epoch 1/10:   8%|█▎              | 81/991 [22:15<4:08:49, 16.41s/batch, batch_loss=12.8, batch_index=81, batch_size=256]

Epoch 1/10:   8%|█▎              | 81/991 [22:30<4:08:49, 16.41s/batch, batch_loss=12.8, batch_index=82, batch_size=256]

Epoch 1/10:   8%|█▎              | 82/991 [22:30<4:02:46, 16.02s/batch, batch_loss=12.8, batch_index=82, batch_size=256]

Epoch 1/10:   8%|█▎              | 82/991 [22:46<4:02:46, 16.02s/batch, batch_loss=6.85, batch_index=83, batch_size=256]

Epoch 1/10:   8%|█▎              | 83/991 [22:46<4:01:29, 15.96s/batch, batch_loss=6.85, batch_index=83, batch_size=256]

Epoch 1/10:   8%|█▎              | 83/991 [23:02<4:01:29, 15.96s/batch, batch_loss=11.2, batch_index=84, batch_size=256]

Epoch 1/10:   8%|█▎              | 84/991 [23:02<4:00:48, 15.93s/batch, batch_loss=11.2, batch_index=84, batch_size=256]

Epoch 1/10:   8%|█▎              | 84/991 [23:18<4:00:48, 15.93s/batch, batch_loss=7.58, batch_index=85, batch_size=256]

Epoch 1/10:   9%|█▎              | 85/991 [23:18<4:00:35, 15.93s/batch, batch_loss=7.58, batch_index=85, batch_size=256]

Epoch 1/10:   9%|█▎              | 85/991 [23:33<4:00:35, 15.93s/batch, batch_loss=10.6, batch_index=86, batch_size=256]

Epoch 1/10:   9%|█▍              | 86/991 [23:33<3:58:19, 15.80s/batch, batch_loss=10.6, batch_index=86, batch_size=256]

Epoch 1/10:   9%|█▍              | 86/991 [23:49<3:58:19, 15.80s/batch, batch_loss=10.7, batch_index=87, batch_size=256]

Epoch 1/10:   9%|█▍              | 87/991 [23:49<3:59:51, 15.92s/batch, batch_loss=10.7, batch_index=87, batch_size=256]

Epoch 1/10:   9%|█▍              | 87/991 [24:05<3:59:51, 15.92s/batch, batch_loss=9.13, batch_index=88, batch_size=256]

Epoch 1/10:   9%|█▍              | 88/991 [24:05<3:58:33, 15.85s/batch, batch_loss=9.13, batch_index=88, batch_size=256]

Epoch 1/10:   9%|█▍              | 88/991 [24:20<3:58:33, 15.85s/batch, batch_loss=5.62, batch_index=89, batch_size=256]

Epoch 1/10:   9%|█▍              | 89/991 [24:20<3:51:42, 15.41s/batch, batch_loss=5.62, batch_index=89, batch_size=256]

Epoch 1/10:   9%|█▌               | 89/991 [24:36<3:51:42, 15.41s/batch, batch_loss=247, batch_index=90, batch_size=256]

Epoch 1/10:   9%|█▌               | 90/991 [24:36<3:54:07, 15.59s/batch, batch_loss=247, batch_index=90, batch_size=256]

Epoch 1/10:   9%|█▏           | 90/991 [24:54<3:54:07, 15.59s/batch, batch_loss=1.53e+3, batch_index=91, batch_size=256]

Epoch 1/10:   9%|█▏           | 91/991 [24:54<4:07:59, 16.53s/batch, batch_loss=1.53e+3, batch_index=91, batch_size=256]

Epoch 1/10:   9%|█▍              | 91/991 [25:11<4:07:59, 16.53s/batch, batch_loss=17.7, batch_index=92, batch_size=256]

Epoch 1/10:   9%|█▍              | 92/991 [25:11<4:07:14, 16.50s/batch, batch_loss=17.7, batch_index=92, batch_size=256]

Epoch 1/10:   9%|█▍              | 92/991 [25:27<4:07:14, 16.50s/batch, batch_loss=19.1, batch_index=93, batch_size=256]

Epoch 1/10:   9%|█▌              | 93/991 [25:27<4:06:28, 16.47s/batch, batch_loss=19.1, batch_index=93, batch_size=256]

Epoch 1/10:   9%|█▌              | 93/991 [25:45<4:06:28, 16.47s/batch, batch_loss=17.6, batch_index=94, batch_size=256]

Epoch 1/10:   9%|█▌              | 94/991 [25:45<4:14:16, 17.01s/batch, batch_loss=17.6, batch_index=94, batch_size=256]

Epoch 1/10:   9%|█▌              | 94/991 [26:02<4:14:16, 17.01s/batch, batch_loss=16.5, batch_index=95, batch_size=256]

Epoch 1/10:  10%|█▌              | 95/991 [26:02<4:10:03, 16.74s/batch, batch_loss=16.5, batch_index=95, batch_size=256]

Epoch 1/10:  10%|█▌              | 95/991 [26:16<4:10:03, 16.74s/batch, batch_loss=18.2, batch_index=96, batch_size=256]

Epoch 1/10:  10%|█▌              | 96/991 [26:16<3:58:07, 15.96s/batch, batch_loss=18.2, batch_index=96, batch_size=256]

Epoch 1/10:  10%|█▌              | 96/991 [26:31<3:58:07, 15.96s/batch, batch_loss=17.6, batch_index=97, batch_size=256]

Epoch 1/10:  10%|█▌              | 97/991 [26:31<3:56:58, 15.90s/batch, batch_loss=17.6, batch_index=97, batch_size=256]

Epoch 1/10:  10%|█▌              | 97/991 [26:52<3:56:58, 15.90s/batch, batch_loss=16.1, batch_index=98, batch_size=256]

Epoch 1/10:  10%|█▌              | 98/991 [26:52<4:15:29, 17.17s/batch, batch_loss=16.1, batch_index=98, batch_size=256]

Epoch 1/10:  10%|█▌              | 98/991 [27:07<4:15:29, 17.17s/batch, batch_loss=13.7, batch_index=99, batch_size=256]

Epoch 1/10:  10%|█▌              | 99/991 [27:07<4:07:48, 16.67s/batch, batch_loss=13.7, batch_index=99, batch_size=256]

Epoch 1/10:  10%|█▍             | 99/991 [27:24<4:07:48, 16.67s/batch, batch_loss=14.7, batch_index=100, batch_size=256]

Epoch 1/10:  10%|█▍            | 100/991 [27:24<4:10:53, 16.89s/batch, batch_loss=14.7, batch_index=100, batch_size=256]

Epoch 1/10:  10%|█▍            | 100/991 [27:42<4:10:53, 16.89s/batch, batch_loss=11.4, batch_index=101, batch_size=256]

Epoch 1/10:  10%|█▍            | 101/991 [27:42<4:12:32, 17.03s/batch, batch_loss=11.4, batch_index=101, batch_size=256]

Epoch 1/10:  10%|█▍            | 101/991 [27:59<4:12:32, 17.03s/batch, batch_loss=22.2, batch_index=102, batch_size=256]

Epoch 1/10:  10%|█▍            | 102/991 [27:59<4:12:44, 17.06s/batch, batch_loss=22.2, batch_index=102, batch_size=256]

Epoch 1/10:  10%|█▌             | 102/991 [28:15<4:12:44, 17.06s/batch, batch_loss=911, batch_index=103, batch_size=256]

Epoch 1/10:  10%|█▌             | 103/991 [28:15<4:06:17, 16.64s/batch, batch_loss=911, batch_index=103, batch_size=256]

Epoch 1/10:  10%|█▍            | 103/991 [28:31<4:06:17, 16.64s/batch, batch_loss=13.4, batch_index=104, batch_size=256]

Epoch 1/10:  10%|█▍            | 104/991 [28:31<4:03:33, 16.48s/batch, batch_loss=13.4, batch_index=104, batch_size=256]

Epoch 1/10:  10%|█▍            | 104/991 [28:49<4:03:33, 16.48s/batch, batch_loss=10.7, batch_index=105, batch_size=256]

Epoch 1/10:  11%|█▍            | 105/991 [28:49<4:11:06, 17.01s/batch, batch_loss=10.7, batch_index=105, batch_size=256]

Epoch 1/10:  11%|█▍            | 105/991 [29:05<4:11:06, 17.01s/batch, batch_loss=10.9, batch_index=106, batch_size=256]

Epoch 1/10:  11%|█▍            | 106/991 [29:05<4:05:06, 16.62s/batch, batch_loss=10.9, batch_index=106, batch_size=256]

Epoch 1/10:  11%|█▍            | 106/991 [29:20<4:05:06, 16.62s/batch, batch_loss=17.2, batch_index=107, batch_size=256]

Epoch 1/10:  11%|█▌            | 107/991 [29:20<4:01:05, 16.36s/batch, batch_loss=17.2, batch_index=107, batch_size=256]

Epoch 1/10:  11%|█▌            | 107/991 [29:36<4:01:05, 16.36s/batch, batch_loss=24.2, batch_index=108, batch_size=256]

Epoch 1/10:  11%|█▌            | 108/991 [29:36<3:55:40, 16.01s/batch, batch_loss=24.2, batch_index=108, batch_size=256]

Epoch 1/10:  11%|█▌            | 108/991 [29:52<3:55:40, 16.01s/batch, batch_loss=13.9, batch_index=109, batch_size=256]

Epoch 1/10:  11%|█▌            | 109/991 [29:52<3:55:35, 16.03s/batch, batch_loss=13.9, batch_index=109, batch_size=256]

Epoch 1/10:  11%|█▌            | 109/991 [30:06<3:55:35, 16.03s/batch, batch_loss=14.8, batch_index=110, batch_size=256]

Epoch 1/10:  11%|█▌            | 110/991 [30:06<3:48:10, 15.54s/batch, batch_loss=14.8, batch_index=110, batch_size=256]

Epoch 1/10:  11%|█▌            | 110/991 [30:21<3:48:10, 15.54s/batch, batch_loss=18.4, batch_index=111, batch_size=256]

Epoch 1/10:  11%|█▌            | 111/991 [30:21<3:45:35, 15.38s/batch, batch_loss=18.4, batch_index=111, batch_size=256]

Epoch 1/10:  11%|█▊              | 111/991 [30:38<3:45:35, 15.38s/batch, batch_loss=16, batch_index=112, batch_size=256]

Epoch 1/10:  11%|█▊              | 112/991 [30:38<3:51:56, 15.83s/batch, batch_loss=16, batch_index=112, batch_size=256]

Epoch 1/10:  11%|█▌            | 112/991 [30:54<3:51:56, 15.83s/batch, batch_loss=11.1, batch_index=113, batch_size=256]

Epoch 1/10:  11%|█▌            | 113/991 [30:54<3:52:18, 15.88s/batch, batch_loss=11.1, batch_index=113, batch_size=256]

Epoch 1/10:  11%|█▌            | 113/991 [31:13<3:52:18, 15.88s/batch, batch_loss=14.9, batch_index=114, batch_size=256]

Epoch 1/10:  12%|█▌            | 114/991 [31:13<4:05:41, 16.81s/batch, batch_loss=14.9, batch_index=114, batch_size=256]

Epoch 1/10:  12%|█▌            | 114/991 [31:29<4:05:41, 16.81s/batch, batch_loss=23.1, batch_index=115, batch_size=256]

Epoch 1/10:  12%|█▌            | 115/991 [31:29<4:03:39, 16.69s/batch, batch_loss=23.1, batch_index=115, batch_size=256]

Epoch 1/10:  12%|█▌            | 115/991 [31:46<4:03:39, 16.69s/batch, batch_loss=10.1, batch_index=116, batch_size=256]

Epoch 1/10:  12%|█▋            | 116/991 [31:46<4:03:05, 16.67s/batch, batch_loss=10.1, batch_index=116, batch_size=256]

Epoch 1/10:  12%|█▋            | 116/991 [32:02<4:03:05, 16.67s/batch, batch_loss=18.5, batch_index=117, batch_size=256]

Epoch 1/10:  12%|█▋            | 117/991 [32:02<4:00:18, 16.50s/batch, batch_loss=18.5, batch_index=117, batch_size=256]

Epoch 1/10:  12%|█▋            | 117/991 [32:20<4:00:18, 16.50s/batch, batch_loss=14.5, batch_index=118, batch_size=256]

Epoch 1/10:  12%|█▋            | 118/991 [32:20<4:04:07, 16.78s/batch, batch_loss=14.5, batch_index=118, batch_size=256]

Epoch 1/10:  12%|█▋            | 118/991 [32:37<4:04:07, 16.78s/batch, batch_loss=27.4, batch_index=119, batch_size=256]

Epoch 1/10:  12%|█▋            | 119/991 [32:37<4:05:53, 16.92s/batch, batch_loss=27.4, batch_index=119, batch_size=256]

Epoch 1/10:  12%|█▋            | 119/991 [32:53<4:05:53, 16.92s/batch, batch_loss=17.3, batch_index=120, batch_size=256]

Epoch 1/10:  12%|█▋            | 120/991 [32:53<4:03:45, 16.79s/batch, batch_loss=17.3, batch_index=120, batch_size=256]

Epoch 1/10:  12%|█▋            | 120/991 [33:08<4:03:45, 16.79s/batch, batch_loss=24.5, batch_index=121, batch_size=256]

Epoch 1/10:  12%|█▋            | 121/991 [33:08<3:55:22, 16.23s/batch, batch_loss=24.5, batch_index=121, batch_size=256]

Epoch 1/10:  12%|█▋            | 121/991 [33:25<3:55:22, 16.23s/batch, batch_loss=10.2, batch_index=122, batch_size=256]

Epoch 1/10:  12%|█▋            | 122/991 [33:25<3:56:01, 16.30s/batch, batch_loss=10.2, batch_index=122, batch_size=256]

Epoch 1/10:  12%|█▋            | 122/991 [33:41<3:56:01, 16.30s/batch, batch_loss=15.9, batch_index=123, batch_size=256]

Epoch 1/10:  12%|█▋            | 123/991 [33:41<3:55:07, 16.25s/batch, batch_loss=15.9, batch_index=123, batch_size=256]

Epoch 1/10:  12%|█▎         | 123/991 [33:56<3:55:07, 16.25s/batch, batch_loss=3.47e+3, batch_index=124, batch_size=256]

Epoch 1/10:  13%|█▍         | 124/991 [33:56<3:51:49, 16.04s/batch, batch_loss=3.47e+3, batch_index=124, batch_size=256]

Epoch 1/10:  13%|█▊            | 124/991 [34:13<3:51:49, 16.04s/batch, batch_loss=9.46, batch_index=125, batch_size=256]

Epoch 1/10:  13%|█▊            | 125/991 [34:13<3:53:01, 16.14s/batch, batch_loss=9.46, batch_index=125, batch_size=256]

Epoch 1/10:  13%|█▊            | 125/991 [34:29<3:53:01, 16.14s/batch, batch_loss=10.9, batch_index=126, batch_size=256]

Epoch 1/10:  13%|█▊            | 126/991 [34:29<3:53:27, 16.19s/batch, batch_loss=10.9, batch_index=126, batch_size=256]

Epoch 1/10:  13%|█▍         | 126/991 [34:45<3:53:27, 16.19s/batch, batch_loss=1.89e+3, batch_index=127, batch_size=256]

Epoch 1/10:  13%|█▍         | 127/991 [34:45<3:50:52, 16.03s/batch, batch_loss=1.89e+3, batch_index=127, batch_size=256]

Epoch 1/10:  13%|█▍         | 127/991 [35:00<3:50:52, 16.03s/batch, batch_loss=1.57e+3, batch_index=128, batch_size=256]

Epoch 1/10:  13%|█▍         | 128/991 [35:00<3:47:49, 15.84s/batch, batch_loss=1.57e+3, batch_index=128, batch_size=256]

Epoch 1/10:  13%|█▉             | 128/991 [35:17<3:47:49, 15.84s/batch, batch_loss=223, batch_index=129, batch_size=256]

Epoch 1/10:  13%|█▉             | 129/991 [35:17<3:53:19, 16.24s/batch, batch_loss=223, batch_index=129, batch_size=256]

Epoch 1/10:  13%|█▉             | 129/991 [35:33<3:53:19, 16.24s/batch, batch_loss=992, batch_index=130, batch_size=256]

Epoch 1/10:  13%|█▉             | 130/991 [35:33<3:51:06, 16.10s/batch, batch_loss=992, batch_index=130, batch_size=256]

Epoch 1/10:  13%|█▍         | 130/991 [35:48<3:51:06, 16.10s/batch, batch_loss=8.34e+3, batch_index=131, batch_size=256]

Epoch 1/10:  13%|█▍         | 131/991 [35:48<3:47:57, 15.90s/batch, batch_loss=8.34e+3, batch_index=131, batch_size=256]

Epoch 1/10:  13%|█▊            | 131/991 [36:04<3:47:57, 15.90s/batch, batch_loss=17.7, batch_index=132, batch_size=256]

Epoch 1/10:  13%|█▊            | 132/991 [36:04<3:44:01, 15.65s/batch, batch_loss=17.7, batch_index=132, batch_size=256]

Epoch 1/10:  13%|█▊            | 132/991 [36:19<3:44:01, 15.65s/batch, batch_loss=9.29, batch_index=133, batch_size=256]

Epoch 1/10:  13%|█▉            | 133/991 [36:19<3:43:31, 15.63s/batch, batch_loss=9.29, batch_index=133, batch_size=256]

Epoch 1/10:  13%|█▉            | 133/991 [36:34<3:43:31, 15.63s/batch, batch_loss=11.9, batch_index=134, batch_size=256]

Epoch 1/10:  14%|█▉            | 134/991 [36:34<3:40:42, 15.45s/batch, batch_loss=11.9, batch_index=134, batch_size=256]

Epoch 1/10:  14%|█▉            | 134/991 [36:50<3:40:42, 15.45s/batch, batch_loss=19.5, batch_index=135, batch_size=256]

Epoch 1/10:  14%|█▉            | 135/991 [36:50<3:42:35, 15.60s/batch, batch_loss=19.5, batch_index=135, batch_size=256]

Epoch 1/10:  14%|█▉            | 135/991 [37:06<3:42:35, 15.60s/batch, batch_loss=9.44, batch_index=136, batch_size=256]

Epoch 1/10:  14%|█▉            | 136/991 [37:06<3:41:42, 15.56s/batch, batch_loss=9.44, batch_index=136, batch_size=256]

Epoch 1/10:  14%|█▉            | 136/991 [37:21<3:41:42, 15.56s/batch, batch_loss=13.4, batch_index=137, batch_size=256]

Epoch 1/10:  14%|█▉            | 137/991 [37:21<3:41:12, 15.54s/batch, batch_loss=13.4, batch_index=137, batch_size=256]

Epoch 1/10:  14%|██▏             | 137/991 [37:36<3:41:12, 15.54s/batch, batch_loss=17, batch_index=138, batch_size=256]

Epoch 1/10:  14%|██▏             | 138/991 [37:36<3:40:25, 15.50s/batch, batch_loss=17, batch_index=138, batch_size=256]

Epoch 1/10:  14%|█▉            | 138/991 [37:53<3:40:25, 15.50s/batch, batch_loss=7.53, batch_index=139, batch_size=256]

Epoch 1/10:  14%|█▉            | 139/991 [37:53<3:43:36, 15.75s/batch, batch_loss=7.53, batch_index=139, batch_size=256]

Epoch 1/10:  14%|██▏             | 139/991 [38:09<3:43:36, 15.75s/batch, batch_loss=12, batch_index=140, batch_size=256]

Epoch 1/10:  14%|██▎             | 140/991 [38:09<3:45:21, 15.89s/batch, batch_loss=12, batch_index=140, batch_size=256]

Epoch 1/10:  14%|█▉            | 140/991 [38:25<3:45:21, 15.89s/batch, batch_loss=7.23, batch_index=141, batch_size=256]

Epoch 1/10:  14%|█▉            | 141/991 [38:25<3:44:30, 15.85s/batch, batch_loss=7.23, batch_index=141, batch_size=256]

Epoch 1/10:  14%|█▉            | 141/991 [38:41<3:44:30, 15.85s/batch, batch_loss=6.66, batch_index=142, batch_size=256]

Epoch 1/10:  14%|██            | 142/991 [38:41<3:46:02, 15.97s/batch, batch_loss=6.66, batch_index=142, batch_size=256]

Epoch 1/10:  14%|██            | 142/991 [38:58<3:46:02, 15.97s/batch, batch_loss=13.4, batch_index=143, batch_size=256]

Epoch 1/10:  14%|██            | 143/991 [38:58<3:51:50, 16.40s/batch, batch_loss=13.4, batch_index=143, batch_size=256]

Epoch 1/10:  14%|██▎             | 143/991 [39:14<3:51:50, 16.40s/batch, batch_loss=15, batch_index=144, batch_size=256]

Epoch 1/10:  15%|██▎             | 144/991 [39:14<3:47:31, 16.12s/batch, batch_loss=15, batch_index=144, batch_size=256]

Epoch 1/10:  15%|██            | 144/991 [39:32<3:47:31, 16.12s/batch, batch_loss=15.5, batch_index=145, batch_size=256]

Epoch 1/10:  15%|██            | 145/991 [39:32<3:57:10, 16.82s/batch, batch_loss=15.5, batch_index=145, batch_size=256]

Epoch 1/10:  15%|██            | 145/991 [39:49<3:57:10, 16.82s/batch, batch_loss=12.9, batch_index=146, batch_size=256]

Epoch 1/10:  15%|██            | 146/991 [39:49<3:55:30, 16.72s/batch, batch_loss=12.9, batch_index=146, batch_size=256]

Epoch 1/10:  15%|██            | 146/991 [40:06<3:55:30, 16.72s/batch, batch_loss=7.04, batch_index=147, batch_size=256]

Epoch 1/10:  15%|██            | 147/991 [40:06<3:57:14, 16.87s/batch, batch_loss=7.04, batch_index=147, batch_size=256]

Epoch 1/10:  15%|██            | 147/991 [40:23<3:57:14, 16.87s/batch, batch_loss=20.2, batch_index=148, batch_size=256]

Epoch 1/10:  15%|██            | 148/991 [40:23<3:56:31, 16.83s/batch, batch_loss=20.2, batch_index=148, batch_size=256]

Epoch 1/10:  15%|██            | 148/991 [40:39<3:56:31, 16.83s/batch, batch_loss=11.4, batch_index=149, batch_size=256]

Epoch 1/10:  15%|██            | 149/991 [40:39<3:52:48, 16.59s/batch, batch_loss=11.4, batch_index=149, batch_size=256]

Epoch 1/10:  15%|██            | 149/991 [40:55<3:52:48, 16.59s/batch, batch_loss=11.3, batch_index=150, batch_size=256]

Epoch 1/10:  15%|██            | 150/991 [40:55<3:52:09, 16.56s/batch, batch_loss=11.3, batch_index=150, batch_size=256]

Epoch 1/10:  15%|██            | 150/991 [41:12<3:52:09, 16.56s/batch, batch_loss=17.5, batch_index=151, batch_size=256]

Epoch 1/10:  15%|██▏           | 151/991 [41:12<3:53:01, 16.64s/batch, batch_loss=17.5, batch_index=151, batch_size=256]

Epoch 1/10:  15%|██▏           | 151/991 [41:28<3:53:01, 16.64s/batch, batch_loss=15.4, batch_index=152, batch_size=256]

Epoch 1/10:  15%|██▏           | 152/991 [41:28<3:51:27, 16.55s/batch, batch_loss=15.4, batch_index=152, batch_size=256]

Epoch 1/10:  15%|██▏           | 152/991 [41:47<3:51:27, 16.55s/batch, batch_loss=18.2, batch_index=153, batch_size=256]

Epoch 1/10:  15%|██▏           | 153/991 [41:47<4:01:20, 17.28s/batch, batch_loss=18.2, batch_index=153, batch_size=256]

Epoch 1/10:  15%|██▏           | 153/991 [42:04<4:01:20, 17.28s/batch, batch_loss=20.4, batch_index=154, batch_size=256]

Epoch 1/10:  16%|██▏           | 154/991 [42:04<3:57:02, 16.99s/batch, batch_loss=20.4, batch_index=154, batch_size=256]

Epoch 1/10:  16%|██▏           | 154/991 [42:19<3:57:02, 16.99s/batch, batch_loss=20.5, batch_index=155, batch_size=256]

Epoch 1/10:  16%|██▏           | 155/991 [42:19<3:50:09, 16.52s/batch, batch_loss=20.5, batch_index=155, batch_size=256]

Epoch 1/10:  16%|██▏           | 155/991 [42:34<3:50:09, 16.52s/batch, batch_loss=7.32, batch_index=156, batch_size=256]

Epoch 1/10:  16%|██▏           | 156/991 [42:34<3:44:06, 16.10s/batch, batch_loss=7.32, batch_index=156, batch_size=256]

Epoch 1/10:  16%|██▏           | 156/991 [42:50<3:44:06, 16.10s/batch, batch_loss=23.3, batch_index=157, batch_size=256]

Epoch 1/10:  16%|██▏           | 157/991 [42:50<3:42:23, 16.00s/batch, batch_loss=23.3, batch_index=157, batch_size=256]

Epoch 1/10:  16%|██▏           | 157/991 [43:05<3:42:23, 16.00s/batch, batch_loss=8.02, batch_index=158, batch_size=256]

Epoch 1/10:  16%|██▏           | 158/991 [43:05<3:37:46, 15.69s/batch, batch_loss=8.02, batch_index=158, batch_size=256]

Epoch 1/10:  16%|██▏           | 158/991 [43:21<3:37:46, 15.69s/batch, batch_loss=5.93, batch_index=159, batch_size=256]

Epoch 1/10:  16%|██▏           | 159/991 [43:21<3:37:35, 15.69s/batch, batch_loss=5.93, batch_index=159, batch_size=256]

Epoch 1/10:  16%|██▏           | 159/991 [43:36<3:37:35, 15.69s/batch, batch_loss=12.1, batch_index=160, batch_size=256]

Epoch 1/10:  16%|██▎           | 160/991 [43:36<3:34:54, 15.52s/batch, batch_loss=12.1, batch_index=160, batch_size=256]

Epoch 1/10:  16%|██▍            | 160/991 [43:51<3:34:54, 15.52s/batch, batch_loss=457, batch_index=161, batch_size=256]

Epoch 1/10:  16%|██▍            | 161/991 [43:51<3:34:07, 15.48s/batch, batch_loss=457, batch_index=161, batch_size=256]

Epoch 1/10:  16%|██▎           | 161/991 [44:07<3:34:07, 15.48s/batch, batch_loss=14.5, batch_index=162, batch_size=256]

Epoch 1/10:  16%|██▎           | 162/991 [44:07<3:34:05, 15.49s/batch, batch_loss=14.5, batch_index=162, batch_size=256]

Epoch 1/10:  16%|██▎           | 162/991 [44:22<3:34:05, 15.49s/batch, batch_loss=8.77, batch_index=163, batch_size=256]

Epoch 1/10:  16%|██▎           | 163/991 [44:22<3:34:20, 15.53s/batch, batch_loss=8.77, batch_index=163, batch_size=256]

Epoch 1/10:  16%|██▎           | 163/991 [44:38<3:34:20, 15.53s/batch, batch_loss=11.9, batch_index=164, batch_size=256]

Epoch 1/10:  17%|██▎           | 164/991 [44:38<3:34:35, 15.57s/batch, batch_loss=11.9, batch_index=164, batch_size=256]

Epoch 1/10:  17%|██▎           | 164/991 [44:53<3:34:35, 15.57s/batch, batch_loss=11.1, batch_index=165, batch_size=256]

Epoch 1/10:  17%|██▎           | 165/991 [44:53<3:30:53, 15.32s/batch, batch_loss=11.1, batch_index=165, batch_size=256]

Epoch 1/10:  17%|██▎           | 165/991 [45:08<3:30:53, 15.32s/batch, batch_loss=11.4, batch_index=166, batch_size=256]

Epoch 1/10:  17%|██▎           | 166/991 [45:08<3:29:41, 15.25s/batch, batch_loss=11.4, batch_index=166, batch_size=256]

Epoch 1/10:  17%|██▎           | 166/991 [45:24<3:29:41, 15.25s/batch, batch_loss=19.1, batch_index=167, batch_size=256]

Epoch 1/10:  17%|██▎           | 167/991 [45:24<3:33:14, 15.53s/batch, batch_loss=19.1, batch_index=167, batch_size=256]

Epoch 1/10:  17%|██▎           | 167/991 [45:40<3:33:14, 15.53s/batch, batch_loss=11.4, batch_index=168, batch_size=256]

Epoch 1/10:  17%|██▎           | 168/991 [45:40<3:33:43, 15.58s/batch, batch_loss=11.4, batch_index=168, batch_size=256]

Epoch 1/10:  17%|██▎           | 168/991 [45:55<3:33:43, 15.58s/batch, batch_loss=14.5, batch_index=169, batch_size=256]

Epoch 1/10:  17%|██▍           | 169/991 [45:55<3:33:01, 15.55s/batch, batch_loss=14.5, batch_index=169, batch_size=256]

Epoch 1/10:  17%|██▍           | 169/991 [46:11<3:33:01, 15.55s/batch, batch_loss=9.49, batch_index=170, batch_size=256]

Epoch 1/10:  17%|██▍           | 170/991 [46:11<3:32:47, 15.55s/batch, batch_loss=9.49, batch_index=170, batch_size=256]

Epoch 1/10:  17%|██▍           | 170/991 [46:27<3:32:47, 15.55s/batch, batch_loss=5.75, batch_index=171, batch_size=256]

Epoch 1/10:  17%|██▍           | 171/991 [46:27<3:35:35, 15.78s/batch, batch_loss=5.75, batch_index=171, batch_size=256]

Epoch 1/10:  17%|██▍           | 171/991 [46:43<3:35:35, 15.78s/batch, batch_loss=8.93, batch_index=172, batch_size=256]

Epoch 1/10:  17%|██▍           | 172/991 [46:43<3:37:52, 15.96s/batch, batch_loss=8.93, batch_index=172, batch_size=256]

Epoch 1/10:  17%|██▍           | 172/991 [47:00<3:37:52, 15.96s/batch, batch_loss=6.58, batch_index=173, batch_size=256]

Epoch 1/10:  17%|██▍           | 173/991 [47:00<3:39:28, 16.10s/batch, batch_loss=6.58, batch_index=173, batch_size=256]

Epoch 1/10:  17%|█▉         | 173/991 [47:16<3:39:28, 16.10s/batch, batch_loss=3.02e+4, batch_index=174, batch_size=256]

Epoch 1/10:  18%|█▉         | 174/991 [47:16<3:40:14, 16.17s/batch, batch_loss=3.02e+4, batch_index=174, batch_size=256]

Epoch 1/10:  18%|██▊             | 174/991 [47:35<3:40:14, 16.17s/batch, batch_loss=19, batch_index=175, batch_size=256]

Epoch 1/10:  18%|██▊             | 175/991 [47:35<3:49:53, 16.90s/batch, batch_loss=19, batch_index=175, batch_size=256]

Epoch 1/10:  18%|██▍           | 175/991 [47:52<3:49:53, 16.90s/batch, batch_loss=25.5, batch_index=176, batch_size=256]

Epoch 1/10:  18%|██▍           | 176/991 [47:52<3:49:25, 16.89s/batch, batch_loss=25.5, batch_index=176, batch_size=256]

Epoch 1/10:  18%|██▍           | 176/991 [48:08<3:49:25, 16.89s/batch, batch_loss=25.7, batch_index=177, batch_size=256]

Epoch 1/10:  18%|██▌           | 177/991 [48:08<3:44:49, 16.57s/batch, batch_loss=25.7, batch_index=177, batch_size=256]

Epoch 1/10:  18%|██▌           | 177/991 [48:24<3:44:49, 16.57s/batch, batch_loss=23.3, batch_index=178, batch_size=256]

Epoch 1/10:  18%|██▌           | 178/991 [48:24<3:44:00, 16.53s/batch, batch_loss=23.3, batch_index=178, batch_size=256]

Epoch 1/10:  18%|██▌           | 178/991 [48:40<3:44:00, 16.53s/batch, batch_loss=12.7, batch_index=179, batch_size=256]

Epoch 1/10:  18%|██▌           | 179/991 [48:40<3:40:39, 16.30s/batch, batch_loss=12.7, batch_index=179, batch_size=256]

Epoch 1/10:  18%|██▌           | 179/991 [48:56<3:40:39, 16.30s/batch, batch_loss=7.33, batch_index=180, batch_size=256]

Epoch 1/10:  18%|██▌           | 180/991 [48:56<3:39:00, 16.20s/batch, batch_loss=7.33, batch_index=180, batch_size=256]

Epoch 1/10:  18%|█▉         | 180/991 [49:15<3:39:00, 16.20s/batch, batch_loss=2.52e+4, batch_index=181, batch_size=256]

Epoch 1/10:  18%|██         | 181/991 [49:15<3:50:55, 17.11s/batch, batch_loss=2.52e+4, batch_index=181, batch_size=256]

Epoch 1/10:  18%|██▌           | 181/991 [49:31<3:50:55, 17.11s/batch, batch_loss=11.6, batch_index=182, batch_size=256]

Epoch 1/10:  18%|██▌           | 182/991 [49:31<3:46:00, 16.76s/batch, batch_loss=11.6, batch_index=182, batch_size=256]

Epoch 1/10:  18%|██▌           | 182/991 [49:46<3:46:00, 16.76s/batch, batch_loss=18.8, batch_index=183, batch_size=256]

Epoch 1/10:  18%|██▌           | 183/991 [49:46<3:40:30, 16.37s/batch, batch_loss=18.8, batch_index=183, batch_size=256]

Epoch 1/10:  18%|██▌           | 183/991 [50:02<3:40:30, 16.37s/batch, batch_loss=17.2, batch_index=184, batch_size=256]

Epoch 1/10:  19%|██▌           | 184/991 [50:02<3:38:21, 16.24s/batch, batch_loss=17.2, batch_index=184, batch_size=256]

Epoch 1/10:  19%|██▌           | 184/991 [50:18<3:38:21, 16.24s/batch, batch_loss=13.9, batch_index=185, batch_size=256]

Epoch 1/10:  19%|██▌           | 185/991 [50:18<3:35:39, 16.05s/batch, batch_loss=13.9, batch_index=185, batch_size=256]

Epoch 1/10:  19%|██▌           | 185/991 [50:34<3:35:39, 16.05s/batch, batch_loss=17.6, batch_index=186, batch_size=256]

Epoch 1/10:  19%|██▋           | 186/991 [50:34<3:34:43, 16.00s/batch, batch_loss=17.6, batch_index=186, batch_size=256]

Epoch 1/10:  19%|██▋           | 186/991 [50:50<3:34:43, 16.00s/batch, batch_loss=14.8, batch_index=187, batch_size=256]

Epoch 1/10:  19%|██▋           | 187/991 [50:50<3:34:15, 15.99s/batch, batch_loss=14.8, batch_index=187, batch_size=256]

Epoch 1/10:  19%|██▋           | 187/991 [51:06<3:34:15, 15.99s/batch, batch_loss=16.2, batch_index=188, batch_size=256]

Epoch 1/10:  19%|██▋           | 188/991 [51:06<3:33:25, 15.95s/batch, batch_loss=16.2, batch_index=188, batch_size=256]

Epoch 1/10:  19%|███             | 188/991 [51:24<3:33:25, 15.95s/batch, batch_loss=18, batch_index=189, batch_size=256]

Epoch 1/10:  19%|███             | 189/991 [51:24<3:41:19, 16.56s/batch, batch_loss=18, batch_index=189, batch_size=256]

Epoch 1/10:  19%|██▋           | 189/991 [51:39<3:41:19, 16.56s/batch, batch_loss=20.6, batch_index=190, batch_size=256]

Epoch 1/10:  19%|██▋           | 190/991 [51:39<3:34:48, 16.09s/batch, batch_loss=20.6, batch_index=190, batch_size=256]

Epoch 1/10:  19%|██▋           | 190/991 [51:54<3:34:48, 16.09s/batch, batch_loss=17.3, batch_index=191, batch_size=256]

Epoch 1/10:  19%|██▋           | 191/991 [51:54<3:29:58, 15.75s/batch, batch_loss=17.3, batch_index=191, batch_size=256]

Epoch 1/10:  19%|██▋           | 191/991 [52:08<3:29:58, 15.75s/batch, batch_loss=11.4, batch_index=192, batch_size=256]

Epoch 1/10:  19%|██▋           | 192/991 [52:08<3:23:53, 15.31s/batch, batch_loss=11.4, batch_index=192, batch_size=256]

Epoch 1/10:  19%|██▋           | 192/991 [52:23<3:23:53, 15.31s/batch, batch_loss=18.9, batch_index=193, batch_size=256]

Epoch 1/10:  19%|██▋           | 193/991 [52:23<3:23:09, 15.28s/batch, batch_loss=18.9, batch_index=193, batch_size=256]

Epoch 1/10:  19%|██▋           | 193/991 [52:38<3:23:09, 15.28s/batch, batch_loss=8.24, batch_index=194, batch_size=256]

Epoch 1/10:  20%|██▋           | 194/991 [52:38<3:21:10, 15.15s/batch, batch_loss=8.24, batch_index=194, batch_size=256]

Epoch 1/10:  20%|██▋           | 194/991 [52:53<3:21:10, 15.15s/batch, batch_loss=3.84, batch_index=195, batch_size=256]

Epoch 1/10:  20%|██▊           | 195/991 [52:53<3:22:09, 15.24s/batch, batch_loss=3.84, batch_index=195, batch_size=256]

Epoch 1/10:  20%|██▊           | 195/991 [53:08<3:22:09, 15.24s/batch, batch_loss=5.05, batch_index=196, batch_size=256]

Epoch 1/10:  20%|██▊           | 196/991 [53:08<3:18:03, 14.95s/batch, batch_loss=5.05, batch_index=196, batch_size=256]

Epoch 1/10:  20%|██▊           | 196/991 [53:24<3:18:03, 14.95s/batch, batch_loss=11.9, batch_index=197, batch_size=256]

Epoch 1/10:  20%|██▊           | 197/991 [53:24<3:22:08, 15.28s/batch, batch_loss=11.9, batch_index=197, batch_size=256]

Epoch 1/10:  20%|██▊           | 197/991 [53:39<3:22:08, 15.28s/batch, batch_loss=8.61, batch_index=198, batch_size=256]

Epoch 1/10:  20%|██▊           | 198/991 [53:39<3:21:55, 15.28s/batch, batch_loss=8.61, batch_index=198, batch_size=256]

Epoch 1/10:  20%|██▊           | 198/991 [53:54<3:21:55, 15.28s/batch, batch_loss=15.3, batch_index=199, batch_size=256]

Epoch 1/10:  20%|██▊           | 199/991 [53:54<3:21:04, 15.23s/batch, batch_loss=15.3, batch_index=199, batch_size=256]

Epoch 1/10:  20%|██▊           | 199/991 [54:10<3:21:04, 15.23s/batch, batch_loss=8.84, batch_index=200, batch_size=256]

Epoch 1/10:  20%|██▊           | 200/991 [54:10<3:25:00, 15.55s/batch, batch_loss=8.84, batch_index=200, batch_size=256]

Epoch 1/10:  20%|██▊           | 200/991 [54:26<3:25:00, 15.55s/batch, batch_loss=16.8, batch_index=201, batch_size=256]

Epoch 1/10:  20%|██▊           | 201/991 [54:26<3:26:34, 15.69s/batch, batch_loss=16.8, batch_index=201, batch_size=256]

Epoch 1/10:  20%|██▊           | 201/991 [54:42<3:26:34, 15.69s/batch, batch_loss=14.4, batch_index=202, batch_size=256]

Epoch 1/10:  20%|██▊           | 202/991 [54:42<3:26:07, 15.67s/batch, batch_loss=14.4, batch_index=202, batch_size=256]

Epoch 1/10:  20%|██▊           | 202/991 [55:00<3:26:07, 15.67s/batch, batch_loss=15.6, batch_index=203, batch_size=256]

Epoch 1/10:  20%|██▊           | 203/991 [55:00<3:33:15, 16.24s/batch, batch_loss=15.6, batch_index=203, batch_size=256]

Epoch 1/10:  20%|███▎            | 203/991 [55:16<3:33:15, 16.24s/batch, batch_loss=44, batch_index=204, batch_size=256]

Epoch 1/10:  21%|███▎            | 204/991 [55:16<3:32:06, 16.17s/batch, batch_loss=44, batch_index=204, batch_size=256]

Epoch 1/10:  21%|██▉           | 204/991 [55:31<3:32:06, 16.17s/batch, batch_loss=36.7, batch_index=205, batch_size=256]

Epoch 1/10:  21%|██▉           | 205/991 [55:31<3:29:07, 15.96s/batch, batch_loss=36.7, batch_index=205, batch_size=256]

Epoch 1/10:  21%|██▉           | 205/991 [55:46<3:29:07, 15.96s/batch, batch_loss=8.97, batch_index=206, batch_size=256]

Epoch 1/10:  21%|██▉           | 206/991 [55:46<3:23:59, 15.59s/batch, batch_loss=8.97, batch_index=206, batch_size=256]

Epoch 1/10:  21%|██▉           | 206/991 [56:00<3:23:59, 15.59s/batch, batch_loss=10.5, batch_index=207, batch_size=256]

Epoch 1/10:  21%|██▉           | 207/991 [56:00<3:17:08, 15.09s/batch, batch_loss=10.5, batch_index=207, batch_size=256]

Epoch 1/10:  21%|██▉           | 207/991 [56:13<3:17:08, 15.09s/batch, batch_loss=12.7, batch_index=208, batch_size=256]

Epoch 1/10:  21%|██▉           | 208/991 [56:13<3:10:43, 14.61s/batch, batch_loss=12.7, batch_index=208, batch_size=256]

Epoch 1/10:  21%|██▉           | 208/991 [56:29<3:10:43, 14.61s/batch, batch_loss=9.67, batch_index=209, batch_size=256]

Epoch 1/10:  21%|██▉           | 209/991 [56:29<3:15:46, 15.02s/batch, batch_loss=9.67, batch_index=209, batch_size=256]

Epoch 1/10:  21%|██▉           | 209/991 [56:45<3:15:46, 15.02s/batch, batch_loss=26.9, batch_index=210, batch_size=256]

Epoch 1/10:  21%|██▉           | 210/991 [56:45<3:18:32, 15.25s/batch, batch_loss=26.9, batch_index=210, batch_size=256]

Epoch 1/10:  21%|██▉           | 210/991 [57:01<3:18:32, 15.25s/batch, batch_loss=13.9, batch_index=211, batch_size=256]

Epoch 1/10:  21%|██▉           | 211/991 [57:01<3:20:54, 15.45s/batch, batch_loss=13.9, batch_index=211, batch_size=256]

Epoch 1/10:  21%|██▉           | 211/991 [57:17<3:20:54, 15.45s/batch, batch_loss=28.7, batch_index=212, batch_size=256]

Epoch 1/10:  21%|██▉           | 212/991 [57:17<3:24:47, 15.77s/batch, batch_loss=28.7, batch_index=212, batch_size=256]

Epoch 1/10:  21%|██▉           | 212/991 [57:33<3:24:47, 15.77s/batch, batch_loss=3.17, batch_index=213, batch_size=256]

Epoch 1/10:  21%|███           | 213/991 [57:33<3:24:20, 15.76s/batch, batch_loss=3.17, batch_index=213, batch_size=256]

Epoch 1/10:  21%|███           | 213/991 [57:50<3:24:20, 15.76s/batch, batch_loss=14.1, batch_index=214, batch_size=256]

Epoch 1/10:  22%|███           | 214/991 [57:50<3:28:30, 16.10s/batch, batch_loss=14.1, batch_index=214, batch_size=256]

Epoch 1/10:  22%|███           | 214/991 [58:07<3:28:30, 16.10s/batch, batch_loss=21.5, batch_index=215, batch_size=256]

Epoch 1/10:  22%|███           | 215/991 [58:07<3:30:01, 16.24s/batch, batch_loss=21.5, batch_index=215, batch_size=256]

Epoch 1/10:  22%|███           | 215/991 [58:22<3:30:01, 16.24s/batch, batch_loss=10.1, batch_index=216, batch_size=256]

Epoch 1/10:  22%|███           | 216/991 [58:22<3:27:54, 16.10s/batch, batch_loss=10.1, batch_index=216, batch_size=256]

Epoch 1/10:  22%|███           | 216/991 [58:39<3:27:54, 16.10s/batch, batch_loss=19.9, batch_index=217, batch_size=256]

Epoch 1/10:  22%|███           | 217/991 [58:39<3:29:18, 16.22s/batch, batch_loss=19.9, batch_index=217, batch_size=256]

Epoch 1/10:  22%|███           | 217/991 [58:55<3:29:18, 16.22s/batch, batch_loss=20.2, batch_index=218, batch_size=256]

Epoch 1/10:  22%|███           | 218/991 [58:55<3:29:27, 16.26s/batch, batch_loss=20.2, batch_index=218, batch_size=256]

Epoch 1/10:  22%|███           | 218/991 [59:12<3:29:27, 16.26s/batch, batch_loss=26.1, batch_index=219, batch_size=256]

Epoch 1/10:  22%|███           | 219/991 [59:12<3:29:47, 16.31s/batch, batch_loss=26.1, batch_index=219, batch_size=256]

Epoch 1/10:  22%|███           | 219/991 [59:28<3:29:47, 16.31s/batch, batch_loss=38.7, batch_index=220, batch_size=256]

Epoch 1/10:  22%|███           | 220/991 [59:28<3:30:17, 16.36s/batch, batch_loss=38.7, batch_index=220, batch_size=256]

Epoch 1/10:  22%|███           | 220/991 [59:45<3:30:17, 16.36s/batch, batch_loss=21.7, batch_index=221, batch_size=256]

Epoch 1/10:  22%|███           | 221/991 [59:45<3:30:17, 16.39s/batch, batch_loss=21.7, batch_index=221, batch_size=256]

Epoch 1/10:  22%|██▋         | 221/991 [1:00:02<3:30:17, 16.39s/batch, batch_loss=16.8, batch_index=222, batch_size=256]

Epoch 1/10:  22%|██▋         | 222/991 [1:00:02<3:33:21, 16.65s/batch, batch_loss=16.8, batch_index=222, batch_size=256]

Epoch 1/10:  22%|██▋         | 222/991 [1:00:18<3:33:21, 16.65s/batch, batch_loss=31.6, batch_index=223, batch_size=256]

Epoch 1/10:  23%|██▋         | 223/991 [1:00:18<3:32:00, 16.56s/batch, batch_loss=31.6, batch_index=223, batch_size=256]

Epoch 1/10:  23%|███▏          | 223/991 [1:00:34<3:32:00, 16.56s/batch, batch_loss=17, batch_index=224, batch_size=256]

Epoch 1/10:  23%|███▏          | 224/991 [1:00:34<3:28:52, 16.34s/batch, batch_loss=17, batch_index=224, batch_size=256]

Epoch 1/10:  23%|██▋         | 224/991 [1:00:50<3:28:52, 16.34s/batch, batch_loss=12.4, batch_index=225, batch_size=256]

Epoch 1/10:  23%|██▋         | 225/991 [1:00:50<3:25:57, 16.13s/batch, batch_loss=12.4, batch_index=225, batch_size=256]

Epoch 1/10:  23%|██▋         | 225/991 [1:01:06<3:25:57, 16.13s/batch, batch_loss=24.3, batch_index=226, batch_size=256]

Epoch 1/10:  23%|██▋         | 226/991 [1:01:06<3:26:52, 16.23s/batch, batch_loss=24.3, batch_index=226, batch_size=256]

Epoch 1/10:  23%|██▎       | 226/991 [1:01:27<3:26:52, 16.23s/batch, batch_loss=2.4e+3, batch_index=227, batch_size=256]

Epoch 1/10:  23%|██▎       | 227/991 [1:01:27<3:44:30, 17.63s/batch, batch_loss=2.4e+3, batch_index=227, batch_size=256]

Epoch 1/10:  23%|██       | 227/991 [1:01:43<3:44:30, 17.63s/batch, batch_loss=3.59e+3, batch_index=228, batch_size=256]

Epoch 1/10:  23%|██       | 228/991 [1:01:43<3:39:32, 17.26s/batch, batch_loss=3.59e+3, batch_index=228, batch_size=256]

Epoch 1/10:  23%|██▊         | 228/991 [1:02:00<3:39:32, 17.26s/batch, batch_loss=13.3, batch_index=229, batch_size=256]

Epoch 1/10:  23%|██▊         | 229/991 [1:02:00<3:37:32, 17.13s/batch, batch_loss=13.3, batch_index=229, batch_size=256]

Epoch 1/10:  23%|██▊         | 229/991 [1:02:16<3:37:32, 17.13s/batch, batch_loss=11.3, batch_index=230, batch_size=256]

Epoch 1/10:  23%|██▊         | 230/991 [1:02:16<3:31:35, 16.68s/batch, batch_loss=11.3, batch_index=230, batch_size=256]

Epoch 1/10:  23%|███▏          | 230/991 [1:02:33<3:31:35, 16.68s/batch, batch_loss=14, batch_index=231, batch_size=256]

Epoch 1/10:  23%|███▎          | 231/991 [1:02:33<3:32:45, 16.80s/batch, batch_loss=14, batch_index=231, batch_size=256]

Epoch 1/10:  23%|███          | 231/991 [1:02:51<3:32:45, 16.80s/batch, batch_loss=8.5, batch_index=232, batch_size=256]

Epoch 1/10:  23%|███          | 232/991 [1:02:51<3:39:02, 17.31s/batch, batch_loss=8.5, batch_index=232, batch_size=256]

Epoch 1/10:  23%|██▊         | 232/991 [1:03:06<3:39:02, 17.31s/batch, batch_loss=9.96, batch_index=233, batch_size=256]

Epoch 1/10:  24%|██▊         | 233/991 [1:03:06<3:28:55, 16.54s/batch, batch_loss=9.96, batch_index=233, batch_size=256]

Epoch 1/10:  24%|██▊         | 233/991 [1:03:22<3:28:55, 16.54s/batch, batch_loss=12.7, batch_index=234, batch_size=256]

Epoch 1/10:  24%|██▊         | 234/991 [1:03:22<3:24:33, 16.21s/batch, batch_loss=12.7, batch_index=234, batch_size=256]

Epoch 1/10:  24%|██▊         | 234/991 [1:03:38<3:24:33, 16.21s/batch, batch_loss=14.3, batch_index=235, batch_size=256]

Epoch 1/10:  24%|██▊         | 235/991 [1:03:38<3:23:52, 16.18s/batch, batch_loss=14.3, batch_index=235, batch_size=256]

Epoch 1/10:  24%|██▊         | 235/991 [1:03:53<3:23:52, 16.18s/batch, batch_loss=26.2, batch_index=236, batch_size=256]

Epoch 1/10:  24%|██▊         | 236/991 [1:03:53<3:19:10, 15.83s/batch, batch_loss=26.2, batch_index=236, batch_size=256]

Epoch 1/10:  24%|██▊         | 236/991 [1:04:08<3:19:10, 15.83s/batch, batch_loss=27.1, batch_index=237, batch_size=256]

Epoch 1/10:  24%|██▊         | 237/991 [1:04:08<3:18:40, 15.81s/batch, batch_loss=27.1, batch_index=237, batch_size=256]

Epoch 1/10:  24%|██▊         | 237/991 [1:04:23<3:18:40, 15.81s/batch, batch_loss=19.6, batch_index=238, batch_size=256]

Epoch 1/10:  24%|██▉         | 238/991 [1:04:23<3:15:13, 15.56s/batch, batch_loss=19.6, batch_index=238, batch_size=256]

Epoch 1/10:  24%|██▉         | 238/991 [1:04:39<3:15:13, 15.56s/batch, batch_loss=4.94, batch_index=239, batch_size=256]

Epoch 1/10:  24%|██▉         | 239/991 [1:04:39<3:16:23, 15.67s/batch, batch_loss=4.94, batch_index=239, batch_size=256]

Epoch 1/10:  24%|██▉         | 239/991 [1:04:56<3:16:23, 15.67s/batch, batch_loss=7.33, batch_index=240, batch_size=256]

Epoch 1/10:  24%|██▉         | 240/991 [1:04:56<3:18:36, 15.87s/batch, batch_loss=7.33, batch_index=240, batch_size=256]

Epoch 1/10:  24%|██▉         | 240/991 [1:05:13<3:18:36, 15.87s/batch, batch_loss=9.31, batch_index=241, batch_size=256]

Epoch 1/10:  24%|██▉         | 241/991 [1:05:13<3:23:15, 16.26s/batch, batch_loss=9.31, batch_index=241, batch_size=256]

Epoch 1/10:  24%|██▉         | 241/991 [1:05:30<3:23:15, 16.26s/batch, batch_loss=23.7, batch_index=242, batch_size=256]

Epoch 1/10:  24%|██▉         | 242/991 [1:05:30<3:25:24, 16.45s/batch, batch_loss=23.7, batch_index=242, batch_size=256]

Epoch 1/10:  24%|███▏         | 242/991 [1:05:46<3:25:24, 16.45s/batch, batch_loss=268, batch_index=243, batch_size=256]

Epoch 1/10:  25%|███▏         | 243/991 [1:05:46<3:24:56, 16.44s/batch, batch_loss=268, batch_index=243, batch_size=256]

Epoch 1/10:  25%|██▉         | 243/991 [1:06:03<3:24:56, 16.44s/batch, batch_loss=16.1, batch_index=244, batch_size=256]

Epoch 1/10:  25%|██▉         | 244/991 [1:06:03<3:25:25, 16.50s/batch, batch_loss=16.1, batch_index=244, batch_size=256]

Epoch 1/10:  25%|██▉         | 244/991 [1:06:20<3:25:25, 16.50s/batch, batch_loss=9.67, batch_index=245, batch_size=256]

Epoch 1/10:  25%|██▉         | 245/991 [1:06:20<3:25:59, 16.57s/batch, batch_loss=9.67, batch_index=245, batch_size=256]

Epoch 1/10:  25%|██▉         | 245/991 [1:06:35<3:25:59, 16.57s/batch, batch_loss=6.49, batch_index=246, batch_size=256]

Epoch 1/10:  25%|██▉         | 246/991 [1:06:35<3:23:02, 16.35s/batch, batch_loss=6.49, batch_index=246, batch_size=256]

Epoch 1/10:  25%|██▉         | 246/991 [1:06:50<3:23:02, 16.35s/batch, batch_loss=16.6, batch_index=247, batch_size=256]

Epoch 1/10:  25%|██▉         | 247/991 [1:06:50<3:17:02, 15.89s/batch, batch_loss=16.6, batch_index=247, batch_size=256]

Epoch 1/10:  25%|██▉         | 247/991 [1:07:09<3:17:02, 15.89s/batch, batch_loss=5.06, batch_index=248, batch_size=256]

Epoch 1/10:  25%|███         | 248/991 [1:07:09<3:25:54, 16.63s/batch, batch_loss=5.06, batch_index=248, batch_size=256]

Epoch 1/10:  25%|███         | 248/991 [1:07:24<3:25:54, 16.63s/batch, batch_loss=14.1, batch_index=249, batch_size=256]

Epoch 1/10:  25%|███         | 249/991 [1:07:24<3:22:10, 16.35s/batch, batch_loss=14.1, batch_index=249, batch_size=256]

Epoch 1/10:  25%|███         | 249/991 [1:07:40<3:22:10, 16.35s/batch, batch_loss=10.3, batch_index=250, batch_size=256]

Epoch 1/10:  25%|███         | 250/991 [1:07:40<3:18:02, 16.04s/batch, batch_loss=10.3, batch_index=250, batch_size=256]

Epoch 1/10:  25%|███         | 250/991 [1:07:55<3:18:02, 16.04s/batch, batch_loss=6.82, batch_index=251, batch_size=256]

Epoch 1/10:  25%|███         | 251/991 [1:07:55<3:14:01, 15.73s/batch, batch_loss=6.82, batch_index=251, batch_size=256]

Epoch 1/10:  25%|███         | 251/991 [1:08:09<3:14:01, 15.73s/batch, batch_loss=13.2, batch_index=252, batch_size=256]

Epoch 1/10:  25%|███         | 252/991 [1:08:09<3:10:22, 15.46s/batch, batch_loss=13.2, batch_index=252, batch_size=256]

Epoch 1/10:  25%|███         | 252/991 [1:08:25<3:10:22, 15.46s/batch, batch_loss=8.34, batch_index=253, batch_size=256]

Epoch 1/10:  26%|███         | 253/991 [1:08:25<3:11:25, 15.56s/batch, batch_loss=8.34, batch_index=253, batch_size=256]

Epoch 1/10:  26%|███         | 253/991 [1:08:40<3:11:25, 15.56s/batch, batch_loss=19.1, batch_index=254, batch_size=256]

Epoch 1/10:  26%|███         | 254/991 [1:08:40<3:08:38, 15.36s/batch, batch_loss=19.1, batch_index=254, batch_size=256]

Epoch 1/10:  26%|███         | 254/991 [1:08:56<3:08:38, 15.36s/batch, batch_loss=15.6, batch_index=255, batch_size=256]

Epoch 1/10:  26%|███         | 255/991 [1:08:56<3:09:33, 15.45s/batch, batch_loss=15.6, batch_index=255, batch_size=256]

Epoch 1/10:  26%|███▎         | 255/991 [1:09:10<3:09:33, 15.45s/batch, batch_loss=697, batch_index=256, batch_size=256]

Epoch 1/10:  26%|███▎         | 256/991 [1:09:10<3:03:10, 14.95s/batch, batch_loss=697, batch_index=256, batch_size=256]

Epoch 1/10:  26%|███         | 256/991 [1:09:25<3:03:10, 14.95s/batch, batch_loss=19.7, batch_index=257, batch_size=256]

Epoch 1/10:  26%|███         | 257/991 [1:09:25<3:03:15, 14.98s/batch, batch_loss=19.7, batch_index=257, batch_size=256]

Epoch 1/10:  26%|███▎         | 257/991 [1:09:39<3:03:15, 14.98s/batch, batch_loss=204, batch_index=258, batch_size=256]

Epoch 1/10:  26%|███▍         | 258/991 [1:09:39<2:59:29, 14.69s/batch, batch_loss=204, batch_index=258, batch_size=256]

Epoch 1/10:  26%|███         | 258/991 [1:09:54<2:59:29, 14.69s/batch, batch_loss=14.5, batch_index=259, batch_size=256]

Epoch 1/10:  26%|███▏        | 259/991 [1:09:54<3:00:31, 14.80s/batch, batch_loss=14.5, batch_index=259, batch_size=256]

Epoch 1/10:  26%|███▏        | 259/991 [1:10:09<3:00:31, 14.80s/batch, batch_loss=22.2, batch_index=260, batch_size=256]

Epoch 1/10:  26%|███▏        | 260/991 [1:10:09<3:01:37, 14.91s/batch, batch_loss=22.2, batch_index=260, batch_size=256]

Epoch 1/10:  26%|███▋          | 260/991 [1:10:24<3:01:37, 14.91s/batch, batch_loss=14, batch_index=261, batch_size=256]

Epoch 1/10:  26%|███▋          | 261/991 [1:10:24<3:02:48, 15.02s/batch, batch_loss=14, batch_index=261, batch_size=256]

Epoch 1/10:  26%|███▏        | 261/991 [1:10:40<3:02:48, 15.02s/batch, batch_loss=11.3, batch_index=262, batch_size=256]

Epoch 1/10:  26%|███▏        | 262/991 [1:10:40<3:05:50, 15.30s/batch, batch_loss=11.3, batch_index=262, batch_size=256]

Epoch 1/10:  26%|███▏        | 262/991 [1:10:55<3:05:50, 15.30s/batch, batch_loss=11.6, batch_index=263, batch_size=256]

Epoch 1/10:  27%|███▏        | 263/991 [1:10:55<3:04:06, 15.17s/batch, batch_loss=11.6, batch_index=263, batch_size=256]

Epoch 1/10:  27%|███▋          | 263/991 [1:11:13<3:04:06, 15.17s/batch, batch_loss=14, batch_index=264, batch_size=256]

Epoch 1/10:  27%|███▋          | 264/991 [1:11:13<3:14:50, 16.08s/batch, batch_loss=14, batch_index=264, batch_size=256]

Epoch 1/10:  27%|███▏        | 264/991 [1:11:28<3:14:50, 16.08s/batch, batch_loss=16.6, batch_index=265, batch_size=256]

Epoch 1/10:  27%|███▏        | 265/991 [1:11:28<3:09:48, 15.69s/batch, batch_loss=16.6, batch_index=265, batch_size=256]

Epoch 1/10:  27%|███▏        | 265/991 [1:11:44<3:09:48, 15.69s/batch, batch_loss=15.1, batch_index=266, batch_size=256]

Epoch 1/10:  27%|███▏        | 266/991 [1:11:44<3:10:52, 15.80s/batch, batch_loss=15.1, batch_index=266, batch_size=256]

Epoch 1/10:  27%|███▏        | 266/991 [1:11:59<3:10:52, 15.80s/batch, batch_loss=10.9, batch_index=267, batch_size=256]

Epoch 1/10:  27%|███▏        | 267/991 [1:11:59<3:07:23, 15.53s/batch, batch_loss=10.9, batch_index=267, batch_size=256]

Epoch 1/10:  27%|███▏        | 267/991 [1:12:15<3:07:23, 15.53s/batch, batch_loss=7.32, batch_index=268, batch_size=256]

Epoch 1/10:  27%|███▏        | 268/991 [1:12:15<3:07:29, 15.56s/batch, batch_loss=7.32, batch_index=268, batch_size=256]

Epoch 1/10:  27%|███▏        | 268/991 [1:12:31<3:07:29, 15.56s/batch, batch_loss=12.7, batch_index=269, batch_size=256]

Epoch 1/10:  27%|███▎        | 269/991 [1:12:31<3:12:01, 15.96s/batch, batch_loss=12.7, batch_index=269, batch_size=256]

Epoch 1/10:  27%|███▎        | 269/991 [1:12:51<3:12:01, 15.96s/batch, batch_loss=1.34, batch_index=270, batch_size=256]

Epoch 1/10:  27%|███▎        | 270/991 [1:12:51<3:24:59, 17.06s/batch, batch_loss=1.34, batch_index=270, batch_size=256]

Epoch 1/10:  27%|███▎        | 270/991 [1:13:08<3:24:59, 17.06s/batch, batch_loss=10.4, batch_index=271, batch_size=256]

Epoch 1/10:  27%|███▎        | 271/991 [1:13:08<3:24:09, 17.01s/batch, batch_loss=10.4, batch_index=271, batch_size=256]

Epoch 1/10:  27%|███▌         | 271/991 [1:13:24<3:24:09, 17.01s/batch, batch_loss=9.7, batch_index=272, batch_size=256]

Epoch 1/10:  27%|███▌         | 272/991 [1:13:24<3:20:33, 16.74s/batch, batch_loss=9.7, batch_index=272, batch_size=256]

Epoch 1/10:  27%|███▎        | 272/991 [1:13:39<3:20:33, 16.74s/batch, batch_loss=17.2, batch_index=273, batch_size=256]

Epoch 1/10:  28%|███▎        | 273/991 [1:13:39<3:15:34, 16.34s/batch, batch_loss=17.2, batch_index=273, batch_size=256]

Epoch 1/10:  28%|███▎        | 273/991 [1:13:56<3:15:34, 16.34s/batch, batch_loss=11.2, batch_index=274, batch_size=256]

Epoch 1/10:  28%|███▎        | 274/991 [1:13:56<3:14:39, 16.29s/batch, batch_loss=11.2, batch_index=274, batch_size=256]

Epoch 1/10:  28%|██▍      | 274/991 [1:14:11<3:14:39, 16.29s/batch, batch_loss=3.31e+3, batch_index=275, batch_size=256]

Epoch 1/10:  28%|██▍      | 275/991 [1:14:11<3:12:08, 16.10s/batch, batch_loss=3.31e+3, batch_index=275, batch_size=256]

Epoch 1/10:  28%|███▎        | 275/991 [1:14:29<3:12:08, 16.10s/batch, batch_loss=14.1, batch_index=276, batch_size=256]

Epoch 1/10:  28%|███▎        | 276/991 [1:14:29<3:17:52, 16.61s/batch, batch_loss=14.1, batch_index=276, batch_size=256]

Epoch 1/10:  28%|██▌      | 276/991 [1:14:44<3:17:52, 16.61s/batch, batch_loss=4.79e+3, batch_index=277, batch_size=256]

Epoch 1/10:  28%|██▌      | 277/991 [1:14:44<3:12:47, 16.20s/batch, batch_loss=4.79e+3, batch_index=277, batch_size=256]

Epoch 1/10:  28%|███▎        | 277/991 [1:14:59<3:12:47, 16.20s/batch, batch_loss=11.4, batch_index=278, batch_size=256]

Epoch 1/10:  28%|███▎        | 278/991 [1:14:59<3:07:04, 15.74s/batch, batch_loss=11.4, batch_index=278, batch_size=256]

Epoch 1/10:  28%|███▎        | 278/991 [1:15:14<3:07:04, 15.74s/batch, batch_loss=16.9, batch_index=279, batch_size=256]

Epoch 1/10:  28%|███▍        | 279/991 [1:15:14<3:05:19, 15.62s/batch, batch_loss=16.9, batch_index=279, batch_size=256]

Epoch 1/10:  28%|███▍        | 279/991 [1:15:30<3:05:19, 15.62s/batch, batch_loss=13.4, batch_index=280, batch_size=256]

Epoch 1/10:  28%|███▍        | 280/991 [1:15:30<3:06:19, 15.72s/batch, batch_loss=13.4, batch_index=280, batch_size=256]

Epoch 1/10:  28%|███▍        | 280/991 [1:15:46<3:06:19, 15.72s/batch, batch_loss=12.2, batch_index=281, batch_size=256]

Epoch 1/10:  28%|███▍        | 281/991 [1:15:46<3:05:14, 15.65s/batch, batch_loss=12.2, batch_index=281, batch_size=256]

Epoch 1/10:  28%|███▍        | 281/991 [1:16:01<3:05:14, 15.65s/batch, batch_loss=6.16, batch_index=282, batch_size=256]

Epoch 1/10:  28%|███▍        | 282/991 [1:16:01<3:03:51, 15.56s/batch, batch_loss=6.16, batch_index=282, batch_size=256]

Epoch 1/10:  28%|███▍        | 282/991 [1:16:16<3:03:51, 15.56s/batch, batch_loss=14.3, batch_index=283, batch_size=256]

Epoch 1/10:  29%|███▍        | 283/991 [1:16:16<3:02:49, 15.49s/batch, batch_loss=14.3, batch_index=283, batch_size=256]

Epoch 1/10:  29%|███▍        | 283/991 [1:16:33<3:02:49, 15.49s/batch, batch_loss=18.1, batch_index=284, batch_size=256]

Epoch 1/10:  29%|███▍        | 284/991 [1:16:33<3:05:20, 15.73s/batch, batch_loss=18.1, batch_index=284, batch_size=256]

Epoch 1/10:  29%|███▍        | 284/991 [1:16:49<3:05:20, 15.73s/batch, batch_loss=11.8, batch_index=285, batch_size=256]

Epoch 1/10:  29%|███▍        | 285/991 [1:16:49<3:07:01, 15.89s/batch, batch_loss=11.8, batch_index=285, batch_size=256]

Epoch 1/10:  29%|███▍        | 285/991 [1:17:04<3:07:01, 15.89s/batch, batch_loss=7.78, batch_index=286, batch_size=256]

Epoch 1/10:  29%|███▍        | 286/991 [1:17:04<3:02:32, 15.54s/batch, batch_loss=7.78, batch_index=286, batch_size=256]

Epoch 1/10:  29%|███▍        | 286/991 [1:17:19<3:02:32, 15.54s/batch, batch_loss=7.26, batch_index=287, batch_size=256]

Epoch 1/10:  29%|███▍        | 287/991 [1:17:19<3:01:21, 15.46s/batch, batch_loss=7.26, batch_index=287, batch_size=256]

Epoch 1/10:  29%|██▌      | 287/991 [1:17:33<3:01:21, 15.46s/batch, batch_loss=2.57e+3, batch_index=288, batch_size=256]

Epoch 1/10:  29%|██▌      | 288/991 [1:17:33<2:57:07, 15.12s/batch, batch_loss=2.57e+3, batch_index=288, batch_size=256]

Epoch 1/10:  29%|██▌      | 288/991 [1:17:49<2:57:07, 15.12s/batch, batch_loss=1.25e+3, batch_index=289, batch_size=256]

Epoch 1/10:  29%|██▌      | 289/991 [1:17:49<2:58:28, 15.25s/batch, batch_loss=1.25e+3, batch_index=289, batch_size=256]

Epoch 1/10:  29%|███▍        | 289/991 [1:18:04<2:58:28, 15.25s/batch, batch_loss=12.7, batch_index=290, batch_size=256]

Epoch 1/10:  29%|███▌        | 290/991 [1:18:04<2:58:45, 15.30s/batch, batch_loss=12.7, batch_index=290, batch_size=256]

Epoch 1/10:  29%|███▌        | 290/991 [1:18:20<2:58:45, 15.30s/batch, batch_loss=5.01, batch_index=291, batch_size=256]

Epoch 1/10:  29%|███▌        | 291/991 [1:18:20<2:59:46, 15.41s/batch, batch_loss=5.01, batch_index=291, batch_size=256]

Epoch 1/10:  29%|███▌        | 291/991 [1:18:35<2:59:46, 15.41s/batch, batch_loss=10.6, batch_index=292, batch_size=256]

Epoch 1/10:  29%|███▌        | 292/991 [1:18:35<2:59:25, 15.40s/batch, batch_loss=10.6, batch_index=292, batch_size=256]

Epoch 1/10:  29%|███▌        | 292/991 [1:18:51<2:59:25, 15.40s/batch, batch_loss=15.3, batch_index=293, batch_size=256]

Epoch 1/10:  30%|███▌        | 293/991 [1:18:51<2:59:08, 15.40s/batch, batch_loss=15.3, batch_index=293, batch_size=256]

Epoch 1/10:  30%|███▌        | 293/991 [1:19:06<2:59:08, 15.40s/batch, batch_loss=12.8, batch_index=294, batch_size=256]

Epoch 1/10:  30%|███▌        | 294/991 [1:19:06<2:58:39, 15.38s/batch, batch_loss=12.8, batch_index=294, batch_size=256]

Epoch 1/10:  30%|████▏         | 294/991 [1:19:22<2:58:39, 15.38s/batch, batch_loss=10, batch_index=295, batch_size=256]

Epoch 1/10:  30%|████▏         | 295/991 [1:19:22<2:59:25, 15.47s/batch, batch_loss=10, batch_index=295, batch_size=256]

Epoch 1/10:  30%|███▌        | 295/991 [1:19:37<2:59:25, 15.47s/batch, batch_loss=16.5, batch_index=296, batch_size=256]

Epoch 1/10:  30%|███▌        | 296/991 [1:19:37<2:58:06, 15.38s/batch, batch_loss=16.5, batch_index=296, batch_size=256]

Epoch 1/10:  30%|███▌        | 296/991 [1:19:52<2:58:06, 15.38s/batch, batch_loss=13.3, batch_index=297, batch_size=256]

Epoch 1/10:  30%|███▌        | 297/991 [1:19:52<2:58:03, 15.39s/batch, batch_loss=13.3, batch_index=297, batch_size=256]

Epoch 1/10:  30%|██▋      | 297/991 [1:20:08<2:58:03, 15.39s/batch, batch_loss=3.24e+4, batch_index=298, batch_size=256]

Epoch 1/10:  30%|██▋      | 298/991 [1:20:08<2:58:49, 15.48s/batch, batch_loss=3.24e+4, batch_index=298, batch_size=256]

Epoch 1/10:  30%|███▌        | 298/991 [1:20:24<2:58:49, 15.48s/batch, batch_loss=16.4, batch_index=299, batch_size=256]

Epoch 1/10:  30%|███▌        | 299/991 [1:20:24<3:00:10, 15.62s/batch, batch_loss=16.4, batch_index=299, batch_size=256]

Epoch 1/10:  30%|███▌        | 299/991 [1:20:40<3:00:10, 15.62s/batch, batch_loss=7.21, batch_index=300, batch_size=256]

Epoch 1/10:  30%|███▋        | 300/991 [1:20:40<3:00:01, 15.63s/batch, batch_loss=7.21, batch_index=300, batch_size=256]

Epoch 1/10:  30%|███▋        | 300/991 [1:20:55<3:00:01, 15.63s/batch, batch_loss=8.79, batch_index=301, batch_size=256]

Epoch 1/10:  30%|███▋        | 301/991 [1:20:55<3:00:00, 15.65s/batch, batch_loss=8.79, batch_index=301, batch_size=256]

Epoch 1/10:  30%|███▋        | 301/991 [1:21:10<3:00:00, 15.65s/batch, batch_loss=11.2, batch_index=302, batch_size=256]

Epoch 1/10:  30%|███▋        | 302/991 [1:21:10<2:57:14, 15.43s/batch, batch_loss=11.2, batch_index=302, batch_size=256]

Epoch 1/10:  30%|███▋        | 302/991 [1:21:26<2:57:14, 15.43s/batch, batch_loss=9.66, batch_index=303, batch_size=256]

Epoch 1/10:  31%|███▋        | 303/991 [1:21:26<2:59:37, 15.66s/batch, batch_loss=9.66, batch_index=303, batch_size=256]

Epoch 1/10:  31%|███▋        | 303/991 [1:21:42<2:59:37, 15.66s/batch, batch_loss=3.05, batch_index=304, batch_size=256]

Epoch 1/10:  31%|███▋        | 304/991 [1:21:42<2:58:47, 15.62s/batch, batch_loss=3.05, batch_index=304, batch_size=256]

Epoch 1/10:  31%|████▎         | 304/991 [1:21:59<2:58:47, 15.62s/batch, batch_loss=15, batch_index=305, batch_size=256]

Epoch 1/10:  31%|████▎         | 305/991 [1:21:59<3:02:13, 15.94s/batch, batch_loss=15, batch_index=305, batch_size=256]

Epoch 1/10:  31%|███▋        | 305/991 [1:22:14<3:02:13, 15.94s/batch, batch_loss=9.44, batch_index=306, batch_size=256]

Epoch 1/10:  31%|███▋        | 306/991 [1:22:14<2:58:30, 15.64s/batch, batch_loss=9.44, batch_index=306, batch_size=256]

Epoch 1/10:  31%|██▊      | 306/991 [1:22:29<2:58:30, 15.64s/batch, batch_loss=6.28e+3, batch_index=307, batch_size=256]

Epoch 1/10:  31%|██▊      | 307/991 [1:22:29<2:56:24, 15.48s/batch, batch_loss=6.28e+3, batch_index=307, batch_size=256]

Epoch 1/10:  31%|███▋        | 307/991 [1:22:43<2:56:24, 15.48s/batch, batch_loss=10.7, batch_index=308, batch_size=256]

Epoch 1/10:  31%|███▋        | 308/991 [1:22:43<2:53:15, 15.22s/batch, batch_loss=10.7, batch_index=308, batch_size=256]

Epoch 1/10:  31%|███▋        | 308/991 [1:22:59<2:53:15, 15.22s/batch, batch_loss=20.2, batch_index=309, batch_size=256]

Epoch 1/10:  31%|███▋        | 309/991 [1:22:59<2:52:57, 15.22s/batch, batch_loss=20.2, batch_index=309, batch_size=256]

Epoch 1/10:  31%|████▎         | 309/991 [1:23:14<2:52:57, 15.22s/batch, batch_loss=14, batch_index=310, batch_size=256]

Epoch 1/10:  31%|████▍         | 310/991 [1:23:14<2:53:00, 15.24s/batch, batch_loss=14, batch_index=310, batch_size=256]

Epoch 1/10:  31%|███▊        | 310/991 [1:23:29<2:53:00, 15.24s/batch, batch_loss=12.4, batch_index=311, batch_size=256]

Epoch 1/10:  31%|███▊        | 311/991 [1:23:29<2:52:29, 15.22s/batch, batch_loss=12.4, batch_index=311, batch_size=256]

Epoch 1/10:  31%|███▊        | 311/991 [1:23:44<2:52:29, 15.22s/batch, batch_loss=12.1, batch_index=312, batch_size=256]

Epoch 1/10:  31%|███▊        | 312/991 [1:23:44<2:52:38, 15.26s/batch, batch_loss=12.1, batch_index=312, batch_size=256]

Epoch 1/10:  31%|██▊      | 312/991 [1:23:59<2:52:38, 15.26s/batch, batch_loss=1.06e+4, batch_index=313, batch_size=256]

Epoch 1/10:  32%|██▊      | 313/991 [1:23:59<2:51:31, 15.18s/batch, batch_loss=1.06e+4, batch_index=313, batch_size=256]

Epoch 1/10:  32%|███▊        | 313/991 [1:24:15<2:51:31, 15.18s/batch, batch_loss=10.2, batch_index=314, batch_size=256]

Epoch 1/10:  32%|███▊        | 314/991 [1:24:15<2:52:16, 15.27s/batch, batch_loss=10.2, batch_index=314, batch_size=256]

Epoch 1/10:  32%|████▍         | 314/991 [1:24:30<2:52:16, 15.27s/batch, batch_loss=15, batch_index=315, batch_size=256]

Epoch 1/10:  32%|████▍         | 315/991 [1:24:30<2:53:22, 15.39s/batch, batch_loss=15, batch_index=315, batch_size=256]

Epoch 1/10:  32%|███▊        | 315/991 [1:24:49<2:53:22, 15.39s/batch, batch_loss=19.6, batch_index=316, batch_size=256]

Epoch 1/10:  32%|███▊        | 316/991 [1:24:49<3:04:00, 16.36s/batch, batch_loss=19.6, batch_index=316, batch_size=256]

Epoch 1/10:  32%|███▊        | 316/991 [1:25:04<3:04:00, 16.36s/batch, batch_loss=20.4, batch_index=317, batch_size=256]

Epoch 1/10:  32%|███▊        | 317/991 [1:25:04<2:58:49, 15.92s/batch, batch_loss=20.4, batch_index=317, batch_size=256]

Epoch 1/10:  32%|███▊        | 317/991 [1:25:20<2:58:49, 15.92s/batch, batch_loss=18.8, batch_index=318, batch_size=256]

Epoch 1/10:  32%|███▊        | 318/991 [1:25:20<2:57:24, 15.82s/batch, batch_loss=18.8, batch_index=318, batch_size=256]

Epoch 1/10:  32%|███▊        | 318/991 [1:25:34<2:57:24, 15.82s/batch, batch_loss=14.7, batch_index=319, batch_size=256]

Epoch 1/10:  32%|███▊        | 319/991 [1:25:34<2:53:25, 15.48s/batch, batch_loss=14.7, batch_index=319, batch_size=256]

Epoch 1/10:  32%|███▊        | 319/991 [1:25:50<2:53:25, 15.48s/batch, batch_loss=15.5, batch_index=320, batch_size=256]

Epoch 1/10:  32%|███▊        | 320/991 [1:25:50<2:54:24, 15.59s/batch, batch_loss=15.5, batch_index=320, batch_size=256]

Epoch 1/10:  32%|███▊        | 320/991 [1:26:05<2:54:24, 15.59s/batch, batch_loss=22.7, batch_index=321, batch_size=256]

Epoch 1/10:  32%|███▉        | 321/991 [1:26:05<2:50:29, 15.27s/batch, batch_loss=22.7, batch_index=321, batch_size=256]

Epoch 1/10:  32%|███▉        | 321/991 [1:26:19<2:50:29, 15.27s/batch, batch_loss=7.38, batch_index=322, batch_size=256]

Epoch 1/10:  32%|███▉        | 322/991 [1:26:19<2:47:06, 14.99s/batch, batch_loss=7.38, batch_index=322, batch_size=256]

Epoch 1/10:  32%|███▉        | 322/991 [1:26:36<2:47:06, 14.99s/batch, batch_loss=8.78, batch_index=323, batch_size=256]

Epoch 1/10:  33%|███▉        | 323/991 [1:26:36<2:53:53, 15.62s/batch, batch_loss=8.78, batch_index=323, batch_size=256]

Epoch 1/10:  33%|███▉        | 323/991 [1:26:56<2:53:53, 15.62s/batch, batch_loss=22.6, batch_index=324, batch_size=256]

Epoch 1/10:  33%|███▉        | 324/991 [1:26:56<3:08:40, 16.97s/batch, batch_loss=22.6, batch_index=324, batch_size=256]

Epoch 1/10:  33%|████▌         | 324/991 [1:27:12<3:08:40, 16.97s/batch, batch_loss=12, batch_index=325, batch_size=256]

Epoch 1/10:  33%|████▌         | 325/991 [1:27:12<3:05:11, 16.68s/batch, batch_loss=12, batch_index=325, batch_size=256]

Epoch 1/10:  33%|███▉        | 325/991 [1:27:29<3:05:11, 16.68s/batch, batch_loss=23.9, batch_index=326, batch_size=256]

Epoch 1/10:  33%|███▉        | 326/991 [1:27:29<3:06:37, 16.84s/batch, batch_loss=23.9, batch_index=326, batch_size=256]

Epoch 1/10:  33%|██▉      | 326/991 [1:27:45<3:06:37, 16.84s/batch, batch_loss=3.04e+3, batch_index=327, batch_size=256]

Epoch 1/10:  33%|██▉      | 327/991 [1:27:45<3:03:54, 16.62s/batch, batch_loss=3.04e+3, batch_index=327, batch_size=256]

Epoch 1/10:  33%|███▉        | 327/991 [1:28:02<3:03:54, 16.62s/batch, batch_loss=8.59, batch_index=328, batch_size=256]

Epoch 1/10:  33%|███▉        | 328/991 [1:28:02<3:02:36, 16.53s/batch, batch_loss=8.59, batch_index=328, batch_size=256]

Epoch 1/10:  33%|███▉        | 328/991 [1:28:18<3:02:36, 16.53s/batch, batch_loss=18.6, batch_index=329, batch_size=256]

Epoch 1/10:  33%|███▉        | 329/991 [1:28:18<3:02:14, 16.52s/batch, batch_loss=18.6, batch_index=329, batch_size=256]

Epoch 1/10:  33%|███▉        | 329/991 [1:28:34<3:02:14, 16.52s/batch, batch_loss=13.4, batch_index=330, batch_size=256]

Epoch 1/10:  33%|███▉        | 330/991 [1:28:34<2:57:43, 16.13s/batch, batch_loss=13.4, batch_index=330, batch_size=256]

Epoch 1/10:  33%|███▉        | 330/991 [1:28:52<2:57:43, 16.13s/batch, batch_loss=13.7, batch_index=331, batch_size=256]

Epoch 1/10:  33%|████        | 331/991 [1:28:52<3:06:06, 16.92s/batch, batch_loss=13.7, batch_index=331, batch_size=256]

Epoch 1/10:  33%|████        | 331/991 [1:29:08<3:06:06, 16.92s/batch, batch_loss=13.5, batch_index=332, batch_size=256]

Epoch 1/10:  34%|████        | 332/991 [1:29:08<3:00:25, 16.43s/batch, batch_loss=13.5, batch_index=332, batch_size=256]

Epoch 1/10:  34%|████        | 332/991 [1:29:23<3:00:25, 16.43s/batch, batch_loss=14.8, batch_index=333, batch_size=256]

Epoch 1/10:  34%|████        | 333/991 [1:29:23<2:55:55, 16.04s/batch, batch_loss=14.8, batch_index=333, batch_size=256]

Epoch 1/10:  34%|████        | 333/991 [1:29:39<2:55:55, 16.04s/batch, batch_loss=13.4, batch_index=334, batch_size=256]

Epoch 1/10:  34%|████        | 334/991 [1:29:39<2:57:29, 16.21s/batch, batch_loss=13.4, batch_index=334, batch_size=256]

Epoch 1/10:  34%|████        | 334/991 [1:29:55<2:57:29, 16.21s/batch, batch_loss=4.83, batch_index=335, batch_size=256]

Epoch 1/10:  34%|████        | 335/991 [1:29:55<2:56:45, 16.17s/batch, batch_loss=4.83, batch_index=335, batch_size=256]

Epoch 1/10:  34%|███      | 335/991 [1:30:12<2:56:45, 16.17s/batch, batch_loss=8.47e+3, batch_index=336, batch_size=256]

Epoch 1/10:  34%|███      | 336/991 [1:30:12<2:58:19, 16.34s/batch, batch_loss=8.47e+3, batch_index=336, batch_size=256]

Epoch 1/10:  34%|███      | 336/991 [1:30:28<2:58:19, 16.34s/batch, batch_loss=2.35e+3, batch_index=337, batch_size=256]

Epoch 1/10:  34%|███      | 337/991 [1:30:28<2:55:41, 16.12s/batch, batch_loss=2.35e+3, batch_index=337, batch_size=256]

Epoch 1/10:  34%|████        | 337/991 [1:30:43<2:55:41, 16.12s/batch, batch_loss=8.32, batch_index=338, batch_size=256]

Epoch 1/10:  34%|████        | 338/991 [1:30:43<2:51:58, 15.80s/batch, batch_loss=8.32, batch_index=338, batch_size=256]

Epoch 1/10:  34%|████        | 338/991 [1:31:00<2:51:58, 15.80s/batch, batch_loss=21.2, batch_index=339, batch_size=256]

Epoch 1/10:  34%|████        | 339/991 [1:31:00<2:57:53, 16.37s/batch, batch_loss=21.2, batch_index=339, batch_size=256]

Epoch 1/10:  34%|████        | 339/991 [1:31:16<2:57:53, 16.37s/batch, batch_loss=12.6, batch_index=340, batch_size=256]

Epoch 1/10:  34%|████        | 340/991 [1:31:16<2:53:48, 16.02s/batch, batch_loss=12.6, batch_index=340, batch_size=256]

Epoch 1/10:  34%|████        | 340/991 [1:31:31<2:53:48, 16.02s/batch, batch_loss=10.3, batch_index=341, batch_size=256]

Epoch 1/10:  34%|████▏       | 341/991 [1:31:31<2:50:59, 15.78s/batch, batch_loss=10.3, batch_index=341, batch_size=256]

Epoch 1/10:  34%|████▏       | 341/991 [1:31:46<2:50:59, 15.78s/batch, batch_loss=1.47, batch_index=342, batch_size=256]

Epoch 1/10:  35%|████▏       | 342/991 [1:31:46<2:49:09, 15.64s/batch, batch_loss=1.47, batch_index=342, batch_size=256]

Epoch 1/10:  35%|████▏       | 342/991 [1:32:02<2:49:09, 15.64s/batch, batch_loss=7.72, batch_index=343, batch_size=256]

Epoch 1/10:  35%|████▏       | 343/991 [1:32:02<2:48:09, 15.57s/batch, batch_loss=7.72, batch_index=343, batch_size=256]

Epoch 1/10:  35%|████▏       | 343/991 [1:32:17<2:48:09, 15.57s/batch, batch_loss=15.6, batch_index=344, batch_size=256]

Epoch 1/10:  35%|████▏       | 344/991 [1:32:17<2:45:55, 15.39s/batch, batch_loss=15.6, batch_index=344, batch_size=256]

Epoch 1/10:  35%|████▌        | 344/991 [1:32:32<2:45:55, 15.39s/batch, batch_loss=116, batch_index=345, batch_size=256]

Epoch 1/10:  35%|████▌        | 345/991 [1:32:32<2:47:15, 15.53s/batch, batch_loss=116, batch_index=345, batch_size=256]

Epoch 1/10:  35%|████▊         | 345/991 [1:32:48<2:47:15, 15.53s/batch, batch_loss=20, batch_index=346, batch_size=256]

Epoch 1/10:  35%|████▉         | 346/991 [1:32:48<2:48:09, 15.64s/batch, batch_loss=20, batch_index=346, batch_size=256]

Epoch 1/10:  35%|████▏       | 346/991 [1:33:06<2:48:09, 15.64s/batch, batch_loss=12.9, batch_index=347, batch_size=256]

Epoch 1/10:  35%|████▏       | 347/991 [1:33:06<2:53:10, 16.13s/batch, batch_loss=12.9, batch_index=347, batch_size=256]

Epoch 1/10:  35%|████▏       | 347/991 [1:33:21<2:53:10, 16.13s/batch, batch_loss=17.1, batch_index=348, batch_size=256]

Epoch 1/10:  35%|████▏       | 348/991 [1:33:21<2:49:16, 15.80s/batch, batch_loss=17.1, batch_index=348, batch_size=256]

Epoch 1/10:  35%|████▏       | 348/991 [1:33:37<2:49:16, 15.80s/batch, batch_loss=8.79, batch_index=349, batch_size=256]

Epoch 1/10:  35%|████▏       | 349/991 [1:33:37<2:50:34, 15.94s/batch, batch_loss=8.79, batch_index=349, batch_size=256]

Epoch 1/10:  35%|████▏       | 349/991 [1:33:53<2:50:34, 15.94s/batch, batch_loss=13.3, batch_index=350, batch_size=256]

Epoch 1/10:  35%|████▏       | 350/991 [1:33:53<2:50:34, 15.97s/batch, batch_loss=13.3, batch_index=350, batch_size=256]

Epoch 1/10:  35%|████▏       | 350/991 [1:34:08<2:50:34, 15.97s/batch, batch_loss=8.69, batch_index=351, batch_size=256]

Epoch 1/10:  35%|████▎       | 351/991 [1:34:08<2:48:05, 15.76s/batch, batch_loss=8.69, batch_index=351, batch_size=256]

Epoch 1/10:  35%|████▎       | 351/991 [1:34:25<2:48:05, 15.76s/batch, batch_loss=15.5, batch_index=352, batch_size=256]

Epoch 1/10:  36%|████▎       | 352/991 [1:34:25<2:52:26, 16.19s/batch, batch_loss=15.5, batch_index=352, batch_size=256]

Epoch 1/10:  36%|████▎       | 352/991 [1:34:42<2:52:26, 16.19s/batch, batch_loss=23.2, batch_index=353, batch_size=256]

Epoch 1/10:  36%|████▎       | 353/991 [1:34:42<2:53:19, 16.30s/batch, batch_loss=23.2, batch_index=353, batch_size=256]

Epoch 1/10:  36%|████▎       | 353/991 [1:35:01<2:53:19, 16.30s/batch, batch_loss=22.4, batch_index=354, batch_size=256]

Epoch 1/10:  36%|████▎       | 354/991 [1:35:01<3:01:25, 17.09s/batch, batch_loss=22.4, batch_index=354, batch_size=256]

Epoch 1/10:  36%|████▎       | 354/991 [1:35:16<3:01:25, 17.09s/batch, batch_loss=10.1, batch_index=355, batch_size=256]

Epoch 1/10:  36%|████▎       | 355/991 [1:35:16<2:56:07, 16.61s/batch, batch_loss=10.1, batch_index=355, batch_size=256]

Epoch 1/10:  36%|████▎       | 355/991 [1:35:33<2:56:07, 16.61s/batch, batch_loss=17.2, batch_index=356, batch_size=256]

Epoch 1/10:  36%|████▎       | 356/991 [1:35:33<2:56:50, 16.71s/batch, batch_loss=17.2, batch_index=356, batch_size=256]

Epoch 1/10:  36%|████▎       | 356/991 [1:35:49<2:56:50, 16.71s/batch, batch_loss=20.2, batch_index=357, batch_size=256]

Epoch 1/10:  36%|████▎       | 357/991 [1:35:49<2:51:47, 16.26s/batch, batch_loss=20.2, batch_index=357, batch_size=256]

Epoch 1/10:  36%|████▎       | 357/991 [1:36:04<2:51:47, 16.26s/batch, batch_loss=16.1, batch_index=358, batch_size=256]

Epoch 1/10:  36%|████▎       | 358/991 [1:36:04<2:50:11, 16.13s/batch, batch_loss=16.1, batch_index=358, batch_size=256]

Epoch 1/10:  36%|████▎       | 358/991 [1:36:20<2:50:11, 16.13s/batch, batch_loss=5.08, batch_index=359, batch_size=256]

Epoch 1/10:  36%|████▎       | 359/991 [1:36:20<2:47:36, 15.91s/batch, batch_loss=5.08, batch_index=359, batch_size=256]

Epoch 1/10:  36%|█████▍         | 359/991 [1:36:35<2:47:36, 15.91s/batch, batch_loss=9, batch_index=360, batch_size=256]

Epoch 1/10:  36%|█████▍         | 360/991 [1:36:35<2:44:44, 15.66s/batch, batch_loss=9, batch_index=360, batch_size=256]

Epoch 1/10:  36%|████▎       | 360/991 [1:36:51<2:44:44, 15.66s/batch, batch_loss=26.9, batch_index=361, batch_size=256]

Epoch 1/10:  36%|████▎       | 361/991 [1:36:51<2:46:15, 15.83s/batch, batch_loss=26.9, batch_index=361, batch_size=256]

Epoch 1/10:  36%|████▎       | 361/991 [1:37:07<2:46:15, 15.83s/batch, batch_loss=20.8, batch_index=362, batch_size=256]

Epoch 1/10:  37%|████▍       | 362/991 [1:37:07<2:47:02, 15.93s/batch, batch_loss=20.8, batch_index=362, batch_size=256]

Epoch 1/10:  37%|████▍       | 362/991 [1:37:24<2:47:02, 15.93s/batch, batch_loss=13.2, batch_index=363, batch_size=256]

Epoch 1/10:  37%|████▍       | 363/991 [1:37:24<2:48:43, 16.12s/batch, batch_loss=13.2, batch_index=363, batch_size=256]

Epoch 1/10:  37%|████▍       | 363/991 [1:37:40<2:48:43, 16.12s/batch, batch_loss=13.7, batch_index=364, batch_size=256]

Epoch 1/10:  37%|████▍       | 364/991 [1:37:40<2:48:28, 16.12s/batch, batch_loss=13.7, batch_index=364, batch_size=256]

Epoch 1/10:  37%|████▍       | 364/991 [1:37:57<2:48:28, 16.12s/batch, batch_loss=9.69, batch_index=365, batch_size=256]

Epoch 1/10:  37%|████▍       | 365/991 [1:37:57<2:49:46, 16.27s/batch, batch_loss=9.69, batch_index=365, batch_size=256]

Epoch 1/10:  37%|████▍       | 365/991 [1:38:12<2:49:46, 16.27s/batch, batch_loss=14.7, batch_index=366, batch_size=256]

Epoch 1/10:  37%|████▍       | 366/991 [1:38:12<2:48:05, 16.14s/batch, batch_loss=14.7, batch_index=366, batch_size=256]

Epoch 1/10:  37%|████▍       | 366/991 [1:38:28<2:48:05, 16.14s/batch, batch_loss=15.2, batch_index=367, batch_size=256]

Epoch 1/10:  37%|████▍       | 367/991 [1:38:28<2:47:14, 16.08s/batch, batch_loss=15.2, batch_index=367, batch_size=256]

Epoch 1/10:  37%|████▍       | 367/991 [1:38:44<2:47:14, 16.08s/batch, batch_loss=18.5, batch_index=368, batch_size=256]

Epoch 1/10:  37%|████▍       | 368/991 [1:38:44<2:45:04, 15.90s/batch, batch_loss=18.5, batch_index=368, batch_size=256]

Epoch 1/10:  37%|████▍       | 368/991 [1:38:59<2:45:04, 15.90s/batch, batch_loss=11.9, batch_index=369, batch_size=256]

Epoch 1/10:  37%|████▍       | 369/991 [1:38:59<2:43:12, 15.74s/batch, batch_loss=11.9, batch_index=369, batch_size=256]

Epoch 1/10:  37%|███▎     | 369/991 [1:39:14<2:43:12, 15.74s/batch, batch_loss=1.21e+4, batch_index=370, batch_size=256]

Epoch 1/10:  37%|███▎     | 370/991 [1:39:14<2:40:18, 15.49s/batch, batch_loss=1.21e+4, batch_index=370, batch_size=256]

Epoch 1/10:  37%|████▍       | 370/991 [1:39:29<2:40:18, 15.49s/batch, batch_loss=19.4, batch_index=371, batch_size=256]

Epoch 1/10:  37%|████▍       | 371/991 [1:39:29<2:39:08, 15.40s/batch, batch_loss=19.4, batch_index=371, batch_size=256]

Epoch 1/10:  37%|████▍       | 371/991 [1:39:45<2:39:08, 15.40s/batch, batch_loss=16.9, batch_index=372, batch_size=256]

Epoch 1/10:  38%|████▌       | 372/991 [1:39:45<2:40:03, 15.51s/batch, batch_loss=16.9, batch_index=372, batch_size=256]

Epoch 1/10:  38%|████▌       | 372/991 [1:40:01<2:40:03, 15.51s/batch, batch_loss=26.7, batch_index=373, batch_size=256]

Epoch 1/10:  38%|████▌       | 373/991 [1:40:01<2:41:04, 15.64s/batch, batch_loss=26.7, batch_index=373, batch_size=256]

Epoch 1/10:  38%|████▉        | 373/991 [1:40:17<2:41:04, 15.64s/batch, batch_loss=470, batch_index=374, batch_size=256]

Epoch 1/10:  38%|████▉        | 374/991 [1:40:17<2:42:51, 15.84s/batch, batch_loss=470, batch_index=374, batch_size=256]

Epoch 1/10:  38%|███▍     | 374/991 [1:40:32<2:42:51, 15.84s/batch, batch_loss=1.42e+3, batch_index=375, batch_size=256]

Epoch 1/10:  38%|███▍     | 375/991 [1:40:32<2:40:08, 15.60s/batch, batch_loss=1.42e+3, batch_index=375, batch_size=256]

Epoch 1/10:  38%|███▍     | 375/991 [1:40:47<2:40:08, 15.60s/batch, batch_loss=1.22e+3, batch_index=376, batch_size=256]

Epoch 1/10:  38%|███▍     | 376/991 [1:40:47<2:37:51, 15.40s/batch, batch_loss=1.22e+3, batch_index=376, batch_size=256]

Epoch 1/10:  38%|████▌       | 376/991 [1:41:02<2:37:51, 15.40s/batch, batch_loss=19.2, batch_index=377, batch_size=256]

Epoch 1/10:  38%|████▌       | 377/991 [1:41:02<2:36:42, 15.31s/batch, batch_loss=19.2, batch_index=377, batch_size=256]

Epoch 1/10:  38%|███▍     | 377/991 [1:41:20<2:36:42, 15.31s/batch, batch_loss=1.18e+3, batch_index=378, batch_size=256]

Epoch 1/10:  38%|███▍     | 378/991 [1:41:20<2:44:15, 16.08s/batch, batch_loss=1.18e+3, batch_index=378, batch_size=256]

Epoch 1/10:  38%|████▌       | 378/991 [1:41:35<2:44:15, 16.08s/batch, batch_loss=10.4, batch_index=379, batch_size=256]

Epoch 1/10:  38%|████▌       | 379/991 [1:41:35<2:40:42, 15.76s/batch, batch_loss=10.4, batch_index=379, batch_size=256]

Epoch 1/10:  38%|████▌       | 379/991 [1:41:51<2:40:42, 15.76s/batch, batch_loss=14.2, batch_index=380, batch_size=256]

Epoch 1/10:  38%|████▌       | 380/991 [1:41:51<2:39:46, 15.69s/batch, batch_loss=14.2, batch_index=380, batch_size=256]

Epoch 1/10:  38%|████▌       | 380/991 [1:42:06<2:39:46, 15.69s/batch, batch_loss=17.9, batch_index=381, batch_size=256]

Epoch 1/10:  38%|████▌       | 381/991 [1:42:06<2:37:20, 15.48s/batch, batch_loss=17.9, batch_index=381, batch_size=256]

Epoch 1/10:  38%|████▌       | 381/991 [1:42:22<2:37:20, 15.48s/batch, batch_loss=11.7, batch_index=382, batch_size=256]

Epoch 1/10:  39%|████▋       | 382/991 [1:42:22<2:37:51, 15.55s/batch, batch_loss=11.7, batch_index=382, batch_size=256]

Epoch 1/10:  39%|████▋       | 382/991 [1:42:37<2:37:51, 15.55s/batch, batch_loss=9.42, batch_index=383, batch_size=256]

Epoch 1/10:  39%|████▋       | 383/991 [1:42:37<2:37:17, 15.52s/batch, batch_loss=9.42, batch_index=383, batch_size=256]

Epoch 1/10:  39%|████▋       | 383/991 [1:42:53<2:37:17, 15.52s/batch, batch_loss=23.8, batch_index=384, batch_size=256]

Epoch 1/10:  39%|████▋       | 384/991 [1:42:53<2:38:07, 15.63s/batch, batch_loss=23.8, batch_index=384, batch_size=256]

Epoch 1/10:  39%|█████        | 384/991 [1:43:09<2:38:07, 15.63s/batch, batch_loss=8.1, batch_index=385, batch_size=256]

Epoch 1/10:  39%|█████        | 385/991 [1:43:09<2:38:22, 15.68s/batch, batch_loss=8.1, batch_index=385, batch_size=256]

Epoch 1/10:  39%|████▋       | 385/991 [1:43:24<2:38:22, 15.68s/batch, batch_loss=17.5, batch_index=386, batch_size=256]

Epoch 1/10:  39%|████▋       | 386/991 [1:43:24<2:37:00, 15.57s/batch, batch_loss=17.5, batch_index=386, batch_size=256]

Epoch 1/10:  39%|████▋       | 386/991 [1:43:39<2:37:00, 15.57s/batch, batch_loss=26.1, batch_index=387, batch_size=256]

Epoch 1/10:  39%|████▋       | 387/991 [1:43:39<2:35:06, 15.41s/batch, batch_loss=26.1, batch_index=387, batch_size=256]

Epoch 1/10:  39%|█████        | 387/991 [1:43:54<2:35:06, 15.41s/batch, batch_loss=805, batch_index=388, batch_size=256]

Epoch 1/10:  39%|█████        | 388/991 [1:43:54<2:33:24, 15.26s/batch, batch_loss=805, batch_index=388, batch_size=256]

Epoch 1/10:  39%|████▋       | 388/991 [1:44:10<2:33:24, 15.26s/batch, batch_loss=12.4, batch_index=389, batch_size=256]

Epoch 1/10:  39%|████▋       | 389/991 [1:44:10<2:34:49, 15.43s/batch, batch_loss=12.4, batch_index=389, batch_size=256]

Epoch 1/10:  39%|█████        | 389/991 [1:44:26<2:34:49, 15.43s/batch, batch_loss=878, batch_index=390, batch_size=256]

Epoch 1/10:  39%|█████        | 390/991 [1:44:26<2:35:43, 15.55s/batch, batch_loss=878, batch_index=390, batch_size=256]

Epoch 1/10:  39%|████▋       | 390/991 [1:44:41<2:35:43, 15.55s/batch, batch_loss=17.7, batch_index=391, batch_size=256]

Epoch 1/10:  39%|████▋       | 391/991 [1:44:41<2:34:59, 15.50s/batch, batch_loss=17.7, batch_index=391, batch_size=256]

Epoch 1/10:  39%|████▋       | 391/991 [1:44:57<2:34:59, 15.50s/batch, batch_loss=15.5, batch_index=392, batch_size=256]

Epoch 1/10:  40%|████▋       | 392/991 [1:44:57<2:36:08, 15.64s/batch, batch_loss=15.5, batch_index=392, batch_size=256]

Epoch 1/10:  40%|████▋       | 392/991 [1:45:12<2:36:08, 15.64s/batch, batch_loss=16.5, batch_index=393, batch_size=256]

Epoch 1/10:  40%|████▊       | 393/991 [1:45:12<2:34:32, 15.51s/batch, batch_loss=16.5, batch_index=393, batch_size=256]

Epoch 1/10:  40%|█████▏       | 393/991 [1:45:27<2:34:32, 15.51s/batch, batch_loss=612, batch_index=394, batch_size=256]

Epoch 1/10:  40%|█████▏       | 394/991 [1:45:27<2:33:48, 15.46s/batch, batch_loss=612, batch_index=394, batch_size=256]

Epoch 1/10:  40%|████▊       | 394/991 [1:45:43<2:33:48, 15.46s/batch, batch_loss=18.3, batch_index=395, batch_size=256]

Epoch 1/10:  40%|████▊       | 395/991 [1:45:43<2:32:38, 15.37s/batch, batch_loss=18.3, batch_index=395, batch_size=256]

Epoch 1/10:  40%|████▊       | 395/991 [1:45:58<2:32:38, 15.37s/batch, batch_loss=12.7, batch_index=396, batch_size=256]

Epoch 1/10:  40%|████▊       | 396/991 [1:45:58<2:32:45, 15.40s/batch, batch_loss=12.7, batch_index=396, batch_size=256]

Epoch 1/10:  40%|████▊       | 396/991 [1:46:13<2:32:45, 15.40s/batch, batch_loss=14.1, batch_index=397, batch_size=256]

Epoch 1/10:  40%|████▊       | 397/991 [1:46:13<2:32:20, 15.39s/batch, batch_loss=14.1, batch_index=397, batch_size=256]

Epoch 1/10:  40%|████▊       | 397/991 [1:46:30<2:32:20, 15.39s/batch, batch_loss=13.8, batch_index=398, batch_size=256]

Epoch 1/10:  40%|████▊       | 398/991 [1:46:30<2:35:14, 15.71s/batch, batch_loss=13.8, batch_index=398, batch_size=256]

Epoch 1/10:  40%|████▊       | 398/991 [1:46:46<2:35:14, 15.71s/batch, batch_loss=19.2, batch_index=399, batch_size=256]

Epoch 1/10:  40%|████▊       | 399/991 [1:46:46<2:36:13, 15.83s/batch, batch_loss=19.2, batch_index=399, batch_size=256]

Epoch 1/10:  40%|████▊       | 399/991 [1:47:04<2:36:13, 15.83s/batch, batch_loss=10.5, batch_index=400, batch_size=256]

Epoch 1/10:  40%|████▊       | 400/991 [1:47:04<2:42:48, 16.53s/batch, batch_loss=10.5, batch_index=400, batch_size=256]

Epoch 1/10:  40%|████▊       | 400/991 [1:47:20<2:42:48, 16.53s/batch, batch_loss=10.6, batch_index=401, batch_size=256]

Epoch 1/10:  40%|████▊       | 401/991 [1:47:20<2:40:28, 16.32s/batch, batch_loss=10.6, batch_index=401, batch_size=256]

Epoch 1/10:  40%|████▊       | 401/991 [1:47:36<2:40:28, 16.32s/batch, batch_loss=14.9, batch_index=402, batch_size=256]

Epoch 1/10:  41%|████▊       | 402/991 [1:47:36<2:38:31, 16.15s/batch, batch_loss=14.9, batch_index=402, batch_size=256]

Epoch 1/10:  41%|████▊       | 402/991 [1:47:51<2:38:31, 16.15s/batch, batch_loss=14.4, batch_index=403, batch_size=256]

Epoch 1/10:  41%|████▉       | 403/991 [1:47:51<2:36:33, 15.98s/batch, batch_loss=14.4, batch_index=403, batch_size=256]

Epoch 1/10:  41%|████▉       | 403/991 [1:48:07<2:36:33, 15.98s/batch, batch_loss=10.8, batch_index=404, batch_size=256]

Epoch 1/10:  41%|████▉       | 404/991 [1:48:07<2:34:17, 15.77s/batch, batch_loss=10.8, batch_index=404, batch_size=256]

Epoch 1/10:  41%|████▉       | 404/991 [1:48:22<2:34:17, 15.77s/batch, batch_loss=13.8, batch_index=405, batch_size=256]

Epoch 1/10:  41%|████▉       | 405/991 [1:48:22<2:33:37, 15.73s/batch, batch_loss=13.8, batch_index=405, batch_size=256]

Epoch 1/10:  41%|████▉       | 405/991 [1:48:38<2:33:37, 15.73s/batch, batch_loss=5.49, batch_index=406, batch_size=256]

Epoch 1/10:  41%|████▉       | 406/991 [1:48:38<2:34:27, 15.84s/batch, batch_loss=5.49, batch_index=406, batch_size=256]

Epoch 1/10:  41%|████▉       | 406/991 [1:48:55<2:34:27, 15.84s/batch, batch_loss=21.3, batch_index=407, batch_size=256]

Epoch 1/10:  41%|████▉       | 407/991 [1:48:55<2:35:19, 15.96s/batch, batch_loss=21.3, batch_index=407, batch_size=256]

Epoch 1/10:  41%|████▉       | 407/991 [1:49:14<2:35:19, 15.96s/batch, batch_loss=6.79, batch_index=408, batch_size=256]

Epoch 1/10:  41%|████▉       | 408/991 [1:49:14<2:44:25, 16.92s/batch, batch_loss=6.79, batch_index=408, batch_size=256]

Epoch 1/10:  41%|████▉       | 408/991 [1:49:29<2:44:25, 16.92s/batch, batch_loss=21.2, batch_index=409, batch_size=256]

Epoch 1/10:  41%|████▉       | 409/991 [1:49:29<2:40:39, 16.56s/batch, batch_loss=21.2, batch_index=409, batch_size=256]

Epoch 1/10:  41%|████▉       | 409/991 [1:49:45<2:40:39, 16.56s/batch, batch_loss=20.8, batch_index=410, batch_size=256]

Epoch 1/10:  41%|████▉       | 410/991 [1:49:45<2:38:31, 16.37s/batch, batch_loss=20.8, batch_index=410, batch_size=256]

Epoch 1/10:  41%|████▉       | 410/991 [1:50:01<2:38:31, 16.37s/batch, batch_loss=11.6, batch_index=411, batch_size=256]

Epoch 1/10:  41%|████▉       | 411/991 [1:50:01<2:37:03, 16.25s/batch, batch_loss=11.6, batch_index=411, batch_size=256]

Epoch 1/10:  41%|████▉       | 411/991 [1:50:17<2:37:03, 16.25s/batch, batch_loss=12.1, batch_index=412, batch_size=256]

Epoch 1/10:  42%|████▉       | 412/991 [1:50:17<2:36:14, 16.19s/batch, batch_loss=12.1, batch_index=412, batch_size=256]

Epoch 1/10:  42%|████▉       | 412/991 [1:50:32<2:36:14, 16.19s/batch, batch_loss=17.2, batch_index=413, batch_size=256]

Epoch 1/10:  42%|█████       | 413/991 [1:50:32<2:32:40, 15.85s/batch, batch_loss=17.2, batch_index=413, batch_size=256]

Epoch 1/10:  42%|█████       | 413/991 [1:50:48<2:32:40, 15.85s/batch, batch_loss=12.8, batch_index=414, batch_size=256]

Epoch 1/10:  42%|█████       | 414/991 [1:50:48<2:31:47, 15.78s/batch, batch_loss=12.8, batch_index=414, batch_size=256]

Epoch 1/10:  42%|█████       | 414/991 [1:51:06<2:31:47, 15.78s/batch, batch_loss=7.93, batch_index=415, batch_size=256]

Epoch 1/10:  42%|█████       | 415/991 [1:51:06<2:38:34, 16.52s/batch, batch_loss=7.93, batch_index=415, batch_size=256]

Epoch 1/10:  42%|█████       | 415/991 [1:51:21<2:38:34, 16.52s/batch, batch_loss=8.89, batch_index=416, batch_size=256]

Epoch 1/10:  42%|█████       | 416/991 [1:51:21<2:32:54, 15.96s/batch, batch_loss=8.89, batch_index=416, batch_size=256]

Epoch 1/10:  42%|█████       | 416/991 [1:51:37<2:32:54, 15.96s/batch, batch_loss=8.77, batch_index=417, batch_size=256]

Epoch 1/10:  42%|█████       | 417/991 [1:51:37<2:32:30, 15.94s/batch, batch_loss=8.77, batch_index=417, batch_size=256]

Epoch 1/10:  42%|█████       | 417/991 [1:51:52<2:32:30, 15.94s/batch, batch_loss=12.1, batch_index=418, batch_size=256]

Epoch 1/10:  42%|█████       | 418/991 [1:51:52<2:30:08, 15.72s/batch, batch_loss=12.1, batch_index=418, batch_size=256]

Epoch 1/10:  42%|████▏     | 418/991 [1:52:08<2:30:08, 15.72s/batch, batch_loss=1.3e+3, batch_index=419, batch_size=256]

Epoch 1/10:  42%|████▏     | 419/991 [1:52:08<2:29:29, 15.68s/batch, batch_loss=1.3e+3, batch_index=419, batch_size=256]

Epoch 1/10:  42%|█████       | 419/991 [1:52:23<2:29:29, 15.68s/batch, batch_loss=14.5, batch_index=420, batch_size=256]

Epoch 1/10:  42%|█████       | 420/991 [1:52:23<2:28:20, 15.59s/batch, batch_loss=14.5, batch_index=420, batch_size=256]

Epoch 1/10:  42%|█████       | 420/991 [1:52:39<2:28:20, 15.59s/batch, batch_loss=12.4, batch_index=421, batch_size=256]

Epoch 1/10:  42%|█████       | 421/991 [1:52:39<2:29:03, 15.69s/batch, batch_loss=12.4, batch_index=421, batch_size=256]

Epoch 1/10:  42%|█████       | 421/991 [1:52:54<2:29:03, 15.69s/batch, batch_loss=7.94, batch_index=422, batch_size=256]

Epoch 1/10:  43%|█████       | 422/991 [1:52:54<2:25:45, 15.37s/batch, batch_loss=7.94, batch_index=422, batch_size=256]

Epoch 1/10:  43%|█████       | 422/991 [1:53:09<2:25:45, 15.37s/batch, batch_loss=10.5, batch_index=423, batch_size=256]

Epoch 1/10:  43%|█████       | 423/991 [1:53:09<2:25:38, 15.38s/batch, batch_loss=10.5, batch_index=423, batch_size=256]

Epoch 1/10:  43%|█████       | 423/991 [1:53:25<2:25:38, 15.38s/batch, batch_loss=10.3, batch_index=424, batch_size=256]

Epoch 1/10:  43%|█████▏      | 424/991 [1:53:25<2:26:24, 15.49s/batch, batch_loss=10.3, batch_index=424, batch_size=256]

Epoch 1/10:  43%|█████▏      | 424/991 [1:53:42<2:26:24, 15.49s/batch, batch_loss=7.18, batch_index=425, batch_size=256]

Epoch 1/10:  43%|█████▏      | 425/991 [1:53:42<2:30:10, 15.92s/batch, batch_loss=7.18, batch_index=425, batch_size=256]

Epoch 1/10:  43%|█████▏      | 425/991 [1:53:58<2:30:10, 15.92s/batch, batch_loss=2.44, batch_index=426, batch_size=256]

Epoch 1/10:  43%|█████▏      | 426/991 [1:53:58<2:30:14, 15.96s/batch, batch_loss=2.44, batch_index=426, batch_size=256]

Epoch 1/10:  43%|█████▏      | 426/991 [1:54:14<2:30:14, 15.96s/batch, batch_loss=9.09, batch_index=427, batch_size=256]

Epoch 1/10:  43%|█████▏      | 427/991 [1:54:14<2:30:13, 15.98s/batch, batch_loss=9.09, batch_index=427, batch_size=256]

Epoch 1/10:  43%|█████▏      | 427/991 [1:54:30<2:30:13, 15.98s/batch, batch_loss=15.6, batch_index=428, batch_size=256]

Epoch 1/10:  43%|█████▏      | 428/991 [1:54:30<2:30:07, 16.00s/batch, batch_loss=15.6, batch_index=428, batch_size=256]

Epoch 1/10:  43%|█████▏      | 428/991 [1:54:49<2:30:07, 16.00s/batch, batch_loss=20.1, batch_index=429, batch_size=256]

Epoch 1/10:  43%|█████▏      | 429/991 [1:54:49<2:38:08, 16.88s/batch, batch_loss=20.1, batch_index=429, batch_size=256]

Epoch 1/10:  43%|███▉     | 429/991 [1:55:04<2:38:08, 16.88s/batch, batch_loss=9.29e+3, batch_index=430, batch_size=256]

Epoch 1/10:  43%|███▉     | 430/991 [1:55:04<2:32:20, 16.29s/batch, batch_loss=9.29e+3, batch_index=430, batch_size=256]

Epoch 1/10:  43%|█████▏      | 430/991 [1:55:18<2:32:20, 16.29s/batch, batch_loss=21.4, batch_index=431, batch_size=256]

Epoch 1/10:  43%|█████▏      | 431/991 [1:55:18<2:27:23, 15.79s/batch, batch_loss=21.4, batch_index=431, batch_size=256]

Epoch 1/10:  43%|█████▏      | 431/991 [1:55:35<2:27:23, 15.79s/batch, batch_loss=19.1, batch_index=432, batch_size=256]

Epoch 1/10:  44%|█████▏      | 432/991 [1:55:35<2:29:21, 16.03s/batch, batch_loss=19.1, batch_index=432, batch_size=256]

Epoch 1/10:  44%|█████▏      | 432/991 [1:55:48<2:29:21, 16.03s/batch, batch_loss=10.5, batch_index=433, batch_size=256]

Epoch 1/10:  44%|█████▏      | 433/991 [1:55:48<2:21:19, 15.20s/batch, batch_loss=10.5, batch_index=433, batch_size=256]

Epoch 1/10:  44%|█████▏      | 433/991 [1:56:02<2:21:19, 15.20s/batch, batch_loss=16.2, batch_index=434, batch_size=256]

Epoch 1/10:  44%|█████▎      | 434/991 [1:56:02<2:17:43, 14.84s/batch, batch_loss=16.2, batch_index=434, batch_size=256]

Epoch 1/10:  44%|█████▎      | 434/991 [1:56:16<2:17:43, 14.84s/batch, batch_loss=12.2, batch_index=435, batch_size=256]

Epoch 1/10:  44%|█████▎      | 435/991 [1:56:16<2:14:33, 14.52s/batch, batch_loss=12.2, batch_index=435, batch_size=256]

Epoch 1/10:  44%|█████▎      | 435/991 [1:56:30<2:14:33, 14.52s/batch, batch_loss=13.8, batch_index=436, batch_size=256]

Epoch 1/10:  44%|█████▎      | 436/991 [1:56:30<2:14:02, 14.49s/batch, batch_loss=13.8, batch_index=436, batch_size=256]

Epoch 1/10:  44%|█████▎      | 436/991 [1:56:47<2:14:02, 14.49s/batch, batch_loss=16.1, batch_index=437, batch_size=256]

Epoch 1/10:  44%|█████▎      | 437/991 [1:56:47<2:18:41, 15.02s/batch, batch_loss=16.1, batch_index=437, batch_size=256]

Epoch 1/10:  44%|█████▎      | 437/991 [1:57:03<2:18:41, 15.02s/batch, batch_loss=21.4, batch_index=438, batch_size=256]

Epoch 1/10:  44%|█████▎      | 438/991 [1:57:03<2:22:31, 15.46s/batch, batch_loss=21.4, batch_index=438, batch_size=256]

Epoch 1/10:  44%|█████▎      | 438/991 [1:57:19<2:22:31, 15.46s/batch, batch_loss=13.6, batch_index=439, batch_size=256]

Epoch 1/10:  44%|█████▎      | 439/991 [1:57:19<2:22:45, 15.52s/batch, batch_loss=13.6, batch_index=439, batch_size=256]

Epoch 1/10:  44%|█████▎      | 439/991 [1:57:34<2:22:45, 15.52s/batch, batch_loss=22.8, batch_index=440, batch_size=256]

Epoch 1/10:  44%|█████▎      | 440/991 [1:57:34<2:21:12, 15.38s/batch, batch_loss=22.8, batch_index=440, batch_size=256]

Epoch 1/10:  44%|█████▎      | 440/991 [1:57:49<2:21:12, 15.38s/batch, batch_loss=21.5, batch_index=441, batch_size=256]

Epoch 1/10:  45%|█████▎      | 441/991 [1:57:49<2:21:22, 15.42s/batch, batch_loss=21.5, batch_index=441, batch_size=256]

Epoch 1/10:  45%|█████▎      | 441/991 [1:58:05<2:21:22, 15.42s/batch, batch_loss=14.7, batch_index=442, batch_size=256]

Epoch 1/10:  45%|█████▎      | 442/991 [1:58:05<2:22:07, 15.53s/batch, batch_loss=14.7, batch_index=442, batch_size=256]

Epoch 1/10:  45%|█████▎      | 442/991 [1:58:21<2:22:07, 15.53s/batch, batch_loss=19.2, batch_index=443, batch_size=256]

Epoch 1/10:  45%|█████▎      | 443/991 [1:58:21<2:22:07, 15.56s/batch, batch_loss=19.2, batch_index=443, batch_size=256]

Epoch 1/10:  45%|█████▎      | 443/991 [1:58:36<2:22:07, 15.56s/batch, batch_loss=14.1, batch_index=444, batch_size=256]

Epoch 1/10:  45%|█████▍      | 444/991 [1:58:36<2:19:58, 15.35s/batch, batch_loss=14.1, batch_index=444, batch_size=256]

Epoch 1/10:  45%|█████▍      | 444/991 [1:58:51<2:19:58, 15.35s/batch, batch_loss=19.7, batch_index=445, batch_size=256]

Epoch 1/10:  45%|█████▍      | 445/991 [1:58:51<2:20:11, 15.41s/batch, batch_loss=19.7, batch_index=445, batch_size=256]

Epoch 1/10:  45%|█████▍      | 445/991 [1:59:10<2:20:11, 15.41s/batch, batch_loss=26.2, batch_index=446, batch_size=256]

Epoch 1/10:  45%|█████▍      | 446/991 [1:59:10<2:30:19, 16.55s/batch, batch_loss=26.2, batch_index=446, batch_size=256]

Epoch 1/10:  45%|██████▎       | 446/991 [1:59:26<2:30:19, 16.55s/batch, batch_loss=12, batch_index=447, batch_size=256]

Epoch 1/10:  45%|██████▎       | 447/991 [1:59:26<2:26:54, 16.20s/batch, batch_loss=12, batch_index=447, batch_size=256]

Epoch 1/10:  45%|█████▍      | 447/991 [1:59:42<2:26:54, 16.20s/batch, batch_loss=18.5, batch_index=448, batch_size=256]

Epoch 1/10:  45%|█████▍      | 448/991 [1:59:42<2:27:49, 16.34s/batch, batch_loss=18.5, batch_index=448, batch_size=256]

Epoch 1/10:  45%|█████▍      | 448/991 [1:59:58<2:27:49, 16.34s/batch, batch_loss=16.4, batch_index=449, batch_size=256]

Epoch 1/10:  45%|█████▍      | 449/991 [1:59:58<2:26:20, 16.20s/batch, batch_loss=16.4, batch_index=449, batch_size=256]

Epoch 1/10:  45%|██████▎       | 449/991 [2:00:14<2:26:20, 16.20s/batch, batch_loss=23, batch_index=450, batch_size=256]

Epoch 1/10:  45%|██████▎       | 450/991 [2:00:14<2:24:06, 15.98s/batch, batch_loss=23, batch_index=450, batch_size=256]

Epoch 1/10:  45%|█████▍      | 450/991 [2:00:30<2:24:06, 15.98s/batch, batch_loss=18.3, batch_index=451, batch_size=256]

Epoch 1/10:  46%|█████▍      | 451/991 [2:00:30<2:23:26, 15.94s/batch, batch_loss=18.3, batch_index=451, batch_size=256]

Epoch 1/10:  46%|█████▍      | 451/991 [2:00:45<2:23:26, 15.94s/batch, batch_loss=16.9, batch_index=452, batch_size=256]

Epoch 1/10:  46%|█████▍      | 452/991 [2:00:45<2:21:41, 15.77s/batch, batch_loss=16.9, batch_index=452, batch_size=256]

Epoch 1/10:  46%|█████▍      | 452/991 [2:01:03<2:21:41, 15.77s/batch, batch_loss=18.6, batch_index=453, batch_size=256]

Epoch 1/10:  46%|█████▍      | 453/991 [2:01:03<2:27:20, 16.43s/batch, batch_loss=18.6, batch_index=453, batch_size=256]

Epoch 1/10:  46%|████     | 453/991 [2:01:18<2:27:20, 16.43s/batch, batch_loss=7.23e+3, batch_index=454, batch_size=256]

Epoch 1/10:  46%|████     | 454/991 [2:01:18<2:22:52, 15.96s/batch, batch_loss=7.23e+3, batch_index=454, batch_size=256]

Epoch 1/10:  46%|██████▍       | 454/991 [2:01:33<2:22:52, 15.96s/batch, batch_loss=26, batch_index=455, batch_size=256]

Epoch 1/10:  46%|██████▍       | 455/991 [2:01:33<2:20:17, 15.70s/batch, batch_loss=26, batch_index=455, batch_size=256]

Epoch 1/10:  46%|█████▌      | 455/991 [2:01:50<2:20:17, 15.70s/batch, batch_loss=23.9, batch_index=456, batch_size=256]

Epoch 1/10:  46%|█████▌      | 456/991 [2:01:50<2:23:24, 16.08s/batch, batch_loss=23.9, batch_index=456, batch_size=256]

Epoch 1/10:  46%|█████▌      | 456/991 [2:02:06<2:23:24, 16.08s/batch, batch_loss=13.3, batch_index=457, batch_size=256]

Epoch 1/10:  46%|█████▌      | 457/991 [2:02:06<2:22:02, 15.96s/batch, batch_loss=13.3, batch_index=457, batch_size=256]

Epoch 1/10:  46%|█████▌      | 457/991 [2:02:20<2:22:02, 15.96s/batch, batch_loss=14.1, batch_index=458, batch_size=256]

Epoch 1/10:  46%|█████▌      | 458/991 [2:02:20<2:19:04, 15.65s/batch, batch_loss=14.1, batch_index=458, batch_size=256]

Epoch 1/10:  46%|█████▌      | 458/991 [2:02:36<2:19:04, 15.65s/batch, batch_loss=22.9, batch_index=459, batch_size=256]

Epoch 1/10:  46%|█████▌      | 459/991 [2:02:36<2:17:21, 15.49s/batch, batch_loss=22.9, batch_index=459, batch_size=256]

Epoch 1/10:  46%|█████▌      | 459/991 [2:02:54<2:17:21, 15.49s/batch, batch_loss=19.7, batch_index=460, batch_size=256]

Epoch 1/10:  46%|█████▌      | 460/991 [2:02:54<2:25:58, 16.49s/batch, batch_loss=19.7, batch_index=460, batch_size=256]

Epoch 1/10:  46%|█████▌      | 460/991 [2:03:11<2:25:58, 16.49s/batch, batch_loss=52.9, batch_index=461, batch_size=256]

Epoch 1/10:  47%|█████▌      | 461/991 [2:03:11<2:25:31, 16.47s/batch, batch_loss=52.9, batch_index=461, batch_size=256]

Epoch 1/10:  47%|█████▌      | 461/991 [2:03:27<2:25:31, 16.47s/batch, batch_loss=14.7, batch_index=462, batch_size=256]

Epoch 1/10:  47%|█████▌      | 462/991 [2:03:27<2:24:36, 16.40s/batch, batch_loss=14.7, batch_index=462, batch_size=256]

Epoch 1/10:  47%|████▏    | 462/991 [2:03:43<2:24:36, 16.40s/batch, batch_loss=6.21e+4, batch_index=463, batch_size=256]

Epoch 1/10:  47%|████▏    | 463/991 [2:03:43<2:23:17, 16.28s/batch, batch_loss=6.21e+4, batch_index=463, batch_size=256]

Epoch 1/10:  47%|█████▌      | 463/991 [2:03:59<2:23:17, 16.28s/batch, batch_loss=14.1, batch_index=464, batch_size=256]

Epoch 1/10:  47%|█████▌      | 464/991 [2:03:59<2:22:35, 16.23s/batch, batch_loss=14.1, batch_index=464, batch_size=256]

Epoch 1/10:  47%|█████▌      | 464/991 [2:04:15<2:22:35, 16.23s/batch, batch_loss=13.9, batch_index=465, batch_size=256]

Epoch 1/10:  47%|█████▋      | 465/991 [2:04:15<2:20:47, 16.06s/batch, batch_loss=13.9, batch_index=465, batch_size=256]

Epoch 1/10:  47%|█████▋      | 465/991 [2:04:31<2:20:47, 16.06s/batch, batch_loss=16.6, batch_index=466, batch_size=256]

Epoch 1/10:  47%|█████▋      | 466/991 [2:04:31<2:21:23, 16.16s/batch, batch_loss=16.6, batch_index=466, batch_size=256]

Epoch 1/10:  47%|█████▋      | 466/991 [2:04:47<2:21:23, 16.16s/batch, batch_loss=15.8, batch_index=467, batch_size=256]

Epoch 1/10:  47%|█████▋      | 467/991 [2:04:47<2:20:07, 16.05s/batch, batch_loss=15.8, batch_index=467, batch_size=256]

Epoch 1/10:  47%|█████▋      | 467/991 [2:05:03<2:20:07, 16.05s/batch, batch_loss=17.4, batch_index=468, batch_size=256]

Epoch 1/10:  47%|█████▋      | 468/991 [2:05:03<2:18:29, 15.89s/batch, batch_loss=17.4, batch_index=468, batch_size=256]

Epoch 1/10:  47%|█████▋      | 468/991 [2:05:18<2:18:29, 15.89s/batch, batch_loss=15.6, batch_index=469, batch_size=256]

Epoch 1/10:  47%|█████▋      | 469/991 [2:05:18<2:17:21, 15.79s/batch, batch_loss=15.6, batch_index=469, batch_size=256]

Epoch 1/10:  47%|█████▋      | 469/991 [2:05:35<2:17:21, 15.79s/batch, batch_loss=12.7, batch_index=470, batch_size=256]

Epoch 1/10:  47%|█████▋      | 470/991 [2:05:35<2:18:52, 15.99s/batch, batch_loss=12.7, batch_index=470, batch_size=256]

Epoch 1/10:  47%|██████▋       | 470/991 [2:05:51<2:18:52, 15.99s/batch, batch_loss=19, batch_index=471, batch_size=256]

Epoch 1/10:  48%|██████▋       | 471/991 [2:05:51<2:18:57, 16.03s/batch, batch_loss=19, batch_index=471, batch_size=256]

Epoch 1/10:  48%|██████▋       | 471/991 [2:06:05<2:18:57, 16.03s/batch, batch_loss=17, batch_index=472, batch_size=256]

Epoch 1/10:  48%|██████▋       | 472/991 [2:06:05<2:14:43, 15.58s/batch, batch_loss=17, batch_index=472, batch_size=256]

Epoch 1/10:  48%|█████▋      | 472/991 [2:06:22<2:14:43, 15.58s/batch, batch_loss=18.6, batch_index=473, batch_size=256]

Epoch 1/10:  48%|█████▋      | 473/991 [2:06:22<2:16:51, 15.85s/batch, batch_loss=18.6, batch_index=473, batch_size=256]

Epoch 1/10:  48%|█████▋      | 473/991 [2:06:37<2:16:51, 15.85s/batch, batch_loss=16.7, batch_index=474, batch_size=256]

Epoch 1/10:  48%|█████▋      | 474/991 [2:06:37<2:16:00, 15.78s/batch, batch_loss=16.7, batch_index=474, batch_size=256]

Epoch 1/10:  48%|████▎    | 474/991 [2:06:54<2:16:00, 15.78s/batch, batch_loss=2.41e+3, batch_index=475, batch_size=256]

Epoch 1/10:  48%|████▎    | 475/991 [2:06:54<2:17:46, 16.02s/batch, batch_loss=2.41e+3, batch_index=475, batch_size=256]

Epoch 1/10:  48%|█████▊      | 475/991 [2:07:12<2:17:46, 16.02s/batch, batch_loss=16.9, batch_index=476, batch_size=256]

Epoch 1/10:  48%|█████▊      | 476/991 [2:07:12<2:22:45, 16.63s/batch, batch_loss=16.9, batch_index=476, batch_size=256]

Epoch 1/10:  48%|██████▋       | 476/991 [2:07:28<2:22:45, 16.63s/batch, batch_loss=15, batch_index=477, batch_size=256]

Epoch 1/10:  48%|██████▋       | 477/991 [2:07:28<2:20:19, 16.38s/batch, batch_loss=15, batch_index=477, batch_size=256]

Epoch 1/10:  48%|█████▊      | 477/991 [2:07:43<2:20:19, 16.38s/batch, batch_loss=17.7, batch_index=478, batch_size=256]

Epoch 1/10:  48%|█████▊      | 478/991 [2:07:43<2:16:48, 16.00s/batch, batch_loss=17.7, batch_index=478, batch_size=256]

Epoch 1/10:  48%|█████▊      | 478/991 [2:07:58<2:16:48, 16.00s/batch, batch_loss=19.6, batch_index=479, batch_size=256]

Epoch 1/10:  48%|█████▊      | 479/991 [2:07:58<2:15:03, 15.83s/batch, batch_loss=19.6, batch_index=479, batch_size=256]

Epoch 1/10:  48%|██████▊       | 479/991 [2:08:15<2:15:03, 15.83s/batch, batch_loss=18, batch_index=480, batch_size=256]

Epoch 1/10:  48%|██████▊       | 480/991 [2:08:15<2:16:23, 16.02s/batch, batch_loss=18, batch_index=480, batch_size=256]

Epoch 1/10:  48%|█████▊      | 480/991 [2:08:30<2:16:23, 16.02s/batch, batch_loss=25.9, batch_index=481, batch_size=256]

Epoch 1/10:  49%|█████▊      | 481/991 [2:08:30<2:15:02, 15.89s/batch, batch_loss=25.9, batch_index=481, batch_size=256]

Epoch 1/10:  49%|█████▊      | 481/991 [2:08:47<2:15:02, 15.89s/batch, batch_loss=18.3, batch_index=482, batch_size=256]

Epoch 1/10:  49%|█████▊      | 482/991 [2:08:47<2:15:35, 15.98s/batch, batch_loss=18.3, batch_index=482, batch_size=256]

Epoch 1/10:  49%|█████▊      | 482/991 [2:09:02<2:15:35, 15.98s/batch, batch_loss=13.6, batch_index=483, batch_size=256]

Epoch 1/10:  49%|█████▊      | 483/991 [2:09:02<2:13:59, 15.83s/batch, batch_loss=13.6, batch_index=483, batch_size=256]

Epoch 1/10:  49%|█████▊      | 483/991 [2:09:18<2:13:59, 15.83s/batch, batch_loss=20.7, batch_index=484, batch_size=256]

Epoch 1/10:  49%|█████▊      | 484/991 [2:09:18<2:14:43, 15.94s/batch, batch_loss=20.7, batch_index=484, batch_size=256]

Epoch 1/10:  49%|█████▊      | 484/991 [2:09:34<2:14:43, 15.94s/batch, batch_loss=10.5, batch_index=485, batch_size=256]

Epoch 1/10:  49%|█████▊      | 485/991 [2:09:34<2:12:55, 15.76s/batch, batch_loss=10.5, batch_index=485, batch_size=256]

Epoch 1/10:  49%|█████▊      | 485/991 [2:09:49<2:12:55, 15.76s/batch, batch_loss=23.3, batch_index=486, batch_size=256]

Epoch 1/10:  49%|█████▉      | 486/991 [2:09:49<2:12:31, 15.75s/batch, batch_loss=23.3, batch_index=486, batch_size=256]

Epoch 1/10:  49%|█████▉      | 486/991 [2:10:05<2:12:31, 15.75s/batch, batch_loss=13.3, batch_index=487, batch_size=256]

Epoch 1/10:  49%|█████▉      | 487/991 [2:10:05<2:12:13, 15.74s/batch, batch_loss=13.3, batch_index=487, batch_size=256]

Epoch 1/10:  49%|█████▉      | 487/991 [2:10:20<2:12:13, 15.74s/batch, batch_loss=9.19, batch_index=488, batch_size=256]

Epoch 1/10:  49%|█████▉      | 488/991 [2:10:20<2:10:57, 15.62s/batch, batch_loss=9.19, batch_index=488, batch_size=256]

Epoch 1/10:  49%|█████▉      | 488/991 [2:10:36<2:10:57, 15.62s/batch, batch_loss=10.4, batch_index=489, batch_size=256]

Epoch 1/10:  49%|█████▉      | 489/991 [2:10:36<2:11:00, 15.66s/batch, batch_loss=10.4, batch_index=489, batch_size=256]

Epoch 1/10:  49%|█████▉      | 489/991 [2:10:51<2:11:00, 15.66s/batch, batch_loss=7.52, batch_index=490, batch_size=256]

Epoch 1/10:  49%|█████▉      | 490/991 [2:10:51<2:08:05, 15.34s/batch, batch_loss=7.52, batch_index=490, batch_size=256]

Epoch 1/10:  49%|█████▉      | 490/991 [2:11:09<2:08:05, 15.34s/batch, batch_loss=22.7, batch_index=491, batch_size=256]

Epoch 1/10:  50%|█████▉      | 491/991 [2:11:09<2:14:23, 16.13s/batch, batch_loss=22.7, batch_index=491, batch_size=256]

Epoch 1/10:  50%|█████▉      | 491/991 [2:11:23<2:14:23, 16.13s/batch, batch_loss=19.2, batch_index=492, batch_size=256]

Epoch 1/10:  50%|█████▉      | 492/991 [2:11:23<2:10:56, 15.74s/batch, batch_loss=19.2, batch_index=492, batch_size=256]

Epoch 1/10:  50%|█████▉      | 492/991 [2:11:39<2:10:56, 15.74s/batch, batch_loss=19.6, batch_index=493, batch_size=256]

Epoch 1/10:  50%|█████▉      | 493/991 [2:11:39<2:08:56, 15.54s/batch, batch_loss=19.6, batch_index=493, batch_size=256]

Epoch 1/10:  50%|█████▉      | 493/991 [2:11:54<2:08:56, 15.54s/batch, batch_loss=7.86, batch_index=494, batch_size=256]

Epoch 1/10:  50%|█████▉      | 494/991 [2:11:54<2:07:39, 15.41s/batch, batch_loss=7.86, batch_index=494, batch_size=256]

Epoch 1/10:  50%|████▍    | 494/991 [2:12:08<2:07:39, 15.41s/batch, batch_loss=8.54e+4, batch_index=495, batch_size=256]

Epoch 1/10:  50%|████▍    | 495/991 [2:12:08<2:03:35, 14.95s/batch, batch_loss=8.54e+4, batch_index=495, batch_size=256]

Epoch 1/10:  50%|█████▉      | 495/991 [2:12:23<2:03:35, 14.95s/batch, batch_loss=11.7, batch_index=496, batch_size=256]

Epoch 1/10:  50%|██████      | 496/991 [2:12:23<2:04:19, 15.07s/batch, batch_loss=11.7, batch_index=496, batch_size=256]

Epoch 1/10:  50%|██████▌      | 496/991 [2:12:38<2:04:19, 15.07s/batch, batch_loss=176, batch_index=497, batch_size=256]

Epoch 1/10:  50%|██████▌      | 497/991 [2:12:38<2:04:58, 15.18s/batch, batch_loss=176, batch_index=497, batch_size=256]

Epoch 1/10:  50%|██████      | 497/991 [2:12:56<2:04:58, 15.18s/batch, batch_loss=13.3, batch_index=498, batch_size=256]

Epoch 1/10:  50%|██████      | 498/991 [2:12:56<2:11:07, 15.96s/batch, batch_loss=13.3, batch_index=498, batch_size=256]

Epoch 1/10:  50%|██████▌      | 498/991 [2:13:11<2:11:07, 15.96s/batch, batch_loss=403, batch_index=499, batch_size=256]

Epoch 1/10:  50%|██████▌      | 499/991 [2:13:11<2:08:49, 15.71s/batch, batch_loss=403, batch_index=499, batch_size=256]

Epoch 1/10:  50%|██████      | 499/991 [2:13:26<2:08:49, 15.71s/batch, batch_loss=17.6, batch_index=500, batch_size=256]

Epoch 1/10:  50%|██████      | 500/991 [2:13:26<2:07:04, 15.53s/batch, batch_loss=17.6, batch_index=500, batch_size=256]

Epoch 1/10:  50%|██████      | 500/991 [2:13:42<2:07:04, 15.53s/batch, batch_loss=8.11, batch_index=501, batch_size=256]

Epoch 1/10:  51%|██████      | 501/991 [2:13:42<2:07:58, 15.67s/batch, batch_loss=8.11, batch_index=501, batch_size=256]

Epoch 1/10:  51%|██████      | 501/991 [2:13:58<2:07:58, 15.67s/batch, batch_loss=10.1, batch_index=502, batch_size=256]

Epoch 1/10:  51%|██████      | 502/991 [2:13:58<2:07:16, 15.62s/batch, batch_loss=10.1, batch_index=502, batch_size=256]

Epoch 1/10:  51%|██████      | 502/991 [2:14:12<2:07:16, 15.62s/batch, batch_loss=18.8, batch_index=503, batch_size=256]

Epoch 1/10:  51%|██████      | 503/991 [2:14:12<2:04:33, 15.32s/batch, batch_loss=18.8, batch_index=503, batch_size=256]

Epoch 1/10:  51%|██████      | 503/991 [2:14:28<2:04:33, 15.32s/batch, batch_loss=11.4, batch_index=504, batch_size=256]

Epoch 1/10:  51%|██████      | 504/991 [2:14:28<2:05:05, 15.41s/batch, batch_loss=11.4, batch_index=504, batch_size=256]

Epoch 1/10:  51%|██████      | 504/991 [2:14:45<2:05:05, 15.41s/batch, batch_loss=7.12, batch_index=505, batch_size=256]

Epoch 1/10:  51%|██████      | 505/991 [2:14:45<2:07:37, 15.76s/batch, batch_loss=7.12, batch_index=505, batch_size=256]

Epoch 1/10:  51%|██████      | 505/991 [2:15:01<2:07:37, 15.76s/batch, batch_loss=12.4, batch_index=506, batch_size=256]

Epoch 1/10:  51%|██████▏     | 506/991 [2:15:01<2:08:56, 15.95s/batch, batch_loss=12.4, batch_index=506, batch_size=256]

Epoch 1/10:  51%|██████▏     | 506/991 [2:15:18<2:08:56, 15.95s/batch, batch_loss=9.77, batch_index=507, batch_size=256]

Epoch 1/10:  51%|██████▏     | 507/991 [2:15:18<2:10:41, 16.20s/batch, batch_loss=9.77, batch_index=507, batch_size=256]

Epoch 1/10:  51%|██████▏     | 507/991 [2:15:34<2:10:41, 16.20s/batch, batch_loss=14.5, batch_index=508, batch_size=256]

Epoch 1/10:  51%|██████▏     | 508/991 [2:15:34<2:11:27, 16.33s/batch, batch_loss=14.5, batch_index=508, batch_size=256]

Epoch 1/10:  51%|██████▏     | 508/991 [2:15:54<2:11:27, 16.33s/batch, batch_loss=15.7, batch_index=509, batch_size=256]

Epoch 1/10:  51%|██████▏     | 509/991 [2:15:54<2:19:08, 17.32s/batch, batch_loss=15.7, batch_index=509, batch_size=256]

Epoch 1/10:  51%|██████▏     | 509/991 [2:16:11<2:19:08, 17.32s/batch, batch_loss=11.7, batch_index=510, batch_size=256]

Epoch 1/10:  51%|██████▏     | 510/991 [2:16:11<2:17:29, 17.15s/batch, batch_loss=11.7, batch_index=510, batch_size=256]

Epoch 1/10:  51%|██████▏     | 510/991 [2:16:28<2:17:29, 17.15s/batch, batch_loss=13.5, batch_index=511, batch_size=256]

Epoch 1/10:  52%|██████▏     | 511/991 [2:16:28<2:16:18, 17.04s/batch, batch_loss=13.5, batch_index=511, batch_size=256]

Epoch 1/10:  52%|██████▏     | 511/991 [2:16:44<2:16:18, 17.04s/batch, batch_loss=9.52, batch_index=512, batch_size=256]

Epoch 1/10:  52%|██████▏     | 512/991 [2:16:44<2:13:52, 16.77s/batch, batch_loss=9.52, batch_index=512, batch_size=256]

Epoch 1/10:  52%|██████▏     | 512/991 [2:16:59<2:13:52, 16.77s/batch, batch_loss=8.43, batch_index=513, batch_size=256]

Epoch 1/10:  52%|██████▏     | 513/991 [2:16:59<2:09:10, 16.21s/batch, batch_loss=8.43, batch_index=513, batch_size=256]

Epoch 1/10:  52%|██████▏     | 513/991 [2:17:15<2:09:10, 16.21s/batch, batch_loss=12.2, batch_index=514, batch_size=256]

Epoch 1/10:  52%|██████▏     | 514/991 [2:17:15<2:08:57, 16.22s/batch, batch_loss=12.2, batch_index=514, batch_size=256]

Epoch 1/10:  52%|██████▏     | 514/991 [2:17:31<2:08:57, 16.22s/batch, batch_loss=13.2, batch_index=515, batch_size=256]

Epoch 1/10:  52%|██████▏     | 515/991 [2:17:31<2:08:17, 16.17s/batch, batch_loss=13.2, batch_index=515, batch_size=256]

Epoch 1/10:  52%|██████▏     | 515/991 [2:17:47<2:08:17, 16.17s/batch, batch_loss=17.6, batch_index=516, batch_size=256]

Epoch 1/10:  52%|██████▏     | 516/991 [2:17:47<2:07:57, 16.16s/batch, batch_loss=17.6, batch_index=516, batch_size=256]

Epoch 1/10:  52%|██████▏     | 516/991 [2:18:02<2:07:57, 16.16s/batch, batch_loss=12.6, batch_index=517, batch_size=256]

Epoch 1/10:  52%|██████▎     | 517/991 [2:18:02<2:04:22, 15.74s/batch, batch_loss=12.6, batch_index=517, batch_size=256]

Epoch 1/10:  52%|██████▎     | 517/991 [2:18:17<2:04:22, 15.74s/batch, batch_loss=19.6, batch_index=518, batch_size=256]

Epoch 1/10:  52%|██████▎     | 518/991 [2:18:17<2:01:53, 15.46s/batch, batch_loss=19.6, batch_index=518, batch_size=256]

Epoch 1/10:  52%|██████▎     | 518/991 [2:18:33<2:01:53, 15.46s/batch, batch_loss=13.4, batch_index=519, batch_size=256]

Epoch 1/10:  52%|██████▎     | 519/991 [2:18:33<2:02:49, 15.61s/batch, batch_loss=13.4, batch_index=519, batch_size=256]

Epoch 1/10:  52%|██████▎     | 519/991 [2:18:49<2:02:49, 15.61s/batch, batch_loss=10.4, batch_index=520, batch_size=256]

Epoch 1/10:  52%|██████▎     | 520/991 [2:18:49<2:04:21, 15.84s/batch, batch_loss=10.4, batch_index=520, batch_size=256]

Epoch 1/10:  52%|██████▎     | 520/991 [2:19:05<2:04:21, 15.84s/batch, batch_loss=7.83, batch_index=521, batch_size=256]

Epoch 1/10:  53%|██████▎     | 521/991 [2:19:05<2:03:43, 15.79s/batch, batch_loss=7.83, batch_index=521, batch_size=256]

Epoch 1/10:  53%|███████▎      | 521/991 [2:19:20<2:03:43, 15.79s/batch, batch_loss=10, batch_index=522, batch_size=256]

Epoch 1/10:  53%|███████▎      | 522/991 [2:19:20<2:01:44, 15.57s/batch, batch_loss=10, batch_index=522, batch_size=256]

Epoch 1/10:  53%|██████▎     | 522/991 [2:19:34<2:01:44, 15.57s/batch, batch_loss=3.13, batch_index=523, batch_size=256]

Epoch 1/10:  53%|██████▎     | 523/991 [2:19:34<1:59:09, 15.28s/batch, batch_loss=3.13, batch_index=523, batch_size=256]

Epoch 1/10:  53%|██████▎     | 523/991 [2:19:49<1:59:09, 15.28s/batch, batch_loss=8.49, batch_index=524, batch_size=256]

Epoch 1/10:  53%|██████▎     | 524/991 [2:19:49<1:58:04, 15.17s/batch, batch_loss=8.49, batch_index=524, batch_size=256]

Epoch 1/10:  53%|██████▎     | 524/991 [2:20:04<1:58:04, 15.17s/batch, batch_loss=6.05, batch_index=525, batch_size=256]

Epoch 1/10:  53%|██████▎     | 525/991 [2:20:04<1:56:53, 15.05s/batch, batch_loss=6.05, batch_index=525, batch_size=256]

Epoch 1/10:  53%|██████▎     | 525/991 [2:20:19<1:56:53, 15.05s/batch, batch_loss=7.95, batch_index=526, batch_size=256]

Epoch 1/10:  53%|██████▎     | 526/991 [2:20:19<1:57:25, 15.15s/batch, batch_loss=7.95, batch_index=526, batch_size=256]

Epoch 1/10:  53%|██████▎     | 526/991 [2:20:35<1:57:25, 15.15s/batch, batch_loss=13.9, batch_index=527, batch_size=256]

Epoch 1/10:  53%|██████▍     | 527/991 [2:20:35<1:59:14, 15.42s/batch, batch_loss=13.9, batch_index=527, batch_size=256]

Epoch 1/10:  53%|██████▍     | 527/991 [2:20:51<1:59:14, 15.42s/batch, batch_loss=13.8, batch_index=528, batch_size=256]

Epoch 1/10:  53%|██████▍     | 528/991 [2:20:51<1:58:55, 15.41s/batch, batch_loss=13.8, batch_index=528, batch_size=256]

Epoch 1/10:  53%|██████▍     | 528/991 [2:21:06<1:58:55, 15.41s/batch, batch_loss=9.39, batch_index=529, batch_size=256]

Epoch 1/10:  53%|██████▍     | 529/991 [2:21:06<1:58:04, 15.33s/batch, batch_loss=9.39, batch_index=529, batch_size=256]

Epoch 1/10:  53%|██████▍     | 529/991 [2:21:21<1:58:04, 15.33s/batch, batch_loss=15.4, batch_index=530, batch_size=256]

Epoch 1/10:  53%|██████▍     | 530/991 [2:21:21<1:56:52, 15.21s/batch, batch_loss=15.4, batch_index=530, batch_size=256]

Epoch 1/10:  53%|██████▍     | 530/991 [2:21:36<1:56:52, 15.21s/batch, batch_loss=12.8, batch_index=531, batch_size=256]

Epoch 1/10:  54%|██████▍     | 531/991 [2:21:36<1:55:20, 15.04s/batch, batch_loss=12.8, batch_index=531, batch_size=256]

Epoch 1/10:  54%|██████▍     | 531/991 [2:21:52<1:55:20, 15.04s/batch, batch_loss=13.3, batch_index=532, batch_size=256]

Epoch 1/10:  54%|██████▍     | 532/991 [2:21:52<1:57:04, 15.30s/batch, batch_loss=13.3, batch_index=532, batch_size=256]

Epoch 1/10:  54%|██████▍     | 532/991 [2:22:07<1:57:04, 15.30s/batch, batch_loss=12.9, batch_index=533, batch_size=256]

Epoch 1/10:  54%|██████▍     | 533/991 [2:22:07<1:56:24, 15.25s/batch, batch_loss=12.9, batch_index=533, batch_size=256]

Epoch 1/10:  54%|██████▍     | 533/991 [2:22:22<1:56:24, 15.25s/batch, batch_loss=12.2, batch_index=534, batch_size=256]

Epoch 1/10:  54%|██████▍     | 534/991 [2:22:22<1:57:14, 15.39s/batch, batch_loss=12.2, batch_index=534, batch_size=256]

Epoch 1/10:  54%|██████▍     | 534/991 [2:22:38<1:57:14, 15.39s/batch, batch_loss=17.7, batch_index=535, batch_size=256]

Epoch 1/10:  54%|██████▍     | 535/991 [2:22:38<1:58:25, 15.58s/batch, batch_loss=17.7, batch_index=535, batch_size=256]

Epoch 1/10:  54%|██████▍     | 535/991 [2:22:54<1:58:25, 15.58s/batch, batch_loss=13.9, batch_index=536, batch_size=256]

Epoch 1/10:  54%|██████▍     | 536/991 [2:22:54<1:57:08, 15.45s/batch, batch_loss=13.9, batch_index=536, batch_size=256]

Epoch 1/10:  54%|██████▍     | 536/991 [2:23:08<1:57:08, 15.45s/batch, batch_loss=9.56, batch_index=537, batch_size=256]

Epoch 1/10:  54%|██████▌     | 537/991 [2:23:08<1:55:29, 15.26s/batch, batch_loss=9.56, batch_index=537, batch_size=256]

Epoch 1/10:  54%|████▉    | 537/991 [2:23:25<1:55:29, 15.26s/batch, batch_loss=1.78e+3, batch_index=538, batch_size=256]

Epoch 1/10:  54%|████▉    | 538/991 [2:23:25<1:57:26, 15.56s/batch, batch_loss=1.78e+3, batch_index=538, batch_size=256]

Epoch 1/10:  54%|██████▌     | 538/991 [2:23:40<1:57:26, 15.56s/batch, batch_loss=24.8, batch_index=539, batch_size=256]

Epoch 1/10:  54%|██████▌     | 539/991 [2:23:40<1:56:34, 15.48s/batch, batch_loss=24.8, batch_index=539, batch_size=256]

Epoch 1/10:  54%|██████▌     | 539/991 [2:23:57<1:56:34, 15.48s/batch, batch_loss=22.3, batch_index=540, batch_size=256]

Epoch 1/10:  54%|██████▌     | 540/991 [2:23:57<1:59:10, 15.85s/batch, batch_loss=22.3, batch_index=540, batch_size=256]

Epoch 1/10:  54%|█████▍    | 540/991 [2:24:12<1:59:10, 15.85s/batch, batch_loss=1.3e+4, batch_index=541, batch_size=256]

Epoch 1/10:  55%|█████▍    | 541/991 [2:24:12<1:57:47, 15.71s/batch, batch_loss=1.3e+4, batch_index=541, batch_size=256]

Epoch 1/10:  55%|████▉    | 541/991 [2:24:28<1:57:47, 15.71s/batch, batch_loss=2.84e+3, batch_index=542, batch_size=256]

Epoch 1/10:  55%|████▉    | 542/991 [2:24:28<1:57:51, 15.75s/batch, batch_loss=2.84e+3, batch_index=542, batch_size=256]

Epoch 1/10:  55%|██████▌     | 542/991 [2:24:47<1:57:51, 15.75s/batch, batch_loss=35.3, batch_index=543, batch_size=256]

Epoch 1/10:  55%|██████▌     | 543/991 [2:24:47<2:05:29, 16.81s/batch, batch_loss=35.3, batch_index=543, batch_size=256]

Epoch 1/10:  55%|██████▌     | 543/991 [2:25:03<2:05:29, 16.81s/batch, batch_loss=23.1, batch_index=544, batch_size=256]

Epoch 1/10:  55%|██████▌     | 544/991 [2:25:03<2:04:11, 16.67s/batch, batch_loss=23.1, batch_index=544, batch_size=256]

Epoch 1/10:  55%|██████▌     | 544/991 [2:25:21<2:04:11, 16.67s/batch, batch_loss=13.8, batch_index=545, batch_size=256]

Epoch 1/10:  55%|██████▌     | 545/991 [2:25:21<2:05:19, 16.86s/batch, batch_loss=13.8, batch_index=545, batch_size=256]

Epoch 1/10:  55%|███████▏     | 545/991 [2:25:37<2:05:19, 16.86s/batch, batch_loss=300, batch_index=546, batch_size=256]

Epoch 1/10:  55%|███████▏     | 546/991 [2:25:37<2:03:35, 16.66s/batch, batch_loss=300, batch_index=546, batch_size=256]

Epoch 1/10:  55%|██████▌     | 546/991 [2:25:54<2:03:35, 16.66s/batch, batch_loss=16.5, batch_index=547, batch_size=256]

Epoch 1/10:  55%|██████▌     | 547/991 [2:25:54<2:04:34, 16.83s/batch, batch_loss=16.5, batch_index=547, batch_size=256]

Epoch 1/10:  55%|███████▋      | 547/991 [2:26:09<2:04:34, 16.83s/batch, batch_loss=13, batch_index=548, batch_size=256]

Epoch 1/10:  55%|███████▋      | 548/991 [2:26:09<1:59:05, 16.13s/batch, batch_loss=13, batch_index=548, batch_size=256]

Epoch 1/10:  55%|███████▋      | 548/991 [2:26:24<1:59:05, 16.13s/batch, batch_loss=12, batch_index=549, batch_size=256]

Epoch 1/10:  55%|███████▊      | 549/991 [2:26:24<1:57:31, 15.95s/batch, batch_loss=12, batch_index=549, batch_size=256]

Epoch 1/10:  55%|██████▋     | 549/991 [2:26:40<1:57:31, 15.95s/batch, batch_loss=19.2, batch_index=550, batch_size=256]

Epoch 1/10:  55%|██████▋     | 550/991 [2:26:40<1:57:28, 15.98s/batch, batch_loss=19.2, batch_index=550, batch_size=256]

Epoch 1/10:  55%|███████▊      | 550/991 [2:26:56<1:57:28, 15.98s/batch, batch_loss=15, batch_index=551, batch_size=256]

Epoch 1/10:  56%|███████▊      | 551/991 [2:26:56<1:55:37, 15.77s/batch, batch_loss=15, batch_index=551, batch_size=256]

Epoch 1/10:  56%|██████▋     | 551/991 [2:27:11<1:55:37, 15.77s/batch, batch_loss=14.3, batch_index=552, batch_size=256]

Epoch 1/10:  56%|██████▋     | 552/991 [2:27:11<1:55:13, 15.75s/batch, batch_loss=14.3, batch_index=552, batch_size=256]

Epoch 1/10:  56%|██████▋     | 552/991 [2:27:27<1:55:13, 15.75s/batch, batch_loss=17.7, batch_index=553, batch_size=256]

Epoch 1/10:  56%|██████▋     | 553/991 [2:27:28<1:56:03, 15.90s/batch, batch_loss=17.7, batch_index=553, batch_size=256]

Epoch 1/10:  56%|█████    | 553/991 [2:27:46<1:56:03, 15.90s/batch, batch_loss=5.74e+3, batch_index=554, batch_size=256]

Epoch 1/10:  56%|█████    | 554/991 [2:27:46<2:02:26, 16.81s/batch, batch_loss=5.74e+3, batch_index=554, batch_size=256]

Epoch 1/10:  56%|█████    | 554/991 [2:28:01<2:02:26, 16.81s/batch, batch_loss=2.57e+3, batch_index=555, batch_size=256]

Epoch 1/10:  56%|█████    | 555/991 [2:28:01<1:57:37, 16.19s/batch, batch_loss=2.57e+3, batch_index=555, batch_size=256]

Epoch 1/10:  56%|███████▊      | 555/991 [2:28:16<1:57:37, 16.19s/batch, batch_loss=16, batch_index=556, batch_size=256]

Epoch 1/10:  56%|███████▊      | 556/991 [2:28:16<1:55:19, 15.91s/batch, batch_loss=16, batch_index=556, batch_size=256]

Epoch 1/10:  56%|█████    | 556/991 [2:28:32<1:55:19, 15.91s/batch, batch_loss=1.27e+4, batch_index=557, batch_size=256]

Epoch 1/10:  56%|█████    | 557/991 [2:28:32<1:53:33, 15.70s/batch, batch_loss=1.27e+4, batch_index=557, batch_size=256]

Epoch 1/10:  56%|██████▋     | 557/991 [2:28:49<1:53:33, 15.70s/batch, batch_loss=10.2, batch_index=558, batch_size=256]

Epoch 1/10:  56%|██████▊     | 558/991 [2:28:49<1:56:36, 16.16s/batch, batch_loss=10.2, batch_index=558, batch_size=256]

Epoch 1/10:  56%|██████▊     | 558/991 [2:29:04<1:56:36, 16.16s/batch, batch_loss=18.1, batch_index=559, batch_size=256]

Epoch 1/10:  56%|██████▊     | 559/991 [2:29:04<1:55:02, 15.98s/batch, batch_loss=18.1, batch_index=559, batch_size=256]

Epoch 1/10:  56%|███████▉      | 559/991 [2:29:20<1:55:02, 15.98s/batch, batch_loss=10, batch_index=560, batch_size=256]

Epoch 1/10:  57%|███████▉      | 560/991 [2:29:20<1:53:34, 15.81s/batch, batch_loss=10, batch_index=560, batch_size=256]

Epoch 1/10:  57%|███████▎     | 560/991 [2:29:35<1:53:34, 15.81s/batch, batch_loss=8.1, batch_index=561, batch_size=256]

Epoch 1/10:  57%|███████▎     | 561/991 [2:29:35<1:52:19, 15.67s/batch, batch_loss=8.1, batch_index=561, batch_size=256]

Epoch 1/10:  57%|██████▊     | 561/991 [2:29:51<1:52:19, 15.67s/batch, batch_loss=17.1, batch_index=562, batch_size=256]

Epoch 1/10:  57%|██████▊     | 562/991 [2:29:51<1:51:55, 15.65s/batch, batch_loss=17.1, batch_index=562, batch_size=256]

Epoch 1/10:  57%|██████▊     | 562/991 [2:30:06<1:51:55, 15.65s/batch, batch_loss=6.81, batch_index=563, batch_size=256]

Epoch 1/10:  57%|██████▊     | 563/991 [2:30:06<1:50:20, 15.47s/batch, batch_loss=6.81, batch_index=563, batch_size=256]

Epoch 1/10:  57%|██████▊     | 563/991 [2:30:21<1:50:20, 15.47s/batch, batch_loss=12.1, batch_index=564, batch_size=256]

Epoch 1/10:  57%|██████▊     | 564/991 [2:30:21<1:49:25, 15.38s/batch, batch_loss=12.1, batch_index=564, batch_size=256]

Epoch 1/10:  57%|███████▍     | 564/991 [2:30:36<1:49:25, 15.38s/batch, batch_loss=500, batch_index=565, batch_size=256]

Epoch 1/10:  57%|███████▍     | 565/991 [2:30:36<1:48:52, 15.33s/batch, batch_loss=500, batch_index=565, batch_size=256]

Epoch 1/10:  57%|██████▊     | 565/991 [2:30:55<1:48:52, 15.33s/batch, batch_loss=10.5, batch_index=566, batch_size=256]

Epoch 1/10:  57%|██████▊     | 566/991 [2:30:55<1:55:08, 16.25s/batch, batch_loss=10.5, batch_index=566, batch_size=256]

Epoch 1/10:  57%|██████▊     | 566/991 [2:31:11<1:55:08, 16.25s/batch, batch_loss=19.6, batch_index=567, batch_size=256]

Epoch 1/10:  57%|██████▊     | 567/991 [2:31:11<1:55:27, 16.34s/batch, batch_loss=19.6, batch_index=567, batch_size=256]

Epoch 1/10:  57%|███████▍     | 567/991 [2:31:27<1:55:27, 16.34s/batch, batch_loss=299, batch_index=568, batch_size=256]

Epoch 1/10:  57%|███████▍     | 568/991 [2:31:27<1:54:16, 16.21s/batch, batch_loss=299, batch_index=568, batch_size=256]

Epoch 1/10:  57%|██████▉     | 568/991 [2:31:42<1:54:16, 16.21s/batch, batch_loss=25.4, batch_index=569, batch_size=256]

Epoch 1/10:  57%|██████▉     | 569/991 [2:31:42<1:51:54, 15.91s/batch, batch_loss=25.4, batch_index=569, batch_size=256]

Epoch 1/10:  57%|█████▏   | 569/991 [2:31:57<1:51:54, 15.91s/batch, batch_loss=8.46e+3, batch_index=570, batch_size=256]

Epoch 1/10:  58%|█████▏   | 570/991 [2:31:57<1:49:27, 15.60s/batch, batch_loss=8.46e+3, batch_index=570, batch_size=256]

Epoch 1/10:  58%|███████▍     | 570/991 [2:32:12<1:49:27, 15.60s/batch, batch_loss=8.8, batch_index=571, batch_size=256]

Epoch 1/10:  58%|███████▍     | 571/991 [2:32:12<1:47:58, 15.42s/batch, batch_loss=8.8, batch_index=571, batch_size=256]

Epoch 1/10:  58%|██████▉     | 571/991 [2:32:28<1:47:58, 15.42s/batch, batch_loss=12.4, batch_index=572, batch_size=256]

Epoch 1/10:  58%|██████▉     | 572/991 [2:32:28<1:48:09, 15.49s/batch, batch_loss=12.4, batch_index=572, batch_size=256]

Epoch 1/10:  58%|██████▉     | 572/991 [2:32:43<1:48:09, 15.49s/batch, batch_loss=6.94, batch_index=573, batch_size=256]

Epoch 1/10:  58%|██████▉     | 573/991 [2:32:43<1:48:01, 15.51s/batch, batch_loss=6.94, batch_index=573, batch_size=256]

Epoch 1/10:  58%|██████▉     | 573/991 [2:33:00<1:48:01, 15.51s/batch, batch_loss=11.9, batch_index=574, batch_size=256]

Epoch 1/10:  58%|██████▉     | 574/991 [2:33:00<1:49:41, 15.78s/batch, batch_loss=11.9, batch_index=574, batch_size=256]

Epoch 1/10:  58%|██████▉     | 574/991 [2:33:15<1:49:41, 15.78s/batch, batch_loss=16.1, batch_index=575, batch_size=256]

Epoch 1/10:  58%|██████▉     | 575/991 [2:33:15<1:48:45, 15.69s/batch, batch_loss=16.1, batch_index=575, batch_size=256]

Epoch 1/10:  58%|██████▉     | 575/991 [2:33:32<1:48:45, 15.69s/batch, batch_loss=23.7, batch_index=576, batch_size=256]

Epoch 1/10:  58%|██████▉     | 576/991 [2:33:32<1:51:18, 16.09s/batch, batch_loss=23.7, batch_index=576, batch_size=256]

Epoch 1/10:  58%|██████▉     | 576/991 [2:33:48<1:51:18, 16.09s/batch, batch_loss=10.7, batch_index=577, batch_size=256]

Epoch 1/10:  58%|██████▉     | 577/991 [2:33:48<1:49:25, 15.86s/batch, batch_loss=10.7, batch_index=577, batch_size=256]

Epoch 1/10:  58%|██████▉     | 577/991 [2:34:03<1:49:25, 15.86s/batch, batch_loss=10.1, batch_index=578, batch_size=256]

Epoch 1/10:  58%|██████▉     | 578/991 [2:34:03<1:48:40, 15.79s/batch, batch_loss=10.1, batch_index=578, batch_size=256]

Epoch 1/10:  58%|██████▉     | 578/991 [2:34:19<1:48:40, 15.79s/batch, batch_loss=11.1, batch_index=579, batch_size=256]

Epoch 1/10:  58%|███████     | 579/991 [2:34:19<1:47:26, 15.65s/batch, batch_loss=11.1, batch_index=579, batch_size=256]

Epoch 1/10:  58%|███████     | 579/991 [2:34:35<1:47:26, 15.65s/batch, batch_loss=18.4, batch_index=580, batch_size=256]

Epoch 1/10:  59%|███████     | 580/991 [2:34:35<1:47:50, 15.74s/batch, batch_loss=18.4, batch_index=580, batch_size=256]

Epoch 1/10:  59%|███████     | 580/991 [2:34:52<1:47:50, 15.74s/batch, batch_loss=6.32, batch_index=581, batch_size=256]

Epoch 1/10:  59%|███████     | 581/991 [2:34:52<1:51:49, 16.36s/batch, batch_loss=6.32, batch_index=581, batch_size=256]

Epoch 1/10:  59%|██████▍    | 581/991 [2:35:08<1:51:49, 16.36s/batch, batch_loss=0.635, batch_index=582, batch_size=256]

Epoch 1/10:  59%|██████▍    | 582/991 [2:35:08<1:50:40, 16.24s/batch, batch_loss=0.635, batch_index=582, batch_size=256]

Epoch 1/10:  59%|█████▎   | 582/991 [2:35:23<1:50:40, 16.24s/batch, batch_loss=6.61e+3, batch_index=583, batch_size=256]

Epoch 1/10:  59%|█████▎   | 583/991 [2:35:23<1:47:30, 15.81s/batch, batch_loss=6.61e+3, batch_index=583, batch_size=256]

Epoch 1/10:  59%|███████     | 583/991 [2:35:38<1:47:30, 15.81s/batch, batch_loss=12.6, batch_index=584, batch_size=256]

Epoch 1/10:  59%|███████     | 584/991 [2:35:38<1:45:51, 15.61s/batch, batch_loss=12.6, batch_index=584, batch_size=256]

Epoch 1/10:  59%|███████     | 584/991 [2:35:54<1:45:51, 15.61s/batch, batch_loss=8.79, batch_index=585, batch_size=256]

Epoch 1/10:  59%|███████     | 585/991 [2:35:54<1:45:34, 15.60s/batch, batch_loss=8.79, batch_index=585, batch_size=256]

Epoch 1/10:  59%|███████     | 585/991 [2:36:08<1:45:34, 15.60s/batch, batch_loss=35.4, batch_index=586, batch_size=256]

Epoch 1/10:  59%|███████     | 586/991 [2:36:08<1:42:07, 15.13s/batch, batch_loss=35.4, batch_index=586, batch_size=256]

Epoch 1/10:  59%|███████     | 586/991 [2:36:23<1:42:07, 15.13s/batch, batch_loss=27.5, batch_index=587, batch_size=256]

Epoch 1/10:  59%|███████     | 587/991 [2:36:23<1:42:11, 15.18s/batch, batch_loss=27.5, batch_index=587, batch_size=256]

Epoch 1/10:  59%|███████     | 587/991 [2:36:39<1:42:11, 15.18s/batch, batch_loss=15.1, batch_index=588, batch_size=256]

Epoch 1/10:  59%|███████     | 588/991 [2:36:39<1:42:45, 15.30s/batch, batch_loss=15.1, batch_index=588, batch_size=256]

Epoch 1/10:  59%|███████     | 588/991 [2:36:54<1:42:45, 15.30s/batch, batch_loss=8.62, batch_index=589, batch_size=256]

Epoch 1/10:  59%|███████▏    | 589/991 [2:36:54<1:41:37, 15.17s/batch, batch_loss=8.62, batch_index=589, batch_size=256]

Epoch 1/10:  59%|███████▏    | 589/991 [2:37:08<1:41:37, 15.17s/batch, batch_loss=17.8, batch_index=590, batch_size=256]

Epoch 1/10:  60%|███████▏    | 590/991 [2:37:08<1:40:25, 15.03s/batch, batch_loss=17.8, batch_index=590, batch_size=256]

Epoch 1/10:  60%|███████▏    | 590/991 [2:37:23<1:40:25, 15.03s/batch, batch_loss=15.2, batch_index=591, batch_size=256]

Epoch 1/10:  60%|███████▏    | 591/991 [2:37:23<1:40:22, 15.06s/batch, batch_loss=15.2, batch_index=591, batch_size=256]

Epoch 1/10:  60%|███████▏    | 591/991 [2:37:38<1:40:22, 15.06s/batch, batch_loss=6.21, batch_index=592, batch_size=256]

Epoch 1/10:  60%|███████▏    | 592/991 [2:37:38<1:40:03, 15.05s/batch, batch_loss=6.21, batch_index=592, batch_size=256]

Epoch 1/10:  60%|████████▎     | 592/991 [2:37:53<1:40:03, 15.05s/batch, batch_loss=12, batch_index=593, batch_size=256]

Epoch 1/10:  60%|████████▍     | 593/991 [2:37:53<1:39:27, 14.99s/batch, batch_loss=12, batch_index=593, batch_size=256]

Epoch 1/10:  60%|███████▏    | 593/991 [2:38:09<1:39:27, 14.99s/batch, batch_loss=11.6, batch_index=594, batch_size=256]

Epoch 1/10:  60%|███████▏    | 594/991 [2:38:09<1:40:22, 15.17s/batch, batch_loss=11.6, batch_index=594, batch_size=256]

Epoch 1/10:  60%|███████▊     | 594/991 [2:38:25<1:40:22, 15.17s/batch, batch_loss=6.4, batch_index=595, batch_size=256]

Epoch 1/10:  60%|███████▊     | 595/991 [2:38:25<1:42:04, 15.47s/batch, batch_loss=6.4, batch_index=595, batch_size=256]

Epoch 1/10:  60%|███████▏    | 595/991 [2:38:40<1:42:04, 15.47s/batch, batch_loss=6.73, batch_index=596, batch_size=256]

Epoch 1/10:  60%|███████▏    | 596/991 [2:38:40<1:40:06, 15.21s/batch, batch_loss=6.73, batch_index=596, batch_size=256]

Epoch 1/10:  60%|███████▏    | 596/991 [2:38:57<1:40:06, 15.21s/batch, batch_loss=20.5, batch_index=597, batch_size=256]

Epoch 1/10:  60%|███████▏    | 597/991 [2:38:57<1:43:29, 15.76s/batch, batch_loss=20.5, batch_index=597, batch_size=256]

Epoch 1/10:  60%|███████▏    | 597/991 [2:39:13<1:43:29, 15.76s/batch, batch_loss=9.06, batch_index=598, batch_size=256]

Epoch 1/10:  60%|███████▏    | 598/991 [2:39:13<1:43:35, 15.82s/batch, batch_loss=9.06, batch_index=598, batch_size=256]

Epoch 1/10:  60%|███████▏    | 598/991 [2:39:28<1:43:35, 15.82s/batch, batch_loss=16.6, batch_index=599, batch_size=256]

Epoch 1/10:  60%|███████▎    | 599/991 [2:39:28<1:42:01, 15.62s/batch, batch_loss=16.6, batch_index=599, batch_size=256]

Epoch 1/10:  60%|███████▎    | 599/991 [2:39:44<1:42:01, 15.62s/batch, batch_loss=13.4, batch_index=600, batch_size=256]

Epoch 1/10:  61%|███████▎    | 600/991 [2:39:44<1:43:24, 15.87s/batch, batch_loss=13.4, batch_index=600, batch_size=256]

Epoch 1/10:  61%|███████▎    | 600/991 [2:40:01<1:43:24, 15.87s/batch, batch_loss=14.1, batch_index=601, batch_size=256]

Epoch 1/10:  61%|███████▎    | 601/991 [2:40:01<1:44:28, 16.07s/batch, batch_loss=14.1, batch_index=601, batch_size=256]

Epoch 1/10:  61%|███████▎    | 601/991 [2:40:17<1:44:28, 16.07s/batch, batch_loss=9.21, batch_index=602, batch_size=256]

Epoch 1/10:  61%|███████▎    | 602/991 [2:40:17<1:43:45, 16.00s/batch, batch_loss=9.21, batch_index=602, batch_size=256]

Epoch 1/10:  61%|███████▎    | 602/991 [2:40:33<1:43:45, 16.00s/batch, batch_loss=7.18, batch_index=603, batch_size=256]

Epoch 1/10:  61%|███████▎    | 603/991 [2:40:33<1:43:41, 16.04s/batch, batch_loss=7.18, batch_index=603, batch_size=256]

Epoch 1/10:  61%|█████▍   | 603/991 [2:40:47<1:43:41, 16.04s/batch, batch_loss=1.01e+4, batch_index=604, batch_size=256]

Epoch 1/10:  61%|█████▍   | 604/991 [2:40:47<1:40:56, 15.65s/batch, batch_loss=1.01e+4, batch_index=604, batch_size=256]

Epoch 1/10:  61%|███████▎    | 604/991 [2:41:03<1:40:56, 15.65s/batch, batch_loss=10.6, batch_index=605, batch_size=256]

Epoch 1/10:  61%|███████▎    | 605/991 [2:41:03<1:40:34, 15.63s/batch, batch_loss=10.6, batch_index=605, batch_size=256]

Epoch 1/10:  61%|███████▎    | 605/991 [2:41:21<1:40:34, 15.63s/batch, batch_loss=6.81, batch_index=606, batch_size=256]

Epoch 1/10:  61%|███████▎    | 606/991 [2:41:21<1:44:34, 16.30s/batch, batch_loss=6.81, batch_index=606, batch_size=256]

Epoch 1/10:  61%|███████▎    | 606/991 [2:41:37<1:44:34, 16.30s/batch, batch_loss=9.94, batch_index=607, batch_size=256]

Epoch 1/10:  61%|███████▎    | 607/991 [2:41:37<1:42:56, 16.09s/batch, batch_loss=9.94, batch_index=607, batch_size=256]

Epoch 1/10:  61%|███████▎    | 607/991 [2:41:52<1:42:56, 16.09s/batch, batch_loss=14.5, batch_index=608, batch_size=256]

Epoch 1/10:  61%|███████▎    | 608/991 [2:41:52<1:41:52, 15.96s/batch, batch_loss=14.5, batch_index=608, batch_size=256]

Epoch 1/10:  61%|███████▎    | 608/991 [2:42:08<1:41:52, 15.96s/batch, batch_loss=15.1, batch_index=609, batch_size=256]

Epoch 1/10:  61%|███████▎    | 609/991 [2:42:08<1:41:09, 15.89s/batch, batch_loss=15.1, batch_index=609, batch_size=256]

Epoch 1/10:  61%|███████▎    | 609/991 [2:42:24<1:41:09, 15.89s/batch, batch_loss=15.7, batch_index=610, batch_size=256]

Epoch 1/10:  62%|███████▍    | 610/991 [2:42:24<1:41:08, 15.93s/batch, batch_loss=15.7, batch_index=610, batch_size=256]

Epoch 1/10:  62%|███████▍    | 610/991 [2:42:40<1:41:08, 15.93s/batch, batch_loss=22.9, batch_index=611, batch_size=256]

Epoch 1/10:  62%|███████▍    | 611/991 [2:42:40<1:40:19, 15.84s/batch, batch_loss=22.9, batch_index=611, batch_size=256]

Epoch 1/10:  62%|███████▍    | 611/991 [2:42:57<1:40:19, 15.84s/batch, batch_loss=7.12, batch_index=612, batch_size=256]

Epoch 1/10:  62%|███████▍    | 612/991 [2:42:57<1:42:16, 16.19s/batch, batch_loss=7.12, batch_index=612, batch_size=256]

Epoch 1/10:  62%|███████▍    | 612/991 [2:43:14<1:42:16, 16.19s/batch, batch_loss=12.8, batch_index=613, batch_size=256]

Epoch 1/10:  62%|███████▍    | 613/991 [2:43:14<1:44:28, 16.58s/batch, batch_loss=12.8, batch_index=613, batch_size=256]

Epoch 1/10:  62%|█████▌   | 613/991 [2:43:30<1:44:28, 16.58s/batch, batch_loss=1.73e+4, batch_index=614, batch_size=256]

Epoch 1/10:  62%|█████▌   | 614/991 [2:43:30<1:43:12, 16.43s/batch, batch_loss=1.73e+4, batch_index=614, batch_size=256]

Epoch 1/10:  62%|████████     | 614/991 [2:43:46<1:43:12, 16.43s/batch, batch_loss=985, batch_index=615, batch_size=256]

Epoch 1/10:  62%|████████     | 615/991 [2:43:46<1:42:48, 16.41s/batch, batch_loss=985, batch_index=615, batch_size=256]

Epoch 1/10:  62%|███████▍    | 615/991 [2:44:03<1:42:48, 16.41s/batch, batch_loss=9.54, batch_index=616, batch_size=256]

Epoch 1/10:  62%|███████▍    | 616/991 [2:44:03<1:42:32, 16.41s/batch, batch_loss=9.54, batch_index=616, batch_size=256]

Epoch 1/10:  62%|███████▍    | 616/991 [2:44:19<1:42:32, 16.41s/batch, batch_loss=18.8, batch_index=617, batch_size=256]

Epoch 1/10:  62%|███████▍    | 617/991 [2:44:19<1:42:34, 16.45s/batch, batch_loss=18.8, batch_index=617, batch_size=256]

Epoch 1/10:  62%|███████▍    | 617/991 [2:44:36<1:42:34, 16.45s/batch, batch_loss=13.6, batch_index=618, batch_size=256]

Epoch 1/10:  62%|███████▍    | 618/991 [2:44:36<1:42:39, 16.51s/batch, batch_loss=13.6, batch_index=618, batch_size=256]

Epoch 1/10:  62%|███████▍    | 618/991 [2:44:52<1:42:39, 16.51s/batch, batch_loss=19.1, batch_index=619, batch_size=256]

Epoch 1/10:  62%|███████▍    | 619/991 [2:44:52<1:41:49, 16.42s/batch, batch_loss=19.1, batch_index=619, batch_size=256]

Epoch 1/10:  62%|████████▋     | 619/991 [2:45:09<1:41:49, 16.42s/batch, batch_loss=13, batch_index=620, batch_size=256]

Epoch 1/10:  63%|████████▊     | 620/991 [2:45:09<1:41:05, 16.35s/batch, batch_loss=13, batch_index=620, batch_size=256]

Epoch 1/10:  63%|███████▌    | 620/991 [2:45:24<1:41:05, 16.35s/batch, batch_loss=11.7, batch_index=621, batch_size=256]

Epoch 1/10:  63%|███████▌    | 621/991 [2:45:24<1:39:54, 16.20s/batch, batch_loss=11.7, batch_index=621, batch_size=256]

Epoch 1/10:  63%|█████▋   | 621/991 [2:45:40<1:39:54, 16.20s/batch, batch_loss=5.49e+3, batch_index=622, batch_size=256]

Epoch 1/10:  63%|█████▋   | 622/991 [2:45:40<1:38:57, 16.09s/batch, batch_loss=5.49e+3, batch_index=622, batch_size=256]

Epoch 1/10:  63%|███████▌    | 622/991 [2:45:56<1:38:57, 16.09s/batch, batch_loss=20.4, batch_index=623, batch_size=256]

Epoch 1/10:  63%|███████▌    | 623/991 [2:45:56<1:38:07, 16.00s/batch, batch_loss=20.4, batch_index=623, batch_size=256]

Epoch 1/10:  63%|██████▎   | 623/991 [2:46:12<1:38:07, 16.00s/batch, batch_loss=1.6e+4, batch_index=624, batch_size=256]

Epoch 1/10:  63%|██████▎   | 624/991 [2:46:12<1:37:48, 15.99s/batch, batch_loss=1.6e+4, batch_index=624, batch_size=256]

Epoch 1/10:  63%|███████▌    | 624/991 [2:46:28<1:37:48, 15.99s/batch, batch_loss=10.9, batch_index=625, batch_size=256]

Epoch 1/10:  63%|███████▌    | 625/991 [2:46:28<1:37:13, 15.94s/batch, batch_loss=10.9, batch_index=625, batch_size=256]

Epoch 1/10:  63%|████████▏    | 625/991 [2:46:44<1:37:13, 15.94s/batch, batch_loss=6.4, batch_index=626, batch_size=256]

Epoch 1/10:  63%|████████▏    | 626/991 [2:46:44<1:36:38, 15.89s/batch, batch_loss=6.4, batch_index=626, batch_size=256]

Epoch 1/10:  63%|█████▋   | 626/991 [2:46:59<1:36:38, 15.89s/batch, batch_loss=4.31e+3, batch_index=627, batch_size=256]

Epoch 1/10:  63%|█████▋   | 627/991 [2:46:59<1:34:53, 15.64s/batch, batch_loss=4.31e+3, batch_index=627, batch_size=256]

Epoch 1/10:  63%|█████▋   | 627/991 [2:47:16<1:34:53, 15.64s/batch, batch_loss=1.05e+3, batch_index=628, batch_size=256]

Epoch 1/10:  63%|█████▋   | 628/991 [2:47:16<1:37:19, 16.09s/batch, batch_loss=1.05e+3, batch_index=628, batch_size=256]

Epoch 1/10:  63%|███████▌    | 628/991 [2:47:30<1:37:19, 16.09s/batch, batch_loss=13.1, batch_index=629, batch_size=256]

Epoch 1/10:  63%|███████▌    | 629/991 [2:47:30<1:34:26, 15.65s/batch, batch_loss=13.1, batch_index=629, batch_size=256]

Epoch 1/10:  63%|████████▉     | 629/991 [2:47:46<1:34:26, 15.65s/batch, batch_loss=18, batch_index=630, batch_size=256]

Epoch 1/10:  64%|████████▉     | 630/991 [2:47:46<1:34:38, 15.73s/batch, batch_loss=18, batch_index=630, batch_size=256]

Epoch 1/10:  64%|███████▋    | 630/991 [2:48:02<1:34:38, 15.73s/batch, batch_loss=14.7, batch_index=631, batch_size=256]

Epoch 1/10:  64%|███████▋    | 631/991 [2:48:02<1:33:51, 15.64s/batch, batch_loss=14.7, batch_index=631, batch_size=256]

Epoch 1/10:  64%|█████████▌     | 631/991 [2:48:17<1:33:51, 15.64s/batch, batch_loss=4, batch_index=632, batch_size=256]

Epoch 1/10:  64%|█████████▌     | 632/991 [2:48:17<1:32:35, 15.47s/batch, batch_loss=4, batch_index=632, batch_size=256]

Epoch 1/10:  64%|███████▋    | 632/991 [2:48:33<1:32:35, 15.47s/batch, batch_loss=21.8, batch_index=633, batch_size=256]

Epoch 1/10:  64%|███████▋    | 633/991 [2:48:33<1:33:33, 15.68s/batch, batch_loss=21.8, batch_index=633, batch_size=256]

Epoch 1/10:  64%|███████▋    | 633/991 [2:48:48<1:33:33, 15.68s/batch, batch_loss=23.7, batch_index=634, batch_size=256]

Epoch 1/10:  64%|███████▋    | 634/991 [2:48:48<1:32:17, 15.51s/batch, batch_loss=23.7, batch_index=634, batch_size=256]

Epoch 1/10:  64%|███████▋    | 634/991 [2:49:06<1:32:17, 15.51s/batch, batch_loss=24.3, batch_index=635, batch_size=256]

Epoch 1/10:  64%|███████▋    | 635/991 [2:49:06<1:35:50, 16.15s/batch, batch_loss=24.3, batch_index=635, batch_size=256]

Epoch 1/10:  64%|███████▋    | 635/991 [2:49:21<1:35:50, 16.15s/batch, batch_loss=17.3, batch_index=636, batch_size=256]

Epoch 1/10:  64%|███████▋    | 636/991 [2:49:21<1:34:07, 15.91s/batch, batch_loss=17.3, batch_index=636, batch_size=256]

Epoch 1/10:  64%|███████▋    | 636/991 [2:49:37<1:34:07, 15.91s/batch, batch_loss=18.7, batch_index=637, batch_size=256]

Epoch 1/10:  64%|███████▋    | 637/991 [2:49:37<1:33:59, 15.93s/batch, batch_loss=18.7, batch_index=637, batch_size=256]

Epoch 1/10:  64%|███████▋    | 637/991 [2:49:53<1:33:59, 15.93s/batch, batch_loss=15.8, batch_index=638, batch_size=256]

Epoch 1/10:  64%|███████▋    | 638/991 [2:49:53<1:33:32, 15.90s/batch, batch_loss=15.8, batch_index=638, batch_size=256]

Epoch 1/10:  64%|█████████     | 638/991 [2:50:09<1:33:32, 15.90s/batch, batch_loss=11, batch_index=639, batch_size=256]

Epoch 1/10:  64%|█████████     | 639/991 [2:50:09<1:33:45, 15.98s/batch, batch_loss=11, batch_index=639, batch_size=256]

Epoch 1/10:  64%|████████▍    | 639/991 [2:50:25<1:33:45, 15.98s/batch, batch_loss=672, batch_index=640, batch_size=256]

Epoch 1/10:  65%|████████▍    | 640/991 [2:50:25<1:34:06, 16.09s/batch, batch_loss=672, batch_index=640, batch_size=256]

Epoch 1/10:  65%|███████▋    | 640/991 [2:50:42<1:34:06, 16.09s/batch, batch_loss=14.9, batch_index=641, batch_size=256]

Epoch 1/10:  65%|███████▊    | 641/991 [2:50:42<1:33:55, 16.10s/batch, batch_loss=14.9, batch_index=641, batch_size=256]

Epoch 1/10:  65%|███████▊    | 641/991 [2:50:57<1:33:55, 16.10s/batch, batch_loss=8.74, batch_index=642, batch_size=256]

Epoch 1/10:  65%|███████▊    | 642/991 [2:50:57<1:31:58, 15.81s/batch, batch_loss=8.74, batch_index=642, batch_size=256]

Epoch 1/10:  65%|█████▊   | 642/991 [2:51:15<1:31:58, 15.81s/batch, batch_loss=2.12e+4, batch_index=643, batch_size=256]

Epoch 1/10:  65%|█████▊   | 643/991 [2:51:15<1:35:30, 16.47s/batch, batch_loss=2.12e+4, batch_index=643, batch_size=256]

Epoch 1/10:  65%|█████▊   | 643/991 [2:51:30<1:35:30, 16.47s/batch, batch_loss=1.76e+4, batch_index=644, batch_size=256]

Epoch 1/10:  65%|█████▊   | 644/991 [2:51:30<1:33:42, 16.20s/batch, batch_loss=1.76e+4, batch_index=644, batch_size=256]

Epoch 1/10:  65%|█████▊   | 644/991 [2:51:45<1:33:42, 16.20s/batch, batch_loss=2.19e+3, batch_index=645, batch_size=256]

Epoch 1/10:  65%|█████▊   | 645/991 [2:51:45<1:31:09, 15.81s/batch, batch_loss=2.19e+3, batch_index=645, batch_size=256]

Epoch 1/10:  65%|███████▊    | 645/991 [2:52:01<1:31:09, 15.81s/batch, batch_loss=10.7, batch_index=646, batch_size=256]

Epoch 1/10:  65%|███████▊    | 646/991 [2:52:01<1:30:16, 15.70s/batch, batch_loss=10.7, batch_index=646, batch_size=256]

Epoch 1/10:  65%|███████▊    | 646/991 [2:52:16<1:30:16, 15.70s/batch, batch_loss=12.9, batch_index=647, batch_size=256]

Epoch 1/10:  65%|███████▊    | 647/991 [2:52:16<1:29:57, 15.69s/batch, batch_loss=12.9, batch_index=647, batch_size=256]

Epoch 1/10:  65%|███████▊    | 647/991 [2:52:31<1:29:57, 15.69s/batch, batch_loss=15.4, batch_index=648, batch_size=256]

Epoch 1/10:  65%|███████▊    | 648/991 [2:52:31<1:28:03, 15.40s/batch, batch_loss=15.4, batch_index=648, batch_size=256]

Epoch 1/10:  65%|███████▊    | 648/991 [2:52:46<1:28:03, 15.40s/batch, batch_loss=15.5, batch_index=649, batch_size=256]

Epoch 1/10:  65%|███████▊    | 649/991 [2:52:46<1:27:23, 15.33s/batch, batch_loss=15.5, batch_index=649, batch_size=256]

Epoch 1/10:  65%|█████▉   | 649/991 [2:53:01<1:27:23, 15.33s/batch, batch_loss=1.35e+4, batch_index=650, batch_size=256]

Epoch 1/10:  66%|█████▉   | 650/991 [2:53:01<1:26:03, 15.14s/batch, batch_loss=1.35e+4, batch_index=650, batch_size=256]

Epoch 1/10:  66%|███████▊    | 650/991 [2:53:15<1:26:03, 15.14s/batch, batch_loss=10.1, batch_index=651, batch_size=256]

Epoch 1/10:  66%|███████▉    | 651/991 [2:53:15<1:24:36, 14.93s/batch, batch_loss=10.1, batch_index=651, batch_size=256]

Epoch 1/10:  66%|███████▉    | 651/991 [2:53:31<1:24:36, 14.93s/batch, batch_loss=13.5, batch_index=652, batch_size=256]

Epoch 1/10:  66%|███████▉    | 652/991 [2:53:31<1:24:53, 15.03s/batch, batch_loss=13.5, batch_index=652, batch_size=256]

Epoch 1/10:  66%|█████████▏    | 652/991 [2:53:46<1:24:53, 15.03s/batch, batch_loss=18, batch_index=653, batch_size=256]

Epoch 1/10:  66%|█████████▏    | 653/991 [2:53:46<1:24:37, 15.02s/batch, batch_loss=18, batch_index=653, batch_size=256]

Epoch 1/10:  66%|███████▉    | 653/991 [2:54:01<1:24:37, 15.02s/batch, batch_loss=19.6, batch_index=654, batch_size=256]

Epoch 1/10:  66%|███████▉    | 654/991 [2:54:01<1:24:42, 15.08s/batch, batch_loss=19.6, batch_index=654, batch_size=256]

Epoch 1/10:  66%|█████▉   | 654/991 [2:54:16<1:24:42, 15.08s/batch, batch_loss=3.84e+3, batch_index=655, batch_size=256]

Epoch 1/10:  66%|█████▉   | 655/991 [2:54:16<1:24:56, 15.17s/batch, batch_loss=3.84e+3, batch_index=655, batch_size=256]

Epoch 1/10:  66%|█████▉   | 655/991 [2:54:32<1:24:56, 15.17s/batch, batch_loss=5.18e+3, batch_index=656, batch_size=256]

Epoch 1/10:  66%|█████▉   | 656/991 [2:54:32<1:25:12, 15.26s/batch, batch_loss=5.18e+3, batch_index=656, batch_size=256]

Epoch 1/10:  66%|█████▉   | 656/991 [2:54:49<1:25:12, 15.26s/batch, batch_loss=4.22e+3, batch_index=657, batch_size=256]

Epoch 1/10:  66%|█████▉   | 657/991 [2:54:49<1:28:42, 15.94s/batch, batch_loss=4.22e+3, batch_index=657, batch_size=256]

Epoch 1/10:  66%|██████▋   | 657/991 [2:55:04<1:28:42, 15.94s/batch, batch_loss=2.2e+4, batch_index=658, batch_size=256]

Epoch 1/10:  66%|██████▋   | 658/991 [2:55:04<1:27:14, 15.72s/batch, batch_loss=2.2e+4, batch_index=658, batch_size=256]

Epoch 1/10:  66%|███████▉    | 658/991 [2:55:20<1:27:14, 15.72s/batch, batch_loss=4.94, batch_index=659, batch_size=256]

Epoch 1/10:  66%|███████▉    | 659/991 [2:55:20<1:26:57, 15.72s/batch, batch_loss=4.94, batch_index=659, batch_size=256]

Epoch 1/10:  66%|███████▉    | 659/991 [2:55:36<1:26:57, 15.72s/batch, batch_loss=5.16, batch_index=660, batch_size=256]

Epoch 1/10:  67%|███████▉    | 660/991 [2:55:36<1:27:17, 15.82s/batch, batch_loss=5.16, batch_index=660, batch_size=256]

Epoch 1/10:  67%|███████▉    | 660/991 [2:55:51<1:27:17, 15.82s/batch, batch_loss=11.8, batch_index=661, batch_size=256]

Epoch 1/10:  67%|████████    | 661/991 [2:55:51<1:25:09, 15.48s/batch, batch_loss=11.8, batch_index=661, batch_size=256]

Epoch 1/10:  67%|████████    | 661/991 [2:56:05<1:25:09, 15.48s/batch, batch_loss=14.1, batch_index=662, batch_size=256]

Epoch 1/10:  67%|████████    | 662/991 [2:56:05<1:22:53, 15.12s/batch, batch_loss=14.1, batch_index=662, batch_size=256]

Epoch 1/10:  67%|████████    | 662/991 [2:56:20<1:22:53, 15.12s/batch, batch_loss=16.5, batch_index=663, batch_size=256]

Epoch 1/10:  67%|████████    | 663/991 [2:56:20<1:22:20, 15.06s/batch, batch_loss=16.5, batch_index=663, batch_size=256]

Epoch 1/10:  67%|██████   | 663/991 [2:56:37<1:22:20, 15.06s/batch, batch_loss=3.04e+3, batch_index=664, batch_size=256]

Epoch 1/10:  67%|██████   | 664/991 [2:56:37<1:24:40, 15.54s/batch, batch_loss=3.04e+3, batch_index=664, batch_size=256]

Epoch 1/10:  67%|█████████▍    | 664/991 [2:56:54<1:24:40, 15.54s/batch, batch_loss=13, batch_index=665, batch_size=256]

Epoch 1/10:  67%|█████████▍    | 665/991 [2:56:54<1:26:54, 16.00s/batch, batch_loss=13, batch_index=665, batch_size=256]

Epoch 1/10:  67%|██████   | 665/991 [2:57:10<1:26:54, 16.00s/batch, batch_loss=3.06e+3, batch_index=666, batch_size=256]

Epoch 1/10:  67%|██████   | 666/991 [2:57:10<1:27:09, 16.09s/batch, batch_loss=3.06e+3, batch_index=666, batch_size=256]

Epoch 1/10:  67%|████████    | 666/991 [2:57:26<1:27:09, 16.09s/batch, batch_loss=18.2, batch_index=667, batch_size=256]

Epoch 1/10:  67%|████████    | 667/991 [2:57:26<1:26:02, 15.93s/batch, batch_loss=18.2, batch_index=667, batch_size=256]

Epoch 1/10:  67%|████████▋    | 667/991 [2:57:42<1:26:02, 15.93s/batch, batch_loss=375, batch_index=668, batch_size=256]

Epoch 1/10:  67%|████████▊    | 668/991 [2:57:42<1:26:04, 15.99s/batch, batch_loss=375, batch_index=668, batch_size=256]

Epoch 1/10:  67%|██████   | 668/991 [2:57:57<1:26:04, 15.99s/batch, batch_loss=2.97e+3, batch_index=669, batch_size=256]

Epoch 1/10:  68%|██████   | 669/991 [2:57:57<1:25:13, 15.88s/batch, batch_loss=2.97e+3, batch_index=669, batch_size=256]

Epoch 1/10:  68%|██████   | 669/991 [2:58:14<1:25:13, 15.88s/batch, batch_loss=1.01e+3, batch_index=670, batch_size=256]

Epoch 1/10:  68%|██████   | 670/991 [2:58:14<1:26:20, 16.14s/batch, batch_loss=1.01e+3, batch_index=670, batch_size=256]

Epoch 1/10:  68%|█████████▍    | 670/991 [2:58:30<1:26:20, 16.14s/batch, batch_loss=10, batch_index=671, batch_size=256]

Epoch 1/10:  68%|█████████▍    | 671/991 [2:58:30<1:25:35, 16.05s/batch, batch_loss=10, batch_index=671, batch_size=256]

Epoch 1/10:  68%|████████▏   | 671/991 [2:58:45<1:25:35, 16.05s/batch, batch_loss=14.2, batch_index=672, batch_size=256]

Epoch 1/10:  68%|████████▏   | 672/991 [2:58:45<1:23:27, 15.70s/batch, batch_loss=14.2, batch_index=672, batch_size=256]

Epoch 1/10:  68%|████████▏   | 672/991 [2:58:59<1:23:27, 15.70s/batch, batch_loss=17.5, batch_index=673, batch_size=256]

Epoch 1/10:  68%|████████▏   | 673/991 [2:58:59<1:21:01, 15.29s/batch, batch_loss=17.5, batch_index=673, batch_size=256]

Epoch 1/10:  68%|████████▏   | 673/991 [2:59:17<1:21:01, 15.29s/batch, batch_loss=14.7, batch_index=674, batch_size=256]

Epoch 1/10:  68%|████████▏   | 674/991 [2:59:17<1:25:34, 16.20s/batch, batch_loss=14.7, batch_index=674, batch_size=256]

Epoch 1/10:  68%|████████▏   | 674/991 [2:59:33<1:25:34, 16.20s/batch, batch_loss=4.53, batch_index=675, batch_size=256]

Epoch 1/10:  68%|████████▏   | 675/991 [2:59:33<1:24:23, 16.02s/batch, batch_loss=4.53, batch_index=675, batch_size=256]

Epoch 1/10:  68%|████████▏   | 675/991 [2:59:49<1:24:23, 16.02s/batch, batch_loss=10.4, batch_index=676, batch_size=256]

Epoch 1/10:  68%|████████▏   | 676/991 [2:59:49<1:24:19, 16.06s/batch, batch_loss=10.4, batch_index=676, batch_size=256]

Epoch 1/10:  68%|█████████▌    | 676/991 [3:00:05<1:24:19, 16.06s/batch, batch_loss=17, batch_index=677, batch_size=256]

Epoch 1/10:  68%|█████████▌    | 677/991 [3:00:05<1:22:50, 15.83s/batch, batch_loss=17, batch_index=677, batch_size=256]

Epoch 1/10:  68%|████████▏   | 677/991 [3:00:20<1:22:50, 15.83s/batch, batch_loss=6.87, batch_index=678, batch_size=256]

Epoch 1/10:  68%|████████▏   | 678/991 [3:00:20<1:21:44, 15.67s/batch, batch_loss=6.87, batch_index=678, batch_size=256]

Epoch 1/10:  68%|██████▏  | 678/991 [3:00:36<1:21:44, 15.67s/batch, batch_loss=3.82e+3, batch_index=679, batch_size=256]

Epoch 1/10:  69%|██████▏  | 679/991 [3:00:36<1:21:36, 15.69s/batch, batch_loss=3.82e+3, batch_index=679, batch_size=256]

Epoch 1/10:  69%|██████▏  | 679/991 [3:00:50<1:21:36, 15.69s/batch, batch_loss=6.11e+3, batch_index=680, batch_size=256]

Epoch 1/10:  69%|██████▏  | 680/991 [3:00:50<1:19:38, 15.36s/batch, batch_loss=6.11e+3, batch_index=680, batch_size=256]

Epoch 1/10:  69%|██████▏  | 680/991 [3:01:09<1:19:38, 15.36s/batch, batch_loss=7.23e+4, batch_index=681, batch_size=256]

Epoch 1/10:  69%|██████▏  | 681/991 [3:01:09<1:24:04, 16.27s/batch, batch_loss=7.23e+4, batch_index=681, batch_size=256]

Epoch 1/10:  69%|████████▏   | 681/991 [3:01:24<1:24:04, 16.27s/batch, batch_loss=11.1, batch_index=682, batch_size=256]

Epoch 1/10:  69%|████████▎   | 682/991 [3:01:24<1:22:45, 16.07s/batch, batch_loss=11.1, batch_index=682, batch_size=256]

Epoch 1/10:  69%|████████▉    | 682/991 [3:01:40<1:22:45, 16.07s/batch, batch_loss=378, batch_index=683, batch_size=256]

Epoch 1/10:  69%|████████▉    | 683/991 [3:01:40<1:22:41, 16.11s/batch, batch_loss=378, batch_index=683, batch_size=256]

Epoch 1/10:  69%|████████▎   | 683/991 [3:01:56<1:22:41, 16.11s/batch, batch_loss=4.47, batch_index=684, batch_size=256]

Epoch 1/10:  69%|████████▎   | 684/991 [3:01:56<1:21:31, 15.93s/batch, batch_loss=4.47, batch_index=684, batch_size=256]

Epoch 1/10:  69%|████████▎   | 684/991 [3:02:11<1:21:31, 15.93s/batch, batch_loss=14.4, batch_index=685, batch_size=256]

Epoch 1/10:  69%|████████▎   | 685/991 [3:02:11<1:20:36, 15.81s/batch, batch_loss=14.4, batch_index=685, batch_size=256]

Epoch 1/10:  69%|████████▎   | 685/991 [3:02:27<1:20:36, 15.81s/batch, batch_loss=13.8, batch_index=686, batch_size=256]

Epoch 1/10:  69%|████████▎   | 686/991 [3:02:27<1:20:36, 15.86s/batch, batch_loss=13.8, batch_index=686, batch_size=256]

Epoch 1/10:  69%|████████▉    | 686/991 [3:02:47<1:20:36, 15.86s/batch, batch_loss=534, batch_index=687, batch_size=256]

Epoch 1/10:  69%|█████████    | 687/991 [3:02:47<1:26:26, 17.06s/batch, batch_loss=534, batch_index=687, batch_size=256]

Epoch 1/10:  69%|████████▎   | 687/991 [3:03:04<1:26:26, 17.06s/batch, batch_loss=5.21, batch_index=688, batch_size=256]

Epoch 1/10:  69%|████████▎   | 688/991 [3:03:04<1:26:05, 17.05s/batch, batch_loss=5.21, batch_index=688, batch_size=256]

Epoch 1/10:  69%|████████▎   | 688/991 [3:03:20<1:26:05, 17.05s/batch, batch_loss=5.86, batch_index=689, batch_size=256]

Epoch 1/10:  70%|████████▎   | 689/991 [3:03:20<1:23:48, 16.65s/batch, batch_loss=5.86, batch_index=689, batch_size=256]

Epoch 1/10:  70%|█████████▋    | 689/991 [3:03:36<1:23:48, 16.65s/batch, batch_loss=12, batch_index=690, batch_size=256]

Epoch 1/10:  70%|█████████▋    | 690/991 [3:03:36<1:22:50, 16.51s/batch, batch_loss=12, batch_index=690, batch_size=256]

Epoch 1/10:  70%|█████████▋    | 690/991 [3:03:52<1:22:50, 16.51s/batch, batch_loss=15, batch_index=691, batch_size=256]

Epoch 1/10:  70%|█████████▊    | 691/991 [3:03:52<1:21:23, 16.28s/batch, batch_loss=15, batch_index=691, batch_size=256]

Epoch 1/10:  70%|████████▎   | 691/991 [3:04:08<1:21:23, 16.28s/batch, batch_loss=6.95, batch_index=692, batch_size=256]

Epoch 1/10:  70%|████████▍   | 692/991 [3:04:08<1:21:21, 16.33s/batch, batch_loss=6.95, batch_index=692, batch_size=256]

Epoch 1/10:  70%|██████▎  | 692/991 [3:04:24<1:21:21, 16.33s/batch, batch_loss=4.66e+3, batch_index=693, batch_size=256]

Epoch 1/10:  70%|██████▎  | 693/991 [3:04:24<1:20:23, 16.19s/batch, batch_loss=4.66e+3, batch_index=693, batch_size=256]

Epoch 1/10:  70%|█████████    | 693/991 [3:04:40<1:20:23, 16.19s/batch, batch_loss=450, batch_index=694, batch_size=256]

Epoch 1/10:  70%|█████████    | 694/991 [3:04:40<1:19:07, 15.98s/batch, batch_loss=450, batch_index=694, batch_size=256]

Epoch 1/10:  70%|█████████    | 694/991 [3:04:56<1:19:07, 15.98s/batch, batch_loss=778, batch_index=695, batch_size=256]

Epoch 1/10:  70%|█████████    | 695/991 [3:04:56<1:19:52, 16.19s/batch, batch_loss=778, batch_index=695, batch_size=256]

Epoch 1/10:  70%|████████▍   | 695/991 [3:05:12<1:19:52, 16.19s/batch, batch_loss=9.59, batch_index=696, batch_size=256]

Epoch 1/10:  70%|████████▍   | 696/991 [3:05:12<1:18:51, 16.04s/batch, batch_loss=9.59, batch_index=696, batch_size=256]

Epoch 1/10:  70%|███████   | 696/991 [3:05:28<1:18:51, 16.04s/batch, batch_loss=6.8e+3, batch_index=697, batch_size=256]

Epoch 1/10:  70%|███████   | 697/991 [3:05:28<1:19:01, 16.13s/batch, batch_loss=6.8e+3, batch_index=697, batch_size=256]

Epoch 1/10:  70%|████████▍   | 697/991 [3:05:46<1:19:01, 16.13s/batch, batch_loss=11.9, batch_index=698, batch_size=256]

Epoch 1/10:  70%|████████▍   | 698/991 [3:05:46<1:20:23, 16.46s/batch, batch_loss=11.9, batch_index=698, batch_size=256]

Epoch 1/10:  70%|████████▍   | 698/991 [3:06:02<1:20:23, 16.46s/batch, batch_loss=6.84, batch_index=699, batch_size=256]

Epoch 1/10:  71%|████████▍   | 699/991 [3:06:02<1:20:38, 16.57s/batch, batch_loss=6.84, batch_index=699, batch_size=256]

Epoch 1/10:  71%|████████▍   | 699/991 [3:06:19<1:20:38, 16.57s/batch, batch_loss=9.19, batch_index=700, batch_size=256]

Epoch 1/10:  71%|████████▍   | 700/991 [3:06:19<1:19:49, 16.46s/batch, batch_loss=9.19, batch_index=700, batch_size=256]

Epoch 1/10:  71%|█████████▏   | 700/991 [3:06:33<1:19:49, 16.46s/batch, batch_loss=212, batch_index=701, batch_size=256]

Epoch 1/10:  71%|█████████▏   | 701/991 [3:06:33<1:17:07, 15.96s/batch, batch_loss=212, batch_index=701, batch_size=256]

Epoch 1/10:  71%|████████▍   | 701/991 [3:06:48<1:17:07, 15.96s/batch, batch_loss=19.4, batch_index=702, batch_size=256]

Epoch 1/10:  71%|████████▌   | 702/991 [3:06:48<1:15:20, 15.64s/batch, batch_loss=19.4, batch_index=702, batch_size=256]

Epoch 1/10:  71%|█████████▏   | 702/991 [3:07:06<1:15:20, 15.64s/batch, batch_loss=273, batch_index=703, batch_size=256]

Epoch 1/10:  71%|█████████▏   | 703/991 [3:07:06<1:17:47, 16.21s/batch, batch_loss=273, batch_index=703, batch_size=256]

Epoch 1/10:  71%|████████▌   | 703/991 [3:07:21<1:17:47, 16.21s/batch, batch_loss=7.76, batch_index=704, batch_size=256]

Epoch 1/10:  71%|████████▌   | 704/991 [3:07:21<1:15:21, 15.75s/batch, batch_loss=7.76, batch_index=704, batch_size=256]

Epoch 1/10:  71%|████████▌   | 704/991 [3:07:37<1:15:21, 15.75s/batch, batch_loss=9.99, batch_index=705, batch_size=256]

Epoch 1/10:  71%|████████▌   | 705/991 [3:07:37<1:15:28, 15.83s/batch, batch_loss=9.99, batch_index=705, batch_size=256]

Epoch 1/10:  71%|████████▌   | 705/991 [3:07:52<1:15:28, 15.83s/batch, batch_loss=16.8, batch_index=706, batch_size=256]

Epoch 1/10:  71%|████████▌   | 706/991 [3:07:52<1:14:56, 15.78s/batch, batch_loss=16.8, batch_index=706, batch_size=256]

Epoch 1/10:  71%|████████▌   | 706/991 [3:08:07<1:14:56, 15.78s/batch, batch_loss=17.9, batch_index=707, batch_size=256]

Epoch 1/10:  71%|████████▌   | 707/991 [3:08:07<1:13:46, 15.59s/batch, batch_loss=17.9, batch_index=707, batch_size=256]

Epoch 1/10:  71%|████████▌   | 707/991 [3:08:23<1:13:46, 15.59s/batch, batch_loss=9.29, batch_index=708, batch_size=256]

Epoch 1/10:  71%|████████▌   | 708/991 [3:08:23<1:13:56, 15.68s/batch, batch_loss=9.29, batch_index=708, batch_size=256]

Epoch 1/10:  71%|████████▌   | 708/991 [3:08:39<1:13:56, 15.68s/batch, batch_loss=6.48, batch_index=709, batch_size=256]

Epoch 1/10:  72%|████████▌   | 709/991 [3:08:39<1:13:48, 15.70s/batch, batch_loss=6.48, batch_index=709, batch_size=256]

Epoch 1/10:  72%|████████▌   | 709/991 [3:08:54<1:13:48, 15.70s/batch, batch_loss=28.7, batch_index=710, batch_size=256]

Epoch 1/10:  72%|████████▌   | 710/991 [3:08:54<1:13:05, 15.61s/batch, batch_loss=28.7, batch_index=710, batch_size=256]

Epoch 1/10:  72%|████████▌   | 710/991 [3:09:10<1:13:05, 15.61s/batch, batch_loss=97.9, batch_index=711, batch_size=256]

Epoch 1/10:  72%|████████▌   | 711/991 [3:09:10<1:13:23, 15.73s/batch, batch_loss=97.9, batch_index=711, batch_size=256]

Epoch 1/10:  72%|████████▌   | 711/991 [3:09:26<1:13:23, 15.73s/batch, batch_loss=11.4, batch_index=712, batch_size=256]

Epoch 1/10:  72%|████████▌   | 712/991 [3:09:26<1:12:47, 15.65s/batch, batch_loss=11.4, batch_index=712, batch_size=256]

Epoch 1/10:  72%|████████▌   | 712/991 [3:09:42<1:12:47, 15.65s/batch, batch_loss=75.8, batch_index=713, batch_size=256]

Epoch 1/10:  72%|████████▋   | 713/991 [3:09:42<1:13:43, 15.91s/batch, batch_loss=75.8, batch_index=713, batch_size=256]

Epoch 1/10:  72%|████████▋   | 713/991 [3:09:57<1:13:43, 15.91s/batch, batch_loss=23.5, batch_index=714, batch_size=256]

Epoch 1/10:  72%|████████▋   | 714/991 [3:09:57<1:12:14, 15.65s/batch, batch_loss=23.5, batch_index=714, batch_size=256]

Epoch 1/10:  72%|████████▋   | 714/991 [3:10:13<1:12:14, 15.65s/batch, batch_loss=16.8, batch_index=715, batch_size=256]

Epoch 1/10:  72%|████████▋   | 715/991 [3:10:13<1:11:25, 15.53s/batch, batch_loss=16.8, batch_index=715, batch_size=256]

Epoch 1/10:  72%|████████▋   | 715/991 [3:10:29<1:11:25, 15.53s/batch, batch_loss=14.4, batch_index=716, batch_size=256]

Epoch 1/10:  72%|████████▋   | 716/991 [3:10:29<1:11:42, 15.64s/batch, batch_loss=14.4, batch_index=716, batch_size=256]

Epoch 1/10:  72%|██████████    | 716/991 [3:10:45<1:11:42, 15.64s/batch, batch_loss=18, batch_index=717, batch_size=256]

Epoch 1/10:  72%|██████████▏   | 717/991 [3:10:45<1:12:46, 15.94s/batch, batch_loss=18, batch_index=717, batch_size=256]

Epoch 1/10:  72%|████████▋   | 717/991 [3:11:05<1:12:46, 15.94s/batch, batch_loss=18.8, batch_index=718, batch_size=256]

Epoch 1/10:  72%|████████▋   | 718/991 [3:11:05<1:17:07, 16.95s/batch, batch_loss=18.8, batch_index=718, batch_size=256]

Epoch 1/10:  72%|████████▋   | 718/991 [3:11:21<1:17:07, 16.95s/batch, batch_loss=10.4, batch_index=719, batch_size=256]

Epoch 1/10:  73%|████████▋   | 719/991 [3:11:21<1:16:44, 16.93s/batch, batch_loss=10.4, batch_index=719, batch_size=256]

Epoch 1/10:  73%|████████▋   | 719/991 [3:11:38<1:16:44, 16.93s/batch, batch_loss=10.4, batch_index=720, batch_size=256]

Epoch 1/10:  73%|████████▋   | 720/991 [3:11:38<1:15:38, 16.75s/batch, batch_loss=10.4, batch_index=720, batch_size=256]

Epoch 1/10:  73%|████████▋   | 720/991 [3:11:53<1:15:38, 16.75s/batch, batch_loss=15.5, batch_index=721, batch_size=256]

Epoch 1/10:  73%|████████▋   | 721/991 [3:11:53<1:12:57, 16.21s/batch, batch_loss=15.5, batch_index=721, batch_size=256]

Epoch 1/10:  73%|████████▋   | 721/991 [3:12:09<1:12:57, 16.21s/batch, batch_loss=19.4, batch_index=722, batch_size=256]

Epoch 1/10:  73%|████████▋   | 722/991 [3:12:09<1:12:51, 16.25s/batch, batch_loss=19.4, batch_index=722, batch_size=256]

Epoch 1/10:  73%|██████▌  | 722/991 [3:12:26<1:12:51, 16.25s/batch, batch_loss=7.24e+3, batch_index=723, batch_size=256]

Epoch 1/10:  73%|██████▌  | 723/991 [3:12:26<1:12:56, 16.33s/batch, batch_loss=7.24e+3, batch_index=723, batch_size=256]

Epoch 1/10:  73%|████████▊   | 723/991 [3:12:41<1:12:56, 16.33s/batch, batch_loss=4.04, batch_index=724, batch_size=256]

Epoch 1/10:  73%|████████▊   | 724/991 [3:12:41<1:11:02, 15.96s/batch, batch_loss=4.04, batch_index=724, batch_size=256]

Epoch 1/10:  73%|████████▊   | 724/991 [3:12:59<1:11:02, 15.96s/batch, batch_loss=12.6, batch_index=725, batch_size=256]

Epoch 1/10:  73%|████████▊   | 725/991 [3:12:59<1:14:33, 16.82s/batch, batch_loss=12.6, batch_index=725, batch_size=256]

Epoch 1/10:  73%|████████▊   | 725/991 [3:13:15<1:14:33, 16.82s/batch, batch_loss=10.7, batch_index=726, batch_size=256]

Epoch 1/10:  73%|████████▊   | 726/991 [3:13:15<1:12:31, 16.42s/batch, batch_loss=10.7, batch_index=726, batch_size=256]

Epoch 1/10:  73%|██████▌  | 726/991 [3:13:31<1:12:31, 16.42s/batch, batch_loss=1.29e+4, batch_index=727, batch_size=256]

Epoch 1/10:  73%|██████▌  | 727/991 [3:13:31<1:11:59, 16.36s/batch, batch_loss=1.29e+4, batch_index=727, batch_size=256]

Epoch 1/10:  73%|████████▊   | 727/991 [3:13:46<1:11:59, 16.36s/batch, batch_loss=11.1, batch_index=728, batch_size=256]

Epoch 1/10:  73%|████████▊   | 728/991 [3:13:46<1:10:09, 16.00s/batch, batch_loss=11.1, batch_index=728, batch_size=256]

Epoch 1/10:  73%|█████████▌   | 728/991 [3:14:02<1:10:09, 16.00s/batch, batch_loss=128, batch_index=729, batch_size=256]

Epoch 1/10:  74%|█████████▌   | 729/991 [3:14:02<1:09:10, 15.84s/batch, batch_loss=128, batch_index=729, batch_size=256]

Epoch 1/10:  74%|████████▊   | 729/991 [3:14:17<1:09:10, 15.84s/batch, batch_loss=10.4, batch_index=730, batch_size=256]

Epoch 1/10:  74%|████████▊   | 730/991 [3:14:17<1:08:41, 15.79s/batch, batch_loss=10.4, batch_index=730, batch_size=256]

Epoch 1/10:  74%|█████████▌   | 730/991 [3:14:32<1:08:41, 15.79s/batch, batch_loss=107, batch_index=731, batch_size=256]

Epoch 1/10:  74%|█████████▌   | 731/991 [3:14:32<1:07:16, 15.53s/batch, batch_loss=107, batch_index=731, batch_size=256]

Epoch 1/10:  74%|██████▋  | 731/991 [3:14:48<1:07:16, 15.53s/batch, batch_loss=13914.0, batch_index=732, batch_size=256]

Epoch 1/10:  74%|██████▋  | 732/991 [3:14:48<1:06:41, 15.45s/batch, batch_loss=13914.0, batch_index=732, batch_size=256]

Epoch 1/10:  74%|████████▊   | 732/991 [3:15:03<1:06:41, 15.45s/batch, batch_loss=12.6, batch_index=733, batch_size=256]

Epoch 1/10:  74%|████████▉   | 733/991 [3:15:03<1:06:49, 15.54s/batch, batch_loss=12.6, batch_index=733, batch_size=256]

Epoch 1/10:  74%|███████▍  | 733/991 [3:15:21<1:06:49, 15.54s/batch, batch_loss=6.8e+3, batch_index=734, batch_size=256]

Epoch 1/10:  74%|███████▍  | 734/991 [3:15:21<1:08:45, 16.05s/batch, batch_loss=6.8e+3, batch_index=734, batch_size=256]

Epoch 1/10:  74%|████████▉   | 734/991 [3:15:36<1:08:45, 16.05s/batch, batch_loss=11.4, batch_index=735, batch_size=256]

Epoch 1/10:  74%|████████▉   | 735/991 [3:15:36<1:08:03, 15.95s/batch, batch_loss=11.4, batch_index=735, batch_size=256]

Epoch 1/10:  74%|████████▉   | 735/991 [3:15:52<1:08:03, 15.95s/batch, batch_loss=13.3, batch_index=736, batch_size=256]

Epoch 1/10:  74%|████████▉   | 736/991 [3:15:52<1:07:30, 15.89s/batch, batch_loss=13.3, batch_index=736, batch_size=256]

Epoch 1/10:  74%|████████▉   | 736/991 [3:16:08<1:07:30, 15.89s/batch, batch_loss=9.52, batch_index=737, batch_size=256]

Epoch 1/10:  74%|████████▉   | 737/991 [3:16:08<1:07:23, 15.92s/batch, batch_loss=9.52, batch_index=737, batch_size=256]

Epoch 1/10:  74%|██████▋  | 737/991 [3:16:23<1:07:23, 15.92s/batch, batch_loss=1.47e+3, batch_index=738, batch_size=256]

Epoch 1/10:  74%|██████▋  | 738/991 [3:16:23<1:06:19, 15.73s/batch, batch_loss=1.47e+3, batch_index=738, batch_size=256]

Epoch 1/10:  74%|████████▉   | 738/991 [3:16:39<1:06:19, 15.73s/batch, batch_loss=24.6, batch_index=739, batch_size=256]

Epoch 1/10:  75%|████████▉   | 739/991 [3:16:39<1:06:10, 15.75s/batch, batch_loss=24.6, batch_index=739, batch_size=256]

Epoch 1/10:  75%|████████▉   | 739/991 [3:16:55<1:06:10, 15.75s/batch, batch_loss=8.92, batch_index=740, batch_size=256]

Epoch 1/10:  75%|████████▉   | 740/991 [3:16:55<1:06:01, 15.78s/batch, batch_loss=8.92, batch_index=740, batch_size=256]

Epoch 1/10:  75%|███████▍  | 740/991 [3:17:11<1:06:01, 15.78s/batch, batch_loss=1.8e+4, batch_index=741, batch_size=256]

Epoch 1/10:  75%|███████▍  | 741/991 [3:17:11<1:05:56, 15.83s/batch, batch_loss=1.8e+4, batch_index=741, batch_size=256]

Epoch 1/10:  75%|██████▋  | 741/991 [3:17:26<1:05:56, 15.83s/batch, batch_loss=2.28e+3, batch_index=742, batch_size=256]

Epoch 1/10:  75%|██████▋  | 742/991 [3:17:26<1:04:09, 15.46s/batch, batch_loss=2.28e+3, batch_index=742, batch_size=256]

Epoch 1/10:  75%|████████▉   | 742/991 [3:17:42<1:04:09, 15.46s/batch, batch_loss=9.08, batch_index=743, batch_size=256]

Epoch 1/10:  75%|████████▉   | 743/991 [3:17:42<1:04:48, 15.68s/batch, batch_loss=9.08, batch_index=743, batch_size=256]

Epoch 1/10:  75%|████████▉   | 743/991 [3:17:57<1:04:48, 15.68s/batch, batch_loss=11.9, batch_index=744, batch_size=256]

Epoch 1/10:  75%|█████████   | 744/991 [3:17:57<1:03:42, 15.47s/batch, batch_loss=11.9, batch_index=744, batch_size=256]

Epoch 1/10:  75%|█████████   | 744/991 [3:18:12<1:03:42, 15.47s/batch, batch_loss=15.7, batch_index=745, batch_size=256]

Epoch 1/10:  75%|█████████   | 745/991 [3:18:12<1:03:33, 15.50s/batch, batch_loss=15.7, batch_index=745, batch_size=256]

Epoch 1/10:  75%|██████▊  | 745/991 [3:18:28<1:03:33, 15.50s/batch, batch_loss=1.15e+3, batch_index=746, batch_size=256]

Epoch 1/10:  75%|██████▊  | 746/991 [3:18:28<1:03:16, 15.50s/batch, batch_loss=1.15e+3, batch_index=746, batch_size=256]

Epoch 1/10:  75%|██████▊  | 746/991 [3:18:43<1:03:16, 15.50s/batch, batch_loss=3.87e+3, batch_index=747, batch_size=256]

Epoch 1/10:  75%|██████▊  | 747/991 [3:18:43<1:02:23, 15.34s/batch, batch_loss=3.87e+3, batch_index=747, batch_size=256]

Epoch 1/10:  75%|█████████   | 747/991 [3:18:58<1:02:23, 15.34s/batch, batch_loss=11.8, batch_index=748, batch_size=256]

Epoch 1/10:  75%|█████████   | 748/991 [3:18:58<1:02:08, 15.34s/batch, batch_loss=11.8, batch_index=748, batch_size=256]

Epoch 1/10:  75%|█████████   | 748/991 [3:19:13<1:02:08, 15.34s/batch, batch_loss=12.2, batch_index=749, batch_size=256]

Epoch 1/10:  76%|█████████   | 749/991 [3:19:13<1:01:27, 15.24s/batch, batch_loss=12.2, batch_index=749, batch_size=256]

Epoch 1/10:  76%|█████████▊   | 749/991 [3:19:27<1:01:27, 15.24s/batch, batch_loss=9.6, batch_index=750, batch_size=256]

Epoch 1/10:  76%|█████████▊   | 750/991 [3:19:27<1:00:05, 14.96s/batch, batch_loss=9.6, batch_index=750, batch_size=256]

Epoch 1/10:  76%|█████████   | 750/991 [3:19:43<1:00:05, 14.96s/batch, batch_loss=11.9, batch_index=751, batch_size=256]

Epoch 1/10:  76%|█████████   | 751/991 [3:19:43<1:00:25, 15.11s/batch, batch_loss=11.9, batch_index=751, batch_size=256]

Epoch 1/10:  76%|█████████   | 751/991 [3:19:58<1:00:25, 15.11s/batch, batch_loss=6.64, batch_index=752, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 752/991 [3:19:58<59:46, 15.00s/batch, batch_loss=6.64, batch_index=752, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 752/991 [3:20:13<59:46, 15.00s/batch, batch_loss=7.32, batch_index=753, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 753/991 [3:20:13<59:31, 15.01s/batch, batch_loss=7.32, batch_index=753, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 753/991 [3:20:28<59:31, 15.01s/batch, batch_loss=5.73, batch_index=754, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 754/991 [3:20:28<59:36, 15.09s/batch, batch_loss=5.73, batch_index=754, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 754/991 [3:20:43<59:36, 15.09s/batch, batch_loss=14.2, batch_index=755, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 755/991 [3:20:43<59:51, 15.22s/batch, batch_loss=14.2, batch_index=755, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 755/991 [3:20:59<59:51, 15.22s/batch, batch_loss=11.8, batch_index=756, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 756/991 [3:20:59<59:34, 15.21s/batch, batch_loss=11.8, batch_index=756, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 756/991 [3:21:14<59:34, 15.21s/batch, batch_loss=4.52, batch_index=757, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 757/991 [3:21:14<59:26, 15.24s/batch, batch_loss=4.52, batch_index=757, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 757/991 [3:21:29<59:26, 15.24s/batch, batch_loss=15.5, batch_index=758, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 758/991 [3:21:29<58:46, 15.14s/batch, batch_loss=15.5, batch_index=758, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 758/991 [3:21:44<58:46, 15.14s/batch, batch_loss=14.7, batch_index=759, batch_size=256]

Epoch 1/10:  77%|██████████▋   | 759/991 [3:21:44<58:57, 15.25s/batch, batch_loss=14.7, batch_index=759, batch_size=256]

Epoch 1/10:  77%|██████████▋   | 759/991 [3:22:00<58:57, 15.25s/batch, batch_loss=16.2, batch_index=760, batch_size=256]

Epoch 1/10:  77%|██████████▋   | 760/991 [3:22:00<58:55, 15.31s/batch, batch_loss=16.2, batch_index=760, batch_size=256]

Epoch 1/10:  77%|██████████▋   | 760/991 [3:22:15<58:55, 15.31s/batch, batch_loss=14.8, batch_index=761, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 761/991 [3:22:15<58:09, 15.17s/batch, batch_loss=14.8, batch_index=761, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 761/991 [3:22:30<58:09, 15.17s/batch, batch_loss=23.6, batch_index=762, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 762/991 [3:22:30<57:42, 15.12s/batch, batch_loss=23.6, batch_index=762, batch_size=256]

Epoch 1/10:  77%|███████████▌   | 762/991 [3:22:46<57:42, 15.12s/batch, batch_loss=509, batch_index=763, batch_size=256]

Epoch 1/10:  77%|███████████▌   | 763/991 [3:22:46<58:36, 15.42s/batch, batch_loss=509, batch_index=763, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 763/991 [3:23:02<58:36, 15.42s/batch, batch_loss=9.69, batch_index=764, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 764/991 [3:23:02<59:17, 15.67s/batch, batch_loss=9.69, batch_index=764, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 764/991 [3:23:18<59:17, 15.67s/batch, batch_loss=2.89, batch_index=765, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 765/991 [3:23:18<59:26, 15.78s/batch, batch_loss=2.89, batch_index=765, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 765/991 [3:23:34<59:26, 15.78s/batch, batch_loss=13.2, batch_index=766, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 766/991 [3:23:34<59:05, 15.76s/batch, batch_loss=13.2, batch_index=766, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 766/991 [3:23:50<59:05, 15.76s/batch, batch_loss=13.2, batch_index=767, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 767/991 [3:23:50<59:00, 15.81s/batch, batch_loss=13.2, batch_index=767, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 767/991 [3:24:06<59:00, 15.81s/batch, batch_loss=3.47, batch_index=768, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 768/991 [3:24:06<58:52, 15.84s/batch, batch_loss=3.47, batch_index=768, batch_size=256]

Epoch 1/10:  77%|█████████████▏   | 768/991 [3:24:21<58:52, 15.84s/batch, batch_loss=2, batch_index=769, batch_size=256]

Epoch 1/10:  78%|█████████████▏   | 769/991 [3:24:21<57:49, 15.63s/batch, batch_loss=2, batch_index=769, batch_size=256]

Epoch 1/10:  78%|████████████▍   | 769/991 [3:24:36<57:49, 15.63s/batch, batch_loss=11, batch_index=770, batch_size=256]

Epoch 1/10:  78%|████████████▍   | 770/991 [3:24:36<57:11, 15.53s/batch, batch_loss=11, batch_index=770, batch_size=256]

Epoch 1/10:  78%|████████▌  | 770/991 [3:24:55<57:11, 15.53s/batch, batch_loss=2.74e+3, batch_index=771, batch_size=256]

Epoch 1/10:  78%|███████  | 771/991 [3:24:55<1:00:29, 16.50s/batch, batch_loss=2.74e+3, batch_index=771, batch_size=256]

Epoch 1/10:  78%|█████████▎  | 771/991 [3:25:11<1:00:29, 16.50s/batch, batch_loss=5.99, batch_index=772, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 772/991 [3:25:11<59:49, 16.39s/batch, batch_loss=5.99, batch_index=772, batch_size=256]

Epoch 1/10:  78%|███████████▋   | 772/991 [3:25:28<59:49, 16.39s/batch, batch_loss=1.6, batch_index=773, batch_size=256]

Epoch 1/10:  78%|██████████▏  | 773/991 [3:25:28<1:00:14, 16.58s/batch, batch_loss=1.6, batch_index=773, batch_size=256]

Epoch 1/10:  78%|█████████▎  | 773/991 [3:25:44<1:00:14, 16.58s/batch, batch_loss=8.53, batch_index=774, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 774/991 [3:25:44<59:26, 16.44s/batch, batch_loss=8.53, batch_index=774, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 774/991 [3:26:00<59:26, 16.44s/batch, batch_loss=7.16, batch_index=775, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 775/991 [3:26:00<59:00, 16.39s/batch, batch_loss=7.16, batch_index=775, batch_size=256]

Epoch 1/10:  78%|███████████▋   | 775/991 [3:26:14<59:00, 16.39s/batch, batch_loss=256, batch_index=776, batch_size=256]

Epoch 1/10:  78%|███████████▋   | 776/991 [3:26:14<56:10, 15.68s/batch, batch_loss=256, batch_index=776, batch_size=256]

Epoch 1/10:  78%|██████████▏  | 776/991 [3:26:30<56:10, 15.68s/batch, batch_loss=0.555, batch_index=777, batch_size=256]

Epoch 1/10:  78%|██████████▏  | 777/991 [3:26:30<55:39, 15.61s/batch, batch_loss=0.555, batch_index=777, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 777/991 [3:26:47<55:39, 15.61s/batch, batch_loss=0.84, batch_index=778, batch_size=256]

Epoch 1/10:  79%|██████████▉   | 778/991 [3:26:47<57:25, 16.18s/batch, batch_loss=0.84, batch_index=778, batch_size=256]

Epoch 1/10:  79%|██████████▉   | 778/991 [3:27:03<57:25, 16.18s/batch, batch_loss=4.67, batch_index=779, batch_size=256]

Epoch 1/10:  79%|███████████   | 779/991 [3:27:03<57:03, 16.15s/batch, batch_loss=4.67, batch_index=779, batch_size=256]

Epoch 1/10:  79%|███████████   | 779/991 [3:27:20<57:03, 16.15s/batch, batch_loss=3.26, batch_index=780, batch_size=256]

Epoch 1/10:  79%|███████████   | 780/991 [3:27:20<56:51, 16.17s/batch, batch_loss=3.26, batch_index=780, batch_size=256]

Epoch 1/10:  79%|███████████   | 780/991 [3:27:35<56:51, 16.17s/batch, batch_loss=3.74, batch_index=781, batch_size=256]

Epoch 1/10:  79%|███████████   | 781/991 [3:27:35<55:32, 15.87s/batch, batch_loss=3.74, batch_index=781, batch_size=256]

Epoch 1/10:  79%|████████▋  | 781/991 [3:27:51<55:32, 15.87s/batch, batch_loss=2.51e+4, batch_index=782, batch_size=256]

Epoch 1/10:  79%|████████▋  | 782/991 [3:27:51<55:25, 15.91s/batch, batch_loss=2.51e+4, batch_index=782, batch_size=256]

Epoch 1/10:  79%|███████████   | 782/991 [3:28:06<55:25, 15.91s/batch, batch_loss=17.3, batch_index=783, batch_size=256]

Epoch 1/10:  79%|███████████   | 783/991 [3:28:06<54:47, 15.81s/batch, batch_loss=17.3, batch_index=783, batch_size=256]

Epoch 1/10:  79%|███████████   | 783/991 [3:28:21<54:47, 15.81s/batch, batch_loss=14.8, batch_index=784, batch_size=256]

Epoch 1/10:  79%|███████████   | 784/991 [3:28:21<53:01, 15.37s/batch, batch_loss=14.8, batch_index=784, batch_size=256]

Epoch 1/10:  79%|███████████   | 784/991 [3:28:36<53:01, 15.37s/batch, batch_loss=12.3, batch_index=785, batch_size=256]

Epoch 1/10:  79%|███████████   | 785/991 [3:28:36<52:52, 15.40s/batch, batch_loss=12.3, batch_index=785, batch_size=256]

Epoch 1/10:  79%|███████████   | 785/991 [3:28:55<52:52, 15.40s/batch, batch_loss=9.19, batch_index=786, batch_size=256]

Epoch 1/10:  79%|███████████   | 786/991 [3:28:55<56:23, 16.50s/batch, batch_loss=9.19, batch_index=786, batch_size=256]

Epoch 1/10:  79%|████████▋  | 786/991 [3:29:10<56:23, 16.50s/batch, batch_loss=2.48e+4, batch_index=787, batch_size=256]

Epoch 1/10:  79%|████████▋  | 787/991 [3:29:10<54:35, 16.06s/batch, batch_loss=2.48e+4, batch_index=787, batch_size=256]

Epoch 1/10:  79%|███████████▉   | 787/991 [3:29:26<54:35, 16.06s/batch, batch_loss=686, batch_index=788, batch_size=256]

Epoch 1/10:  80%|███████████▉   | 788/991 [3:29:26<53:39, 15.86s/batch, batch_loss=686, batch_index=788, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 788/991 [3:29:41<53:39, 15.86s/batch, batch_loss=16.9, batch_index=789, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 789/991 [3:29:41<52:58, 15.73s/batch, batch_loss=16.9, batch_index=789, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 789/991 [3:29:57<52:58, 15.73s/batch, batch_loss=11.9, batch_index=790, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 790/991 [3:29:57<52:49, 15.77s/batch, batch_loss=11.9, batch_index=790, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 790/991 [3:30:13<52:49, 15.77s/batch, batch_loss=12.9, batch_index=791, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 791/991 [3:30:13<52:32, 15.76s/batch, batch_loss=12.9, batch_index=791, batch_size=256]

Epoch 1/10:  80%|████████▊  | 791/991 [3:30:28<52:32, 15.76s/batch, batch_loss=1.04e+4, batch_index=792, batch_size=256]

Epoch 1/10:  80%|████████▊  | 792/991 [3:30:28<51:47, 15.62s/batch, batch_loss=1.04e+4, batch_index=792, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 792/991 [3:30:44<51:47, 15.62s/batch, batch_loss=8.03, batch_index=793, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 793/991 [3:30:44<51:24, 15.58s/batch, batch_loss=8.03, batch_index=793, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 793/991 [3:31:02<51:24, 15.58s/batch, batch_loss=2.07, batch_index=794, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 794/991 [3:31:02<53:42, 16.36s/batch, batch_loss=2.07, batch_index=794, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 794/991 [3:31:18<53:42, 16.36s/batch, batch_loss=6.61, batch_index=795, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 795/991 [3:31:18<53:08, 16.27s/batch, batch_loss=6.61, batch_index=795, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 795/991 [3:31:34<53:08, 16.27s/batch, batch_loss=10.6, batch_index=796, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 796/991 [3:31:34<52:47, 16.25s/batch, batch_loss=10.6, batch_index=796, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 796/991 [3:31:51<52:47, 16.25s/batch, batch_loss=17.7, batch_index=797, batch_size=256]

Epoch 1/10:  80%|███████████▎  | 797/991 [3:31:51<53:18, 16.49s/batch, batch_loss=17.7, batch_index=797, batch_size=256]

Epoch 1/10:  80%|████████████   | 797/991 [3:32:07<53:18, 16.49s/batch, batch_loss=333, batch_index=798, batch_size=256]

Epoch 1/10:  81%|████████████   | 798/991 [3:32:07<52:22, 16.28s/batch, batch_loss=333, batch_index=798, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 798/991 [3:32:23<52:22, 16.28s/batch, batch_loss=8.25, batch_index=799, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 799/991 [3:32:23<52:05, 16.28s/batch, batch_loss=8.25, batch_index=799, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 799/991 [3:32:39<52:05, 16.28s/batch, batch_loss=14.2, batch_index=800, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 800/991 [3:32:39<51:33, 16.20s/batch, batch_loss=14.2, batch_index=800, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 800/991 [3:32:55<51:33, 16.20s/batch, batch_loss=11.2, batch_index=801, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 801/991 [3:32:55<51:27, 16.25s/batch, batch_loss=11.2, batch_index=801, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 801/991 [3:33:12<51:27, 16.25s/batch, batch_loss=14.4, batch_index=802, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 802/991 [3:33:12<51:35, 16.38s/batch, batch_loss=14.4, batch_index=802, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 802/991 [3:33:28<51:35, 16.38s/batch, batch_loss=6.19, batch_index=803, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 803/991 [3:33:28<51:13, 16.35s/batch, batch_loss=6.19, batch_index=803, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 803/991 [3:33:47<51:13, 16.35s/batch, batch_loss=13.2, batch_index=804, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 804/991 [3:33:47<53:11, 17.07s/batch, batch_loss=13.2, batch_index=804, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 804/991 [3:34:03<53:11, 17.07s/batch, batch_loss=6.31, batch_index=805, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 805/991 [3:34:03<51:51, 16.73s/batch, batch_loss=6.31, batch_index=805, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 805/991 [3:34:18<51:51, 16.73s/batch, batch_loss=10.1, batch_index=806, batch_size=256]

Epoch 1/10:  81%|███████████▍  | 806/991 [3:34:18<50:18, 16.32s/batch, batch_loss=10.1, batch_index=806, batch_size=256]

Epoch 1/10:  81%|███████████▍  | 806/991 [3:34:34<50:18, 16.32s/batch, batch_loss=8.58, batch_index=807, batch_size=256]

Epoch 1/10:  81%|███████████▍  | 807/991 [3:34:34<49:10, 16.04s/batch, batch_loss=8.58, batch_index=807, batch_size=256]

Epoch 1/10:  81%|███████████▍  | 807/991 [3:34:51<49:10, 16.04s/batch, batch_loss=15.2, batch_index=808, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 808/991 [3:34:51<49:36, 16.26s/batch, batch_loss=15.2, batch_index=808, batch_size=256]

Epoch 1/10:  82%|████████▉  | 808/991 [3:35:05<49:36, 16.26s/batch, batch_loss=1.21e+4, batch_index=809, batch_size=256]

Epoch 1/10:  82%|████████▉  | 809/991 [3:35:05<47:33, 15.68s/batch, batch_loss=1.21e+4, batch_index=809, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 809/991 [3:35:20<47:33, 15.68s/batch, batch_loss=13.9, batch_index=810, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 810/991 [3:35:20<46:24, 15.38s/batch, batch_loss=13.9, batch_index=810, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 810/991 [3:35:35<46:24, 15.38s/batch, batch_loss=7.07, batch_index=811, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 811/991 [3:35:35<46:04, 15.36s/batch, batch_loss=7.07, batch_index=811, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 811/991 [3:35:50<46:04, 15.36s/batch, batch_loss=5.99, batch_index=812, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 812/991 [3:35:50<45:33, 15.27s/batch, batch_loss=5.99, batch_index=812, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 812/991 [3:36:06<45:33, 15.27s/batch, batch_loss=9.22, batch_index=813, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 813/991 [3:36:06<46:03, 15.52s/batch, batch_loss=9.22, batch_index=813, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 813/991 [3:36:21<46:03, 15.52s/batch, batch_loss=13.6, batch_index=814, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 814/991 [3:36:21<45:28, 15.41s/batch, batch_loss=13.6, batch_index=814, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 814/991 [3:36:36<45:28, 15.41s/batch, batch_loss=8.62, batch_index=815, batch_size=256]

Epoch 1/10:  82%|███████████▌  | 815/991 [3:36:36<44:48, 15.28s/batch, batch_loss=8.62, batch_index=815, batch_size=256]

Epoch 1/10:  82%|█████████████▏  | 815/991 [3:36:52<44:48, 15.28s/batch, batch_loss=91, batch_index=816, batch_size=256]

Epoch 1/10:  82%|█████████████▏  | 816/991 [3:36:52<44:34, 15.28s/batch, batch_loss=91, batch_index=816, batch_size=256]

Epoch 1/10:  82%|████████████▎  | 816/991 [3:37:06<44:34, 15.28s/batch, batch_loss=360, batch_index=817, batch_size=256]

Epoch 1/10:  82%|████████████▎  | 817/991 [3:37:06<43:58, 15.17s/batch, batch_loss=360, batch_index=817, batch_size=256]

Epoch 1/10:  82%|████████████▎  | 817/991 [3:37:21<43:58, 15.17s/batch, batch_loss=370, batch_index=818, batch_size=256]

Epoch 1/10:  83%|████████████▍  | 818/991 [3:37:21<43:33, 15.11s/batch, batch_loss=370, batch_index=818, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 818/991 [3:37:36<43:33, 15.11s/batch, batch_loss=12.6, batch_index=819, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 819/991 [3:37:36<42:54, 14.97s/batch, batch_loss=12.6, batch_index=819, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 819/991 [3:37:51<42:54, 14.97s/batch, batch_loss=7.98, batch_index=820, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 820/991 [3:37:51<43:00, 15.09s/batch, batch_loss=7.98, batch_index=820, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 820/991 [3:38:07<43:00, 15.09s/batch, batch_loss=7.26, batch_index=821, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 821/991 [3:38:07<43:17, 15.28s/batch, batch_loss=7.26, batch_index=821, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 821/991 [3:38:22<43:17, 15.28s/batch, batch_loss=8.78, batch_index=822, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 822/991 [3:38:22<42:25, 15.06s/batch, batch_loss=8.78, batch_index=822, batch_size=256]

Epoch 1/10:  83%|████████████▍  | 822/991 [3:38:38<42:25, 15.06s/batch, batch_loss=153, batch_index=823, batch_size=256]

Epoch 1/10:  83%|████████████▍  | 823/991 [3:38:38<42:53, 15.32s/batch, batch_loss=153, batch_index=823, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 823/991 [3:38:54<42:53, 15.32s/batch, batch_loss=6.99, batch_index=824, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 824/991 [3:38:54<43:26, 15.61s/batch, batch_loss=6.99, batch_index=824, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 824/991 [3:39:12<43:26, 15.61s/batch, batch_loss=13.2, batch_index=825, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 825/991 [3:39:12<45:37, 16.49s/batch, batch_loss=13.2, batch_index=825, batch_size=256]

Epoch 1/10:  83%|█████████▉  | 825/991 [3:39:27<45:37, 16.49s/batch, batch_loss=2.6e+3, batch_index=826, batch_size=256]

Epoch 1/10:  83%|██████████  | 826/991 [3:39:27<44:05, 16.04s/batch, batch_loss=2.6e+3, batch_index=826, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 826/991 [3:39:43<44:05, 16.04s/batch, batch_loss=21.7, batch_index=827, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 827/991 [3:39:43<43:46, 16.01s/batch, batch_loss=21.7, batch_index=827, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 827/991 [3:39:59<43:46, 16.01s/batch, batch_loss=15.7, batch_index=828, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 828/991 [3:39:59<43:05, 15.86s/batch, batch_loss=15.7, batch_index=828, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 828/991 [3:40:14<43:05, 15.86s/batch, batch_loss=7.82, batch_index=829, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 829/991 [3:40:14<42:00, 15.56s/batch, batch_loss=7.82, batch_index=829, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 829/991 [3:40:29<42:00, 15.56s/batch, batch_loss=12.6, batch_index=830, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 830/991 [3:40:29<41:25, 15.44s/batch, batch_loss=12.6, batch_index=830, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 830/991 [3:40:44<41:25, 15.44s/batch, batch_loss=8.78, batch_index=831, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 831/991 [3:40:44<41:01, 15.38s/batch, batch_loss=8.78, batch_index=831, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 831/991 [3:40:59<41:01, 15.38s/batch, batch_loss=13.5, batch_index=832, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 832/991 [3:40:59<40:34, 15.31s/batch, batch_loss=13.5, batch_index=832, batch_size=256]

Epoch 1/10:  84%|████████████▌  | 832/991 [3:41:15<40:34, 15.31s/batch, batch_loss=216, batch_index=833, batch_size=256]

Epoch 1/10:  84%|████████████▌  | 833/991 [3:41:15<40:17, 15.30s/batch, batch_loss=216, batch_index=833, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 833/991 [3:41:34<40:17, 15.30s/batch, batch_loss=14.6, batch_index=834, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 834/991 [3:41:34<43:13, 16.52s/batch, batch_loss=14.6, batch_index=834, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 834/991 [3:41:49<43:13, 16.52s/batch, batch_loss=11.6, batch_index=835, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 835/991 [3:41:49<41:48, 16.08s/batch, batch_loss=11.6, batch_index=835, batch_size=256]

Epoch 1/10:  84%|█████████▎ | 835/991 [3:42:05<41:48, 16.08s/batch, batch_loss=3.27e+3, batch_index=836, batch_size=256]

Epoch 1/10:  84%|█████████▎ | 836/991 [3:42:05<41:33, 16.09s/batch, batch_loss=3.27e+3, batch_index=836, batch_size=256]

Epoch 1/10:  84%|██████████  | 836/991 [3:42:21<41:33, 16.09s/batch, batch_loss=4.9e+3, batch_index=837, batch_size=256]

Epoch 1/10:  84%|██████████▏ | 837/991 [3:42:21<40:48, 15.90s/batch, batch_loss=4.9e+3, batch_index=837, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 837/991 [3:42:36<40:48, 15.90s/batch, batch_loss=14.3, batch_index=838, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 838/991 [3:42:36<40:29, 15.88s/batch, batch_loss=14.3, batch_index=838, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 838/991 [3:42:53<40:29, 15.88s/batch, batch_loss=4.96, batch_index=839, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 839/991 [3:42:53<40:34, 16.02s/batch, batch_loss=4.96, batch_index=839, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 839/991 [3:43:08<40:34, 16.02s/batch, batch_loss=4.72, batch_index=840, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 840/991 [3:43:08<39:56, 15.87s/batch, batch_loss=4.72, batch_index=840, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 840/991 [3:43:24<39:56, 15.87s/batch, batch_loss=13.5, batch_index=841, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 841/991 [3:43:24<39:41, 15.88s/batch, batch_loss=13.5, batch_index=841, batch_size=256]

Epoch 1/10:  85%|█████████████▌  | 841/991 [3:43:41<39:41, 15.88s/batch, batch_loss=15, batch_index=842, batch_size=256]

Epoch 1/10:  85%|█████████████▌  | 842/991 [3:43:41<40:05, 16.15s/batch, batch_loss=15, batch_index=842, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 842/991 [3:43:58<40:05, 16.15s/batch, batch_loss=8.64, batch_index=843, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 843/991 [3:43:58<40:23, 16.38s/batch, batch_loss=8.64, batch_index=843, batch_size=256]

Epoch 1/10:  85%|█████████▎ | 843/991 [3:44:13<40:23, 16.38s/batch, batch_loss=1.69e+3, batch_index=844, batch_size=256]

Epoch 1/10:  85%|█████████▎ | 844/991 [3:44:13<39:13, 16.01s/batch, batch_loss=1.69e+3, batch_index=844, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 844/991 [3:44:30<39:13, 16.01s/batch, batch_loss=14.1, batch_index=845, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 845/991 [3:44:30<39:30, 16.23s/batch, batch_loss=14.1, batch_index=845, batch_size=256]

Epoch 1/10:  85%|█████████▍ | 845/991 [3:44:47<39:30, 16.23s/batch, batch_loss=1.18e+4, batch_index=846, batch_size=256]

Epoch 1/10:  85%|█████████▍ | 846/991 [3:44:47<40:02, 16.57s/batch, batch_loss=1.18e+4, batch_index=846, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 846/991 [3:45:04<40:02, 16.57s/batch, batch_loss=18.3, batch_index=847, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 847/991 [3:45:04<39:44, 16.56s/batch, batch_loss=18.3, batch_index=847, batch_size=256]

Epoch 1/10:  85%|█████████████▋  | 847/991 [3:45:20<39:44, 16.56s/batch, batch_loss=27, batch_index=848, batch_size=256]

Epoch 1/10:  86%|█████████████▋  | 848/991 [3:45:20<39:06, 16.41s/batch, batch_loss=27, batch_index=848, batch_size=256]

Epoch 1/10:  86%|███████████▉  | 848/991 [3:45:35<39:06, 16.41s/batch, batch_loss=1e+3, batch_index=849, batch_size=256]

Epoch 1/10:  86%|███████████▉  | 849/991 [3:45:35<37:57, 16.04s/batch, batch_loss=1e+3, batch_index=849, batch_size=256]

Epoch 1/10:  86%|███████████▉  | 849/991 [3:45:51<37:57, 16.04s/batch, batch_loss=7.75, batch_index=850, batch_size=256]

Epoch 1/10:  86%|████████████  | 850/991 [3:45:51<37:55, 16.14s/batch, batch_loss=7.75, batch_index=850, batch_size=256]

Epoch 1/10:  86%|████████████  | 850/991 [3:46:07<37:55, 16.14s/batch, batch_loss=13.6, batch_index=851, batch_size=256]

Epoch 1/10:  86%|████████████  | 851/991 [3:46:07<37:37, 16.13s/batch, batch_loss=13.6, batch_index=851, batch_size=256]

Epoch 1/10:  86%|████████████  | 851/991 [3:46:23<37:37, 16.13s/batch, batch_loss=16.8, batch_index=852, batch_size=256]

Epoch 1/10:  86%|████████████  | 852/991 [3:46:23<37:15, 16.08s/batch, batch_loss=16.8, batch_index=852, batch_size=256]

Epoch 1/10:  86%|█████████▍ | 852/991 [3:46:39<37:15, 16.08s/batch, batch_loss=7.66e+3, batch_index=853, batch_size=256]

Epoch 1/10:  86%|█████████▍ | 853/991 [3:46:39<36:53, 16.04s/batch, batch_loss=7.66e+3, batch_index=853, batch_size=256]

Epoch 1/10:  86%|████████████  | 853/991 [3:46:55<36:53, 16.04s/batch, batch_loss=15.3, batch_index=854, batch_size=256]

Epoch 1/10:  86%|████████████  | 854/991 [3:46:55<36:32, 16.01s/batch, batch_loss=15.3, batch_index=854, batch_size=256]

Epoch 1/10:  86%|████████████▉  | 854/991 [3:47:14<36:32, 16.01s/batch, batch_loss=7.5, batch_index=855, batch_size=256]

Epoch 1/10:  86%|████████████▉  | 855/991 [3:47:14<38:02, 16.78s/batch, batch_loss=7.5, batch_index=855, batch_size=256]

Epoch 1/10:  86%|████████████  | 855/991 [3:47:29<38:02, 16.78s/batch, batch_loss=8.48, batch_index=856, batch_size=256]

Epoch 1/10:  86%|████████████  | 856/991 [3:47:29<36:55, 16.41s/batch, batch_loss=8.48, batch_index=856, batch_size=256]

Epoch 1/10:  86%|████████████  | 856/991 [3:47:45<36:55, 16.41s/batch, batch_loss=8.27, batch_index=857, batch_size=256]

Epoch 1/10:  86%|████████████  | 857/991 [3:47:45<36:13, 16.22s/batch, batch_loss=8.27, batch_index=857, batch_size=256]

Epoch 1/10:  86%|████████████  | 857/991 [3:48:01<36:13, 16.22s/batch, batch_loss=17.6, batch_index=858, batch_size=256]

Epoch 1/10:  87%|████████████  | 858/991 [3:48:01<35:47, 16.14s/batch, batch_loss=17.6, batch_index=858, batch_size=256]

Epoch 1/10:  87%|████████████  | 858/991 [3:48:16<35:47, 16.14s/batch, batch_loss=12.4, batch_index=859, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 859/991 [3:48:16<34:57, 15.89s/batch, batch_loss=12.4, batch_index=859, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 859/991 [3:48:33<34:57, 15.89s/batch, batch_loss=16.8, batch_index=860, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 860/991 [3:48:33<34:54, 15.99s/batch, batch_loss=16.8, batch_index=860, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 860/991 [3:48:48<34:54, 15.99s/batch, batch_loss=8.09, batch_index=861, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 861/991 [3:48:48<34:04, 15.73s/batch, batch_loss=8.09, batch_index=861, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 861/991 [3:49:07<34:04, 15.73s/batch, batch_loss=17.9, batch_index=862, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 862/991 [3:49:07<35:56, 16.71s/batch, batch_loss=17.9, batch_index=862, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 862/991 [3:49:23<35:56, 16.71s/batch, batch_loss=26.5, batch_index=863, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 863/991 [3:49:23<35:38, 16.71s/batch, batch_loss=26.5, batch_index=863, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 863/991 [3:49:40<35:38, 16.71s/batch, batch_loss=8.98, batch_index=864, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 864/991 [3:49:40<35:07, 16.60s/batch, batch_loss=8.98, batch_index=864, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 864/991 [3:49:55<35:07, 16.60s/batch, batch_loss=15.3, batch_index=865, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 865/991 [3:49:55<34:16, 16.32s/batch, batch_loss=15.3, batch_index=865, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 865/991 [3:50:11<34:16, 16.32s/batch, batch_loss=19.1, batch_index=866, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 866/991 [3:50:11<33:32, 16.10s/batch, batch_loss=19.1, batch_index=866, batch_size=256]

Epoch 1/10:  87%|█████████████▉  | 866/991 [3:50:26<33:32, 16.10s/batch, batch_loss=19, batch_index=867, batch_size=256]

Epoch 1/10:  87%|█████████████▉  | 867/991 [3:50:26<32:51, 15.90s/batch, batch_loss=19, batch_index=867, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 867/991 [3:50:41<32:51, 15.90s/batch, batch_loss=20.3, batch_index=868, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 868/991 [3:50:41<32:01, 15.62s/batch, batch_loss=20.3, batch_index=868, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 868/991 [3:50:56<32:01, 15.62s/batch, batch_loss=10.2, batch_index=869, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 869/991 [3:50:56<31:04, 15.28s/batch, batch_loss=10.2, batch_index=869, batch_size=256]

Epoch 1/10:  88%|██████████████  | 869/991 [3:51:14<31:04, 15.28s/batch, batch_loss=12, batch_index=870, batch_size=256]

Epoch 1/10:  88%|██████████████  | 870/991 [3:51:14<32:23, 16.07s/batch, batch_loss=12, batch_index=870, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 870/991 [3:51:29<32:23, 16.07s/batch, batch_loss=7.92, batch_index=871, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 871/991 [3:51:29<31:36, 15.81s/batch, batch_loss=7.92, batch_index=871, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 871/991 [3:51:44<31:36, 15.81s/batch, batch_loss=17.3, batch_index=872, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 872/991 [3:51:44<30:58, 15.62s/batch, batch_loss=17.3, batch_index=872, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 872/991 [3:52:00<30:58, 15.62s/batch, batch_loss=12.8, batch_index=873, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 873/991 [3:52:00<30:41, 15.61s/batch, batch_loss=12.8, batch_index=873, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 873/991 [3:52:15<30:41, 15.61s/batch, batch_loss=7.07, batch_index=874, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 874/991 [3:52:15<30:19, 15.55s/batch, batch_loss=7.07, batch_index=874, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 874/991 [3:52:31<30:19, 15.55s/batch, batch_loss=13.3, batch_index=875, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 875/991 [3:52:31<30:21, 15.70s/batch, batch_loss=13.3, batch_index=875, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 875/991 [3:52:47<30:21, 15.70s/batch, batch_loss=19.6, batch_index=876, batch_size=256]

Epoch 1/10:  88%|████████████▍ | 876/991 [3:52:47<29:54, 15.60s/batch, batch_loss=19.6, batch_index=876, batch_size=256]

Epoch 1/10:  88%|████████████▍ | 876/991 [3:53:02<29:54, 15.60s/batch, batch_loss=15.8, batch_index=877, batch_size=256]

Epoch 1/10:  88%|████████████▍ | 877/991 [3:53:02<29:42, 15.64s/batch, batch_loss=15.8, batch_index=877, batch_size=256]

Epoch 1/10:  88%|████████████▍ | 877/991 [3:53:18<29:42, 15.64s/batch, batch_loss=21.6, batch_index=878, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 878/991 [3:53:18<29:27, 15.64s/batch, batch_loss=21.6, batch_index=878, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 878/991 [3:53:34<29:27, 15.64s/batch, batch_loss=18.1, batch_index=879, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 879/991 [3:53:34<29:14, 15.66s/batch, batch_loss=18.1, batch_index=879, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 879/991 [3:53:49<29:14, 15.66s/batch, batch_loss=13.8, batch_index=880, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 880/991 [3:53:49<28:54, 15.62s/batch, batch_loss=13.8, batch_index=880, batch_size=256]

Epoch 1/10:  89%|█████████▊ | 880/991 [3:54:05<28:54, 15.62s/batch, batch_loss=5.12e+3, batch_index=881, batch_size=256]

Epoch 1/10:  89%|█████████▊ | 881/991 [3:54:05<28:57, 15.79s/batch, batch_loss=5.12e+3, batch_index=881, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 881/991 [3:54:21<28:57, 15.79s/batch, batch_loss=14.3, batch_index=882, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 882/991 [3:54:21<28:27, 15.66s/batch, batch_loss=14.3, batch_index=882, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 882/991 [3:54:38<28:27, 15.66s/batch, batch_loss=15.6, batch_index=883, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 883/991 [3:54:38<28:56, 16.08s/batch, batch_loss=15.6, batch_index=883, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 883/991 [3:54:56<28:56, 16.08s/batch, batch_loss=9.05, batch_index=884, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 884/991 [3:54:56<29:49, 16.72s/batch, batch_loss=9.05, batch_index=884, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 884/991 [3:55:13<29:49, 16.72s/batch, batch_loss=12.2, batch_index=885, batch_size=256]

Epoch 1/10:  89%|████████████▌ | 885/991 [3:55:13<29:34, 16.74s/batch, batch_loss=12.2, batch_index=885, batch_size=256]

Epoch 1/10:  89%|████████████▌ | 885/991 [3:55:28<29:34, 16.74s/batch, batch_loss=16.5, batch_index=886, batch_size=256]

Epoch 1/10:  89%|████████████▌ | 886/991 [3:55:28<28:33, 16.31s/batch, batch_loss=16.5, batch_index=886, batch_size=256]

Epoch 1/10:  89%|█████████▊ | 886/991 [3:55:43<28:33, 16.31s/batch, batch_loss=1.93e+4, batch_index=887, batch_size=256]

Epoch 1/10:  90%|█████████▊ | 887/991 [3:55:43<27:43, 16.00s/batch, batch_loss=1.93e+4, batch_index=887, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 887/991 [3:55:58<27:43, 16.00s/batch, batch_loss=15.5, batch_index=888, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 888/991 [3:55:58<26:34, 15.48s/batch, batch_loss=15.5, batch_index=888, batch_size=256]

Epoch 1/10:  90%|██████████████▎ | 888/991 [3:56:12<26:34, 15.48s/batch, batch_loss=19, batch_index=889, batch_size=256]

Epoch 1/10:  90%|██████████████▎ | 889/991 [3:56:12<25:51, 15.21s/batch, batch_loss=19, batch_index=889, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 889/991 [3:56:29<25:51, 15.21s/batch, batch_loss=11.4, batch_index=890, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 890/991 [3:56:29<26:15, 15.60s/batch, batch_loss=11.4, batch_index=890, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 890/991 [3:56:49<26:15, 15.60s/batch, batch_loss=13.6, batch_index=891, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 891/991 [3:56:49<28:32, 17.13s/batch, batch_loss=13.6, batch_index=891, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 891/991 [3:57:05<28:32, 17.13s/batch, batch_loss=15.9, batch_index=892, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 892/991 [3:57:05<27:41, 16.78s/batch, batch_loss=15.9, batch_index=892, batch_size=256]

Epoch 1/10:  90%|██████████▊ | 892/991 [3:57:22<27:41, 16.78s/batch, batch_loss=3.7e+3, batch_index=893, batch_size=256]

Epoch 1/10:  90%|██████████▊ | 893/991 [3:57:22<27:22, 16.76s/batch, batch_loss=3.7e+3, batch_index=893, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 893/991 [3:57:39<27:22, 16.76s/batch, batch_loss=8.48, batch_index=894, batch_size=256]

Epoch 1/10:  90%|████████████▋ | 894/991 [3:57:39<27:05, 16.76s/batch, batch_loss=8.48, batch_index=894, batch_size=256]

Epoch 1/10:  90%|████████████▋ | 894/991 [3:57:55<27:05, 16.76s/batch, batch_loss=13.5, batch_index=895, batch_size=256]

Epoch 1/10:  90%|████████████▋ | 895/991 [3:57:55<26:36, 16.63s/batch, batch_loss=13.5, batch_index=895, batch_size=256]

Epoch 1/10:  90%|█████████████▌ | 895/991 [3:58:12<26:36, 16.63s/batch, batch_loss=9.9, batch_index=896, batch_size=256]

Epoch 1/10:  90%|█████████████▌ | 896/991 [3:58:12<26:20, 16.63s/batch, batch_loss=9.9, batch_index=896, batch_size=256]

Epoch 1/10:  90%|████████████▋ | 896/991 [3:58:27<26:20, 16.63s/batch, batch_loss=15.3, batch_index=897, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 897/991 [3:58:27<25:26, 16.24s/batch, batch_loss=15.3, batch_index=897, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 897/991 [3:58:43<25:26, 16.24s/batch, batch_loss=18.6, batch_index=898, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 898/991 [3:58:43<24:50, 16.03s/batch, batch_loss=18.6, batch_index=898, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 898/991 [3:58:59<24:50, 16.03s/batch, batch_loss=15.3, batch_index=899, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 899/991 [3:58:59<24:29, 15.97s/batch, batch_loss=15.3, batch_index=899, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 899/991 [3:59:17<24:29, 15.97s/batch, batch_loss=16.1, batch_index=900, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 900/991 [3:59:17<25:22, 16.73s/batch, batch_loss=16.1, batch_index=900, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 900/991 [3:59:33<25:22, 16.73s/batch, batch_loss=14.5, batch_index=901, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 901/991 [3:59:33<24:43, 16.49s/batch, batch_loss=14.5, batch_index=901, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 901/991 [3:59:48<24:43, 16.49s/batch, batch_loss=10.3, batch_index=902, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 902/991 [3:59:48<24:00, 16.19s/batch, batch_loss=10.3, batch_index=902, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 902/991 [4:00:04<24:00, 16.19s/batch, batch_loss=6.93, batch_index=903, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 903/991 [4:00:04<23:38, 16.12s/batch, batch_loss=6.93, batch_index=903, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 903/991 [4:00:21<23:38, 16.12s/batch, batch_loss=8.02, batch_index=904, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 904/991 [4:00:21<23:28, 16.19s/batch, batch_loss=8.02, batch_index=904, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 904/991 [4:00:36<23:28, 16.19s/batch, batch_loss=24.3, batch_index=905, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 905/991 [4:00:36<22:47, 15.90s/batch, batch_loss=24.3, batch_index=905, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 905/991 [4:00:52<22:47, 15.90s/batch, batch_loss=17.9, batch_index=906, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 906/991 [4:00:52<22:33, 15.92s/batch, batch_loss=17.9, batch_index=906, batch_size=256]

Epoch 1/10:  91%|██████████████▋ | 906/991 [4:01:11<22:33, 15.92s/batch, batch_loss=17, batch_index=907, batch_size=256]

Epoch 1/10:  92%|██████████████▋ | 907/991 [4:01:11<23:25, 16.73s/batch, batch_loss=17, batch_index=907, batch_size=256]

Epoch 1/10:  92%|██████████████▋ | 907/991 [4:01:26<23:25, 16.73s/batch, batch_loss=12, batch_index=908, batch_size=256]

Epoch 1/10:  92%|██████████████▋ | 908/991 [4:01:26<22:29, 16.26s/batch, batch_loss=12, batch_index=908, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 908/991 [4:01:41<22:29, 16.26s/batch, batch_loss=5.19, batch_index=909, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 909/991 [4:01:41<21:55, 16.04s/batch, batch_loss=5.19, batch_index=909, batch_size=256]

Epoch 1/10:  92%|█████████████▊ | 909/991 [4:01:56<21:55, 16.04s/batch, batch_loss=684, batch_index=910, batch_size=256]

Epoch 1/10:  92%|█████████████▊ | 910/991 [4:01:56<21:19, 15.79s/batch, batch_loss=684, batch_index=910, batch_size=256]

Epoch 1/10:  92%|██████████ | 910/991 [4:02:12<21:19, 15.79s/batch, batch_loss=1.02e+3, batch_index=911, batch_size=256]

Epoch 1/10:  92%|██████████ | 911/991 [4:02:12<20:53, 15.67s/batch, batch_loss=1.02e+3, batch_index=911, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 911/991 [4:02:28<20:53, 15.67s/batch, batch_loss=22.6, batch_index=912, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 912/991 [4:02:28<20:49, 15.81s/batch, batch_loss=22.6, batch_index=912, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 912/991 [4:02:48<20:49, 15.81s/batch, batch_loss=21.7, batch_index=913, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 913/991 [4:02:48<22:20, 17.18s/batch, batch_loss=21.7, batch_index=913, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 913/991 [4:03:06<22:20, 17.18s/batch, batch_loss=19.1, batch_index=914, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 914/991 [4:03:06<22:04, 17.20s/batch, batch_loss=19.1, batch_index=914, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 914/991 [4:03:23<22:04, 17.20s/batch, batch_loss=17.3, batch_index=915, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 915/991 [4:03:23<21:54, 17.30s/batch, batch_loss=17.3, batch_index=915, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 915/991 [4:03:39<21:54, 17.30s/batch, batch_loss=15.5, batch_index=916, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 916/991 [4:03:39<21:13, 16.98s/batch, batch_loss=15.5, batch_index=916, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 916/991 [4:03:56<21:13, 16.98s/batch, batch_loss=9.18, batch_index=917, batch_size=256]

Epoch 1/10:  93%|████████████▉ | 917/991 [4:03:56<20:41, 16.77s/batch, batch_loss=9.18, batch_index=917, batch_size=256]

Epoch 1/10:  93%|██████████████▊ | 917/991 [4:04:12<20:41, 16.77s/batch, batch_loss=12, batch_index=918, batch_size=256]

Epoch 1/10:  93%|██████████████▊ | 918/991 [4:04:12<20:09, 16.57s/batch, batch_loss=12, batch_index=918, batch_size=256]

Epoch 1/10:  93%|██████████████▊ | 918/991 [4:04:27<20:09, 16.57s/batch, batch_loss=15, batch_index=919, batch_size=256]

Epoch 1/10:  93%|██████████████▊ | 919/991 [4:04:27<19:33, 16.30s/batch, batch_loss=15, batch_index=919, batch_size=256]

Epoch 1/10:  93%|████████████▉ | 919/991 [4:04:44<19:33, 16.30s/batch, batch_loss=13.7, batch_index=920, batch_size=256]

Epoch 1/10:  93%|████████████▉ | 920/991 [4:04:44<19:12, 16.23s/batch, batch_loss=13.7, batch_index=920, batch_size=256]

Epoch 1/10:  93%|████████████▉ | 920/991 [4:04:59<19:12, 16.23s/batch, batch_loss=19.5, batch_index=921, batch_size=256]

Epoch 1/10:  93%|█████████████ | 921/991 [4:04:59<18:36, 15.95s/batch, batch_loss=19.5, batch_index=921, batch_size=256]

Epoch 1/10:  93%|█████████████ | 921/991 [4:05:15<18:36, 15.95s/batch, batch_loss=21.4, batch_index=922, batch_size=256]

Epoch 1/10:  93%|█████████████ | 922/991 [4:05:15<18:21, 15.97s/batch, batch_loss=21.4, batch_index=922, batch_size=256]

Epoch 1/10:  93%|█████████████ | 922/991 [4:05:31<18:21, 15.97s/batch, batch_loss=5.98, batch_index=923, batch_size=256]

Epoch 1/10:  93%|█████████████ | 923/991 [4:05:31<18:14, 16.10s/batch, batch_loss=5.98, batch_index=923, batch_size=256]

Epoch 1/10:  93%|█████████████ | 923/991 [4:05:49<18:14, 16.10s/batch, batch_loss=10.8, batch_index=924, batch_size=256]

Epoch 1/10:  93%|█████████████ | 924/991 [4:05:49<18:23, 16.47s/batch, batch_loss=10.8, batch_index=924, batch_size=256]

Epoch 1/10:  93%|█████████████ | 924/991 [4:06:05<18:23, 16.47s/batch, batch_loss=9.82, batch_index=925, batch_size=256]

Epoch 1/10:  93%|█████████████ | 925/991 [4:06:05<18:03, 16.42s/batch, batch_loss=9.82, batch_index=925, batch_size=256]

Epoch 1/10:  93%|█████████████ | 925/991 [4:06:21<18:03, 16.42s/batch, batch_loss=3e+4, batch_index=926, batch_size=256]

Epoch 1/10:  93%|█████████████ | 926/991 [4:06:21<17:39, 16.30s/batch, batch_loss=3e+4, batch_index=926, batch_size=256]

Epoch 1/10:  93%|█████████████ | 926/991 [4:06:36<17:39, 16.30s/batch, batch_loss=6.72, batch_index=927, batch_size=256]

Epoch 1/10:  94%|█████████████ | 927/991 [4:06:36<17:02, 15.98s/batch, batch_loss=6.72, batch_index=927, batch_size=256]

Epoch 1/10:  94%|██████████████ | 927/991 [4:06:52<17:02, 15.98s/batch, batch_loss=852, batch_index=928, batch_size=256]

Epoch 1/10:  94%|██████████████ | 928/991 [4:06:52<16:35, 15.80s/batch, batch_loss=852, batch_index=928, batch_size=256]

Epoch 1/10:  94%|█████████████ | 928/991 [4:07:09<16:35, 15.80s/batch, batch_loss=10.3, batch_index=929, batch_size=256]

Epoch 1/10:  94%|█████████████ | 929/991 [4:07:09<16:59, 16.45s/batch, batch_loss=10.3, batch_index=929, batch_size=256]

Epoch 1/10:  94%|██████████████ | 929/991 [4:07:26<16:59, 16.45s/batch, batch_loss=8.6, batch_index=930, batch_size=256]

Epoch 1/10:  94%|██████████████ | 930/991 [4:07:26<16:39, 16.38s/batch, batch_loss=8.6, batch_index=930, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 930/991 [4:07:41<16:39, 16.38s/batch, batch_loss=11.7, batch_index=931, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 931/991 [4:07:41<15:59, 15.98s/batch, batch_loss=11.7, batch_index=931, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 931/991 [4:07:57<15:59, 15.98s/batch, batch_loss=10.5, batch_index=932, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 932/991 [4:07:57<15:44, 16.02s/batch, batch_loss=10.5, batch_index=932, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 932/991 [4:08:12<15:44, 16.02s/batch, batch_loss=10.8, batch_index=933, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 933/991 [4:08:12<15:19, 15.86s/batch, batch_loss=10.8, batch_index=933, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 933/991 [4:08:28<15:19, 15.86s/batch, batch_loss=1.39, batch_index=934, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 934/991 [4:08:28<15:08, 15.93s/batch, batch_loss=1.39, batch_index=934, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 934/991 [4:08:44<15:08, 15.93s/batch, batch_loss=1.42, batch_index=935, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 935/991 [4:08:44<14:47, 15.84s/batch, batch_loss=1.42, batch_index=935, batch_size=256]

Epoch 1/10:  94%|██████████████▏| 935/991 [4:08:59<14:47, 15.84s/batch, batch_loss=163, batch_index=936, batch_size=256]

Epoch 1/10:  94%|██████████████▏| 936/991 [4:08:59<14:20, 15.64s/batch, batch_loss=163, batch_index=936, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 936/991 [4:09:14<14:20, 15.64s/batch, batch_loss=37.3, batch_index=937, batch_size=256]

Epoch 1/10:  95%|█████████████▏| 937/991 [4:09:14<13:53, 15.44s/batch, batch_loss=37.3, batch_index=937, batch_size=256]

Epoch 1/10:  95%|██████████████▏| 937/991 [4:09:31<13:53, 15.44s/batch, batch_loss=9.3, batch_index=938, batch_size=256]

Epoch 1/10:  95%|██████████████▏| 938/991 [4:09:31<13:57, 15.81s/batch, batch_loss=9.3, batch_index=938, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 938/991 [4:09:46<13:57, 15.81s/batch, batch_loss=8.05, batch_index=939, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 939/991 [4:09:46<13:26, 15.51s/batch, batch_loss=8.05, batch_index=939, batch_size=256]

Epoch 1/10:  95%|██████████████▏| 939/991 [4:10:00<13:26, 15.51s/batch, batch_loss=419, batch_index=940, batch_size=256]

Epoch 1/10:  95%|██████████████▏| 940/991 [4:10:00<12:59, 15.29s/batch, batch_loss=419, batch_index=940, batch_size=256]

Epoch 1/10:  95%|███████████████▏| 940/991 [4:10:15<12:59, 15.29s/batch, batch_loss=17, batch_index=941, batch_size=256]

Epoch 1/10:  95%|███████████████▏| 941/991 [4:10:15<12:39, 15.19s/batch, batch_loss=17, batch_index=941, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 941/991 [4:10:31<12:39, 15.19s/batch, batch_loss=13.3, batch_index=942, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 942/991 [4:10:31<12:26, 15.23s/batch, batch_loss=13.3, batch_index=942, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 942/991 [4:10:46<12:26, 15.23s/batch, batch_loss=10.1, batch_index=943, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 943/991 [4:10:46<12:10, 15.22s/batch, batch_loss=10.1, batch_index=943, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 943/991 [4:11:05<12:10, 15.22s/batch, batch_loss=14.1, batch_index=944, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 944/991 [4:11:05<12:46, 16.30s/batch, batch_loss=14.1, batch_index=944, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 944/991 [4:11:20<12:46, 16.30s/batch, batch_loss=1.59, batch_index=945, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 945/991 [4:11:20<12:19, 16.07s/batch, batch_loss=1.59, batch_index=945, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 945/991 [4:11:37<12:19, 16.07s/batch, batch_loss=12.2, batch_index=946, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 946/991 [4:11:37<12:09, 16.21s/batch, batch_loss=12.2, batch_index=946, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 946/991 [4:11:54<12:09, 16.21s/batch, batch_loss=12.7, batch_index=947, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 947/991 [4:11:54<11:59, 16.36s/batch, batch_loss=12.7, batch_index=947, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 947/991 [4:12:09<11:59, 16.36s/batch, batch_loss=10.1, batch_index=948, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 948/991 [4:12:09<11:36, 16.20s/batch, batch_loss=10.1, batch_index=948, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 948/991 [4:12:25<11:36, 16.20s/batch, batch_loss=5.79, batch_index=949, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 949/991 [4:12:25<11:18, 16.16s/batch, batch_loss=5.79, batch_index=949, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 949/991 [4:12:40<11:18, 16.16s/batch, batch_loss=8.29, batch_index=950, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 950/991 [4:12:40<10:48, 15.82s/batch, batch_loss=8.29, batch_index=950, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 950/991 [4:12:59<10:48, 15.82s/batch, batch_loss=16.1, batch_index=951, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 951/991 [4:12:59<11:01, 16.54s/batch, batch_loss=16.1, batch_index=951, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 951/991 [4:13:15<11:01, 16.54s/batch, batch_loss=15.5, batch_index=952, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 952/991 [4:13:15<10:40, 16.42s/batch, batch_loss=15.5, batch_index=952, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 952/991 [4:13:31<10:40, 16.42s/batch, batch_loss=6.33, batch_index=953, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 953/991 [4:13:31<10:23, 16.42s/batch, batch_loss=6.33, batch_index=953, batch_size=256]

Epoch 1/10:  96%|██████████████▍| 953/991 [4:13:47<10:23, 16.42s/batch, batch_loss=331, batch_index=954, batch_size=256]

Epoch 1/10:  96%|██████████████▍| 954/991 [4:13:47<10:05, 16.37s/batch, batch_loss=331, batch_index=954, batch_size=256]

Epoch 1/10:  96%|███████████████▍| 954/991 [4:14:04<10:05, 16.37s/batch, batch_loss=12, batch_index=955, batch_size=256]

Epoch 1/10:  96%|███████████████▍| 955/991 [4:14:04<09:48, 16.35s/batch, batch_loss=12, batch_index=955, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 955/991 [4:14:20<09:48, 16.35s/batch, batch_loss=14.1, batch_index=956, batch_size=256]

Epoch 1/10:  96%|█████████████▌| 956/991 [4:14:20<09:34, 16.43s/batch, batch_loss=14.1, batch_index=956, batch_size=256]

Epoch 1/10:  96%|█████████████▌| 956/991 [4:14:37<09:34, 16.43s/batch, batch_loss=14.5, batch_index=957, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 957/991 [4:14:37<09:15, 16.35s/batch, batch_loss=14.5, batch_index=957, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 957/991 [4:14:52<09:15, 16.35s/batch, batch_loss=11.6, batch_index=958, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 958/991 [4:14:52<08:53, 16.16s/batch, batch_loss=11.6, batch_index=958, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 958/991 [4:15:09<08:53, 16.16s/batch, batch_loss=7.19, batch_index=959, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 959/991 [4:15:09<08:46, 16.46s/batch, batch_loss=7.19, batch_index=959, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 959/991 [4:15:26<08:46, 16.46s/batch, batch_loss=10.6, batch_index=960, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 960/991 [4:15:26<08:33, 16.57s/batch, batch_loss=10.6, batch_index=960, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 960/991 [4:15:43<08:33, 16.57s/batch, batch_loss=14.1, batch_index=961, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 961/991 [4:15:43<08:16, 16.54s/batch, batch_loss=14.1, batch_index=961, batch_size=256]

Epoch 1/10:  97%|████████████████▍| 961/991 [4:16:00<08:16, 16.54s/batch, batch_loss=5, batch_index=962, batch_size=256]

Epoch 1/10:  97%|████████████████▌| 962/991 [4:16:00<08:06, 16.76s/batch, batch_loss=5, batch_index=962, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 962/991 [4:16:16<08:06, 16.76s/batch, batch_loss=5.96, batch_index=963, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 963/991 [4:16:16<07:42, 16.53s/batch, batch_loss=5.96, batch_index=963, batch_size=256]

Epoch 1/10:  97%|██████████▋| 963/991 [4:16:33<07:42, 16.53s/batch, batch_loss=9.44e+3, batch_index=964, batch_size=256]

Epoch 1/10:  97%|██████████▋| 964/991 [4:16:33<07:30, 16.67s/batch, batch_loss=9.44e+3, batch_index=964, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 964/991 [4:16:49<07:30, 16.67s/batch, batch_loss=16.9, batch_index=965, batch_size=256]

Epoch 1/10:  97%|█████████████▋| 965/991 [4:16:49<07:08, 16.47s/batch, batch_loss=16.9, batch_index=965, batch_size=256]

Epoch 1/10:  97%|█████████████▋| 965/991 [4:17:04<07:08, 16.47s/batch, batch_loss=14.7, batch_index=966, batch_size=256]

Epoch 1/10:  97%|█████████████▋| 966/991 [4:17:04<06:43, 16.14s/batch, batch_loss=14.7, batch_index=966, batch_size=256]

Epoch 1/10:  97%|██████████▋| 966/991 [4:17:21<06:43, 16.14s/batch, batch_loss=2.41e+4, batch_index=967, batch_size=256]

Epoch 1/10:  98%|██████████▋| 967/991 [4:17:21<06:29, 16.23s/batch, batch_loss=2.41e+4, batch_index=967, batch_size=256]

Epoch 1/10:  98%|██████████████▋| 967/991 [4:17:38<06:29, 16.23s/batch, batch_loss=411, batch_index=968, batch_size=256]

Epoch 1/10:  98%|██████████████▋| 968/991 [4:17:38<06:19, 16.49s/batch, batch_loss=411, batch_index=968, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 968/991 [4:17:55<06:19, 16.49s/batch, batch_loss=19.2, batch_index=969, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 969/991 [4:17:55<06:04, 16.55s/batch, batch_loss=19.2, batch_index=969, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 969/991 [4:18:10<06:04, 16.55s/batch, batch_loss=1.02, batch_index=970, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 970/991 [4:18:10<05:43, 16.34s/batch, batch_loss=1.02, batch_index=970, batch_size=256]

Epoch 1/10:  98%|██████████████▋| 970/991 [4:18:26<05:43, 16.34s/batch, batch_loss=9.8, batch_index=971, batch_size=256]

Epoch 1/10:  98%|██████████████▋| 971/991 [4:18:26<05:24, 16.21s/batch, batch_loss=9.8, batch_index=971, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 971/991 [4:18:45<05:24, 16.21s/batch, batch_loss=23.8, batch_index=972, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 972/991 [4:18:45<05:19, 16.80s/batch, batch_loss=23.8, batch_index=972, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 972/991 [4:19:01<05:19, 16.80s/batch, batch_loss=18.5, batch_index=973, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 973/991 [4:19:01<05:02, 16.80s/batch, batch_loss=18.5, batch_index=973, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 973/991 [4:19:18<05:02, 16.80s/batch, batch_loss=14.8, batch_index=974, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 974/991 [4:19:18<04:44, 16.73s/batch, batch_loss=14.8, batch_index=974, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 974/991 [4:19:34<04:44, 16.73s/batch, batch_loss=7.61, batch_index=975, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 975/991 [4:19:34<04:23, 16.50s/batch, batch_loss=7.61, batch_index=975, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 975/991 [4:19:50<04:23, 16.50s/batch, batch_loss=30.9, batch_index=976, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 976/991 [4:19:50<04:06, 16.41s/batch, batch_loss=30.9, batch_index=976, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 976/991 [4:20:03<04:06, 16.41s/batch, batch_loss=19.5, batch_index=977, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 977/991 [4:20:03<03:36, 15.45s/batch, batch_loss=19.5, batch_index=977, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 977/991 [4:20:17<03:36, 15.45s/batch, batch_loss=18.6, batch_index=978, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 978/991 [4:20:17<03:15, 15.07s/batch, batch_loss=18.6, batch_index=978, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 978/991 [4:20:31<03:15, 15.07s/batch, batch_loss=17.5, batch_index=979, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 979/991 [4:20:31<02:56, 14.70s/batch, batch_loss=17.5, batch_index=979, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 979/991 [4:20:45<02:56, 14.70s/batch, batch_loss=15.3, batch_index=980, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 980/991 [4:20:45<02:39, 14.50s/batch, batch_loss=15.3, batch_index=980, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 980/991 [4:21:00<02:39, 14.50s/batch, batch_loss=11.2, batch_index=981, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 981/991 [4:21:00<02:25, 14.58s/batch, batch_loss=11.2, batch_index=981, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 981/991 [4:21:14<02:25, 14.58s/batch, batch_loss=5.67, batch_index=982, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 982/991 [4:21:14<02:10, 14.50s/batch, batch_loss=5.67, batch_index=982, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 982/991 [4:21:29<02:10, 14.50s/batch, batch_loss=2.14, batch_index=983, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 983/991 [4:21:29<01:56, 14.54s/batch, batch_loss=2.14, batch_index=983, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 983/991 [4:21:44<01:56, 14.54s/batch, batch_loss=1.04, batch_index=984, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 984/991 [4:21:44<01:42, 14.64s/batch, batch_loss=1.04, batch_index=984, batch_size=256]

Epoch 1/10:  99%|████████████▉| 984/991 [4:21:58<01:42, 14.64s/batch, batch_loss=0.422, batch_index=985, batch_size=256]

Epoch 1/10:  99%|████████████▉| 985/991 [4:21:58<01:27, 14.58s/batch, batch_loss=0.422, batch_index=985, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 985/991 [4:22:13<01:27, 14.58s/batch, batch_loss=0.18, batch_index=986, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 986/991 [4:22:13<01:12, 14.50s/batch, batch_loss=0.18, batch_index=986, batch_size=256]

Epoch 1/10:  99%|████████████▉| 986/991 [4:22:26<01:12, 14.50s/batch, batch_loss=0.147, batch_index=987, batch_size=256]

Epoch 1/10: 100%|████████████▉| 987/991 [4:22:26<00:57, 14.25s/batch, batch_loss=0.147, batch_index=987, batch_size=256]

Epoch 1/10: 100%|████████████▉| 987/991 [4:22:40<00:57, 14.25s/batch, batch_loss=0.201, batch_index=988, batch_size=256]

Epoch 1/10: 100%|████████████▉| 988/991 [4:22:40<00:41, 13.98s/batch, batch_loss=0.201, batch_index=988, batch_size=256]

Epoch 1/10: 100%|████████████▉| 988/991 [4:22:53<00:41, 13.98s/batch, batch_loss=0.272, batch_index=989, batch_size=256]

Epoch 1/10: 100%|████████████▉| 989/991 [4:22:53<00:27, 13.90s/batch, batch_loss=0.272, batch_index=989, batch_size=256]

Epoch 1/10: 100%|████████████▉| 989/991 [4:23:07<00:27, 13.90s/batch, batch_loss=0.329, batch_index=990, batch_size=256]

Epoch 1/10: 100%|████████████▉| 990/991 [4:23:07<00:13, 13.69s/batch, batch_loss=0.329, batch_index=990, batch_size=256]

Epoch 1/10: 100%|████████████▉| 990/991 [4:23:18<00:13, 13.69s/batch, batch_loss=0.361, batch_index=991, batch_size=220]

Epoch 1/10: 100%|█████████████| 991/991 [4:23:18<00:00, 12.88s/batch, batch_loss=0.361, batch_index=991, batch_size=220]

Epoch 1/10: 100%|█████████████| 991/991 [4:23:18<00:00, 15.94s/batch, batch_loss=0.361, batch_index=991, batch_size=220]




Epoch 1, Loss: 986.7976


Validation:   0%|                                                                            | 0/743 [00:00<?, ?batch/s]

Validation:   0%|                            | 0/743 [00:15<?, ?batch/s, batch_loss=19.4, batch_index=1, batch_size=256]

Validation:   0%|                  | 1/743 [00:15<3:17:51, 16.00s/batch, batch_loss=19.4, batch_index=1, batch_size=256]

Validation:   0%|                  | 1/743 [00:32<3:17:51, 16.00s/batch, batch_loss=21.8, batch_index=2, batch_size=256]

Validation:   0%|                  | 2/743 [00:32<3:19:02, 16.12s/batch, batch_loss=21.8, batch_index=2, batch_size=256]

Validation:   0%|                  | 2/743 [00:46<3:19:02, 16.12s/batch, batch_loss=17.4, batch_index=3, batch_size=256]

Validation:   0%|                  | 3/743 [00:46<3:09:43, 15.38s/batch, batch_loss=17.4, batch_index=3, batch_size=256]

Validation:   0%|                  | 3/743 [01:01<3:09:43, 15.38s/batch, batch_loss=11.7, batch_index=4, batch_size=256]

Validation:   1%|                  | 4/743 [01:01<3:06:14, 15.12s/batch, batch_loss=11.7, batch_index=4, batch_size=256]

Validation:   1%|                  | 4/743 [01:17<3:06:14, 15.12s/batch, batch_loss=20.6, batch_index=5, batch_size=256]

Validation:   1%|                  | 5/743 [01:17<3:09:52, 15.44s/batch, batch_loss=20.6, batch_index=5, batch_size=256]

Validation:   1%|▏                   | 5/743 [01:35<3:09:52, 15.44s/batch, batch_loss=21, batch_index=6, batch_size=256]

Validation:   1%|▏                   | 6/743 [01:35<3:20:53, 16.35s/batch, batch_loss=21, batch_index=6, batch_size=256]

Validation:   1%|▏                  | 6/743 [01:51<3:20:53, 16.35s/batch, batch_loss=556, batch_index=7, batch_size=256]

Validation:   1%|▏                  | 7/743 [01:51<3:20:39, 16.36s/batch, batch_loss=556, batch_index=7, batch_size=256]

Validation:   1%|▏                 | 7/743 [02:07<3:20:39, 16.36s/batch, batch_loss=18.2, batch_index=8, batch_size=256]

Validation:   1%|▏                 | 8/743 [02:07<3:18:17, 16.19s/batch, batch_loss=18.2, batch_index=8, batch_size=256]

Validation:   1%|▏                 | 8/743 [02:23<3:18:17, 16.19s/batch, batch_loss=16.2, batch_index=9, batch_size=256]

Validation:   1%|▏                 | 9/743 [02:23<3:18:06, 16.19s/batch, batch_loss=16.2, batch_index=9, batch_size=256]

Validation:   1%|▏                | 9/743 [02:40<3:18:06, 16.19s/batch, batch_loss=15.2, batch_index=10, batch_size=256]

Validation:   1%|▏               | 10/743 [02:40<3:17:52, 16.20s/batch, batch_loss=15.2, batch_index=10, batch_size=256]

Validation:   1%|▏               | 10/743 [02:54<3:17:52, 16.20s/batch, batch_loss=13.1, batch_index=11, batch_size=256]

Validation:   1%|▏               | 11/743 [02:54<3:10:23, 15.61s/batch, batch_loss=13.1, batch_index=11, batch_size=256]

Validation:   1%|▏            | 11/743 [03:09<3:10:23, 15.61s/batch, batch_loss=2.18e+3, batch_index=12, batch_size=256]

Validation:   2%|▏            | 12/743 [03:09<3:07:49, 15.42s/batch, batch_loss=2.18e+3, batch_index=12, batch_size=256]

Validation:   2%|▎               | 12/743 [03:27<3:07:49, 15.42s/batch, batch_loss=14.8, batch_index=13, batch_size=256]

Validation:   2%|▎               | 13/743 [03:27<3:18:33, 16.32s/batch, batch_loss=14.8, batch_index=13, batch_size=256]

Validation:   2%|▎               | 13/743 [03:45<3:18:33, 16.32s/batch, batch_loss=10.3, batch_index=14, batch_size=256]

Validation:   2%|▎               | 14/743 [03:45<3:21:49, 16.61s/batch, batch_loss=10.3, batch_index=14, batch_size=256]

Validation:   2%|▎               | 14/743 [04:00<3:21:49, 16.61s/batch, batch_loss=19.4, batch_index=15, batch_size=256]

Validation:   2%|▎               | 15/743 [04:00<3:18:13, 16.34s/batch, batch_loss=19.4, batch_index=15, batch_size=256]

Validation:   2%|▎               | 15/743 [04:17<3:18:13, 16.34s/batch, batch_loss=15.8, batch_index=16, batch_size=256]

Validation:   2%|▎               | 16/743 [04:17<3:18:25, 16.38s/batch, batch_loss=15.8, batch_index=16, batch_size=256]

Validation:   2%|▎               | 16/743 [04:34<3:18:25, 16.38s/batch, batch_loss=12.6, batch_index=17, batch_size=256]

Validation:   2%|▎               | 17/743 [04:34<3:21:57, 16.69s/batch, batch_loss=12.6, batch_index=17, batch_size=256]

Validation:   2%|▎            | 17/743 [04:50<3:21:57, 16.69s/batch, batch_loss=4.53e+3, batch_index=18, batch_size=256]

Validation:   2%|▎            | 18/743 [04:50<3:17:58, 16.38s/batch, batch_loss=4.53e+3, batch_index=18, batch_size=256]

Validation:   2%|▍               | 18/743 [05:06<3:17:58, 16.38s/batch, batch_loss=12.2, batch_index=19, batch_size=256]

Validation:   3%|▍               | 19/743 [05:06<3:17:52, 16.40s/batch, batch_loss=12.2, batch_index=19, batch_size=256]

Validation:   3%|▍               | 19/743 [05:22<3:17:52, 16.40s/batch, batch_loss=16.5, batch_index=20, batch_size=256]

Validation:   3%|▍               | 20/743 [05:22<3:16:41, 16.32s/batch, batch_loss=16.5, batch_index=20, batch_size=256]

Validation:   3%|▍                | 20/743 [05:40<3:16:41, 16.32s/batch, batch_loss=959, batch_index=21, batch_size=256]

Validation:   3%|▍                | 21/743 [05:40<3:21:38, 16.76s/batch, batch_loss=959, batch_index=21, batch_size=256]

Validation:   3%|▍               | 21/743 [05:56<3:21:38, 16.76s/batch, batch_loss=15.9, batch_index=22, batch_size=256]

Validation:   3%|▍               | 22/743 [05:56<3:18:00, 16.48s/batch, batch_loss=15.9, batch_index=22, batch_size=256]

Validation:   3%|▍               | 22/743 [06:11<3:18:00, 16.48s/batch, batch_loss=8.21, batch_index=23, batch_size=256]

Validation:   3%|▍               | 23/743 [06:11<3:12:51, 16.07s/batch, batch_loss=8.21, batch_index=23, batch_size=256]

Validation:   3%|▍               | 23/743 [06:27<3:12:51, 16.07s/batch, batch_loss=17.3, batch_index=24, batch_size=256]

Validation:   3%|▌               | 24/743 [06:27<3:11:45, 16.00s/batch, batch_loss=17.3, batch_index=24, batch_size=256]

Validation:   3%|▌               | 24/743 [06:42<3:11:45, 16.00s/batch, batch_loss=15.3, batch_index=25, batch_size=256]

Validation:   3%|▌               | 25/743 [06:42<3:08:27, 15.75s/batch, batch_loss=15.3, batch_index=25, batch_size=256]

Validation:   3%|▌               | 25/743 [06:57<3:08:27, 15.75s/batch, batch_loss=21.7, batch_index=26, batch_size=256]

Validation:   3%|▌               | 26/743 [06:57<3:04:56, 15.48s/batch, batch_loss=21.7, batch_index=26, batch_size=256]

Validation:   3%|▍            | 26/743 [07:12<3:04:56, 15.48s/batch, batch_loss=1.65e+3, batch_index=27, batch_size=256]

Validation:   4%|▍            | 27/743 [07:12<3:01:13, 15.19s/batch, batch_loss=1.65e+3, batch_index=27, batch_size=256]

Validation:   4%|▌               | 27/743 [07:26<3:01:13, 15.19s/batch, batch_loss=18.4, batch_index=28, batch_size=256]

Validation:   4%|▌               | 28/743 [07:26<3:00:02, 15.11s/batch, batch_loss=18.4, batch_index=28, batch_size=256]

Validation:   4%|▌               | 28/743 [07:44<3:00:02, 15.11s/batch, batch_loss=16.6, batch_index=29, batch_size=256]

Validation:   4%|▌               | 29/743 [07:44<3:09:19, 15.91s/batch, batch_loss=16.6, batch_index=29, batch_size=256]

Validation:   4%|▌            | 29/743 [07:59<3:09:19, 15.91s/batch, batch_loss=1.18e+4, batch_index=30, batch_size=256]

Validation:   4%|▌            | 30/743 [07:59<3:06:13, 15.67s/batch, batch_loss=1.18e+4, batch_index=30, batch_size=256]

Validation:   4%|▋               | 30/743 [08:15<3:06:13, 15.67s/batch, batch_loss=21.4, batch_index=31, batch_size=256]

Validation:   4%|▋               | 31/743 [08:15<3:04:55, 15.58s/batch, batch_loss=21.4, batch_index=31, batch_size=256]

Validation:   4%|▋               | 31/743 [08:30<3:04:55, 15.58s/batch, batch_loss=15.6, batch_index=32, batch_size=256]

Validation:   4%|▋               | 32/743 [08:30<3:04:04, 15.53s/batch, batch_loss=15.6, batch_index=32, batch_size=256]

Validation:   4%|▊                 | 32/743 [08:45<3:04:04, 15.53s/batch, batch_loss=19, batch_index=33, batch_size=256]

Validation:   4%|▊                 | 33/743 [08:45<3:03:13, 15.48s/batch, batch_loss=19, batch_index=33, batch_size=256]

Validation:   4%|▊                 | 33/743 [09:00<3:03:13, 15.48s/batch, batch_loss=17, batch_index=34, batch_size=256]

Validation:   5%|▊                 | 34/743 [09:00<3:00:21, 15.26s/batch, batch_loss=17, batch_index=34, batch_size=256]

Validation:   5%|▌            | 34/743 [09:16<3:00:21, 15.26s/batch, batch_loss=2.82e+3, batch_index=35, batch_size=256]

Validation:   5%|▌            | 35/743 [09:16<3:01:21, 15.37s/batch, batch_loss=2.82e+3, batch_index=35, batch_size=256]

Validation:   5%|▊               | 35/743 [09:33<3:01:21, 15.37s/batch, batch_loss=16.7, batch_index=36, batch_size=256]

Validation:   5%|▊               | 36/743 [09:33<3:05:56, 15.78s/batch, batch_loss=16.7, batch_index=36, batch_size=256]

Validation:   5%|▊                | 36/743 [09:48<3:05:56, 15.78s/batch, batch_loss=164, batch_index=37, batch_size=256]

Validation:   5%|▊                | 37/743 [09:48<3:03:42, 15.61s/batch, batch_loss=164, batch_index=37, batch_size=256]

Validation:   5%|▋            | 37/743 [10:03<3:03:42, 15.61s/batch, batch_loss=6.51e+3, batch_index=38, batch_size=256]

Validation:   5%|▋            | 38/743 [10:03<3:00:54, 15.40s/batch, batch_loss=6.51e+3, batch_index=38, batch_size=256]

Validation:   5%|▊               | 38/743 [10:17<3:00:54, 15.40s/batch, batch_loss=14.5, batch_index=39, batch_size=256]

Validation:   5%|▊               | 39/743 [10:17<2:57:18, 15.11s/batch, batch_loss=14.5, batch_index=39, batch_size=256]

Validation:   5%|▊               | 39/743 [10:32<2:57:18, 15.11s/batch, batch_loss=18.3, batch_index=40, batch_size=256]

Validation:   5%|▊               | 40/743 [10:32<2:54:27, 14.89s/batch, batch_loss=18.3, batch_index=40, batch_size=256]

Validation:   5%|▊               | 40/743 [10:47<2:54:27, 14.89s/batch, batch_loss=15.1, batch_index=41, batch_size=256]

Validation:   6%|▉               | 41/743 [10:47<2:57:18, 15.15s/batch, batch_loss=15.1, batch_index=41, batch_size=256]

Validation:   6%|▉               | 41/743 [11:03<2:57:18, 15.15s/batch, batch_loss=15.6, batch_index=42, batch_size=256]

Validation:   6%|▉               | 42/743 [11:03<2:59:30, 15.36s/batch, batch_loss=15.6, batch_index=42, batch_size=256]

Validation:   6%|▉               | 42/743 [11:18<2:59:30, 15.36s/batch, batch_loss=10.7, batch_index=43, batch_size=256]

Validation:   6%|▉               | 43/743 [11:18<2:59:05, 15.35s/batch, batch_loss=10.7, batch_index=43, batch_size=256]

Validation:   6%|▉               | 43/743 [11:36<2:59:05, 15.35s/batch, batch_loss=15.8, batch_index=44, batch_size=256]

Validation:   6%|▉               | 44/743 [11:36<3:07:40, 16.11s/batch, batch_loss=15.8, batch_index=44, batch_size=256]

Validation:   6%|█                 | 44/743 [11:52<3:07:40, 16.11s/batch, batch_loss=20, batch_index=45, batch_size=256]

Validation:   6%|█                 | 45/743 [11:52<3:06:51, 16.06s/batch, batch_loss=20, batch_index=45, batch_size=256]

Validation:   6%|▉               | 45/743 [12:07<3:06:51, 16.06s/batch, batch_loss=10.7, batch_index=46, batch_size=256]

Validation:   6%|▉               | 46/743 [12:07<3:02:53, 15.74s/batch, batch_loss=10.7, batch_index=46, batch_size=256]

Validation:   6%|▉               | 46/743 [12:22<3:02:53, 15.74s/batch, batch_loss=19.1, batch_index=47, batch_size=256]

Validation:   6%|█               | 47/743 [12:22<2:59:51, 15.51s/batch, batch_loss=19.1, batch_index=47, batch_size=256]

Validation:   6%|█               | 47/743 [12:38<2:59:51, 15.51s/batch, batch_loss=18.4, batch_index=48, batch_size=256]

Validation:   6%|█               | 48/743 [12:38<2:59:01, 15.46s/batch, batch_loss=18.4, batch_index=48, batch_size=256]

Validation:   6%|█▏                | 48/743 [12:54<2:59:01, 15.46s/batch, batch_loss=19, batch_index=49, batch_size=256]

Validation:   7%|█▏                | 49/743 [12:54<3:00:29, 15.60s/batch, batch_loss=19, batch_index=49, batch_size=256]

Validation:   7%|█               | 49/743 [13:10<3:00:29, 15.60s/batch, batch_loss=15.4, batch_index=50, batch_size=256]

Validation:   7%|█               | 50/743 [13:10<3:03:27, 15.88s/batch, batch_loss=15.4, batch_index=50, batch_size=256]

Validation:   7%|█               | 50/743 [13:26<3:03:27, 15.88s/batch, batch_loss=15.9, batch_index=51, batch_size=256]

Validation:   7%|█               | 51/743 [13:26<3:03:04, 15.87s/batch, batch_loss=15.9, batch_index=51, batch_size=256]

Validation:   7%|█               | 51/743 [13:42<3:03:04, 15.87s/batch, batch_loss=17.2, batch_index=52, batch_size=256]

Validation:   7%|█               | 52/743 [13:42<3:04:43, 16.04s/batch, batch_loss=17.2, batch_index=52, batch_size=256]

Validation:   7%|█               | 52/743 [13:58<3:04:43, 16.04s/batch, batch_loss=22.8, batch_index=53, batch_size=256]

Validation:   7%|█▏              | 53/743 [13:58<3:02:04, 15.83s/batch, batch_loss=22.8, batch_index=53, batch_size=256]

Validation:   7%|█▏              | 53/743 [14:13<3:02:04, 15.83s/batch, batch_loss=14.7, batch_index=54, batch_size=256]

Validation:   7%|█▏              | 54/743 [14:13<2:58:22, 15.53s/batch, batch_loss=14.7, batch_index=54, batch_size=256]

Validation:   7%|█▏              | 54/743 [14:27<2:58:22, 15.53s/batch, batch_loss=20.1, batch_index=55, batch_size=256]

Validation:   7%|█▏              | 55/743 [14:27<2:55:01, 15.26s/batch, batch_loss=20.1, batch_index=55, batch_size=256]

Validation:   7%|█▏              | 55/743 [14:43<2:55:01, 15.26s/batch, batch_loss=17.3, batch_index=56, batch_size=256]

Validation:   8%|█▏              | 56/743 [14:43<2:57:29, 15.50s/batch, batch_loss=17.3, batch_index=56, batch_size=256]

Validation:   8%|█▏              | 56/743 [14:59<2:57:29, 15.50s/batch, batch_loss=13.7, batch_index=57, batch_size=256]

Validation:   8%|█▏              | 57/743 [14:59<2:57:24, 15.52s/batch, batch_loss=13.7, batch_index=57, batch_size=256]

Validation:   8%|█▏              | 57/743 [15:14<2:57:24, 15.52s/batch, batch_loss=20.7, batch_index=58, batch_size=256]

Validation:   8%|█▏              | 58/743 [15:14<2:57:03, 15.51s/batch, batch_loss=20.7, batch_index=58, batch_size=256]

Validation:   8%|█▎               | 58/743 [15:30<2:57:03, 15.51s/batch, batch_loss=107, batch_index=59, batch_size=256]

Validation:   8%|█▎               | 59/743 [15:30<2:56:20, 15.47s/batch, batch_loss=107, batch_index=59, batch_size=256]

Validation:   8%|█            | 59/743 [15:47<2:56:20, 15.47s/batch, batch_loss=6.14e+3, batch_index=60, batch_size=256]

Validation:   8%|█            | 60/743 [15:47<3:02:33, 16.04s/batch, batch_loss=6.14e+3, batch_index=60, batch_size=256]

Validation:   8%|█▎              | 60/743 [16:02<3:02:33, 16.04s/batch, batch_loss=8.74, batch_index=61, batch_size=256]

Validation:   8%|█▎              | 61/743 [16:02<2:58:39, 15.72s/batch, batch_loss=8.74, batch_index=61, batch_size=256]

Validation:   8%|█▎              | 61/743 [16:16<2:58:39, 15.72s/batch, batch_loss=9.96, batch_index=62, batch_size=256]

Validation:   8%|█▎              | 62/743 [16:16<2:53:49, 15.32s/batch, batch_loss=9.96, batch_index=62, batch_size=256]

Validation:   8%|█▎              | 62/743 [16:31<2:53:49, 15.32s/batch, batch_loss=20.9, batch_index=63, batch_size=256]

Validation:   8%|█▎              | 63/743 [16:31<2:51:30, 15.13s/batch, batch_loss=20.9, batch_index=63, batch_size=256]

Validation:   8%|█▎              | 63/743 [16:46<2:51:30, 15.13s/batch, batch_loss=13.2, batch_index=64, batch_size=256]

Validation:   9%|█▍              | 64/743 [16:46<2:50:10, 15.04s/batch, batch_loss=13.2, batch_index=64, batch_size=256]

Validation:   9%|█▍              | 64/743 [17:00<2:50:10, 15.04s/batch, batch_loss=16.5, batch_index=65, batch_size=256]

Validation:   9%|█▍              | 65/743 [17:00<2:48:13, 14.89s/batch, batch_loss=16.5, batch_index=65, batch_size=256]

Validation:   9%|█▏           | 65/743 [17:16<2:48:13, 14.89s/batch, batch_loss=1.27e+3, batch_index=66, batch_size=256]

Validation:   9%|█▏           | 66/743 [17:16<2:48:43, 14.95s/batch, batch_loss=1.27e+3, batch_index=66, batch_size=256]

Validation:   9%|█▍              | 66/743 [17:30<2:48:43, 14.95s/batch, batch_loss=13.8, batch_index=67, batch_size=256]

Validation:   9%|█▍              | 67/743 [17:30<2:46:29, 14.78s/batch, batch_loss=13.8, batch_index=67, batch_size=256]

Validation:   9%|█▍              | 67/743 [17:45<2:46:29, 14.78s/batch, batch_loss=14.7, batch_index=68, batch_size=256]

Validation:   9%|█▍              | 68/743 [17:45<2:46:44, 14.82s/batch, batch_loss=14.7, batch_index=68, batch_size=256]

Validation:   9%|█▍              | 68/743 [18:03<2:46:44, 14.82s/batch, batch_loss=11.8, batch_index=69, batch_size=256]

Validation:   9%|█▍              | 69/743 [18:03<2:57:08, 15.77s/batch, batch_loss=11.8, batch_index=69, batch_size=256]

Validation:   9%|█▍              | 69/743 [18:18<2:57:08, 15.77s/batch, batch_loss=16.2, batch_index=70, batch_size=256]

Validation:   9%|█▌              | 70/743 [18:18<2:53:52, 15.50s/batch, batch_loss=16.2, batch_index=70, batch_size=256]

Validation:   9%|█▌              | 70/743 [18:33<2:53:52, 15.50s/batch, batch_loss=10.3, batch_index=71, batch_size=256]

Validation:  10%|█▌              | 71/743 [18:33<2:51:51, 15.34s/batch, batch_loss=10.3, batch_index=71, batch_size=256]

Validation:  10%|█▌              | 71/743 [18:48<2:51:51, 15.34s/batch, batch_loss=13.5, batch_index=72, batch_size=256]

Validation:  10%|█▌              | 72/743 [18:48<2:50:13, 15.22s/batch, batch_loss=13.5, batch_index=72, batch_size=256]

Validation:  10%|█▋                | 72/743 [19:04<2:50:13, 15.22s/batch, batch_loss=15, batch_index=73, batch_size=256]

Validation:  10%|█▊                | 73/743 [19:04<2:53:21, 15.52s/batch, batch_loss=15, batch_index=73, batch_size=256]

Validation:  10%|█▌              | 73/743 [19:20<2:53:21, 15.52s/batch, batch_loss=17.4, batch_index=74, batch_size=256]

Validation:  10%|█▌              | 74/743 [19:20<2:56:23, 15.82s/batch, batch_loss=17.4, batch_index=74, batch_size=256]

Validation:  10%|█▌              | 74/743 [19:36<2:56:23, 15.82s/batch, batch_loss=13.9, batch_index=75, batch_size=256]

Validation:  10%|█▌              | 75/743 [19:36<2:54:33, 15.68s/batch, batch_loss=13.9, batch_index=75, batch_size=256]

Validation:  10%|█▌              | 75/743 [19:50<2:54:33, 15.68s/batch, batch_loss=15.1, batch_index=76, batch_size=256]

Validation:  10%|█▋              | 76/743 [19:50<2:51:21, 15.41s/batch, batch_loss=15.1, batch_index=76, batch_size=256]

Validation:  10%|█▋              | 76/743 [20:06<2:51:21, 15.41s/batch, batch_loss=13.4, batch_index=77, batch_size=256]

Validation:  10%|█▋              | 77/743 [20:06<2:51:34, 15.46s/batch, batch_loss=13.4, batch_index=77, batch_size=256]

Validation:  10%|█▋              | 77/743 [20:21<2:51:34, 15.46s/batch, batch_loss=17.2, batch_index=78, batch_size=256]

Validation:  10%|█▋              | 78/743 [20:21<2:51:19, 15.46s/batch, batch_loss=17.2, batch_index=78, batch_size=256]

Validation:  10%|█▋              | 78/743 [20:38<2:51:19, 15.46s/batch, batch_loss=11.8, batch_index=79, batch_size=256]

Validation:  11%|█▋              | 79/743 [20:38<2:53:46, 15.70s/batch, batch_loss=11.8, batch_index=79, batch_size=256]

Validation:  11%|█▋              | 79/743 [20:54<2:53:46, 15.70s/batch, batch_loss=8.11, batch_index=80, batch_size=256]

Validation:  11%|█▋              | 80/743 [20:54<2:56:36, 15.98s/batch, batch_loss=8.11, batch_index=80, batch_size=256]

Validation:  11%|█▊               | 80/743 [21:10<2:56:36, 15.98s/batch, batch_loss=150, batch_index=81, batch_size=256]

Validation:  11%|█▊               | 81/743 [21:10<2:54:15, 15.79s/batch, batch_loss=150, batch_index=81, batch_size=256]

Validation:  11%|█▍           | 81/743 [21:24<2:54:15, 15.79s/batch, batch_loss=1.51e+3, batch_index=82, batch_size=256]

Validation:  11%|█▍           | 82/743 [21:24<2:49:50, 15.42s/batch, batch_loss=1.51e+3, batch_index=82, batch_size=256]

Validation:  11%|█▊              | 82/743 [21:40<2:49:50, 15.42s/batch, batch_loss=28.8, batch_index=83, batch_size=256]

Validation:  11%|█▊              | 83/743 [21:40<2:50:24, 15.49s/batch, batch_loss=28.8, batch_index=83, batch_size=256]

Validation:  11%|█▊              | 83/743 [21:55<2:50:24, 15.49s/batch, batch_loss=17.3, batch_index=84, batch_size=256]

Validation:  11%|█▊              | 84/743 [21:55<2:50:08, 15.49s/batch, batch_loss=17.3, batch_index=84, batch_size=256]

Validation:  11%|█▊              | 84/743 [22:11<2:50:08, 15.49s/batch, batch_loss=20.6, batch_index=85, batch_size=256]

Validation:  11%|█▊              | 85/743 [22:11<2:51:05, 15.60s/batch, batch_loss=20.6, batch_index=85, batch_size=256]

Validation:  11%|██                | 85/743 [22:27<2:51:05, 15.60s/batch, batch_loss=25, batch_index=86, batch_size=256]

Validation:  12%|██                | 86/743 [22:27<2:51:27, 15.66s/batch, batch_loss=25, batch_index=86, batch_size=256]

Validation:  12%|█▊              | 86/743 [22:43<2:51:27, 15.66s/batch, batch_loss=31.5, batch_index=87, batch_size=256]

Validation:  12%|█▊              | 87/743 [22:43<2:52:07, 15.74s/batch, batch_loss=31.5, batch_index=87, batch_size=256]

Validation:  12%|█▊              | 87/743 [22:58<2:52:07, 15.74s/batch, batch_loss=21.8, batch_index=88, batch_size=256]

Validation:  12%|█▉              | 88/743 [22:58<2:49:29, 15.53s/batch, batch_loss=21.8, batch_index=88, batch_size=256]

Validation:  12%|█▌           | 88/743 [23:13<2:49:29, 15.53s/batch, batch_loss=1.46e+4, batch_index=89, batch_size=256]

Validation:  12%|█▌           | 89/743 [23:13<2:48:47, 15.49s/batch, batch_loss=1.46e+4, batch_index=89, batch_size=256]

Validation:  12%|█▉              | 89/743 [23:29<2:48:47, 15.49s/batch, batch_loss=8.72, batch_index=90, batch_size=256]

Validation:  12%|█▉              | 90/743 [23:29<2:47:48, 15.42s/batch, batch_loss=8.72, batch_index=90, batch_size=256]

Validation:  12%|█▉              | 90/743 [23:47<2:47:48, 15.42s/batch, batch_loss=31.7, batch_index=91, batch_size=256]

Validation:  12%|█▉              | 91/743 [23:47<2:55:35, 16.16s/batch, batch_loss=31.7, batch_index=91, batch_size=256]

Validation:  12%|█▉              | 91/743 [24:01<2:55:35, 16.16s/batch, batch_loss=29.5, batch_index=92, batch_size=256]

Validation:  12%|█▉              | 92/743 [24:01<2:50:42, 15.73s/batch, batch_loss=29.5, batch_index=92, batch_size=256]

Validation:  12%|█▉              | 92/743 [24:16<2:50:42, 15.73s/batch, batch_loss=22.8, batch_index=93, batch_size=256]

Validation:  13%|██              | 93/743 [24:16<2:47:26, 15.46s/batch, batch_loss=22.8, batch_index=93, batch_size=256]

Validation:  13%|██              | 93/743 [24:31<2:47:26, 15.46s/batch, batch_loss=30.7, batch_index=94, batch_size=256]

Validation:  13%|██              | 94/743 [24:31<2:46:23, 15.38s/batch, batch_loss=30.7, batch_index=94, batch_size=256]

Validation:  13%|██▎               | 94/743 [24:46<2:46:23, 15.38s/batch, batch_loss=14, batch_index=95, batch_size=256]

Validation:  13%|██▎               | 95/743 [24:46<2:44:47, 15.26s/batch, batch_loss=14, batch_index=95, batch_size=256]

Validation:  13%|██              | 95/743 [25:02<2:44:47, 15.26s/batch, batch_loss=18.5, batch_index=96, batch_size=256]

Validation:  13%|██              | 96/743 [25:02<2:45:12, 15.32s/batch, batch_loss=18.5, batch_index=96, batch_size=256]

Validation:  13%|██▎               | 96/743 [25:17<2:45:12, 15.32s/batch, batch_loss=29, batch_index=97, batch_size=256]

Validation:  13%|██▎               | 97/743 [25:17<2:43:39, 15.20s/batch, batch_loss=29, batch_index=97, batch_size=256]

Validation:  13%|██              | 97/743 [25:32<2:43:39, 15.20s/batch, batch_loss=19.6, batch_index=98, batch_size=256]

Validation:  13%|██              | 98/743 [25:32<2:42:13, 15.09s/batch, batch_loss=19.6, batch_index=98, batch_size=256]

Validation:  13%|██              | 98/743 [25:49<2:42:13, 15.09s/batch, batch_loss=19.7, batch_index=99, batch_size=256]

Validation:  13%|██▏             | 99/743 [25:49<2:49:20, 15.78s/batch, batch_loss=19.7, batch_index=99, batch_size=256]

Validation:  13%|█▉             | 99/743 [26:05<2:49:20, 15.78s/batch, batch_loss=11.6, batch_index=100, batch_size=256]

Validation:  13%|█▉            | 100/743 [26:05<2:50:29, 15.91s/batch, batch_loss=11.6, batch_index=100, batch_size=256]

Validation:  13%|█▉            | 100/743 [26:20<2:50:29, 15.91s/batch, batch_loss=17.5, batch_index=101, batch_size=256]

Validation:  14%|█▉            | 101/743 [26:20<2:47:07, 15.62s/batch, batch_loss=17.5, batch_index=101, batch_size=256]

Validation:  14%|█▉            | 101/743 [26:34<2:47:07, 15.62s/batch, batch_loss=14.1, batch_index=102, batch_size=256]

Validation:  14%|█▉            | 102/743 [26:34<2:42:12, 15.18s/batch, batch_loss=14.1, batch_index=102, batch_size=256]

Validation:  14%|█▌         | 102/743 [26:49<2:42:12, 15.18s/batch, batch_loss=3.39e+3, batch_index=103, batch_size=256]

Validation:  14%|█▌         | 103/743 [26:49<2:40:02, 15.00s/batch, batch_loss=3.39e+3, batch_index=103, batch_size=256]

Validation:  14%|█▉            | 103/743 [27:04<2:40:02, 15.00s/batch, batch_loss=14.5, batch_index=104, batch_size=256]

Validation:  14%|█▉            | 104/743 [27:04<2:40:03, 15.03s/batch, batch_loss=14.5, batch_index=104, batch_size=256]

Validation:  14%|█▉            | 104/743 [27:20<2:40:03, 15.03s/batch, batch_loss=10.3, batch_index=105, batch_size=256]

Validation:  14%|█▉            | 105/743 [27:20<2:42:30, 15.28s/batch, batch_loss=10.3, batch_index=105, batch_size=256]

Validation:  14%|█▉            | 105/743 [27:36<2:42:30, 15.28s/batch, batch_loss=17.5, batch_index=106, batch_size=256]

Validation:  14%|█▉            | 106/743 [27:36<2:45:30, 15.59s/batch, batch_loss=17.5, batch_index=106, batch_size=256]

Validation:  14%|██▏            | 106/743 [27:54<2:45:30, 15.59s/batch, batch_loss=712, batch_index=107, batch_size=256]

Validation:  14%|██▏            | 107/743 [27:54<2:53:33, 16.37s/batch, batch_loss=712, batch_index=107, batch_size=256]

Validation:  14%|█▋          | 107/743 [28:10<2:53:33, 16.37s/batch, batch_loss=1.6e+3, batch_index=108, batch_size=256]

Validation:  15%|█▋          | 108/743 [28:10<2:51:38, 16.22s/batch, batch_loss=1.6e+3, batch_index=108, batch_size=256]

Validation:  15%|██▏            | 108/743 [28:27<2:51:38, 16.22s/batch, batch_loss=202, batch_index=109, batch_size=256]

Validation:  15%|██▏            | 109/743 [28:27<2:53:29, 16.42s/batch, batch_loss=202, batch_index=109, batch_size=256]

Validation:  15%|██            | 109/743 [28:43<2:53:29, 16.42s/batch, batch_loss=26.7, batch_index=110, batch_size=256]

Validation:  15%|██            | 110/743 [28:43<2:52:30, 16.35s/batch, batch_loss=26.7, batch_index=110, batch_size=256]

Validation:  15%|██            | 110/743 [29:00<2:52:30, 16.35s/batch, batch_loss=15.5, batch_index=111, batch_size=256]

Validation:  15%|██            | 111/743 [29:00<2:52:21, 16.36s/batch, batch_loss=15.5, batch_index=111, batch_size=256]

Validation:  15%|██            | 111/743 [29:15<2:52:21, 16.36s/batch, batch_loss=22.8, batch_index=112, batch_size=256]

Validation:  15%|██            | 112/743 [29:15<2:49:31, 16.12s/batch, batch_loss=22.8, batch_index=112, batch_size=256]

Validation:  15%|█▋         | 112/743 [29:30<2:49:31, 16.12s/batch, batch_loss=1.06e+4, batch_index=113, batch_size=256]

Validation:  15%|█▋         | 113/743 [29:30<2:45:11, 15.73s/batch, batch_loss=1.06e+4, batch_index=113, batch_size=256]

Validation:  15%|██▏           | 113/743 [29:45<2:45:11, 15.73s/batch, batch_loss=19.3, batch_index=114, batch_size=256]

Validation:  15%|██▏           | 114/743 [29:45<2:43:20, 15.58s/batch, batch_loss=19.3, batch_index=114, batch_size=256]

Validation:  15%|██▏           | 114/743 [30:01<2:43:20, 15.58s/batch, batch_loss=20.2, batch_index=115, batch_size=256]

Validation:  15%|██▏           | 115/743 [30:01<2:43:41, 15.64s/batch, batch_loss=20.2, batch_index=115, batch_size=256]

Validation:  15%|██▏           | 115/743 [30:17<2:43:41, 15.64s/batch, batch_loss=16.2, batch_index=116, batch_size=256]

Validation:  16%|██▏           | 116/743 [30:17<2:44:39, 15.76s/batch, batch_loss=16.2, batch_index=116, batch_size=256]

Validation:  16%|██▏           | 116/743 [30:33<2:44:39, 15.76s/batch, batch_loss=24.1, batch_index=117, batch_size=256]

Validation:  16%|██▏           | 117/743 [30:33<2:45:35, 15.87s/batch, batch_loss=24.1, batch_index=117, batch_size=256]

Validation:  16%|██▏           | 117/743 [30:50<2:45:35, 15.87s/batch, batch_loss=23.6, batch_index=118, batch_size=256]

Validation:  16%|██▏           | 118/743 [30:50<2:48:39, 16.19s/batch, batch_loss=23.6, batch_index=118, batch_size=256]

Validation:  16%|██▏           | 118/743 [31:05<2:48:39, 16.19s/batch, batch_loss=15.2, batch_index=119, batch_size=256]

Validation:  16%|██▏           | 119/743 [31:05<2:44:53, 15.85s/batch, batch_loss=15.2, batch_index=119, batch_size=256]

Validation:  16%|██▏           | 119/743 [31:21<2:44:53, 15.85s/batch, batch_loss=18.8, batch_index=120, batch_size=256]

Validation:  16%|██▎           | 120/743 [31:21<2:43:11, 15.72s/batch, batch_loss=18.8, batch_index=120, batch_size=256]

Validation:  16%|██▎           | 120/743 [31:38<2:43:11, 15.72s/batch, batch_loss=13.1, batch_index=121, batch_size=256]

Validation:  16%|██▎           | 121/743 [31:38<2:48:20, 16.24s/batch, batch_loss=13.1, batch_index=121, batch_size=256]

Validation:  16%|██▎           | 121/743 [31:54<2:48:20, 16.24s/batch, batch_loss=6.76, batch_index=122, batch_size=256]

Validation:  16%|██▎           | 122/743 [31:54<2:46:17, 16.07s/batch, batch_loss=6.76, batch_index=122, batch_size=256]

Validation:  16%|██▎           | 122/743 [32:08<2:46:17, 16.07s/batch, batch_loss=9.62, batch_index=123, batch_size=256]

Validation:  17%|██▎           | 123/743 [32:08<2:42:01, 15.68s/batch, batch_loss=9.62, batch_index=123, batch_size=256]

Validation:  17%|██▎           | 123/743 [32:24<2:42:01, 15.68s/batch, batch_loss=13.1, batch_index=124, batch_size=256]

Validation:  17%|██▎           | 124/743 [32:24<2:41:28, 15.65s/batch, batch_loss=13.1, batch_index=124, batch_size=256]

Validation:  17%|██▎           | 124/743 [32:37<2:41:28, 15.65s/batch, batch_loss=23.6, batch_index=125, batch_size=256]

Validation:  17%|██▎           | 125/743 [32:37<2:33:32, 14.91s/batch, batch_loss=23.6, batch_index=125, batch_size=256]

Validation:  17%|██▎           | 125/743 [32:50<2:33:32, 14.91s/batch, batch_loss=14.7, batch_index=126, batch_size=256]

Validation:  17%|██▎           | 126/743 [32:50<2:27:38, 14.36s/batch, batch_loss=14.7, batch_index=126, batch_size=256]

Validation:  17%|██▋             | 126/743 [33:05<2:27:38, 14.36s/batch, batch_loss=14, batch_index=127, batch_size=256]

Validation:  17%|██▋             | 127/743 [33:05<2:27:39, 14.38s/batch, batch_loss=14, batch_index=127, batch_size=256]

Validation:  17%|██▍           | 127/743 [33:20<2:27:39, 14.38s/batch, batch_loss=22.1, batch_index=128, batch_size=256]

Validation:  17%|██▍           | 128/743 [33:20<2:30:23, 14.67s/batch, batch_loss=22.1, batch_index=128, batch_size=256]

Validation:  17%|██▍           | 128/743 [33:34<2:30:23, 14.67s/batch, batch_loss=12.9, batch_index=129, batch_size=256]

Validation:  17%|██▍           | 129/743 [33:34<2:29:08, 14.57s/batch, batch_loss=12.9, batch_index=129, batch_size=256]

Validation:  17%|██▍           | 129/743 [33:51<2:29:08, 14.57s/batch, batch_loss=19.7, batch_index=130, batch_size=256]

Validation:  17%|██▍           | 130/743 [33:51<2:33:38, 15.04s/batch, batch_loss=19.7, batch_index=130, batch_size=256]

Validation:  17%|██▍           | 130/743 [34:06<2:33:38, 15.04s/batch, batch_loss=21.7, batch_index=131, batch_size=256]

Validation:  18%|██▍           | 131/743 [34:06<2:35:37, 15.26s/batch, batch_loss=21.7, batch_index=131, batch_size=256]

Validation:  18%|██▊             | 131/743 [34:22<2:35:37, 15.26s/batch, batch_loss=23, batch_index=132, batch_size=256]

Validation:  18%|██▊             | 132/743 [34:22<2:35:31, 15.27s/batch, batch_loss=23, batch_index=132, batch_size=256]

Validation:  18%|██▍           | 132/743 [34:36<2:35:31, 15.27s/batch, batch_loss=35.3, batch_index=133, batch_size=256]

Validation:  18%|██▌           | 133/743 [34:36<2:31:43, 14.92s/batch, batch_loss=35.3, batch_index=133, batch_size=256]

Validation:  18%|██▌           | 133/743 [34:50<2:31:43, 14.92s/batch, batch_loss=18.5, batch_index=134, batch_size=256]

Validation:  18%|██▌           | 134/743 [34:50<2:29:38, 14.74s/batch, batch_loss=18.5, batch_index=134, batch_size=256]

Validation:  18%|██▌           | 134/743 [35:05<2:29:38, 14.74s/batch, batch_loss=32.5, batch_index=135, batch_size=256]

Validation:  18%|██▌           | 135/743 [35:05<2:29:51, 14.79s/batch, batch_loss=32.5, batch_index=135, batch_size=256]

Validation:  18%|██▌           | 135/743 [35:21<2:29:51, 14.79s/batch, batch_loss=17.9, batch_index=136, batch_size=256]

Validation:  18%|██▌           | 136/743 [35:21<2:32:02, 15.03s/batch, batch_loss=17.9, batch_index=136, batch_size=256]

Validation:  18%|██▌           | 136/743 [35:36<2:32:02, 15.03s/batch, batch_loss=26.2, batch_index=137, batch_size=256]

Validation:  18%|██▌           | 137/743 [35:36<2:32:56, 15.14s/batch, batch_loss=26.2, batch_index=137, batch_size=256]

Validation:  18%|██▌           | 137/743 [35:54<2:32:56, 15.14s/batch, batch_loss=9.72, batch_index=138, batch_size=256]

Validation:  19%|██▌           | 138/743 [35:54<2:40:55, 15.96s/batch, batch_loss=9.72, batch_index=138, batch_size=256]

Validation:  19%|██▊            | 138/743 [36:08<2:40:55, 15.96s/batch, batch_loss=253, batch_index=139, batch_size=256]

Validation:  19%|██▊            | 139/743 [36:08<2:36:37, 15.56s/batch, batch_loss=253, batch_index=139, batch_size=256]

Validation:  19%|██▌           | 139/743 [36:24<2:36:37, 15.56s/batch, batch_loss=17.2, batch_index=140, batch_size=256]

Validation:  19%|██▋           | 140/743 [36:24<2:37:15, 15.65s/batch, batch_loss=17.2, batch_index=140, batch_size=256]

Validation:  19%|███             | 140/743 [36:39<2:37:15, 15.65s/batch, batch_loss=16, batch_index=141, batch_size=256]

Validation:  19%|███             | 141/743 [36:39<2:35:24, 15.49s/batch, batch_loss=16, batch_index=141, batch_size=256]

Validation:  19%|██▋           | 141/743 [36:54<2:35:24, 15.49s/batch, batch_loss=12.9, batch_index=142, batch_size=256]

Validation:  19%|██▋           | 142/743 [36:54<2:31:48, 15.16s/batch, batch_loss=12.9, batch_index=142, batch_size=256]

Validation:  19%|██▋           | 142/743 [37:09<2:31:48, 15.16s/batch, batch_loss=13.5, batch_index=143, batch_size=256]

Validation:  19%|██▋           | 143/743 [37:09<2:31:53, 15.19s/batch, batch_loss=13.5, batch_index=143, batch_size=256]

Validation:  19%|██▋           | 143/743 [37:24<2:31:53, 15.19s/batch, batch_loss=20.7, batch_index=144, batch_size=256]

Validation:  19%|██▋           | 144/743 [37:24<2:31:44, 15.20s/batch, batch_loss=20.7, batch_index=144, batch_size=256]

Validation:  19%|██▋           | 144/743 [37:40<2:31:44, 15.20s/batch, batch_loss=14.5, batch_index=145, batch_size=256]

Validation:  20%|██▋           | 145/743 [37:40<2:32:16, 15.28s/batch, batch_loss=14.5, batch_index=145, batch_size=256]

Validation:  20%|██▋           | 145/743 [37:57<2:32:16, 15.28s/batch, batch_loss=16.4, batch_index=146, batch_size=256]

Validation:  20%|██▊           | 146/743 [37:57<2:38:13, 15.90s/batch, batch_loss=16.4, batch_index=146, batch_size=256]

Validation:  20%|██▊           | 146/743 [38:13<2:38:13, 15.90s/batch, batch_loss=18.6, batch_index=147, batch_size=256]

Validation:  20%|██▊           | 147/743 [38:13<2:38:48, 15.99s/batch, batch_loss=18.6, batch_index=147, batch_size=256]

Validation:  20%|██▎         | 147/743 [38:29<2:38:48, 15.99s/batch, batch_loss=3.2e+4, batch_index=148, batch_size=256]

Validation:  20%|██▍         | 148/743 [38:29<2:37:44, 15.91s/batch, batch_loss=3.2e+4, batch_index=148, batch_size=256]

Validation:  20%|██▊           | 148/743 [38:44<2:37:44, 15.91s/batch, batch_loss=24.9, batch_index=149, batch_size=256]

Validation:  20%|██▊           | 149/743 [38:44<2:35:42, 15.73s/batch, batch_loss=24.9, batch_index=149, batch_size=256]

Validation:  20%|██▊           | 149/743 [39:01<2:35:42, 15.73s/batch, batch_loss=22.7, batch_index=150, batch_size=256]

Validation:  20%|██▊           | 150/743 [39:01<2:37:42, 15.96s/batch, batch_loss=22.7, batch_index=150, batch_size=256]

Validation:  20%|██▊           | 150/743 [39:18<2:37:42, 15.96s/batch, batch_loss=15.1, batch_index=151, batch_size=256]

Validation:  20%|██▊           | 151/743 [39:18<2:39:48, 16.20s/batch, batch_loss=15.1, batch_index=151, batch_size=256]

Validation:  20%|██▏        | 151/743 [39:37<2:39:48, 16.20s/batch, batch_loss=1.04e+4, batch_index=152, batch_size=256]

Validation:  20%|██▎        | 152/743 [39:37<2:50:03, 17.27s/batch, batch_loss=1.04e+4, batch_index=152, batch_size=256]

Validation:  20%|██▊           | 152/743 [39:54<2:50:03, 17.27s/batch, batch_loss=15.6, batch_index=153, batch_size=256]

Validation:  21%|██▉           | 153/743 [39:54<2:47:38, 17.05s/batch, batch_loss=15.6, batch_index=153, batch_size=256]

Validation:  21%|██▉           | 153/743 [40:10<2:47:38, 17.05s/batch, batch_loss=16.8, batch_index=154, batch_size=256]

Validation:  21%|██▉           | 154/743 [40:10<2:43:46, 16.68s/batch, batch_loss=16.8, batch_index=154, batch_size=256]

Validation:  21%|██▉           | 154/743 [40:26<2:43:46, 16.68s/batch, batch_loss=20.6, batch_index=155, batch_size=256]

Validation:  21%|██▉           | 155/743 [40:26<2:41:22, 16.47s/batch, batch_loss=20.6, batch_index=155, batch_size=256]

Validation:  21%|██▉           | 155/743 [40:42<2:41:22, 16.47s/batch, batch_loss=18.9, batch_index=156, batch_size=256]

Validation:  21%|██▉           | 156/743 [40:42<2:41:15, 16.48s/batch, batch_loss=18.9, batch_index=156, batch_size=256]

Validation:  21%|███▎            | 156/743 [40:58<2:41:15, 16.48s/batch, batch_loss=20, batch_index=157, batch_size=256]

Validation:  21%|███▍            | 157/743 [40:58<2:39:42, 16.35s/batch, batch_loss=20, batch_index=157, batch_size=256]

Validation:  21%|██▉           | 157/743 [41:14<2:39:42, 16.35s/batch, batch_loss=23.4, batch_index=158, batch_size=256]

Validation:  21%|██▉           | 158/743 [41:14<2:37:07, 16.12s/batch, batch_loss=23.4, batch_index=158, batch_size=256]

Validation:  21%|██▉           | 158/743 [41:29<2:37:07, 16.12s/batch, batch_loss=25.6, batch_index=159, batch_size=256]

Validation:  21%|██▉           | 159/743 [41:29<2:33:09, 15.73s/batch, batch_loss=25.6, batch_index=159, batch_size=256]

Validation:  21%|███▍            | 159/743 [41:45<2:33:09, 15.73s/batch, batch_loss=16, batch_index=160, batch_size=256]

Validation:  22%|███▍            | 160/743 [41:45<2:35:02, 15.96s/batch, batch_loss=16, batch_index=160, batch_size=256]

Validation:  22%|███           | 160/743 [42:00<2:35:02, 15.96s/batch, batch_loss=16.2, batch_index=161, batch_size=256]

Validation:  22%|███           | 161/743 [42:00<2:32:46, 15.75s/batch, batch_loss=16.2, batch_index=161, batch_size=256]

Validation:  22%|███           | 161/743 [42:17<2:32:46, 15.75s/batch, batch_loss=20.3, batch_index=162, batch_size=256]

Validation:  22%|███           | 162/743 [42:17<2:34:06, 15.92s/batch, batch_loss=20.3, batch_index=162, batch_size=256]

Validation:  22%|███▍            | 162/743 [42:33<2:34:06, 15.92s/batch, batch_loss=14, batch_index=163, batch_size=256]

Validation:  22%|███▌            | 163/743 [42:33<2:33:40, 15.90s/batch, batch_loss=14, batch_index=163, batch_size=256]

Validation:  22%|███▌            | 163/743 [42:48<2:33:40, 15.90s/batch, batch_loss=11, batch_index=164, batch_size=256]

Validation:  22%|███▌            | 164/743 [42:48<2:32:50, 15.84s/batch, batch_loss=11, batch_index=164, batch_size=256]

Validation:  22%|███▌            | 164/743 [43:04<2:32:50, 15.84s/batch, batch_loss=15, batch_index=165, batch_size=256]

Validation:  22%|███▌            | 165/743 [43:04<2:33:11, 15.90s/batch, batch_loss=15, batch_index=165, batch_size=256]

Validation:  22%|███           | 165/743 [43:20<2:33:11, 15.90s/batch, batch_loss=13.1, batch_index=166, batch_size=256]

Validation:  22%|███▏          | 166/743 [43:20<2:32:05, 15.82s/batch, batch_loss=13.1, batch_index=166, batch_size=256]

Validation:  22%|███▏          | 166/743 [43:35<2:32:05, 15.82s/batch, batch_loss=14.9, batch_index=167, batch_size=256]

Validation:  22%|███▏          | 167/743 [43:35<2:29:09, 15.54s/batch, batch_loss=14.9, batch_index=167, batch_size=256]

Validation:  22%|███▏          | 167/743 [43:50<2:29:09, 15.54s/batch, batch_loss=23.1, batch_index=168, batch_size=256]

Validation:  23%|███▏          | 168/743 [43:50<2:27:57, 15.44s/batch, batch_loss=23.1, batch_index=168, batch_size=256]

Validation:  23%|███▏          | 168/743 [44:07<2:27:57, 15.44s/batch, batch_loss=24.6, batch_index=169, batch_size=256]

Validation:  23%|███▏          | 169/743 [44:07<2:31:19, 15.82s/batch, batch_loss=24.6, batch_index=169, batch_size=256]

Validation:  23%|███▏          | 169/743 [44:22<2:31:19, 15.82s/batch, batch_loss=20.4, batch_index=170, batch_size=256]

Validation:  23%|███▏          | 170/743 [44:22<2:30:40, 15.78s/batch, batch_loss=20.4, batch_index=170, batch_size=256]

Validation:  23%|███▏          | 170/743 [44:38<2:30:40, 15.78s/batch, batch_loss=20.5, batch_index=171, batch_size=256]

Validation:  23%|███▏          | 171/743 [44:38<2:29:33, 15.69s/batch, batch_loss=20.5, batch_index=171, batch_size=256]

Validation:  23%|███▏          | 171/743 [44:53<2:29:33, 15.69s/batch, batch_loss=18.9, batch_index=172, batch_size=256]

Validation:  23%|███▏          | 172/743 [44:53<2:27:32, 15.50s/batch, batch_loss=18.9, batch_index=172, batch_size=256]

Validation:  23%|███▏          | 172/743 [45:09<2:27:32, 15.50s/batch, batch_loss=23.3, batch_index=173, batch_size=256]

Validation:  23%|███▎          | 173/743 [45:09<2:28:44, 15.66s/batch, batch_loss=23.3, batch_index=173, batch_size=256]

Validation:  23%|███▎          | 173/743 [45:24<2:28:44, 15.66s/batch, batch_loss=15.3, batch_index=174, batch_size=256]

Validation:  23%|███▎          | 174/743 [45:24<2:26:47, 15.48s/batch, batch_loss=15.3, batch_index=174, batch_size=256]

Validation:  23%|███▎          | 174/743 [45:40<2:26:47, 15.48s/batch, batch_loss=21.3, batch_index=175, batch_size=256]

Validation:  24%|███▎          | 175/743 [45:40<2:27:53, 15.62s/batch, batch_loss=21.3, batch_index=175, batch_size=256]

Validation:  24%|███▎          | 175/743 [45:55<2:27:53, 15.62s/batch, batch_loss=17.8, batch_index=176, batch_size=256]

Validation:  24%|███▎          | 176/743 [45:55<2:26:12, 15.47s/batch, batch_loss=17.8, batch_index=176, batch_size=256]

Validation:  24%|███▎          | 176/743 [46:10<2:26:12, 15.47s/batch, batch_loss=16.2, batch_index=177, batch_size=256]

Validation:  24%|███▎          | 177/743 [46:10<2:23:33, 15.22s/batch, batch_loss=16.2, batch_index=177, batch_size=256]

Validation:  24%|███▎          | 177/743 [46:26<2:23:33, 15.22s/batch, batch_loss=20.2, batch_index=178, batch_size=256]

Validation:  24%|███▎          | 178/743 [46:26<2:25:23, 15.44s/batch, batch_loss=20.2, batch_index=178, batch_size=256]

Validation:  24%|███▎          | 178/743 [46:41<2:25:23, 15.44s/batch, batch_loss=18.6, batch_index=179, batch_size=256]

Validation:  24%|███▎          | 179/743 [46:41<2:23:39, 15.28s/batch, batch_loss=18.6, batch_index=179, batch_size=256]

Validation:  24%|██▋        | 179/743 [46:57<2:23:39, 15.28s/batch, batch_loss=7.24e+3, batch_index=180, batch_size=256]

Validation:  24%|██▋        | 180/743 [46:57<2:25:27, 15.50s/batch, batch_loss=7.24e+3, batch_index=180, batch_size=256]

Validation:  24%|███▍          | 180/743 [47:12<2:25:27, 15.50s/batch, batch_loss=17.2, batch_index=181, batch_size=256]

Validation:  24%|███▍          | 181/743 [47:12<2:23:34, 15.33s/batch, batch_loss=17.2, batch_index=181, batch_size=256]

Validation:  24%|███▍          | 181/743 [47:26<2:23:34, 15.33s/batch, batch_loss=18.4, batch_index=182, batch_size=256]

Validation:  24%|███▍          | 182/743 [47:26<2:21:49, 15.17s/batch, batch_loss=18.4, batch_index=182, batch_size=256]

Validation:  24%|███▍          | 182/743 [47:41<2:21:49, 15.17s/batch, batch_loss=15.9, batch_index=183, batch_size=256]

Validation:  25%|███▍          | 183/743 [47:41<2:20:09, 15.02s/batch, batch_loss=15.9, batch_index=183, batch_size=256]

Validation:  25%|███▍          | 183/743 [48:00<2:20:09, 15.02s/batch, batch_loss=10.9, batch_index=184, batch_size=256]

Validation:  25%|███▍          | 184/743 [48:00<2:29:45, 16.07s/batch, batch_loss=10.9, batch_index=184, batch_size=256]

Validation:  25%|███▍          | 184/743 [48:15<2:29:45, 16.07s/batch, batch_loss=16.8, batch_index=185, batch_size=256]

Validation:  25%|███▍          | 185/743 [48:15<2:27:05, 15.82s/batch, batch_loss=16.8, batch_index=185, batch_size=256]

Validation:  25%|███▍          | 185/743 [48:31<2:27:05, 15.82s/batch, batch_loss=23.1, batch_index=186, batch_size=256]

Validation:  25%|███▌          | 186/743 [48:31<2:27:06, 15.85s/batch, batch_loss=23.1, batch_index=186, batch_size=256]

Validation:  25%|███▌          | 186/743 [48:46<2:27:06, 15.85s/batch, batch_loss=26.4, batch_index=187, batch_size=256]

Validation:  25%|███▌          | 187/743 [48:46<2:25:43, 15.73s/batch, batch_loss=26.4, batch_index=187, batch_size=256]

Validation:  25%|███▌          | 187/743 [49:01<2:25:43, 15.73s/batch, batch_loss=14.8, batch_index=188, batch_size=256]

Validation:  25%|███▌          | 188/743 [49:01<2:23:25, 15.51s/batch, batch_loss=14.8, batch_index=188, batch_size=256]

Validation:  25%|███▌          | 188/743 [49:16<2:23:25, 15.51s/batch, batch_loss=17.4, batch_index=189, batch_size=256]

Validation:  25%|███▌          | 189/743 [49:16<2:22:46, 15.46s/batch, batch_loss=17.4, batch_index=189, batch_size=256]

Validation:  25%|███▊           | 189/743 [49:35<2:22:46, 15.46s/batch, batch_loss=975, batch_index=190, batch_size=256]

Validation:  26%|███▊           | 190/743 [49:35<2:29:49, 16.26s/batch, batch_loss=975, batch_index=190, batch_size=256]

Validation:  26%|███▌          | 190/743 [49:49<2:29:49, 16.26s/batch, batch_loss=20.5, batch_index=191, batch_size=256]

Validation:  26%|███▌          | 191/743 [49:49<2:25:29, 15.81s/batch, batch_loss=20.5, batch_index=191, batch_size=256]

Validation:  26%|███▌          | 191/743 [50:04<2:25:29, 15.81s/batch, batch_loss=12.6, batch_index=192, batch_size=256]

Validation:  26%|███▌          | 192/743 [50:04<2:21:44, 15.44s/batch, batch_loss=12.6, batch_index=192, batch_size=256]

Validation:  26%|███▌          | 192/743 [50:19<2:21:44, 15.44s/batch, batch_loss=17.6, batch_index=193, batch_size=256]

Validation:  26%|███▋          | 193/743 [50:19<2:19:42, 15.24s/batch, batch_loss=17.6, batch_index=193, batch_size=256]

Validation:  26%|████▏           | 193/743 [50:33<2:19:42, 15.24s/batch, batch_loss=18, batch_index=194, batch_size=256]

Validation:  26%|████▏           | 194/743 [50:33<2:17:49, 15.06s/batch, batch_loss=18, batch_index=194, batch_size=256]

Validation:  26%|███▋          | 194/743 [50:50<2:17:49, 15.06s/batch, batch_loss=11.4, batch_index=195, batch_size=256]

Validation:  26%|███▋          | 195/743 [50:50<2:22:34, 15.61s/batch, batch_loss=11.4, batch_index=195, batch_size=256]

Validation:  26%|███▋          | 195/743 [51:06<2:22:34, 15.61s/batch, batch_loss=18.6, batch_index=196, batch_size=256]

Validation:  26%|███▋          | 196/743 [51:06<2:23:48, 15.77s/batch, batch_loss=18.6, batch_index=196, batch_size=256]

Validation:  26%|███▋          | 196/743 [51:21<2:23:48, 15.77s/batch, batch_loss=9.96, batch_index=197, batch_size=256]

Validation:  27%|███▋          | 197/743 [51:21<2:20:50, 15.48s/batch, batch_loss=9.96, batch_index=197, batch_size=256]

Validation:  27%|███▋          | 197/743 [51:36<2:20:50, 15.48s/batch, batch_loss=20.3, batch_index=198, batch_size=256]

Validation:  27%|███▋          | 198/743 [51:36<2:18:10, 15.21s/batch, batch_loss=20.3, batch_index=198, batch_size=256]

Validation:  27%|███▋          | 198/743 [51:50<2:18:10, 15.21s/batch, batch_loss=19.6, batch_index=199, batch_size=256]

Validation:  27%|███▋          | 199/743 [51:50<2:14:17, 14.81s/batch, batch_loss=19.6, batch_index=199, batch_size=256]

Validation:  27%|████           | 199/743 [52:04<2:14:17, 14.81s/batch, batch_loss=281, batch_index=200, batch_size=256]

Validation:  27%|████           | 200/743 [52:04<2:13:40, 14.77s/batch, batch_loss=281, batch_index=200, batch_size=256]

Validation:  27%|███▊          | 200/743 [52:19<2:13:40, 14.77s/batch, batch_loss=30.9, batch_index=201, batch_size=256]

Validation:  27%|███▊          | 201/743 [52:19<2:12:24, 14.66s/batch, batch_loss=30.9, batch_index=201, batch_size=256]

Validation:  27%|████▎           | 201/743 [52:34<2:12:24, 14.66s/batch, batch_loss=23, batch_index=202, batch_size=256]

Validation:  27%|████▎           | 202/743 [52:34<2:15:05, 14.98s/batch, batch_loss=23, batch_index=202, batch_size=256]

Validation:  27%|███▊          | 202/743 [52:51<2:15:05, 14.98s/batch, batch_loss=16.8, batch_index=203, batch_size=256]

Validation:  27%|███▊          | 203/743 [52:51<2:18:14, 15.36s/batch, batch_loss=16.8, batch_index=203, batch_size=256]

Validation:  27%|███▊          | 203/743 [53:06<2:18:14, 15.36s/batch, batch_loss=19.3, batch_index=204, batch_size=256]

Validation:  27%|███▊          | 204/743 [53:06<2:18:56, 15.47s/batch, batch_loss=19.3, batch_index=204, batch_size=256]

Validation:  27%|███▊          | 204/743 [53:22<2:18:56, 15.47s/batch, batch_loss=20.1, batch_index=205, batch_size=256]

Validation:  28%|███▊          | 205/743 [53:22<2:20:05, 15.62s/batch, batch_loss=20.1, batch_index=205, batch_size=256]

Validation:  28%|███▊          | 205/743 [53:38<2:20:05, 15.62s/batch, batch_loss=13.1, batch_index=206, batch_size=256]

Validation:  28%|███▉          | 206/743 [53:38<2:18:34, 15.48s/batch, batch_loss=13.1, batch_index=206, batch_size=256]

Validation:  28%|███▉          | 206/743 [53:53<2:18:34, 15.48s/batch, batch_loss=20.4, batch_index=207, batch_size=256]

Validation:  28%|███▉          | 207/743 [53:53<2:19:23, 15.60s/batch, batch_loss=20.4, batch_index=207, batch_size=256]

Validation:  28%|███▉          | 207/743 [54:09<2:19:23, 15.60s/batch, batch_loss=18.2, batch_index=208, batch_size=256]

Validation:  28%|███▉          | 208/743 [54:09<2:18:59, 15.59s/batch, batch_loss=18.2, batch_index=208, batch_size=256]

Validation:  28%|███▉          | 208/743 [54:24<2:18:59, 15.59s/batch, batch_loss=8.76, batch_index=209, batch_size=256]

Validation:  28%|███▉          | 209/743 [54:24<2:17:53, 15.49s/batch, batch_loss=8.76, batch_index=209, batch_size=256]

Validation:  28%|███▉          | 209/743 [54:40<2:17:53, 15.49s/batch, batch_loss=11.3, batch_index=210, batch_size=256]

Validation:  28%|███▉          | 210/743 [54:40<2:17:43, 15.50s/batch, batch_loss=11.3, batch_index=210, batch_size=256]

Validation:  28%|███▉          | 210/743 [54:55<2:17:43, 15.50s/batch, batch_loss=15.3, batch_index=211, batch_size=256]

Validation:  28%|███▉          | 211/743 [54:55<2:15:50, 15.32s/batch, batch_loss=15.3, batch_index=211, batch_size=256]

Validation:  28%|████▌           | 211/743 [55:10<2:15:50, 15.32s/batch, batch_loss=15, batch_index=212, batch_size=256]

Validation:  29%|████▌           | 212/743 [55:10<2:14:22, 15.18s/batch, batch_loss=15, batch_index=212, batch_size=256]

Validation:  29%|████▎          | 212/743 [55:25<2:14:22, 15.18s/batch, batch_loss=539, batch_index=213, batch_size=256]

Validation:  29%|████▎          | 213/743 [55:25<2:14:26, 15.22s/batch, batch_loss=539, batch_index=213, batch_size=256]

Validation:  29%|████          | 213/743 [55:40<2:14:26, 15.22s/batch, batch_loss=12.7, batch_index=214, batch_size=256]

Validation:  29%|████          | 214/743 [55:40<2:14:07, 15.21s/batch, batch_loss=12.7, batch_index=214, batch_size=256]

Validation:  29%|████▌           | 214/743 [55:55<2:14:07, 15.21s/batch, batch_loss=15, batch_index=215, batch_size=256]

Validation:  29%|████▋           | 215/743 [55:55<2:13:07, 15.13s/batch, batch_loss=15, batch_index=215, batch_size=256]

Validation:  29%|███▏       | 215/743 [56:09<2:13:07, 15.13s/batch, batch_loss=2.57e+3, batch_index=216, batch_size=256]

Validation:  29%|███▏       | 216/743 [56:09<2:09:14, 14.71s/batch, batch_loss=2.57e+3, batch_index=216, batch_size=256]

Validation:  29%|████          | 216/743 [56:23<2:09:14, 14.71s/batch, batch_loss=20.5, batch_index=217, batch_size=256]

Validation:  29%|████          | 217/743 [56:23<2:06:59, 14.49s/batch, batch_loss=20.5, batch_index=217, batch_size=256]

Validation:  29%|████          | 217/743 [56:37<2:06:59, 14.49s/batch, batch_loss=14.5, batch_index=218, batch_size=256]

Validation:  29%|████          | 218/743 [56:37<2:06:18, 14.44s/batch, batch_loss=14.5, batch_index=218, batch_size=256]

Validation:  29%|████          | 218/743 [56:51<2:06:18, 14.44s/batch, batch_loss=28.6, batch_index=219, batch_size=256]

Validation:  29%|████▏         | 219/743 [56:51<2:06:11, 14.45s/batch, batch_loss=28.6, batch_index=219, batch_size=256]

Validation:  29%|████▏         | 219/743 [57:07<2:06:11, 14.45s/batch, batch_loss=25.6, batch_index=220, batch_size=256]

Validation:  30%|████▏         | 220/743 [57:07<2:07:50, 14.67s/batch, batch_loss=25.6, batch_index=220, batch_size=256]

Validation:  30%|████▋           | 220/743 [57:22<2:07:50, 14.67s/batch, batch_loss=18, batch_index=221, batch_size=256]

Validation:  30%|████▊           | 221/743 [57:22<2:08:59, 14.83s/batch, batch_loss=18, batch_index=221, batch_size=256]

Validation:  30%|████▏         | 221/743 [57:37<2:08:59, 14.83s/batch, batch_loss=12.9, batch_index=222, batch_size=256]

Validation:  30%|████▏         | 222/743 [57:37<2:08:37, 14.81s/batch, batch_loss=12.9, batch_index=222, batch_size=256]

Validation:  30%|████▏         | 222/743 [57:52<2:08:37, 14.81s/batch, batch_loss=12.1, batch_index=223, batch_size=256]

Validation:  30%|████▏         | 223/743 [57:52<2:08:35, 14.84s/batch, batch_loss=12.1, batch_index=223, batch_size=256]

Validation:  30%|████▏         | 223/743 [58:06<2:08:35, 14.84s/batch, batch_loss=11.9, batch_index=224, batch_size=256]

Validation:  30%|████▏         | 224/743 [58:06<2:08:16, 14.83s/batch, batch_loss=11.9, batch_index=224, batch_size=256]

Validation:  30%|███▎       | 224/743 [58:21<2:08:16, 14.83s/batch, batch_loss=4.94e+3, batch_index=225, batch_size=256]

Validation:  30%|███▎       | 225/743 [58:21<2:08:20, 14.87s/batch, batch_loss=4.94e+3, batch_index=225, batch_size=256]

Validation:  30%|████▏         | 225/743 [58:36<2:08:20, 14.87s/batch, batch_loss=17.1, batch_index=226, batch_size=256]

Validation:  30%|████▎         | 226/743 [58:36<2:08:42, 14.94s/batch, batch_loss=17.1, batch_index=226, batch_size=256]

Validation:  30%|████▎         | 226/743 [58:51<2:08:42, 14.94s/batch, batch_loss=17.1, batch_index=227, batch_size=256]

Validation:  31%|████▎         | 227/743 [58:51<2:08:41, 14.96s/batch, batch_loss=17.1, batch_index=227, batch_size=256]

Validation:  31%|████▎         | 227/743 [59:07<2:08:41, 14.96s/batch, batch_loss=15.2, batch_index=228, batch_size=256]

Validation:  31%|████▎         | 228/743 [59:07<2:10:14, 15.17s/batch, batch_loss=15.2, batch_index=228, batch_size=256]

Validation:  31%|████▎         | 228/743 [59:23<2:10:14, 15.17s/batch, batch_loss=18.3, batch_index=229, batch_size=256]

Validation:  31%|████▎         | 229/743 [59:23<2:11:01, 15.29s/batch, batch_loss=18.3, batch_index=229, batch_size=256]

Validation:  31%|████▎         | 229/743 [59:37<2:11:01, 15.29s/batch, batch_loss=22.2, batch_index=230, batch_size=256]

Validation:  31%|████▎         | 230/743 [59:37<2:08:34, 15.04s/batch, batch_loss=22.2, batch_index=230, batch_size=256]

Validation:  31%|███▍       | 230/743 [59:53<2:08:34, 15.04s/batch, batch_loss=3.23e+4, batch_index=231, batch_size=256]

Validation:  31%|███▍       | 231/743 [59:53<2:11:14, 15.38s/batch, batch_loss=3.23e+4, batch_index=231, batch_size=256]

Validation:  31%|███▋        | 231/743 [1:00:09<2:11:14, 15.38s/batch, batch_loss=18.4, batch_index=232, batch_size=256]

Validation:  31%|███▋        | 232/743 [1:00:09<2:12:12, 15.52s/batch, batch_loss=18.4, batch_index=232, batch_size=256]

Validation:  31%|███▋        | 232/743 [1:00:24<2:12:12, 15.52s/batch, batch_loss=10.3, batch_index=233, batch_size=256]

Validation:  31%|███▊        | 233/743 [1:00:24<2:10:03, 15.30s/batch, batch_loss=10.3, batch_index=233, batch_size=256]

Validation:  31%|███▊        | 233/743 [1:00:39<2:10:03, 15.30s/batch, batch_loss=14.4, batch_index=234, batch_size=256]

Validation:  31%|███▊        | 234/743 [1:00:39<2:09:35, 15.28s/batch, batch_loss=14.4, batch_index=234, batch_size=256]

Validation:  31%|███▊        | 234/743 [1:00:55<2:09:35, 15.28s/batch, batch_loss=16.9, batch_index=235, batch_size=256]

Validation:  32%|███▊        | 235/743 [1:00:55<2:10:03, 15.36s/batch, batch_loss=16.9, batch_index=235, batch_size=256]

Validation:  32%|███▊        | 235/743 [1:01:10<2:10:03, 15.36s/batch, batch_loss=4.76, batch_index=236, batch_size=256]

Validation:  32%|███▊        | 236/743 [1:01:10<2:09:53, 15.37s/batch, batch_loss=4.76, batch_index=236, batch_size=256]

Validation:  32%|███▊        | 236/743 [1:01:29<2:09:53, 15.37s/batch, batch_loss=21.2, batch_index=237, batch_size=256]

Validation:  32%|███▊        | 237/743 [1:01:29<2:18:35, 16.43s/batch, batch_loss=21.2, batch_index=237, batch_size=256]

Validation:  32%|███▊        | 237/743 [1:01:44<2:18:35, 16.43s/batch, batch_loss=15.7, batch_index=238, batch_size=256]

Validation:  32%|███▊        | 238/743 [1:01:44<2:15:00, 16.04s/batch, batch_loss=15.7, batch_index=238, batch_size=256]

Validation:  32%|██▉      | 238/743 [1:02:00<2:15:00, 16.04s/batch, batch_loss=4.49e+3, batch_index=239, batch_size=256]

Validation:  32%|██▉      | 239/743 [1:02:00<2:15:02, 16.08s/batch, batch_loss=4.49e+3, batch_index=239, batch_size=256]

Validation:  32%|███▊        | 239/743 [1:02:16<2:15:02, 16.08s/batch, batch_loss=19.7, batch_index=240, batch_size=256]

Validation:  32%|███▉        | 240/743 [1:02:16<2:13:34, 15.93s/batch, batch_loss=19.7, batch_index=240, batch_size=256]

Validation:  32%|███▉        | 240/743 [1:02:34<2:13:34, 15.93s/batch, batch_loss=17.3, batch_index=241, batch_size=256]

Validation:  32%|███▉        | 241/743 [1:02:34<2:18:20, 16.54s/batch, batch_loss=17.3, batch_index=241, batch_size=256]

Validation:  32%|████▏        | 241/743 [1:02:47<2:18:20, 16.54s/batch, batch_loss=234, batch_index=242, batch_size=256]

Validation:  33%|████▏        | 242/743 [1:02:47<2:10:49, 15.67s/batch, batch_loss=234, batch_index=242, batch_size=256]

Validation:  33%|███▉        | 242/743 [1:03:03<2:10:49, 15.67s/batch, batch_loss=12.1, batch_index=243, batch_size=256]

Validation:  33%|███▉        | 243/743 [1:03:03<2:09:23, 15.53s/batch, batch_loss=12.1, batch_index=243, batch_size=256]

Validation:  33%|███▉        | 243/743 [1:03:18<2:09:23, 15.53s/batch, batch_loss=15.9, batch_index=244, batch_size=256]

Validation:  33%|███▉        | 244/743 [1:03:18<2:08:24, 15.44s/batch, batch_loss=15.9, batch_index=244, batch_size=256]

Validation:  33%|███▉        | 244/743 [1:03:37<2:08:24, 15.44s/batch, batch_loss=21.4, batch_index=245, batch_size=256]

Validation:  33%|███▉        | 245/743 [1:03:37<2:16:51, 16.49s/batch, batch_loss=21.4, batch_index=245, batch_size=256]

Validation:  33%|███▉        | 245/743 [1:03:52<2:16:51, 16.49s/batch, batch_loss=6.96, batch_index=246, batch_size=256]

Validation:  33%|███▉        | 246/743 [1:03:52<2:12:45, 16.03s/batch, batch_loss=6.96, batch_index=246, batch_size=256]

Validation:  33%|███▉        | 246/743 [1:04:07<2:12:45, 16.03s/batch, batch_loss=15.8, batch_index=247, batch_size=256]

Validation:  33%|███▉        | 247/743 [1:04:07<2:10:59, 15.85s/batch, batch_loss=15.8, batch_index=247, batch_size=256]

Validation:  33%|███▉        | 247/743 [1:04:22<2:10:59, 15.85s/batch, batch_loss=34.5, batch_index=248, batch_size=256]

Validation:  33%|████        | 248/743 [1:04:22<2:08:24, 15.56s/batch, batch_loss=34.5, batch_index=248, batch_size=256]

Validation:  33%|████        | 248/743 [1:04:37<2:08:24, 15.56s/batch, batch_loss=13.5, batch_index=249, batch_size=256]

Validation:  34%|████        | 249/743 [1:04:37<2:06:36, 15.38s/batch, batch_loss=13.5, batch_index=249, batch_size=256]

Validation:  34%|████        | 249/743 [1:04:53<2:06:36, 15.38s/batch, batch_loss=18.6, batch_index=250, batch_size=256]

Validation:  34%|████        | 250/743 [1:04:53<2:06:39, 15.41s/batch, batch_loss=18.6, batch_index=250, batch_size=256]

Validation:  34%|████        | 250/743 [1:05:08<2:06:39, 15.41s/batch, batch_loss=21.2, batch_index=251, batch_size=256]

Validation:  34%|████        | 251/743 [1:05:08<2:06:29, 15.43s/batch, batch_loss=21.2, batch_index=251, batch_size=256]

Validation:  34%|████        | 251/743 [1:05:23<2:06:29, 15.43s/batch, batch_loss=20.6, batch_index=252, batch_size=256]

Validation:  34%|████        | 252/743 [1:05:23<2:05:46, 15.37s/batch, batch_loss=20.6, batch_index=252, batch_size=256]

Validation:  34%|████        | 252/743 [1:05:40<2:05:46, 15.37s/batch, batch_loss=20.9, batch_index=253, batch_size=256]

Validation:  34%|████        | 253/743 [1:05:40<2:09:54, 15.91s/batch, batch_loss=20.9, batch_index=253, batch_size=256]

Validation:  34%|███      | 253/743 [1:05:54<2:09:54, 15.91s/batch, batch_loss=1.15e+4, batch_index=254, batch_size=256]

Validation:  34%|███      | 254/743 [1:05:54<2:04:52, 15.32s/batch, batch_loss=1.15e+4, batch_index=254, batch_size=256]

Validation:  34%|███      | 254/743 [1:06:09<2:04:52, 15.32s/batch, batch_loss=2.44e+3, batch_index=255, batch_size=256]

Validation:  34%|███      | 255/743 [1:06:09<2:03:40, 15.21s/batch, batch_loss=2.44e+3, batch_index=255, batch_size=256]

Validation:  34%|████▊         | 255/743 [1:06:24<2:03:40, 15.21s/batch, batch_loss=19, batch_index=256, batch_size=256]

Validation:  34%|████▊         | 256/743 [1:06:24<2:02:52, 15.14s/batch, batch_loss=19, batch_index=256, batch_size=256]

Validation:  34%|████▏       | 256/743 [1:06:38<2:02:52, 15.14s/batch, batch_loss=20.8, batch_index=257, batch_size=256]

Validation:  35%|████▏       | 257/743 [1:06:38<1:59:33, 14.76s/batch, batch_loss=20.8, batch_index=257, batch_size=256]

Validation:  35%|████▏       | 257/743 [1:06:53<1:59:33, 14.76s/batch, batch_loss=15.2, batch_index=258, batch_size=256]

Validation:  35%|████▏       | 258/743 [1:06:53<1:58:24, 14.65s/batch, batch_loss=15.2, batch_index=258, batch_size=256]

Validation:  35%|████▏       | 258/743 [1:07:08<1:58:24, 14.65s/batch, batch_loss=3.63, batch_index=259, batch_size=256]

Validation:  35%|████▏       | 259/743 [1:07:08<1:59:34, 14.82s/batch, batch_loss=3.63, batch_index=259, batch_size=256]

Validation:  35%|████▏       | 259/743 [1:07:23<1:59:34, 14.82s/batch, batch_loss=3.22, batch_index=260, batch_size=256]

Validation:  35%|████▏       | 260/743 [1:07:23<2:01:12, 15.06s/batch, batch_loss=3.22, batch_index=260, batch_size=256]

Validation:  35%|████▏       | 260/743 [1:07:41<2:01:12, 15.06s/batch, batch_loss=7.99, batch_index=261, batch_size=256]

Validation:  35%|████▏       | 261/743 [1:07:41<2:07:10, 15.83s/batch, batch_loss=7.99, batch_index=261, batch_size=256]

Validation:  35%|████▏       | 261/743 [1:07:57<2:07:10, 15.83s/batch, batch_loss=26.9, batch_index=262, batch_size=256]

Validation:  35%|████▏       | 262/743 [1:07:57<2:06:37, 15.80s/batch, batch_loss=26.9, batch_index=262, batch_size=256]

Validation:  35%|███▏     | 262/743 [1:08:12<2:06:37, 15.80s/batch, batch_loss=2.72e+3, batch_index=263, batch_size=256]

Validation:  35%|███▏     | 263/743 [1:08:12<2:05:35, 15.70s/batch, batch_loss=2.72e+3, batch_index=263, batch_size=256]

Validation:  35%|████▏       | 263/743 [1:08:28<2:05:35, 15.70s/batch, batch_loss=10.1, batch_index=264, batch_size=256]

Validation:  36%|████▎       | 264/743 [1:08:28<2:04:44, 15.62s/batch, batch_loss=10.1, batch_index=264, batch_size=256]

Validation:  36%|████▎       | 264/743 [1:08:43<2:04:44, 15.62s/batch, batch_loss=19.6, batch_index=265, batch_size=256]

Validation:  36%|████▎       | 265/743 [1:08:43<2:04:28, 15.62s/batch, batch_loss=19.6, batch_index=265, batch_size=256]

Validation:  36%|████▉         | 265/743 [1:08:58<2:04:28, 15.62s/batch, batch_loss=24, batch_index=266, batch_size=256]

Validation:  36%|█████         | 266/743 [1:08:58<2:01:38, 15.30s/batch, batch_loss=24, batch_index=266, batch_size=256]

Validation:  36%|████▎       | 266/743 [1:09:13<2:01:38, 15.30s/batch, batch_loss=20.8, batch_index=267, batch_size=256]

Validation:  36%|████▎       | 267/743 [1:09:13<2:01:31, 15.32s/batch, batch_loss=20.8, batch_index=267, batch_size=256]

Validation:  36%|███▏     | 267/743 [1:09:28<2:01:31, 15.32s/batch, batch_loss=3.01e+3, batch_index=268, batch_size=256]

Validation:  36%|███▏     | 268/743 [1:09:28<2:00:49, 15.26s/batch, batch_loss=3.01e+3, batch_index=268, batch_size=256]

Validation:  36%|████▎       | 268/743 [1:09:47<2:00:49, 15.26s/batch, batch_loss=31.4, batch_index=269, batch_size=256]

Validation:  36%|████▎       | 269/743 [1:09:47<2:07:41, 16.16s/batch, batch_loss=31.4, batch_index=269, batch_size=256]

Validation:  36%|████▎       | 269/743 [1:10:03<2:07:41, 16.16s/batch, batch_loss=30.6, batch_index=270, batch_size=256]

Validation:  36%|████▎       | 270/743 [1:10:03<2:07:41, 16.20s/batch, batch_loss=30.6, batch_index=270, batch_size=256]

Validation:  36%|█████         | 270/743 [1:10:18<2:07:41, 16.20s/batch, batch_loss=26, batch_index=271, batch_size=256]

Validation:  36%|█████         | 271/743 [1:10:18<2:05:10, 15.91s/batch, batch_loss=26, batch_index=271, batch_size=256]

Validation:  36%|███▎     | 271/743 [1:10:33<2:05:10, 15.91s/batch, batch_loss=1.06e+3, batch_index=272, batch_size=256]

Validation:  37%|███▎     | 272/743 [1:10:33<2:02:04, 15.55s/batch, batch_loss=1.06e+3, batch_index=272, batch_size=256]

Validation:  37%|████▍       | 272/743 [1:10:48<2:02:04, 15.55s/batch, batch_loss=18.4, batch_index=273, batch_size=256]

Validation:  37%|████▍       | 273/743 [1:10:48<2:01:01, 15.45s/batch, batch_loss=18.4, batch_index=273, batch_size=256]

Validation:  37%|████▍       | 273/743 [1:11:04<2:01:01, 15.45s/batch, batch_loss=21.9, batch_index=274, batch_size=256]

Validation:  37%|████▍       | 274/743 [1:11:04<2:01:35, 15.56s/batch, batch_loss=21.9, batch_index=274, batch_size=256]

Validation:  37%|████▍       | 274/743 [1:11:21<2:01:35, 15.56s/batch, batch_loss=18.3, batch_index=275, batch_size=256]

Validation:  37%|████▍       | 275/743 [1:11:21<2:04:39, 15.98s/batch, batch_loss=18.3, batch_index=275, batch_size=256]

Validation:  37%|████▍       | 275/743 [1:11:39<2:04:39, 15.98s/batch, batch_loss=13.4, batch_index=276, batch_size=256]

Validation:  37%|████▍       | 276/743 [1:11:39<2:09:49, 16.68s/batch, batch_loss=13.4, batch_index=276, batch_size=256]

Validation:  37%|████▍       | 276/743 [1:11:55<2:09:49, 16.68s/batch, batch_loss=25.1, batch_index=277, batch_size=256]

Validation:  37%|████▍       | 277/743 [1:11:55<2:08:02, 16.49s/batch, batch_loss=25.1, batch_index=277, batch_size=256]

Validation:  37%|████▍       | 277/743 [1:12:10<2:08:02, 16.49s/batch, batch_loss=21.3, batch_index=278, batch_size=256]

Validation:  37%|████▍       | 278/743 [1:12:10<2:03:58, 16.00s/batch, batch_loss=21.3, batch_index=278, batch_size=256]

Validation:  37%|████▍       | 278/743 [1:12:27<2:03:58, 16.00s/batch, batch_loss=8.56, batch_index=279, batch_size=256]

Validation:  38%|████▌       | 279/743 [1:12:27<2:04:52, 16.15s/batch, batch_loss=8.56, batch_index=279, batch_size=256]

Validation:  38%|████▌       | 279/743 [1:12:43<2:04:52, 16.15s/batch, batch_loss=14.6, batch_index=280, batch_size=256]

Validation:  38%|████▌       | 280/743 [1:12:43<2:04:58, 16.20s/batch, batch_loss=14.6, batch_index=280, batch_size=256]

Validation:  38%|████▌       | 280/743 [1:12:58<2:04:58, 16.20s/batch, batch_loss=19.9, batch_index=281, batch_size=256]

Validation:  38%|████▌       | 281/743 [1:12:58<2:03:15, 16.01s/batch, batch_loss=19.9, batch_index=281, batch_size=256]

Validation:  38%|████▌       | 281/743 [1:13:14<2:03:15, 16.01s/batch, batch_loss=22.2, batch_index=282, batch_size=256]

Validation:  38%|████▌       | 282/743 [1:13:14<2:01:25, 15.80s/batch, batch_loss=22.2, batch_index=282, batch_size=256]

Validation:  38%|████▌       | 282/743 [1:13:30<2:01:25, 15.80s/batch, batch_loss=17.3, batch_index=283, batch_size=256]

Validation:  38%|████▌       | 283/743 [1:13:30<2:01:54, 15.90s/batch, batch_loss=17.3, batch_index=283, batch_size=256]

Validation:  38%|█████▎        | 283/743 [1:13:46<2:01:54, 15.90s/batch, batch_loss=15, batch_index=284, batch_size=256]

Validation:  38%|█████▎        | 284/743 [1:13:46<2:02:18, 15.99s/batch, batch_loss=15, batch_index=284, batch_size=256]

Validation:  38%|████▌       | 284/743 [1:14:02<2:02:18, 15.99s/batch, batch_loss=14.2, batch_index=285, batch_size=256]

Validation:  38%|████▌       | 285/743 [1:14:02<2:01:39, 15.94s/batch, batch_loss=14.2, batch_index=285, batch_size=256]

Validation:  38%|████▌       | 285/743 [1:14:19<2:01:39, 15.94s/batch, batch_loss=18.5, batch_index=286, batch_size=256]

Validation:  38%|████▌       | 286/743 [1:14:19<2:03:08, 16.17s/batch, batch_loss=18.5, batch_index=286, batch_size=256]

Validation:  38%|███▍     | 286/743 [1:14:34<2:03:08, 16.17s/batch, batch_loss=1.19e+4, batch_index=287, batch_size=256]

Validation:  39%|███▍     | 287/743 [1:14:34<2:01:12, 15.95s/batch, batch_loss=1.19e+4, batch_index=287, batch_size=256]

Validation:  39%|█████▍        | 287/743 [1:14:49<2:01:12, 15.95s/batch, batch_loss=22, batch_index=288, batch_size=256]

Validation:  39%|█████▍        | 288/743 [1:14:49<1:59:40, 15.78s/batch, batch_loss=22, batch_index=288, batch_size=256]

Validation:  39%|████▋       | 288/743 [1:15:05<1:59:40, 15.78s/batch, batch_loss=21.4, batch_index=289, batch_size=256]

Validation:  39%|████▋       | 289/743 [1:15:05<1:58:59, 15.73s/batch, batch_loss=21.4, batch_index=289, batch_size=256]

Validation:  39%|█████        | 289/743 [1:15:20<1:58:59, 15.73s/batch, batch_loss=484, batch_index=290, batch_size=256]

Validation:  39%|█████        | 290/743 [1:15:20<1:56:29, 15.43s/batch, batch_loss=484, batch_index=290, batch_size=256]

Validation:  39%|███▌     | 290/743 [1:15:36<1:56:29, 15.43s/batch, batch_loss=1.52e+3, batch_index=291, batch_size=256]

Validation:  39%|███▌     | 291/743 [1:15:36<1:57:07, 15.55s/batch, batch_loss=1.52e+3, batch_index=291, batch_size=256]

Validation:  39%|███▉      | 291/743 [1:15:53<1:57:07, 15.55s/batch, batch_loss=1.2e+3, batch_index=292, batch_size=256]

Validation:  39%|███▉      | 292/743 [1:15:53<2:01:14, 16.13s/batch, batch_loss=1.2e+3, batch_index=292, batch_size=256]

Validation:  39%|████▋       | 292/743 [1:16:08<2:01:14, 16.13s/batch, batch_loss=27.9, batch_index=293, batch_size=256]

Validation:  39%|████▋       | 293/743 [1:16:08<1:58:05, 15.74s/batch, batch_loss=27.9, batch_index=293, batch_size=256]

Validation:  39%|███▉      | 293/743 [1:16:23<1:58:05, 15.74s/batch, batch_loss=1.1e+3, batch_index=294, batch_size=256]

Validation:  40%|███▉      | 294/743 [1:16:23<1:57:26, 15.69s/batch, batch_loss=1.1e+3, batch_index=294, batch_size=256]

Validation:  40%|████▋       | 294/743 [1:16:38<1:57:26, 15.69s/batch, batch_loss=18.2, batch_index=295, batch_size=256]

Validation:  40%|████▊       | 295/743 [1:16:38<1:54:45, 15.37s/batch, batch_loss=18.2, batch_index=295, batch_size=256]

Validation:  40%|████▊       | 295/743 [1:16:53<1:54:45, 15.37s/batch, batch_loss=17.3, batch_index=296, batch_size=256]

Validation:  40%|████▊       | 296/743 [1:16:53<1:52:30, 15.10s/batch, batch_loss=17.3, batch_index=296, batch_size=256]

Validation:  40%|████▊       | 296/743 [1:17:07<1:52:30, 15.10s/batch, batch_loss=11.3, batch_index=297, batch_size=256]

Validation:  40%|████▊       | 297/743 [1:17:07<1:50:20, 14.84s/batch, batch_loss=11.3, batch_index=297, batch_size=256]

Validation:  40%|████▊       | 297/743 [1:17:22<1:50:20, 14.84s/batch, batch_loss=23.1, batch_index=298, batch_size=256]

Validation:  40%|████▊       | 298/743 [1:17:22<1:50:26, 14.89s/batch, batch_loss=23.1, batch_index=298, batch_size=256]

Validation:  40%|████▊       | 298/743 [1:17:36<1:50:26, 14.89s/batch, batch_loss=29.6, batch_index=299, batch_size=256]

Validation:  40%|████▊       | 299/743 [1:17:36<1:49:30, 14.80s/batch, batch_loss=29.6, batch_index=299, batch_size=256]

Validation:  40%|█████▋        | 299/743 [1:17:51<1:49:30, 14.80s/batch, batch_loss=34, batch_index=300, batch_size=256]

Validation:  40%|█████▋        | 300/743 [1:17:51<1:49:06, 14.78s/batch, batch_loss=34, batch_index=300, batch_size=256]

Validation:  40%|█████▏       | 300/743 [1:18:09<1:49:06, 14.78s/batch, batch_loss=831, batch_index=301, batch_size=256]

Validation:  41%|█████▎       | 301/743 [1:18:09<1:55:05, 15.62s/batch, batch_loss=831, batch_index=301, batch_size=256]

Validation:  41%|████▊       | 301/743 [1:18:24<1:55:05, 15.62s/batch, batch_loss=11.6, batch_index=302, batch_size=256]

Validation:  41%|████▉       | 302/743 [1:18:24<1:52:59, 15.37s/batch, batch_loss=11.6, batch_index=302, batch_size=256]

Validation:  41%|████▉       | 302/743 [1:18:39<1:52:59, 15.37s/batch, batch_loss=17.8, batch_index=303, batch_size=256]

Validation:  41%|████▉       | 303/743 [1:18:39<1:53:02, 15.42s/batch, batch_loss=17.8, batch_index=303, batch_size=256]

Validation:  41%|█████▋        | 303/743 [1:18:54<1:53:02, 15.42s/batch, batch_loss=18, batch_index=304, batch_size=256]

Validation:  41%|█████▋        | 304/743 [1:18:54<1:51:02, 15.18s/batch, batch_loss=18, batch_index=304, batch_size=256]

Validation:  41%|████▉       | 304/743 [1:19:09<1:51:02, 15.18s/batch, batch_loss=13.4, batch_index=305, batch_size=256]

Validation:  41%|████▉       | 305/743 [1:19:09<1:51:20, 15.25s/batch, batch_loss=13.4, batch_index=305, batch_size=256]

Validation:  41%|████▉       | 305/743 [1:19:23<1:51:20, 15.25s/batch, batch_loss=21.5, batch_index=306, batch_size=256]

Validation:  41%|████▉       | 306/743 [1:19:23<1:49:12, 14.99s/batch, batch_loss=21.5, batch_index=306, batch_size=256]

Validation:  41%|████▉       | 306/743 [1:19:37<1:49:12, 14.99s/batch, batch_loss=19.5, batch_index=307, batch_size=256]

Validation:  41%|████▉       | 307/743 [1:19:37<1:44:50, 14.43s/batch, batch_loss=19.5, batch_index=307, batch_size=256]

Validation:  41%|█████▎       | 307/743 [1:19:51<1:44:50, 14.43s/batch, batch_loss=881, batch_index=308, batch_size=256]

Validation:  41%|█████▍       | 308/743 [1:19:51<1:45:20, 14.53s/batch, batch_loss=881, batch_index=308, batch_size=256]

Validation:  41%|████▉       | 308/743 [1:20:07<1:45:20, 14.53s/batch, batch_loss=25.7, batch_index=309, batch_size=256]

Validation:  42%|████▉       | 309/743 [1:20:07<1:46:33, 14.73s/batch, batch_loss=25.7, batch_index=309, batch_size=256]

Validation:  42%|█████▊        | 309/743 [1:20:21<1:46:33, 14.73s/batch, batch_loss=17, batch_index=310, batch_size=256]

Validation:  42%|█████▊        | 310/743 [1:20:21<1:45:46, 14.66s/batch, batch_loss=17, batch_index=310, batch_size=256]

Validation:  42%|█████       | 310/743 [1:20:35<1:45:46, 14.66s/batch, batch_loss=18.4, batch_index=311, batch_size=256]

Validation:  42%|█████       | 311/743 [1:20:35<1:44:51, 14.56s/batch, batch_loss=18.4, batch_index=311, batch_size=256]

Validation:  42%|█████       | 311/743 [1:20:51<1:44:51, 14.56s/batch, batch_loss=17.4, batch_index=312, batch_size=256]

Validation:  42%|█████       | 312/743 [1:20:51<1:45:50, 14.73s/batch, batch_loss=17.4, batch_index=312, batch_size=256]

Validation:  42%|█████       | 312/743 [1:21:06<1:45:50, 14.73s/batch, batch_loss=8.08, batch_index=313, batch_size=256]

Validation:  42%|█████       | 313/743 [1:21:06<1:47:13, 14.96s/batch, batch_loss=8.08, batch_index=313, batch_size=256]

Validation:  42%|█████       | 313/743 [1:21:20<1:47:13, 14.96s/batch, batch_loss=11.9, batch_index=314, batch_size=256]

Validation:  42%|█████       | 314/743 [1:21:20<1:45:42, 14.79s/batch, batch_loss=11.9, batch_index=314, batch_size=256]

Validation:  42%|█████▉        | 314/743 [1:21:35<1:45:42, 14.79s/batch, batch_loss=22, batch_index=315, batch_size=256]

Validation:  42%|█████▉        | 315/743 [1:21:35<1:44:34, 14.66s/batch, batch_loss=22, batch_index=315, batch_size=256]

Validation:  42%|█████       | 315/743 [1:21:48<1:44:34, 14.66s/batch, batch_loss=21.4, batch_index=316, batch_size=256]

Validation:  43%|█████       | 316/743 [1:21:48<1:42:22, 14.38s/batch, batch_loss=21.4, batch_index=316, batch_size=256]

Validation:  43%|█████       | 316/743 [1:22:03<1:42:22, 14.38s/batch, batch_loss=20.5, batch_index=317, batch_size=256]

Validation:  43%|█████       | 317/743 [1:22:03<1:43:23, 14.56s/batch, batch_loss=20.5, batch_index=317, batch_size=256]

Validation:  43%|█████       | 317/743 [1:22:17<1:43:23, 14.56s/batch, batch_loss=15.4, batch_index=318, batch_size=256]

Validation:  43%|█████▏      | 318/743 [1:22:17<1:41:54, 14.39s/batch, batch_loss=15.4, batch_index=318, batch_size=256]

Validation:  43%|█████▏      | 318/743 [1:22:31<1:41:54, 14.39s/batch, batch_loss=20.6, batch_index=319, batch_size=256]

Validation:  43%|█████▏      | 319/743 [1:22:31<1:40:55, 14.28s/batch, batch_loss=20.6, batch_index=319, batch_size=256]

Validation:  43%|█████▏      | 319/743 [1:22:46<1:40:55, 14.28s/batch, batch_loss=18.2, batch_index=320, batch_size=256]

Validation:  43%|█████▏      | 320/743 [1:22:46<1:41:49, 14.44s/batch, batch_loss=18.2, batch_index=320, batch_size=256]

Validation:  43%|█████▏      | 320/743 [1:23:00<1:41:49, 14.44s/batch, batch_loss=17.6, batch_index=321, batch_size=256]

Validation:  43%|█████▏      | 321/743 [1:23:00<1:40:54, 14.35s/batch, batch_loss=17.6, batch_index=321, batch_size=256]

Validation:  43%|█████▏      | 321/743 [1:23:16<1:40:54, 14.35s/batch, batch_loss=19.2, batch_index=322, batch_size=256]

Validation:  43%|█████▏      | 322/743 [1:23:16<1:44:17, 14.86s/batch, batch_loss=19.2, batch_index=322, batch_size=256]

Validation:  43%|█████▏      | 322/743 [1:23:32<1:44:17, 14.86s/batch, batch_loss=19.9, batch_index=323, batch_size=256]

Validation:  43%|█████▏      | 323/743 [1:23:32<1:45:03, 15.01s/batch, batch_loss=19.9, batch_index=323, batch_size=256]

Validation:  43%|█████▋       | 323/743 [1:23:50<1:45:03, 15.01s/batch, batch_loss=300, batch_index=324, batch_size=256]

Validation:  44%|█████▋       | 324/743 [1:23:50<1:51:53, 16.02s/batch, batch_loss=300, batch_index=324, batch_size=256]

Validation:  44%|██████        | 324/743 [1:24:06<1:51:53, 16.02s/batch, batch_loss=20, batch_index=325, batch_size=256]

Validation:  44%|██████        | 325/743 [1:24:06<1:50:28, 15.86s/batch, batch_loss=20, batch_index=325, batch_size=256]

Validation:  44%|█████▏      | 325/743 [1:24:22<1:50:28, 15.86s/batch, batch_loss=17.1, batch_index=326, batch_size=256]

Validation:  44%|█████▎      | 326/743 [1:24:22<1:51:37, 16.06s/batch, batch_loss=17.1, batch_index=326, batch_size=256]

Validation:  44%|█████▎      | 326/743 [1:24:38<1:51:37, 16.06s/batch, batch_loss=18.6, batch_index=327, batch_size=256]

Validation:  44%|█████▎      | 327/743 [1:24:38<1:49:51, 15.84s/batch, batch_loss=18.6, batch_index=327, batch_size=256]

Validation:  44%|█████▎      | 327/743 [1:24:53<1:49:51, 15.84s/batch, batch_loss=20.2, batch_index=328, batch_size=256]

Validation:  44%|█████▎      | 328/743 [1:24:53<1:49:10, 15.79s/batch, batch_loss=20.2, batch_index=328, batch_size=256]

Validation:  44%|█████▎      | 328/743 [1:25:09<1:49:10, 15.79s/batch, batch_loss=9.27, batch_index=329, batch_size=256]

Validation:  44%|█████▎      | 329/743 [1:25:09<1:49:34, 15.88s/batch, batch_loss=9.27, batch_index=329, batch_size=256]

Validation:  44%|█████▎      | 329/743 [1:25:25<1:49:34, 15.88s/batch, batch_loss=14.8, batch_index=330, batch_size=256]

Validation:  44%|█████▎      | 330/743 [1:25:25<1:48:30, 15.76s/batch, batch_loss=14.8, batch_index=330, batch_size=256]

Validation:  44%|█████▎      | 330/743 [1:25:40<1:48:30, 15.76s/batch, batch_loss=21.1, batch_index=331, batch_size=256]

Validation:  45%|█████▎      | 331/743 [1:25:40<1:46:08, 15.46s/batch, batch_loss=21.1, batch_index=331, batch_size=256]

Validation:  45%|████     | 331/743 [1:25:58<1:46:08, 15.46s/batch, batch_loss=1.15e+4, batch_index=332, batch_size=256]

Validation:  45%|████     | 332/743 [1:25:58<1:51:47, 16.32s/batch, batch_loss=1.15e+4, batch_index=332, batch_size=256]

Validation:  45%|█████▎      | 332/743 [1:26:14<1:51:47, 16.32s/batch, batch_loss=29.3, batch_index=333, batch_size=256]

Validation:  45%|█████▍      | 333/743 [1:26:14<1:50:42, 16.20s/batch, batch_loss=29.3, batch_index=333, batch_size=256]

Validation:  45%|█████▍      | 333/743 [1:26:30<1:50:42, 16.20s/batch, batch_loss=23.5, batch_index=334, batch_size=256]

Validation:  45%|█████▍      | 334/743 [1:26:30<1:50:48, 16.26s/batch, batch_loss=23.5, batch_index=334, batch_size=256]

Validation:  45%|██████▎       | 334/743 [1:26:46<1:50:48, 16.26s/batch, batch_loss=36, batch_index=335, batch_size=256]

Validation:  45%|██████▎       | 335/743 [1:26:46<1:48:59, 16.03s/batch, batch_loss=36, batch_index=335, batch_size=256]

Validation:  45%|█████▍      | 335/743 [1:27:02<1:48:59, 16.03s/batch, batch_loss=13.3, batch_index=336, batch_size=256]

Validation:  45%|█████▍      | 336/743 [1:27:02<1:49:54, 16.20s/batch, batch_loss=13.3, batch_index=336, batch_size=256]

Validation:  45%|█████▍      | 336/743 [1:27:17<1:49:54, 16.20s/batch, batch_loss=22.4, batch_index=337, batch_size=256]

Validation:  45%|█████▍      | 337/743 [1:27:17<1:47:02, 15.82s/batch, batch_loss=22.4, batch_index=337, batch_size=256]

Validation:  45%|█████▍      | 337/743 [1:27:32<1:47:02, 15.82s/batch, batch_loss=32.6, batch_index=338, batch_size=256]

Validation:  45%|█████▍      | 338/743 [1:27:32<1:43:52, 15.39s/batch, batch_loss=32.6, batch_index=338, batch_size=256]

Validation:  45%|██████▎       | 338/743 [1:27:46<1:43:52, 15.39s/batch, batch_loss=30, batch_index=339, batch_size=256]

Validation:  46%|██████▍       | 339/743 [1:27:46<1:41:23, 15.06s/batch, batch_loss=30, batch_index=339, batch_size=256]

Validation:  46%|█████▍      | 339/743 [1:28:03<1:41:23, 15.06s/batch, batch_loss=31.1, batch_index=340, batch_size=256]

Validation:  46%|█████▍      | 340/743 [1:28:03<1:45:52, 15.76s/batch, batch_loss=31.1, batch_index=340, batch_size=256]

Validation:  46%|█████▍      | 340/743 [1:28:18<1:45:52, 15.76s/batch, batch_loss=17.8, batch_index=341, batch_size=256]

Validation:  46%|█████▌      | 341/743 [1:28:18<1:43:12, 15.40s/batch, batch_loss=17.8, batch_index=341, batch_size=256]

Validation:  46%|█████▌      | 341/743 [1:28:33<1:43:12, 15.40s/batch, batch_loss=22.1, batch_index=342, batch_size=256]

Validation:  46%|█████▌      | 342/743 [1:28:33<1:42:05, 15.28s/batch, batch_loss=22.1, batch_index=342, batch_size=256]

Validation:  46%|█████▌      | 342/743 [1:28:48<1:42:05, 15.28s/batch, batch_loss=24.6, batch_index=343, batch_size=256]

Validation:  46%|█████▌      | 343/743 [1:28:48<1:41:43, 15.26s/batch, batch_loss=24.6, batch_index=343, batch_size=256]

Validation:  46%|█████▌      | 343/743 [1:29:02<1:41:43, 15.26s/batch, batch_loss=23.1, batch_index=344, batch_size=256]

Validation:  46%|█████▌      | 344/743 [1:29:02<1:39:42, 14.99s/batch, batch_loss=23.1, batch_index=344, batch_size=256]

Validation:  46%|█████▌      | 344/743 [1:29:17<1:39:42, 14.99s/batch, batch_loss=20.5, batch_index=345, batch_size=256]

Validation:  46%|█████▌      | 345/743 [1:29:17<1:38:58, 14.92s/batch, batch_loss=20.5, batch_index=345, batch_size=256]

Validation:  46%|█████▌      | 345/743 [1:29:32<1:38:58, 14.92s/batch, batch_loss=31.1, batch_index=346, batch_size=256]

Validation:  47%|█████▌      | 346/743 [1:29:32<1:38:25, 14.88s/batch, batch_loss=31.1, batch_index=346, batch_size=256]

Validation:  47%|█████▌      | 346/743 [1:29:47<1:38:25, 14.88s/batch, batch_loss=21.6, batch_index=347, batch_size=256]

Validation:  47%|█████▌      | 347/743 [1:29:47<1:38:52, 14.98s/batch, batch_loss=21.6, batch_index=347, batch_size=256]

Validation:  47%|█████▌      | 347/743 [1:30:02<1:38:52, 14.98s/batch, batch_loss=28.3, batch_index=348, batch_size=256]

Validation:  47%|█████▌      | 348/743 [1:30:02<1:38:01, 14.89s/batch, batch_loss=28.3, batch_index=348, batch_size=256]

Validation:  47%|█████▌      | 348/743 [1:30:17<1:38:01, 14.89s/batch, batch_loss=24.4, batch_index=349, batch_size=256]

Validation:  47%|█████▋      | 349/743 [1:30:17<1:37:26, 14.84s/batch, batch_loss=24.4, batch_index=349, batch_size=256]

Validation:  47%|██████▌       | 349/743 [1:30:31<1:37:26, 14.84s/batch, batch_loss=21, batch_index=350, batch_size=256]

Validation:  47%|██████▌       | 350/743 [1:30:31<1:36:34, 14.74s/batch, batch_loss=21, batch_index=350, batch_size=256]

Validation:  47%|████▏    | 350/743 [1:30:45<1:36:34, 14.74s/batch, batch_loss=1.31e+4, batch_index=351, batch_size=256]

Validation:  47%|████▎    | 351/743 [1:30:45<1:34:06, 14.40s/batch, batch_loss=1.31e+4, batch_index=351, batch_size=256]

Validation:  47%|█████▋      | 351/743 [1:31:00<1:34:06, 14.40s/batch, batch_loss=29.6, batch_index=352, batch_size=256]

Validation:  47%|█████▋      | 352/743 [1:31:00<1:35:12, 14.61s/batch, batch_loss=29.6, batch_index=352, batch_size=256]

Validation:  47%|█████▋      | 352/743 [1:31:16<1:35:12, 14.61s/batch, batch_loss=18.2, batch_index=353, batch_size=256]

Validation:  48%|█████▋      | 353/743 [1:31:16<1:38:03, 15.09s/batch, batch_loss=18.2, batch_index=353, batch_size=256]

Validation:  48%|█████▋      | 353/743 [1:31:33<1:38:03, 15.09s/batch, batch_loss=23.3, batch_index=354, batch_size=256]

Validation:  48%|█████▋      | 354/743 [1:31:33<1:41:41, 15.68s/batch, batch_loss=23.3, batch_index=354, batch_size=256]

Validation:  48%|█████▋      | 354/743 [1:31:49<1:41:41, 15.68s/batch, batch_loss=27.1, batch_index=355, batch_size=256]

Validation:  48%|█████▋      | 355/743 [1:31:49<1:42:29, 15.85s/batch, batch_loss=27.1, batch_index=355, batch_size=256]

Validation:  48%|█████▋      | 355/743 [1:32:06<1:42:29, 15.85s/batch, batch_loss=35.5, batch_index=356, batch_size=256]

Validation:  48%|█████▋      | 356/743 [1:32:06<1:43:15, 16.01s/batch, batch_loss=35.5, batch_index=356, batch_size=256]

Validation:  48%|████▎    | 356/743 [1:32:22<1:43:15, 16.01s/batch, batch_loss=5.99e+4, batch_index=357, batch_size=256]

Validation:  48%|████▎    | 357/743 [1:32:22<1:42:43, 15.97s/batch, batch_loss=5.99e+4, batch_index=357, batch_size=256]

Validation:  48%|█████▊      | 357/743 [1:32:36<1:42:43, 15.97s/batch, batch_loss=15.9, batch_index=358, batch_size=256]

Validation:  48%|█████▊      | 358/743 [1:32:36<1:39:47, 15.55s/batch, batch_loss=15.9, batch_index=358, batch_size=256]

Validation:  48%|█████▊      | 358/743 [1:32:51<1:39:47, 15.55s/batch, batch_loss=12.6, batch_index=359, batch_size=256]

Validation:  48%|█████▊      | 359/743 [1:32:51<1:37:38, 15.26s/batch, batch_loss=12.6, batch_index=359, batch_size=256]

Validation:  48%|█████▊      | 359/743 [1:33:06<1:37:38, 15.26s/batch, batch_loss=23.1, batch_index=360, batch_size=256]

Validation:  48%|█████▊      | 360/743 [1:33:06<1:37:11, 15.23s/batch, batch_loss=23.1, batch_index=360, batch_size=256]

Validation:  48%|██████▊       | 360/743 [1:33:23<1:37:11, 15.23s/batch, batch_loss=18, batch_index=361, batch_size=256]

Validation:  49%|██████▊       | 361/743 [1:33:23<1:39:56, 15.70s/batch, batch_loss=18, batch_index=361, batch_size=256]

Validation:  49%|█████▊      | 361/743 [1:33:40<1:39:56, 15.70s/batch, batch_loss=24.6, batch_index=362, batch_size=256]

Validation:  49%|█████▊      | 362/743 [1:33:40<1:42:33, 16.15s/batch, batch_loss=24.6, batch_index=362, batch_size=256]

Validation:  49%|█████▊      | 362/743 [1:33:55<1:42:33, 16.15s/batch, batch_loss=24.5, batch_index=363, batch_size=256]

Validation:  49%|█████▊      | 363/743 [1:33:55<1:41:00, 15.95s/batch, batch_loss=24.5, batch_index=363, batch_size=256]

Validation:  49%|█████▊      | 363/743 [1:34:11<1:41:00, 15.95s/batch, batch_loss=22.2, batch_index=364, batch_size=256]

Validation:  49%|█████▉      | 364/743 [1:34:11<1:40:04, 15.84s/batch, batch_loss=22.2, batch_index=364, batch_size=256]

Validation:  49%|█████▉      | 364/743 [1:34:27<1:40:04, 15.84s/batch, batch_loss=19.2, batch_index=365, batch_size=256]

Validation:  49%|█████▉      | 365/743 [1:34:27<1:41:08, 16.05s/batch, batch_loss=19.2, batch_index=365, batch_size=256]

Validation:  49%|█████▉      | 365/743 [1:34:44<1:41:08, 16.05s/batch, batch_loss=16.1, batch_index=366, batch_size=256]

Validation:  49%|█████▉      | 366/743 [1:34:44<1:42:02, 16.24s/batch, batch_loss=16.1, batch_index=366, batch_size=256]

Validation:  49%|█████▉      | 366/743 [1:35:01<1:42:02, 16.24s/batch, batch_loss=19.3, batch_index=367, batch_size=256]

Validation:  49%|█████▉      | 367/743 [1:35:01<1:42:59, 16.44s/batch, batch_loss=19.3, batch_index=367, batch_size=256]

Validation:  49%|████▍    | 367/743 [1:35:16<1:42:59, 16.44s/batch, batch_loss=4.83e+3, batch_index=368, batch_size=256]

Validation:  50%|████▍    | 368/743 [1:35:16<1:40:48, 16.13s/batch, batch_loss=4.83e+3, batch_index=368, batch_size=256]

Validation:  50%|█████▉      | 368/743 [1:35:31<1:40:48, 16.13s/batch, batch_loss=16.6, batch_index=369, batch_size=256]

Validation:  50%|█████▉      | 369/743 [1:35:31<1:38:13, 15.76s/batch, batch_loss=16.6, batch_index=369, batch_size=256]

Validation:  50%|██████▉       | 369/743 [1:35:48<1:38:13, 15.76s/batch, batch_loss=24, batch_index=370, batch_size=256]

Validation:  50%|██████▉       | 370/743 [1:35:48<1:39:34, 16.02s/batch, batch_loss=24, batch_index=370, batch_size=256]

Validation:  50%|█████▉      | 370/743 [1:36:04<1:39:34, 16.02s/batch, batch_loss=20.6, batch_index=371, batch_size=256]

Validation:  50%|█████▉      | 371/743 [1:36:04<1:38:36, 15.91s/batch, batch_loss=20.6, batch_index=371, batch_size=256]

Validation:  50%|█████▉      | 371/743 [1:36:18<1:38:36, 15.91s/batch, batch_loss=19.8, batch_index=372, batch_size=256]

Validation:  50%|██████      | 372/743 [1:36:18<1:35:46, 15.49s/batch, batch_loss=19.8, batch_index=372, batch_size=256]

Validation:  50%|██████      | 372/743 [1:36:33<1:35:46, 15.49s/batch, batch_loss=24.3, batch_index=373, batch_size=256]

Validation:  50%|██████      | 373/743 [1:36:33<1:34:19, 15.29s/batch, batch_loss=24.3, batch_index=373, batch_size=256]

Validation:  50%|██████      | 373/743 [1:36:48<1:34:19, 15.29s/batch, batch_loss=16.7, batch_index=374, batch_size=256]

Validation:  50%|██████      | 374/743 [1:36:48<1:32:49, 15.09s/batch, batch_loss=16.7, batch_index=374, batch_size=256]

Validation:  50%|██████      | 374/743 [1:37:02<1:32:49, 15.09s/batch, batch_loss=8.61, batch_index=375, batch_size=256]

Validation:  50%|██████      | 375/743 [1:37:02<1:31:50, 14.97s/batch, batch_loss=8.61, batch_index=375, batch_size=256]

Validation:  50%|██████      | 375/743 [1:37:16<1:31:50, 14.97s/batch, batch_loss=32.5, batch_index=376, batch_size=256]

Validation:  51%|██████      | 376/743 [1:37:16<1:29:22, 14.61s/batch, batch_loss=32.5, batch_index=376, batch_size=256]

Validation:  51%|██████      | 376/743 [1:37:30<1:29:22, 14.61s/batch, batch_loss=12.1, batch_index=377, batch_size=256]

Validation:  51%|██████      | 377/743 [1:37:30<1:28:14, 14.47s/batch, batch_loss=12.1, batch_index=377, batch_size=256]

Validation:  51%|██████      | 377/743 [1:37:45<1:28:14, 14.47s/batch, batch_loss=21.1, batch_index=378, batch_size=256]

Validation:  51%|██████      | 378/743 [1:37:45<1:27:52, 14.45s/batch, batch_loss=21.1, batch_index=378, batch_size=256]

Validation:  51%|██████      | 378/743 [1:38:02<1:27:52, 14.45s/batch, batch_loss=8.06, batch_index=379, batch_size=256]

Validation:  51%|██████      | 379/743 [1:38:02<1:33:07, 15.35s/batch, batch_loss=8.06, batch_index=379, batch_size=256]

Validation:  51%|██████      | 379/743 [1:38:17<1:33:07, 15.35s/batch, batch_loss=8.19, batch_index=380, batch_size=256]

Validation:  51%|██████▏     | 380/743 [1:38:17<1:32:27, 15.28s/batch, batch_loss=8.19, batch_index=380, batch_size=256]

Validation:  51%|████▌    | 380/743 [1:38:32<1:32:27, 15.28s/batch, batch_loss=7.04e+4, batch_index=381, batch_size=256]

Validation:  51%|████▌    | 381/743 [1:38:32<1:31:53, 15.23s/batch, batch_loss=7.04e+4, batch_index=381, batch_size=256]

Validation:  51%|██████▋      | 381/743 [1:38:47<1:31:53, 15.23s/batch, batch_loss=916, batch_index=382, batch_size=256]

Validation:  51%|██████▋      | 382/743 [1:38:47<1:30:48, 15.09s/batch, batch_loss=916, batch_index=382, batch_size=256]

Validation:  51%|██████▋      | 382/743 [1:39:02<1:30:48, 15.09s/batch, batch_loss=210, batch_index=383, batch_size=256]

Validation:  52%|██████▋      | 383/743 [1:39:02<1:29:59, 15.00s/batch, batch_loss=210, batch_index=383, batch_size=256]

Validation:  52%|██████▋      | 383/743 [1:39:17<1:29:59, 15.00s/batch, batch_loss=281, batch_index=384, batch_size=256]

Validation:  52%|██████▋      | 384/743 [1:39:17<1:29:19, 14.93s/batch, batch_loss=281, batch_index=384, batch_size=256]

Validation:  52%|██████▏     | 384/743 [1:39:32<1:29:19, 14.93s/batch, batch_loss=19.4, batch_index=385, batch_size=256]

Validation:  52%|██████▏     | 385/743 [1:39:32<1:29:03, 14.92s/batch, batch_loss=19.4, batch_index=385, batch_size=256]

Validation:  52%|███████▎      | 385/743 [1:39:46<1:29:03, 14.92s/batch, batch_loss=12, batch_index=386, batch_size=256]

Validation:  52%|███████▎      | 386/743 [1:39:46<1:28:04, 14.80s/batch, batch_loss=12, batch_index=386, batch_size=256]

Validation:  52%|██████▏     | 386/743 [1:40:04<1:28:04, 14.80s/batch, batch_loss=10.3, batch_index=387, batch_size=256]

Validation:  52%|██████▎     | 387/743 [1:40:04<1:33:44, 15.80s/batch, batch_loss=10.3, batch_index=387, batch_size=256]

Validation:  52%|██████▎     | 387/743 [1:40:19<1:33:44, 15.80s/batch, batch_loss=17.8, batch_index=388, batch_size=256]

Validation:  52%|██████▎     | 388/743 [1:40:19<1:32:36, 15.65s/batch, batch_loss=17.8, batch_index=388, batch_size=256]

Validation:  52%|██████▎     | 388/743 [1:40:34<1:32:36, 15.65s/batch, batch_loss=13.4, batch_index=389, batch_size=256]

Validation:  52%|██████▎     | 389/743 [1:40:34<1:30:47, 15.39s/batch, batch_loss=13.4, batch_index=389, batch_size=256]

Validation:  52%|███████▎      | 389/743 [1:40:49<1:30:47, 15.39s/batch, batch_loss=19, batch_index=390, batch_size=256]

Validation:  52%|███████▎      | 390/743 [1:40:49<1:29:13, 15.17s/batch, batch_loss=19, batch_index=390, batch_size=256]

Validation:  52%|██████▎     | 390/743 [1:41:03<1:29:13, 15.17s/batch, batch_loss=15.3, batch_index=391, batch_size=256]

Validation:  53%|██████▎     | 391/743 [1:41:03<1:26:23, 14.73s/batch, batch_loss=15.3, batch_index=391, batch_size=256]

Validation:  53%|██████▎     | 391/743 [1:41:18<1:26:23, 14.73s/batch, batch_loss=15.6, batch_index=392, batch_size=256]

Validation:  53%|██████▎     | 392/743 [1:41:18<1:27:14, 14.91s/batch, batch_loss=15.6, batch_index=392, batch_size=256]

Validation:  53%|██████▎     | 392/743 [1:41:34<1:27:14, 14.91s/batch, batch_loss=18.3, batch_index=393, batch_size=256]

Validation:  53%|██████▎     | 393/743 [1:41:34<1:28:27, 15.16s/batch, batch_loss=18.3, batch_index=393, batch_size=256]

Validation:  53%|██████▎     | 393/743 [1:41:49<1:28:27, 15.16s/batch, batch_loss=17.7, batch_index=394, batch_size=256]

Validation:  53%|██████▎     | 394/743 [1:41:49<1:29:01, 15.30s/batch, batch_loss=17.7, batch_index=394, batch_size=256]

Validation:  53%|██████▎     | 394/743 [1:42:05<1:29:01, 15.30s/batch, batch_loss=12.1, batch_index=395, batch_size=256]

Validation:  53%|██████▍     | 395/743 [1:42:05<1:30:09, 15.54s/batch, batch_loss=12.1, batch_index=395, batch_size=256]

Validation:  53%|██████▍     | 395/743 [1:42:20<1:30:09, 15.54s/batch, batch_loss=16.1, batch_index=396, batch_size=256]

Validation:  53%|██████▍     | 396/743 [1:42:20<1:28:48, 15.35s/batch, batch_loss=16.1, batch_index=396, batch_size=256]

Validation:  53%|██████▍     | 396/743 [1:42:36<1:28:48, 15.35s/batch, batch_loss=12.8, batch_index=397, batch_size=256]

Validation:  53%|██████▍     | 397/743 [1:42:36<1:29:24, 15.50s/batch, batch_loss=12.8, batch_index=397, batch_size=256]

Validation:  53%|██████▍     | 397/743 [1:42:52<1:29:24, 15.50s/batch, batch_loss=19.6, batch_index=398, batch_size=256]

Validation:  54%|██████▍     | 398/743 [1:42:52<1:29:50, 15.62s/batch, batch_loss=19.6, batch_index=398, batch_size=256]

Validation:  54%|██████▍     | 398/743 [1:43:08<1:29:50, 15.62s/batch, batch_loss=12.9, batch_index=399, batch_size=256]

Validation:  54%|██████▍     | 399/743 [1:43:08<1:29:14, 15.56s/batch, batch_loss=12.9, batch_index=399, batch_size=256]

Validation:  54%|██████▍     | 399/743 [1:43:23<1:29:14, 15.56s/batch, batch_loss=21.1, batch_index=400, batch_size=256]

Validation:  54%|██████▍     | 400/743 [1:43:23<1:28:24, 15.47s/batch, batch_loss=21.1, batch_index=400, batch_size=256]

Validation:  54%|███████▌      | 400/743 [1:43:38<1:28:24, 15.47s/batch, batch_loss=19, batch_index=401, batch_size=256]

Validation:  54%|███████▌      | 401/743 [1:43:38<1:28:21, 15.50s/batch, batch_loss=19, batch_index=401, batch_size=256]

Validation:  54%|██████▍     | 401/743 [1:43:56<1:28:21, 15.50s/batch, batch_loss=8.35, batch_index=402, batch_size=256]

Validation:  54%|██████▍     | 402/743 [1:43:56<1:32:06, 16.21s/batch, batch_loss=8.35, batch_index=402, batch_size=256]

Validation:  54%|██████▍     | 402/743 [1:44:11<1:32:06, 16.21s/batch, batch_loss=18.3, batch_index=403, batch_size=256]

Validation:  54%|██████▌     | 403/743 [1:44:11<1:29:36, 15.81s/batch, batch_loss=18.3, batch_index=403, batch_size=256]

Validation:  54%|██████▌     | 403/743 [1:44:26<1:29:36, 15.81s/batch, batch_loss=16.6, batch_index=404, batch_size=256]

Validation:  54%|██████▌     | 404/743 [1:44:26<1:27:52, 15.55s/batch, batch_loss=16.6, batch_index=404, batch_size=256]

Validation:  54%|██████▌     | 404/743 [1:44:41<1:27:52, 15.55s/batch, batch_loss=11.5, batch_index=405, batch_size=256]

Validation:  55%|██████▌     | 405/743 [1:44:41<1:26:41, 15.39s/batch, batch_loss=11.5, batch_index=405, batch_size=256]

Validation:  55%|██████▌     | 405/743 [1:44:56<1:26:41, 15.39s/batch, batch_loss=14.3, batch_index=406, batch_size=256]

Validation:  55%|██████▌     | 406/743 [1:44:56<1:26:03, 15.32s/batch, batch_loss=14.3, batch_index=406, batch_size=256]

Validation:  55%|██████▌     | 406/743 [1:45:12<1:26:03, 15.32s/batch, batch_loss=19.4, batch_index=407, batch_size=256]

Validation:  55%|██████▌     | 407/743 [1:45:12<1:26:03, 15.37s/batch, batch_loss=19.4, batch_index=407, batch_size=256]

Validation:  55%|██████▌     | 407/743 [1:45:27<1:26:03, 15.37s/batch, batch_loss=20.2, batch_index=408, batch_size=256]

Validation:  55%|██████▌     | 408/743 [1:45:27<1:25:18, 15.28s/batch, batch_loss=20.2, batch_index=408, batch_size=256]

Validation:  55%|██████▌     | 408/743 [1:45:42<1:25:18, 15.28s/batch, batch_loss=12.8, batch_index=409, batch_size=256]

Validation:  55%|██████▌     | 409/743 [1:45:42<1:24:40, 15.21s/batch, batch_loss=12.8, batch_index=409, batch_size=256]

Validation:  55%|██████▌     | 409/743 [1:45:57<1:24:40, 15.21s/batch, batch_loss=17.7, batch_index=410, batch_size=256]

Validation:  55%|██████▌     | 410/743 [1:45:57<1:24:23, 15.21s/batch, batch_loss=17.7, batch_index=410, batch_size=256]

Validation:  55%|██████▌     | 410/743 [1:46:12<1:24:23, 15.21s/batch, batch_loss=21.2, batch_index=411, batch_size=256]

Validation:  55%|██████▋     | 411/743 [1:46:12<1:24:23, 15.25s/batch, batch_loss=21.2, batch_index=411, batch_size=256]

Validation:  55%|██████▋     | 411/743 [1:46:28<1:24:23, 15.25s/batch, batch_loss=17.3, batch_index=412, batch_size=256]

Validation:  55%|██████▋     | 412/743 [1:46:28<1:24:02, 15.23s/batch, batch_loss=17.3, batch_index=412, batch_size=256]

Validation:  55%|████▉    | 412/743 [1:46:42<1:24:02, 15.23s/batch, batch_loss=1.93e+3, batch_index=413, batch_size=256]

Validation:  56%|█████    | 413/743 [1:46:42<1:22:24, 14.98s/batch, batch_loss=1.93e+3, batch_index=413, batch_size=256]

Validation:  56%|███████▊      | 413/743 [1:46:57<1:22:24, 14.98s/batch, batch_loss=23, batch_index=414, batch_size=256]

Validation:  56%|███████▊      | 414/743 [1:46:57<1:23:01, 15.14s/batch, batch_loss=23, batch_index=414, batch_size=256]

Validation:  56%|███████▊      | 414/743 [1:47:13<1:23:01, 15.14s/batch, batch_loss=27, batch_index=415, batch_size=256]

Validation:  56%|███████▊      | 415/743 [1:47:13<1:22:52, 15.16s/batch, batch_loss=27, batch_index=415, batch_size=256]

Validation:  56%|█████    | 415/743 [1:47:28<1:22:52, 15.16s/batch, batch_loss=6.47e+3, batch_index=416, batch_size=256]

Validation:  56%|█████    | 416/743 [1:47:28<1:22:23, 15.12s/batch, batch_loss=6.47e+3, batch_index=416, batch_size=256]

Validation:  56%|██████▋     | 416/743 [1:47:43<1:22:23, 15.12s/batch, batch_loss=18.7, batch_index=417, batch_size=256]

Validation:  56%|██████▋     | 417/743 [1:47:43<1:22:31, 15.19s/batch, batch_loss=18.7, batch_index=417, batch_size=256]

Validation:  56%|██████▋     | 417/743 [1:48:01<1:22:31, 15.19s/batch, batch_loss=15.4, batch_index=418, batch_size=256]

Validation:  56%|██████▊     | 418/743 [1:48:01<1:27:06, 16.08s/batch, batch_loss=15.4, batch_index=418, batch_size=256]

Validation:  56%|██████▊     | 418/743 [1:48:17<1:27:06, 16.08s/batch, batch_loss=18.1, batch_index=419, batch_size=256]

Validation:  56%|██████▊     | 419/743 [1:48:17<1:25:46, 15.88s/batch, batch_loss=18.1, batch_index=419, batch_size=256]

Validation:  56%|██████▊     | 419/743 [1:48:32<1:25:46, 15.88s/batch, batch_loss=15.6, batch_index=420, batch_size=256]

Validation:  57%|██████▊     | 420/743 [1:48:32<1:25:19, 15.85s/batch, batch_loss=15.6, batch_index=420, batch_size=256]

Validation:  57%|██████▊     | 420/743 [1:48:48<1:25:19, 15.85s/batch, batch_loss=28.6, batch_index=421, batch_size=256]

Validation:  57%|██████▊     | 421/743 [1:48:48<1:24:50, 15.81s/batch, batch_loss=28.6, batch_index=421, batch_size=256]

Validation:  57%|███████▉      | 421/743 [1:49:04<1:24:50, 15.81s/batch, batch_loss=12, batch_index=422, batch_size=256]

Validation:  57%|███████▉      | 422/743 [1:49:04<1:24:11, 15.74s/batch, batch_loss=12, batch_index=422, batch_size=256]

Validation:  57%|██████▊     | 422/743 [1:49:19<1:24:11, 15.74s/batch, batch_loss=22.3, batch_index=423, batch_size=256]

Validation:  57%|██████▊     | 423/743 [1:49:19<1:23:51, 15.72s/batch, batch_loss=22.3, batch_index=423, batch_size=256]

Validation:  57%|███████▍     | 423/743 [1:49:38<1:23:51, 15.72s/batch, batch_loss=322, batch_index=424, batch_size=256]

Validation:  57%|███████▍     | 424/743 [1:49:38<1:27:48, 16.52s/batch, batch_loss=322, batch_index=424, batch_size=256]

Validation:  57%|██████▊     | 424/743 [1:49:53<1:27:48, 16.52s/batch, batch_loss=22.9, batch_index=425, batch_size=256]

Validation:  57%|██████▊     | 425/743 [1:49:53<1:25:17, 16.09s/batch, batch_loss=22.9, batch_index=425, batch_size=256]

Validation:  57%|██████▊     | 425/743 [1:50:09<1:25:17, 16.09s/batch, batch_loss=23.9, batch_index=426, batch_size=256]

Validation:  57%|██████▉     | 426/743 [1:50:09<1:24:51, 16.06s/batch, batch_loss=23.9, batch_index=426, batch_size=256]

Validation:  57%|██████▉     | 426/743 [1:50:24<1:24:51, 16.06s/batch, batch_loss=20.6, batch_index=427, batch_size=256]

Validation:  57%|██████▉     | 427/743 [1:50:24<1:23:28, 15.85s/batch, batch_loss=20.6, batch_index=427, batch_size=256]

Validation:  57%|█████▏   | 427/743 [1:50:40<1:23:28, 15.85s/batch, batch_loss=5.29e+3, batch_index=428, batch_size=256]

Validation:  58%|█████▏   | 428/743 [1:50:40<1:23:18, 15.87s/batch, batch_loss=5.29e+3, batch_index=428, batch_size=256]

Validation:  58%|██████▉     | 428/743 [1:50:58<1:23:18, 15.87s/batch, batch_loss=17.8, batch_index=429, batch_size=256]

Validation:  58%|██████▉     | 429/743 [1:50:58<1:25:38, 16.37s/batch, batch_loss=17.8, batch_index=429, batch_size=256]

Validation:  58%|█████▊    | 429/743 [1:51:13<1:25:38, 16.37s/batch, batch_loss=5.4e+3, batch_index=430, batch_size=256]

Validation:  58%|█████▊    | 430/743 [1:51:13<1:23:16, 15.96s/batch, batch_loss=5.4e+3, batch_index=430, batch_size=256]

Validation:  58%|█████▊    | 430/743 [1:51:27<1:23:16, 15.96s/batch, batch_loss=1.4e+4, batch_index=431, batch_size=256]

Validation:  58%|█████▊    | 431/743 [1:51:27<1:20:44, 15.53s/batch, batch_loss=1.4e+4, batch_index=431, batch_size=256]

Validation:  58%|███████▌     | 431/743 [1:51:42<1:20:44, 15.53s/batch, batch_loss=959, batch_index=432, batch_size=256]

Validation:  58%|███████▌     | 432/743 [1:51:42<1:19:53, 15.41s/batch, batch_loss=959, batch_index=432, batch_size=256]

Validation:  58%|██████▉     | 432/743 [1:51:57<1:19:53, 15.41s/batch, batch_loss=16.6, batch_index=433, batch_size=256]

Validation:  58%|██████▉     | 433/743 [1:51:57<1:18:05, 15.12s/batch, batch_loss=16.6, batch_index=433, batch_size=256]

Validation:  58%|██████▉     | 433/743 [1:52:11<1:18:05, 15.12s/batch, batch_loss=13.1, batch_index=434, batch_size=256]

Validation:  58%|███████     | 434/743 [1:52:11<1:17:06, 14.97s/batch, batch_loss=13.1, batch_index=434, batch_size=256]

Validation:  58%|███████     | 434/743 [1:52:26<1:17:06, 14.97s/batch, batch_loss=17.6, batch_index=435, batch_size=256]

Validation:  59%|███████     | 435/743 [1:52:26<1:16:58, 14.99s/batch, batch_loss=17.6, batch_index=435, batch_size=256]

Validation:  59%|████████▏     | 435/743 [1:52:42<1:16:58, 14.99s/batch, batch_loss=14, batch_index=436, batch_size=256]

Validation:  59%|████████▏     | 436/743 [1:52:42<1:17:21, 15.12s/batch, batch_loss=14, batch_index=436, batch_size=256]

Validation:  59%|███████     | 436/743 [1:52:56<1:17:21, 15.12s/batch, batch_loss=24.4, batch_index=437, batch_size=256]

Validation:  59%|███████     | 437/743 [1:52:56<1:15:51, 14.87s/batch, batch_loss=24.4, batch_index=437, batch_size=256]

Validation:  59%|███████▋     | 437/743 [1:53:12<1:15:51, 14.87s/batch, batch_loss=975, batch_index=438, batch_size=256]

Validation:  59%|███████▋     | 438/743 [1:53:12<1:16:41, 15.09s/batch, batch_loss=975, batch_index=438, batch_size=256]

Validation:  59%|███████▋     | 438/743 [1:53:26<1:16:41, 15.09s/batch, batch_loss=898, batch_index=439, batch_size=256]

Validation:  59%|███████▋     | 439/743 [1:53:26<1:15:17, 14.86s/batch, batch_loss=898, batch_index=439, batch_size=256]

Validation:  59%|███████     | 439/743 [1:53:40<1:15:17, 14.86s/batch, batch_loss=19.4, batch_index=440, batch_size=256]

Validation:  59%|███████     | 440/743 [1:53:40<1:13:26, 14.54s/batch, batch_loss=19.4, batch_index=440, batch_size=256]

Validation:  59%|███████     | 440/743 [1:53:55<1:13:26, 14.54s/batch, batch_loss=16.5, batch_index=441, batch_size=256]

Validation:  59%|███████     | 441/743 [1:53:55<1:13:36, 14.62s/batch, batch_loss=16.5, batch_index=441, batch_size=256]

Validation:  59%|███████     | 441/743 [1:54:10<1:13:36, 14.62s/batch, batch_loss=15.6, batch_index=442, batch_size=256]

Validation:  59%|███████▏    | 442/743 [1:54:10<1:13:43, 14.69s/batch, batch_loss=15.6, batch_index=442, batch_size=256]

Validation:  59%|███████▏    | 442/743 [1:54:25<1:13:43, 14.69s/batch, batch_loss=14.9, batch_index=443, batch_size=256]

Validation:  60%|███████▏    | 443/743 [1:54:25<1:14:21, 14.87s/batch, batch_loss=14.9, batch_index=443, batch_size=256]

Validation:  60%|███████▏    | 443/743 [1:54:40<1:14:21, 14.87s/batch, batch_loss=16.2, batch_index=444, batch_size=256]

Validation:  60%|███████▏    | 444/743 [1:54:40<1:14:50, 15.02s/batch, batch_loss=16.2, batch_index=444, batch_size=256]

Validation:  60%|███████▏    | 444/743 [1:54:55<1:14:50, 15.02s/batch, batch_loss=11.1, batch_index=445, batch_size=256]

Validation:  60%|███████▏    | 445/743 [1:54:55<1:14:23, 14.98s/batch, batch_loss=11.1, batch_index=445, batch_size=256]

Validation:  60%|███████▏    | 445/743 [1:55:10<1:14:23, 14.98s/batch, batch_loss=17.1, batch_index=446, batch_size=256]

Validation:  60%|███████▏    | 446/743 [1:55:10<1:14:04, 14.96s/batch, batch_loss=17.1, batch_index=446, batch_size=256]

Validation:  60%|█████▍   | 446/743 [1:55:25<1:14:04, 14.96s/batch, batch_loss=6.84e+3, batch_index=447, batch_size=256]

Validation:  60%|█████▍   | 447/743 [1:55:25<1:14:02, 15.01s/batch, batch_loss=6.84e+3, batch_index=447, batch_size=256]

Validation:  60%|███████▏    | 447/743 [1:55:40<1:14:02, 15.01s/batch, batch_loss=7.78, batch_index=448, batch_size=256]

Validation:  60%|███████▏    | 448/743 [1:55:40<1:14:14, 15.10s/batch, batch_loss=7.78, batch_index=448, batch_size=256]

Validation:  60%|███████▏    | 448/743 [1:55:56<1:14:14, 15.10s/batch, batch_loss=10.2, batch_index=449, batch_size=256]

Validation:  60%|███████▎    | 449/743 [1:55:56<1:14:11, 15.14s/batch, batch_loss=10.2, batch_index=449, batch_size=256]

Validation:  60%|███████▎    | 449/743 [1:56:11<1:14:11, 15.14s/batch, batch_loss=17.4, batch_index=450, batch_size=256]

Validation:  61%|███████▎    | 450/743 [1:56:11<1:14:40, 15.29s/batch, batch_loss=17.4, batch_index=450, batch_size=256]

Validation:  61%|███████▎    | 450/743 [1:56:26<1:14:40, 15.29s/batch, batch_loss=13.2, batch_index=451, batch_size=256]

Validation:  61%|███████▎    | 451/743 [1:56:26<1:13:54, 15.19s/batch, batch_loss=13.2, batch_index=451, batch_size=256]

Validation:  61%|████████▍     | 451/743 [1:56:41<1:13:54, 15.19s/batch, batch_loss=20, batch_index=452, batch_size=256]

Validation:  61%|████████▌     | 452/743 [1:56:41<1:13:44, 15.21s/batch, batch_loss=20, batch_index=452, batch_size=256]

Validation:  61%|███████▎    | 452/743 [1:56:57<1:13:44, 15.21s/batch, batch_loss=13.7, batch_index=453, batch_size=256]

Validation:  61%|███████▎    | 453/743 [1:56:57<1:13:18, 15.17s/batch, batch_loss=13.7, batch_index=453, batch_size=256]

Validation:  61%|███████▎    | 453/743 [1:57:12<1:13:18, 15.17s/batch, batch_loss=9.73, batch_index=454, batch_size=256]

Validation:  61%|███████▎    | 454/743 [1:57:12<1:13:06, 15.18s/batch, batch_loss=9.73, batch_index=454, batch_size=256]

Validation:  61%|███████▎    | 454/743 [1:57:27<1:13:06, 15.18s/batch, batch_loss=11.5, batch_index=455, batch_size=256]

Validation:  61%|███████▎    | 455/743 [1:57:27<1:13:11, 15.25s/batch, batch_loss=11.5, batch_index=455, batch_size=256]

Validation:  61%|███████▉     | 455/743 [1:57:42<1:13:11, 15.25s/batch, batch_loss=9.5, batch_index=456, batch_size=256]

Validation:  61%|███████▉     | 456/743 [1:57:42<1:12:09, 15.09s/batch, batch_loss=9.5, batch_index=456, batch_size=256]

Validation:  61%|███████▎    | 456/743 [1:57:57<1:12:09, 15.09s/batch, batch_loss=16.4, batch_index=457, batch_size=256]

Validation:  62%|███████▍    | 457/743 [1:57:57<1:11:30, 15.00s/batch, batch_loss=16.4, batch_index=457, batch_size=256]

Validation:  62%|███████▍    | 457/743 [1:58:13<1:11:30, 15.00s/batch, batch_loss=25.5, batch_index=458, batch_size=256]

Validation:  62%|███████▍    | 458/743 [1:58:13<1:12:26, 15.25s/batch, batch_loss=25.5, batch_index=458, batch_size=256]

Validation:  62%|███████▍    | 458/743 [1:58:29<1:12:26, 15.25s/batch, batch_loss=14.9, batch_index=459, batch_size=256]

Validation:  62%|███████▍    | 459/743 [1:58:29<1:13:43, 15.58s/batch, batch_loss=14.9, batch_index=459, batch_size=256]

Validation:  62%|███████▍    | 459/743 [1:58:45<1:13:43, 15.58s/batch, batch_loss=20.5, batch_index=460, batch_size=256]

Validation:  62%|███████▍    | 460/743 [1:58:45<1:14:52, 15.88s/batch, batch_loss=20.5, batch_index=460, batch_size=256]

Validation:  62%|███████▍    | 460/743 [1:59:01<1:14:52, 15.88s/batch, batch_loss=17.6, batch_index=461, batch_size=256]

Validation:  62%|███████▍    | 461/743 [1:59:01<1:14:01, 15.75s/batch, batch_loss=17.6, batch_index=461, batch_size=256]

Validation:  62%|███████▍    | 461/743 [1:59:15<1:14:01, 15.75s/batch, batch_loss=16.6, batch_index=462, batch_size=256]

Validation:  62%|███████▍    | 462/743 [1:59:15<1:11:09, 15.19s/batch, batch_loss=16.6, batch_index=462, batch_size=256]

Validation:  62%|███████▍    | 462/743 [1:59:30<1:11:09, 15.19s/batch, batch_loss=12.2, batch_index=463, batch_size=256]

Validation:  62%|███████▍    | 463/743 [1:59:30<1:10:31, 15.11s/batch, batch_loss=12.2, batch_index=463, batch_size=256]

Validation:  62%|█████▌   | 463/743 [1:59:45<1:10:31, 15.11s/batch, batch_loss=1.34e+4, batch_index=464, batch_size=256]

Validation:  62%|█████▌   | 464/743 [1:59:45<1:09:55, 15.04s/batch, batch_loss=1.34e+4, batch_index=464, batch_size=256]

Validation:  62%|███████▍    | 464/743 [2:00:00<1:09:55, 15.04s/batch, batch_loss=19.4, batch_index=465, batch_size=256]

Validation:  63%|███████▌    | 465/743 [2:00:00<1:10:01, 15.11s/batch, batch_loss=19.4, batch_index=465, batch_size=256]

Validation:  63%|███████▌    | 465/743 [2:00:15<1:10:01, 15.11s/batch, batch_loss=16.4, batch_index=466, batch_size=256]

Validation:  63%|███████▌    | 466/743 [2:00:15<1:09:12, 14.99s/batch, batch_loss=16.4, batch_index=466, batch_size=256]

Validation:  63%|███████▌    | 466/743 [2:00:29<1:09:12, 14.99s/batch, batch_loss=22.5, batch_index=467, batch_size=256]

Validation:  63%|███████▌    | 467/743 [2:00:29<1:07:56, 14.77s/batch, batch_loss=22.5, batch_index=467, batch_size=256]

Validation:  63%|███████▌    | 467/743 [2:00:44<1:07:56, 14.77s/batch, batch_loss=12.9, batch_index=468, batch_size=256]

Validation:  63%|███████▌    | 468/743 [2:00:44<1:07:49, 14.80s/batch, batch_loss=12.9, batch_index=468, batch_size=256]

Validation:  63%|███████▌    | 468/743 [2:00:58<1:07:49, 14.80s/batch, batch_loss=18.7, batch_index=469, batch_size=256]

Validation:  63%|███████▌    | 469/743 [2:00:58<1:07:30, 14.78s/batch, batch_loss=18.7, batch_index=469, batch_size=256]

Validation:  63%|█████▋   | 469/743 [2:01:14<1:07:30, 14.78s/batch, batch_loss=6.35e+4, batch_index=470, batch_size=256]

Validation:  63%|█████▋   | 470/743 [2:01:14<1:08:33, 15.07s/batch, batch_loss=6.35e+4, batch_index=470, batch_size=256]

Validation:  63%|███████▌    | 470/743 [2:01:31<1:08:33, 15.07s/batch, batch_loss=11.8, batch_index=471, batch_size=256]

Validation:  63%|███████▌    | 471/743 [2:01:31<1:11:21, 15.74s/batch, batch_loss=11.8, batch_index=471, batch_size=256]

Validation:  63%|███████▌    | 471/743 [2:01:47<1:11:21, 15.74s/batch, batch_loss=20.8, batch_index=472, batch_size=256]

Validation:  64%|███████▌    | 472/743 [2:01:47<1:10:12, 15.54s/batch, batch_loss=20.8, batch_index=472, batch_size=256]

Validation:  64%|████████▎    | 472/743 [2:02:03<1:10:12, 15.54s/batch, batch_loss=587, batch_index=473, batch_size=256]

Validation:  64%|████████▎    | 473/743 [2:02:03<1:10:38, 15.70s/batch, batch_loss=587, batch_index=473, batch_size=256]

Validation:  64%|███████▋    | 473/743 [2:02:17<1:10:38, 15.70s/batch, batch_loss=15.9, batch_index=474, batch_size=256]

Validation:  64%|███████▋    | 474/743 [2:02:17<1:09:07, 15.42s/batch, batch_loss=15.9, batch_index=474, batch_size=256]

Validation:  64%|███████▋    | 474/743 [2:02:34<1:09:07, 15.42s/batch, batch_loss=20.4, batch_index=475, batch_size=256]

Validation:  64%|███████▋    | 475/743 [2:02:34<1:10:37, 15.81s/batch, batch_loss=20.4, batch_index=475, batch_size=256]

Validation:  64%|███████▋    | 475/743 [2:02:48<1:10:37, 15.81s/batch, batch_loss=11.4, batch_index=476, batch_size=256]

Validation:  64%|███████▋    | 476/743 [2:02:48<1:08:12, 15.33s/batch, batch_loss=11.4, batch_index=476, batch_size=256]

Validation:  64%|███████▋    | 476/743 [2:03:02<1:08:12, 15.33s/batch, batch_loss=15.7, batch_index=477, batch_size=256]

Validation:  64%|███████▋    | 477/743 [2:03:02<1:05:14, 14.72s/batch, batch_loss=15.7, batch_index=477, batch_size=256]

Validation:  64%|█████▊   | 477/743 [2:03:17<1:05:14, 14.72s/batch, batch_loss=2.46e+3, batch_index=478, batch_size=256]

Validation:  64%|█████▊   | 478/743 [2:03:17<1:05:33, 14.84s/batch, batch_loss=2.46e+3, batch_index=478, batch_size=256]

Validation:  64%|█████▊   | 478/743 [2:03:35<1:05:33, 14.84s/batch, batch_loss=2.08e+4, batch_index=479, batch_size=256]

Validation:  64%|█████▊   | 479/743 [2:03:35<1:10:21, 15.99s/batch, batch_loss=2.08e+4, batch_index=479, batch_size=256]

Validation:  64%|█████████     | 479/743 [2:03:51<1:10:21, 15.99s/batch, batch_loss=12, batch_index=480, batch_size=256]

Validation:  65%|█████████     | 480/743 [2:03:51<1:09:27, 15.85s/batch, batch_loss=12, batch_index=480, batch_size=256]

Validation:  65%|███████▊    | 480/743 [2:04:06<1:09:27, 15.85s/batch, batch_loss=13.9, batch_index=481, batch_size=256]

Validation:  65%|███████▊    | 481/743 [2:04:06<1:08:18, 15.64s/batch, batch_loss=13.9, batch_index=481, batch_size=256]

Validation:  65%|█████▊   | 481/743 [2:04:21<1:08:18, 15.64s/batch, batch_loss=6.96e+3, batch_index=482, batch_size=256]

Validation:  65%|█████▊   | 482/743 [2:04:21<1:06:47, 15.36s/batch, batch_loss=6.96e+3, batch_index=482, batch_size=256]

Validation:  65%|███████▊    | 482/743 [2:04:35<1:06:47, 15.36s/batch, batch_loss=19.5, batch_index=483, batch_size=256]

Validation:  65%|███████▊    | 483/743 [2:04:35<1:05:43, 15.17s/batch, batch_loss=19.5, batch_index=483, batch_size=256]

Validation:  65%|█████▊   | 483/743 [2:04:51<1:05:43, 15.17s/batch, batch_loss=2.31e+4, batch_index=484, batch_size=256]

Validation:  65%|█████▊   | 484/743 [2:04:51<1:05:49, 15.25s/batch, batch_loss=2.31e+4, batch_index=484, batch_size=256]

Validation:  65%|█████▊   | 484/743 [2:05:07<1:05:49, 15.25s/batch, batch_loss=3.13e+4, batch_index=485, batch_size=256]

Validation:  65%|█████▊   | 485/743 [2:05:07<1:06:49, 15.54s/batch, batch_loss=3.13e+4, batch_index=485, batch_size=256]

Validation:  65%|███████▊    | 485/743 [2:05:22<1:06:49, 15.54s/batch, batch_loss=17.6, batch_index=486, batch_size=256]

Validation:  65%|███████▊    | 486/743 [2:05:22<1:05:36, 15.32s/batch, batch_loss=17.6, batch_index=486, batch_size=256]

Validation:  65%|███████▊    | 486/743 [2:05:40<1:05:36, 15.32s/batch, batch_loss=34.6, batch_index=487, batch_size=256]

Validation:  66%|███████▊    | 487/743 [2:05:40<1:09:22, 16.26s/batch, batch_loss=34.6, batch_index=487, batch_size=256]

Validation:  66%|█████████▏    | 487/743 [2:05:55<1:09:22, 16.26s/batch, batch_loss=24, batch_index=488, batch_size=256]

Validation:  66%|█████████▏    | 488/743 [2:05:55<1:07:20, 15.84s/batch, batch_loss=24, batch_index=488, batch_size=256]

Validation:  66%|███████▉    | 488/743 [2:06:11<1:07:20, 15.84s/batch, batch_loss=11.2, batch_index=489, batch_size=256]

Validation:  66%|███████▉    | 489/743 [2:06:11<1:06:34, 15.73s/batch, batch_loss=11.2, batch_index=489, batch_size=256]

Validation:  66%|███████▉    | 489/743 [2:06:26<1:06:34, 15.73s/batch, batch_loss=19.6, batch_index=490, batch_size=256]

Validation:  66%|███████▉    | 490/743 [2:06:26<1:05:25, 15.52s/batch, batch_loss=19.6, batch_index=490, batch_size=256]

Validation:  66%|███████▉    | 490/743 [2:06:41<1:05:25, 15.52s/batch, batch_loss=16.6, batch_index=491, batch_size=256]

Validation:  66%|███████▉    | 491/743 [2:06:41<1:04:30, 15.36s/batch, batch_loss=16.6, batch_index=491, batch_size=256]

Validation:  66%|█████▉   | 491/743 [2:06:57<1:04:30, 15.36s/batch, batch_loss=1.04e+3, batch_index=492, batch_size=256]

Validation:  66%|█████▉   | 492/743 [2:06:57<1:05:12, 15.59s/batch, batch_loss=1.04e+3, batch_index=492, batch_size=256]

Validation:  66%|█████▉   | 492/743 [2:07:12<1:05:12, 15.59s/batch, batch_loss=1.43e+4, batch_index=493, batch_size=256]

Validation:  66%|█████▉   | 493/743 [2:07:12<1:04:21, 15.45s/batch, batch_loss=1.43e+4, batch_index=493, batch_size=256]

Validation:  66%|███████▉    | 493/743 [2:07:27<1:04:21, 15.45s/batch, batch_loss=10.5, batch_index=494, batch_size=256]

Validation:  66%|███████▉    | 494/743 [2:07:27<1:04:03, 15.44s/batch, batch_loss=10.5, batch_index=494, batch_size=256]

Validation:  66%|█████▉   | 494/743 [2:07:44<1:04:03, 15.44s/batch, batch_loss=1.18e+4, batch_index=495, batch_size=256]

Validation:  67%|█████▉   | 495/743 [2:07:44<1:05:41, 15.89s/batch, batch_loss=1.18e+4, batch_index=495, batch_size=256]

Validation:  67%|███████▉    | 495/743 [2:07:59<1:05:41, 15.89s/batch, batch_loss=17.9, batch_index=496, batch_size=256]

Validation:  67%|████████    | 496/743 [2:07:59<1:04:29, 15.67s/batch, batch_loss=17.9, batch_index=496, batch_size=256]

Validation:  67%|████████    | 496/743 [2:08:15<1:04:29, 15.67s/batch, batch_loss=14.1, batch_index=497, batch_size=256]

Validation:  67%|████████    | 497/743 [2:08:15<1:04:16, 15.68s/batch, batch_loss=14.1, batch_index=497, batch_size=256]

Validation:  67%|████████    | 497/743 [2:08:30<1:04:16, 15.68s/batch, batch_loss=14.5, batch_index=498, batch_size=256]

Validation:  67%|████████    | 498/743 [2:08:30<1:02:59, 15.43s/batch, batch_loss=14.5, batch_index=498, batch_size=256]

Validation:  67%|████████    | 498/743 [2:08:46<1:02:59, 15.43s/batch, batch_loss=7.18, batch_index=499, batch_size=256]

Validation:  67%|████████    | 499/743 [2:08:46<1:03:00, 15.50s/batch, batch_loss=7.18, batch_index=499, batch_size=256]

Validation:  67%|██████   | 499/743 [2:09:01<1:03:00, 15.50s/batch, batch_loss=2.51e+4, batch_index=500, batch_size=256]

Validation:  67%|██████   | 500/743 [2:09:01<1:02:00, 15.31s/batch, batch_loss=2.51e+4, batch_index=500, batch_size=256]

Validation:  67%|████████    | 500/743 [2:09:16<1:02:00, 15.31s/batch, batch_loss=20.9, batch_index=501, batch_size=256]

Validation:  67%|████████    | 501/743 [2:09:16<1:02:15, 15.44s/batch, batch_loss=20.9, batch_index=501, batch_size=256]

Validation:  67%|██████   | 501/743 [2:09:32<1:02:15, 15.44s/batch, batch_loss=3.14e+3, batch_index=502, batch_size=256]

Validation:  68%|██████   | 502/743 [2:09:32<1:01:51, 15.40s/batch, batch_loss=3.14e+3, batch_index=502, batch_size=256]

Validation:  68%|████████    | 502/743 [2:09:50<1:01:51, 15.40s/batch, batch_loss=15.8, batch_index=503, batch_size=256]

Validation:  68%|████████    | 503/743 [2:09:50<1:05:21, 16.34s/batch, batch_loss=15.8, batch_index=503, batch_size=256]

Validation:  68%|████████    | 503/743 [2:10:06<1:05:21, 16.34s/batch, batch_loss=12.6, batch_index=504, batch_size=256]

Validation:  68%|████████▏   | 504/743 [2:10:06<1:04:42, 16.25s/batch, batch_loss=12.6, batch_index=504, batch_size=256]

Validation:  68%|████████▏   | 504/743 [2:10:22<1:04:42, 16.25s/batch, batch_loss=21.2, batch_index=505, batch_size=256]

Validation:  68%|████████▏   | 505/743 [2:10:22<1:03:52, 16.10s/batch, batch_loss=21.2, batch_index=505, batch_size=256]

Validation:  68%|██████   | 505/743 [2:10:37<1:03:52, 16.10s/batch, batch_loss=2.82e+3, batch_index=506, batch_size=256]

Validation:  68%|██████▏  | 506/743 [2:10:37<1:02:27, 15.81s/batch, batch_loss=2.82e+3, batch_index=506, batch_size=256]

Validation:  68%|██████▏  | 506/743 [2:10:52<1:02:27, 15.81s/batch, batch_loss=1.99e+3, batch_index=507, batch_size=256]

Validation:  68%|██████▏  | 507/743 [2:10:52<1:01:06, 15.53s/batch, batch_loss=1.99e+3, batch_index=507, batch_size=256]

Validation:  68%|██████▏  | 507/743 [2:11:08<1:01:06, 15.53s/batch, batch_loss=8.37e+3, batch_index=508, batch_size=256]

Validation:  68%|██████▏  | 508/743 [2:11:08<1:01:01, 15.58s/batch, batch_loss=8.37e+3, batch_index=508, batch_size=256]

Validation:  68%|██████▏  | 508/743 [2:11:23<1:01:01, 15.58s/batch, batch_loss=8.47e+3, batch_index=509, batch_size=256]

Validation:  69%|██████▏  | 509/743 [2:11:23<1:00:03, 15.40s/batch, batch_loss=8.47e+3, batch_index=509, batch_size=256]

Validation:  69%|████████▏   | 509/743 [2:11:41<1:00:03, 15.40s/batch, batch_loss=14.7, batch_index=510, batch_size=256]

Validation:  69%|████████▏   | 510/743 [2:11:41<1:02:47, 16.17s/batch, batch_loss=14.7, batch_index=510, batch_size=256]

Validation:  69%|█████████▌    | 510/743 [2:11:55<1:02:47, 16.17s/batch, batch_loss=20, batch_index=511, batch_size=256]

Validation:  69%|█████████▋    | 511/743 [2:11:55<1:00:52, 15.74s/batch, batch_loss=20, batch_index=511, batch_size=256]

Validation:  69%|████████▎   | 511/743 [2:12:10<1:00:52, 15.74s/batch, batch_loss=17.4, batch_index=512, batch_size=256]

Validation:  69%|█████████▋    | 512/743 [2:12:10<59:53, 15.56s/batch, batch_loss=17.4, batch_index=512, batch_size=256]

Validation:  69%|█████████▋    | 512/743 [2:12:25<59:53, 15.56s/batch, batch_loss=18.1, batch_index=513, batch_size=256]

Validation:  69%|█████████▋    | 513/743 [2:12:25<58:54, 15.37s/batch, batch_loss=18.1, batch_index=513, batch_size=256]

Validation:  69%|█████████▋    | 513/743 [2:12:41<58:54, 15.37s/batch, batch_loss=13.5, batch_index=514, batch_size=256]

Validation:  69%|█████████▋    | 514/743 [2:12:41<58:52, 15.42s/batch, batch_loss=13.5, batch_index=514, batch_size=256]

Validation:  69%|█████████▋    | 514/743 [2:12:57<58:52, 15.42s/batch, batch_loss=12.7, batch_index=515, batch_size=256]

Validation:  69%|█████████▋    | 515/743 [2:12:57<59:04, 15.55s/batch, batch_loss=12.7, batch_index=515, batch_size=256]

Validation:  69%|█████████▋    | 515/743 [2:13:12<59:04, 15.55s/batch, batch_loss=13.6, batch_index=516, batch_size=256]

Validation:  69%|█████████▋    | 516/743 [2:13:12<57:57, 15.32s/batch, batch_loss=13.6, batch_index=516, batch_size=256]

Validation:  69%|███████▋   | 516/743 [2:13:27<57:57, 15.32s/batch, batch_loss=6.16e+4, batch_index=517, batch_size=256]

Validation:  70%|███████▋   | 517/743 [2:13:27<57:55, 15.38s/batch, batch_loss=6.16e+4, batch_index=517, batch_size=256]

Validation:  70%|██████████▍    | 517/743 [2:13:43<57:55, 15.38s/batch, batch_loss=506, batch_index=518, batch_size=256]

Validation:  70%|██████████▍    | 518/743 [2:13:43<58:00, 15.47s/batch, batch_loss=506, batch_index=518, batch_size=256]

Validation:  70%|█████████▊    | 518/743 [2:13:58<58:00, 15.47s/batch, batch_loss=11.6, batch_index=519, batch_size=256]

Validation:  70%|█████████▊    | 519/743 [2:13:58<57:51, 15.50s/batch, batch_loss=11.6, batch_index=519, batch_size=256]

Validation:  70%|█████████▊    | 519/743 [2:14:14<57:51, 15.50s/batch, batch_loss=17.4, batch_index=520, batch_size=256]

Validation:  70%|█████████▊    | 520/743 [2:14:14<58:03, 15.62s/batch, batch_loss=17.4, batch_index=520, batch_size=256]

Validation:  70%|█████████▊    | 520/743 [2:14:29<58:03, 15.62s/batch, batch_loss=15.3, batch_index=521, batch_size=256]

Validation:  70%|█████████▊    | 521/743 [2:14:29<56:49, 15.36s/batch, batch_loss=15.3, batch_index=521, batch_size=256]

Validation:  70%|███████████▏    | 521/743 [2:14:44<56:49, 15.36s/batch, batch_loss=16, batch_index=522, batch_size=256]

Validation:  70%|███████████▏    | 522/743 [2:14:44<56:13, 15.27s/batch, batch_loss=16, batch_index=522, batch_size=256]

Validation:  70%|██████████▌    | 522/743 [2:14:58<56:13, 15.27s/batch, batch_loss=433, batch_index=523, batch_size=256]

Validation:  70%|██████████▌    | 523/743 [2:14:58<54:45, 14.93s/batch, batch_loss=433, batch_index=523, batch_size=256]

Validation:  70%|█████████▊    | 523/743 [2:15:13<54:45, 14.93s/batch, batch_loss=16.7, batch_index=524, batch_size=256]

Validation:  71%|█████████▊    | 524/743 [2:15:13<54:38, 14.97s/batch, batch_loss=16.7, batch_index=524, batch_size=256]

Validation:  71%|█████████▊    | 524/743 [2:15:28<54:38, 14.97s/batch, batch_loss=21.6, batch_index=525, batch_size=256]

Validation:  71%|█████████▉    | 525/743 [2:15:28<54:38, 15.04s/batch, batch_loss=21.6, batch_index=525, batch_size=256]

Validation:  71%|█████████▉    | 525/743 [2:15:46<54:38, 15.04s/batch, batch_loss=11.2, batch_index=526, batch_size=256]

Validation:  71%|█████████▉    | 526/743 [2:15:46<56:41, 15.68s/batch, batch_loss=11.2, batch_index=526, batch_size=256]

Validation:  71%|███████▊   | 526/743 [2:16:01<56:41, 15.68s/batch, batch_loss=3.75e+3, batch_index=527, batch_size=256]

Validation:  71%|███████▊   | 527/743 [2:16:01<55:46, 15.49s/batch, batch_loss=3.75e+3, batch_index=527, batch_size=256]

Validation:  71%|██████████▋    | 527/743 [2:16:16<55:46, 15.49s/batch, batch_loss=512, batch_index=528, batch_size=256]

Validation:  71%|██████████▋    | 528/743 [2:16:16<55:01, 15.35s/batch, batch_loss=512, batch_index=528, batch_size=256]

Validation:  71%|███████▊   | 528/743 [2:16:31<55:01, 15.35s/batch, batch_loss=6.52e+3, batch_index=529, batch_size=256]

Validation:  71%|███████▊   | 529/743 [2:16:31<54:15, 15.21s/batch, batch_loss=6.52e+3, batch_index=529, batch_size=256]

Validation:  71%|██████████▋    | 529/743 [2:16:45<54:15, 15.21s/batch, batch_loss=213, batch_index=530, batch_size=256]

Validation:  71%|██████████▋    | 530/743 [2:16:45<52:40, 14.84s/batch, batch_loss=213, batch_index=530, batch_size=256]

Validation:  71%|█████████▉    | 530/743 [2:16:59<52:40, 14.84s/batch, batch_loss=41.1, batch_index=531, batch_size=256]

Validation:  71%|██████████    | 531/743 [2:16:59<52:20, 14.81s/batch, batch_loss=41.1, batch_index=531, batch_size=256]

Validation:  71%|██████████▋    | 531/743 [2:17:15<52:20, 14.81s/batch, batch_loss=255, batch_index=532, batch_size=256]

Validation:  72%|██████████▋    | 532/743 [2:17:15<52:41, 14.98s/batch, batch_loss=255, batch_index=532, batch_size=256]

Validation:  72%|██████████    | 532/743 [2:17:32<52:41, 14.98s/batch, batch_loss=9.78, batch_index=533, batch_size=256]

Validation:  72%|██████████    | 533/743 [2:17:32<54:36, 15.60s/batch, batch_loss=9.78, batch_index=533, batch_size=256]

Validation:  72%|██████████    | 533/743 [2:17:47<54:36, 15.60s/batch, batch_loss=12.7, batch_index=534, batch_size=256]

Validation:  72%|██████████    | 534/743 [2:17:47<53:59, 15.50s/batch, batch_loss=12.7, batch_index=534, batch_size=256]

Validation:  72%|██████████    | 534/743 [2:18:03<53:59, 15.50s/batch, batch_loss=16.8, batch_index=535, batch_size=256]

Validation:  72%|██████████    | 535/743 [2:18:03<54:05, 15.60s/batch, batch_loss=16.8, batch_index=535, batch_size=256]

Validation:  72%|██████████    | 535/743 [2:18:18<54:05, 15.60s/batch, batch_loss=20.9, batch_index=536, batch_size=256]

Validation:  72%|██████████    | 536/743 [2:18:18<53:18, 15.45s/batch, batch_loss=20.9, batch_index=536, batch_size=256]

Validation:  72%|██████████    | 536/743 [2:18:34<53:18, 15.45s/batch, batch_loss=13.9, batch_index=537, batch_size=256]

Validation:  72%|██████████    | 537/743 [2:18:34<53:09, 15.48s/batch, batch_loss=13.9, batch_index=537, batch_size=256]

Validation:  72%|██████████    | 537/743 [2:18:52<53:09, 15.48s/batch, batch_loss=17.1, batch_index=538, batch_size=256]

Validation:  72%|██████████▏   | 538/743 [2:18:52<55:28, 16.24s/batch, batch_loss=17.1, batch_index=538, batch_size=256]

Validation:  72%|██████████▊    | 538/743 [2:19:07<55:28, 16.24s/batch, batch_loss=249, batch_index=539, batch_size=256]

Validation:  73%|██████████▉    | 539/743 [2:19:07<54:37, 16.07s/batch, batch_loss=249, batch_index=539, batch_size=256]

Validation:  73%|██████████▏   | 539/743 [2:19:23<54:37, 16.07s/batch, batch_loss=17.4, batch_index=540, batch_size=256]

Validation:  73%|██████████▏   | 540/743 [2:19:23<54:28, 16.10s/batch, batch_loss=17.4, batch_index=540, batch_size=256]

Validation:  73%|██████████▏   | 540/743 [2:19:41<54:28, 16.10s/batch, batch_loss=25.8, batch_index=541, batch_size=256]

Validation:  73%|██████████▏   | 541/743 [2:19:41<55:19, 16.44s/batch, batch_loss=25.8, batch_index=541, batch_size=256]

Validation:  73%|████████   | 541/743 [2:19:57<55:19, 16.44s/batch, batch_loss=1.94e+3, batch_index=542, batch_size=256]

Validation:  73%|████████   | 542/743 [2:19:57<54:36, 16.30s/batch, batch_loss=1.94e+3, batch_index=542, batch_size=256]

Validation:  73%|██████████▏   | 542/743 [2:20:12<54:36, 16.30s/batch, batch_loss=15.6, batch_index=543, batch_size=256]

Validation:  73%|██████████▏   | 543/743 [2:20:12<53:39, 16.10s/batch, batch_loss=15.6, batch_index=543, batch_size=256]

Validation:  73%|████████▊   | 543/743 [2:20:28<53:39, 16.10s/batch, batch_loss=1.1e+4, batch_index=544, batch_size=256]

Validation:  73%|████████▊   | 544/743 [2:20:28<53:35, 16.16s/batch, batch_loss=1.1e+4, batch_index=544, batch_size=256]

Validation:  73%|████████   | 544/743 [2:20:44<53:35, 16.16s/batch, batch_loss=2.75e+3, batch_index=545, batch_size=256]

Validation:  73%|████████   | 545/743 [2:20:44<52:37, 15.94s/batch, batch_loss=2.75e+3, batch_index=545, batch_size=256]

Validation:  73%|██████████▎   | 545/743 [2:21:01<52:37, 15.94s/batch, batch_loss=8.52, batch_index=546, batch_size=256]

Validation:  73%|██████████▎   | 546/743 [2:21:01<53:02, 16.15s/batch, batch_loss=8.52, batch_index=546, batch_size=256]

Validation:  73%|███████████    | 546/743 [2:21:17<53:02, 16.15s/batch, batch_loss=258, batch_index=547, batch_size=256]

Validation:  74%|███████████    | 547/743 [2:21:17<52:53, 16.19s/batch, batch_loss=258, batch_index=547, batch_size=256]

Validation:  74%|██████████▎   | 547/743 [2:21:33<52:53, 16.19s/batch, batch_loss=26.1, batch_index=548, batch_size=256]

Validation:  74%|██████████▎   | 548/743 [2:21:33<52:17, 16.09s/batch, batch_loss=26.1, batch_index=548, batch_size=256]

Validation:  74%|████████   | 548/743 [2:21:49<52:17, 16.09s/batch, batch_loss=4.11e+3, batch_index=549, batch_size=256]

Validation:  74%|████████▏  | 549/743 [2:21:49<52:14, 16.16s/batch, batch_loss=4.11e+3, batch_index=549, batch_size=256]

Validation:  74%|████████▏  | 549/743 [2:22:05<52:14, 16.16s/batch, batch_loss=1.28e+4, batch_index=550, batch_size=256]

Validation:  74%|████████▏  | 550/743 [2:22:05<51:43, 16.08s/batch, batch_loss=1.28e+4, batch_index=550, batch_size=256]

Validation:  74%|██████████▎   | 550/743 [2:22:21<51:43, 16.08s/batch, batch_loss=17.8, batch_index=551, batch_size=256]

Validation:  74%|██████████▍   | 551/743 [2:22:21<51:47, 16.19s/batch, batch_loss=17.8, batch_index=551, batch_size=256]

Validation:  74%|████████▏  | 551/743 [2:22:37<51:47, 16.19s/batch, batch_loss=6.78e+3, batch_index=552, batch_size=256]

Validation:  74%|████████▏  | 552/743 [2:22:37<51:10, 16.07s/batch, batch_loss=6.78e+3, batch_index=552, batch_size=256]

Validation:  74%|██████████▍   | 552/743 [2:22:53<51:10, 16.07s/batch, batch_loss=24.2, batch_index=553, batch_size=256]

Validation:  74%|██████████▍   | 553/743 [2:22:53<50:34, 15.97s/batch, batch_loss=24.2, batch_index=553, batch_size=256]

Validation:  74%|██████████▍   | 553/743 [2:23:08<50:34, 15.97s/batch, batch_loss=21.7, batch_index=554, batch_size=256]

Validation:  75%|██████████▍   | 554/743 [2:23:08<49:34, 15.74s/batch, batch_loss=21.7, batch_index=554, batch_size=256]

Validation:  75%|████████▏  | 554/743 [2:23:23<49:34, 15.74s/batch, batch_loss=2.47e+3, batch_index=555, batch_size=256]

Validation:  75%|████████▏  | 555/743 [2:23:23<48:56, 15.62s/batch, batch_loss=2.47e+3, batch_index=555, batch_size=256]

Validation:  75%|██████████▍   | 555/743 [2:23:38<48:56, 15.62s/batch, batch_loss=31.4, batch_index=556, batch_size=256]

Validation:  75%|██████████▍   | 556/743 [2:23:38<47:57, 15.39s/batch, batch_loss=31.4, batch_index=556, batch_size=256]

Validation:  75%|██████████▍   | 556/743 [2:23:54<47:57, 15.39s/batch, batch_loss=9.46, batch_index=557, batch_size=256]

Validation:  75%|██████████▍   | 557/743 [2:23:54<48:02, 15.49s/batch, batch_loss=9.46, batch_index=557, batch_size=256]

Validation:  75%|████████▏  | 557/743 [2:24:11<48:02, 15.49s/batch, batch_loss=1.52e+4, batch_index=558, batch_size=256]

Validation:  75%|████████▎  | 558/743 [2:24:11<49:08, 15.94s/batch, batch_loss=1.52e+4, batch_index=558, batch_size=256]

Validation:  75%|████████▎  | 558/743 [2:24:25<49:08, 15.94s/batch, batch_loss=3.59e+3, batch_index=559, batch_size=256]

Validation:  75%|████████▎  | 559/743 [2:24:25<47:32, 15.50s/batch, batch_loss=3.59e+3, batch_index=559, batch_size=256]

Validation:  75%|████████▎  | 559/743 [2:24:40<47:32, 15.50s/batch, batch_loss=2.93e+3, batch_index=560, batch_size=256]

Validation:  75%|████████▎  | 560/743 [2:24:40<46:05, 15.11s/batch, batch_loss=2.93e+3, batch_index=560, batch_size=256]

Validation:  75%|██████████▌   | 560/743 [2:24:54<46:05, 15.11s/batch, batch_loss=13.1, batch_index=561, batch_size=256]

Validation:  76%|██████████▌   | 561/743 [2:24:54<45:24, 14.97s/batch, batch_loss=13.1, batch_index=561, batch_size=256]

Validation:  76%|██████████▌   | 561/743 [2:25:09<45:24, 14.97s/batch, batch_loss=17.7, batch_index=562, batch_size=256]

Validation:  76%|██████████▌   | 562/743 [2:25:09<44:54, 14.89s/batch, batch_loss=17.7, batch_index=562, batch_size=256]

Validation:  76%|██████████▌   | 562/743 [2:25:23<44:54, 14.89s/batch, batch_loss=18.6, batch_index=563, batch_size=256]

Validation:  76%|██████████▌   | 563/743 [2:25:23<44:07, 14.71s/batch, batch_loss=18.6, batch_index=563, batch_size=256]

Validation:  76%|████████▎  | 563/743 [2:25:39<44:07, 14.71s/batch, batch_loss=1.09e+3, batch_index=564, batch_size=256]

Validation:  76%|████████▎  | 564/743 [2:25:39<44:23, 14.88s/batch, batch_loss=1.09e+3, batch_index=564, batch_size=256]

Validation:  76%|████████▎  | 564/743 [2:25:55<44:23, 14.88s/batch, batch_loss=3.68e+3, batch_index=565, batch_size=256]

Validation:  76%|████████▎  | 565/743 [2:25:55<45:54, 15.47s/batch, batch_loss=3.68e+3, batch_index=565, batch_size=256]

Validation:  76%|██████████▋   | 565/743 [2:26:10<45:54, 15.47s/batch, batch_loss=14.2, batch_index=566, batch_size=256]

Validation:  76%|██████████▋   | 566/743 [2:26:10<45:07, 15.29s/batch, batch_loss=14.2, batch_index=566, batch_size=256]

Validation:  76%|██████████▋   | 566/743 [2:26:24<45:07, 15.29s/batch, batch_loss=16.2, batch_index=567, batch_size=256]

Validation:  76%|██████████▋   | 567/743 [2:26:24<43:40, 14.89s/batch, batch_loss=16.2, batch_index=567, batch_size=256]

Validation:  76%|██████████▋   | 567/743 [2:26:39<43:40, 14.89s/batch, batch_loss=13.7, batch_index=568, batch_size=256]

Validation:  76%|██████████▋   | 568/743 [2:26:39<43:35, 14.94s/batch, batch_loss=13.7, batch_index=568, batch_size=256]

Validation:  76%|██████████▋   | 568/743 [2:26:55<43:35, 14.94s/batch, batch_loss=18.5, batch_index=569, batch_size=256]

Validation:  77%|██████████▋   | 569/743 [2:26:55<44:09, 15.23s/batch, batch_loss=18.5, batch_index=569, batch_size=256]

Validation:  77%|██████████▋   | 569/743 [2:27:11<44:09, 15.23s/batch, batch_loss=18.7, batch_index=570, batch_size=256]

Validation:  77%|██████████▋   | 570/743 [2:27:11<44:09, 15.31s/batch, batch_loss=18.7, batch_index=570, batch_size=256]

Validation:  77%|██████████▋   | 570/743 [2:27:27<44:09, 15.31s/batch, batch_loss=11.9, batch_index=571, batch_size=256]

Validation:  77%|██████████▊   | 571/743 [2:27:27<44:36, 15.56s/batch, batch_loss=11.9, batch_index=571, batch_size=256]

Validation:  77%|██████████▊   | 571/743 [2:27:43<44:36, 15.56s/batch, batch_loss=21.7, batch_index=572, batch_size=256]

Validation:  77%|██████████▊   | 572/743 [2:27:43<44:28, 15.61s/batch, batch_loss=21.7, batch_index=572, batch_size=256]

Validation:  77%|██████████▊   | 572/743 [2:28:01<44:28, 15.61s/batch, batch_loss=15.1, batch_index=573, batch_size=256]

Validation:  77%|██████████▊   | 573/743 [2:28:01<46:40, 16.47s/batch, batch_loss=15.1, batch_index=573, batch_size=256]

Validation:  77%|██████████▊   | 573/743 [2:28:17<46:40, 16.47s/batch, batch_loss=17.7, batch_index=574, batch_size=256]

Validation:  77%|██████████▊   | 574/743 [2:28:17<45:52, 16.29s/batch, batch_loss=17.7, batch_index=574, batch_size=256]

Validation:  77%|████████████▎   | 574/743 [2:28:31<45:52, 16.29s/batch, batch_loss=16, batch_index=575, batch_size=256]

Validation:  77%|████████████▍   | 575/743 [2:28:31<43:45, 15.63s/batch, batch_loss=16, batch_index=575, batch_size=256]

Validation:  77%|████████████▍   | 575/743 [2:28:46<43:45, 15.63s/batch, batch_loss=23, batch_index=576, batch_size=256]

Validation:  78%|████████████▍   | 576/743 [2:28:46<43:19, 15.56s/batch, batch_loss=23, batch_index=576, batch_size=256]

Validation:  78%|██████████▊   | 576/743 [2:29:03<43:19, 15.56s/batch, batch_loss=19.8, batch_index=577, batch_size=256]

Validation:  78%|██████████▊   | 577/743 [2:29:03<43:35, 15.75s/batch, batch_loss=19.8, batch_index=577, batch_size=256]

Validation:  78%|██████████▊   | 577/743 [2:29:18<43:35, 15.75s/batch, batch_loss=24.6, batch_index=578, batch_size=256]

Validation:  78%|██████████▉   | 578/743 [2:29:18<42:55, 15.61s/batch, batch_loss=24.6, batch_index=578, batch_size=256]

Validation:  78%|███████████▋   | 578/743 [2:29:33<42:55, 15.61s/batch, batch_loss=315, batch_index=579, batch_size=256]

Validation:  78%|███████████▋   | 579/743 [2:29:33<41:50, 15.31s/batch, batch_loss=315, batch_index=579, batch_size=256]

Validation:  78%|██████████▉   | 579/743 [2:29:48<41:50, 15.31s/batch, batch_loss=8.05, batch_index=580, batch_size=256]

Validation:  78%|██████████▉   | 580/743 [2:29:48<41:51, 15.41s/batch, batch_loss=8.05, batch_index=580, batch_size=256]

Validation:  78%|██████████▉   | 580/743 [2:30:04<41:51, 15.41s/batch, batch_loss=9.86, batch_index=581, batch_size=256]

Validation:  78%|██████████▉   | 581/743 [2:30:04<41:45, 15.47s/batch, batch_loss=9.86, batch_index=581, batch_size=256]

Validation:  78%|██████████▉   | 581/743 [2:30:19<41:45, 15.47s/batch, batch_loss=17.3, batch_index=582, batch_size=256]

Validation:  78%|██████████▉   | 582/743 [2:30:19<41:42, 15.54s/batch, batch_loss=17.3, batch_index=582, batch_size=256]

Validation:  78%|████████▌  | 582/743 [2:30:35<41:42, 15.54s/batch, batch_loss=2.41e+3, batch_index=583, batch_size=256]

Validation:  78%|████████▋  | 583/743 [2:30:35<41:32, 15.58s/batch, batch_loss=2.41e+3, batch_index=583, batch_size=256]

Validation:  78%|██████████▉   | 583/743 [2:30:50<41:32, 15.58s/batch, batch_loss=3.72, batch_index=584, batch_size=256]

Validation:  79%|███████████   | 584/743 [2:30:50<40:53, 15.43s/batch, batch_loss=3.72, batch_index=584, batch_size=256]

Validation:  79%|███████████   | 584/743 [2:31:05<40:53, 15.43s/batch, batch_loss=19.3, batch_index=585, batch_size=256]

Validation:  79%|███████████   | 585/743 [2:31:05<40:27, 15.37s/batch, batch_loss=19.3, batch_index=585, batch_size=256]

Validation:  79%|███████████▊   | 585/743 [2:31:20<40:27, 15.37s/batch, batch_loss=557, batch_index=586, batch_size=256]

Validation:  79%|███████████▊   | 586/743 [2:31:20<39:52, 15.24s/batch, batch_loss=557, batch_index=586, batch_size=256]

Validation:  79%|███████████   | 586/743 [2:31:37<39:52, 15.24s/batch, batch_loss=10.2, batch_index=587, batch_size=256]

Validation:  79%|███████████   | 587/743 [2:31:37<40:57, 15.75s/batch, batch_loss=10.2, batch_index=587, batch_size=256]

Validation:  79%|███████████▊   | 587/743 [2:31:52<40:57, 15.75s/batch, batch_loss=410, batch_index=588, batch_size=256]

Validation:  79%|███████████▊   | 588/743 [2:31:52<39:58, 15.47s/batch, batch_loss=410, batch_index=588, batch_size=256]

Validation:  79%|████████▋  | 588/743 [2:32:07<39:58, 15.47s/batch, batch_loss=2.51e+4, batch_index=589, batch_size=256]

Validation:  79%|████████▋  | 589/743 [2:32:07<39:12, 15.27s/batch, batch_loss=2.51e+4, batch_index=589, batch_size=256]

Validation:  79%|███████████   | 589/743 [2:32:22<39:12, 15.27s/batch, batch_loss=17.7, batch_index=590, batch_size=256]

Validation:  79%|███████████   | 590/743 [2:32:22<39:00, 15.30s/batch, batch_loss=17.7, batch_index=590, batch_size=256]

Validation:  79%|███████████   | 590/743 [2:32:48<39:00, 15.30s/batch, batch_loss=17.3, batch_index=591, batch_size=256]

Validation:  80%|███████████▏  | 591/743 [2:32:48<46:28, 18.35s/batch, batch_loss=17.3, batch_index=591, batch_size=256]

Validation:  80%|███████████▏  | 591/743 [2:33:11<46:28, 18.35s/batch, batch_loss=14.3, batch_index=592, batch_size=256]

Validation:  80%|███████████▏  | 592/743 [2:33:11<50:04, 19.90s/batch, batch_loss=14.3, batch_index=592, batch_size=256]

Validation:  80%|████████▊  | 592/743 [2:33:28<50:04, 19.90s/batch, batch_loss=2.39e+4, batch_index=593, batch_size=256]

Validation:  80%|████████▊  | 593/743 [2:33:28<47:35, 19.04s/batch, batch_loss=2.39e+4, batch_index=593, batch_size=256]

Validation:  80%|███████████▏  | 593/743 [2:33:45<47:35, 19.04s/batch, batch_loss=11.2, batch_index=594, batch_size=256]

Validation:  80%|███████████▏  | 594/743 [2:33:45<45:28, 18.31s/batch, batch_loss=11.2, batch_index=594, batch_size=256]

Validation:  80%|███████████▏  | 594/743 [2:34:00<45:28, 18.31s/batch, batch_loss=6.35, batch_index=595, batch_size=256]

Validation:  80%|███████████▏  | 595/743 [2:34:00<43:07, 17.48s/batch, batch_loss=6.35, batch_index=595, batch_size=256]

Validation:  80%|███████████▏  | 595/743 [2:34:16<43:07, 17.48s/batch, batch_loss=8.73, batch_index=596, batch_size=256]

Validation:  80%|███████████▏  | 596/743 [2:34:16<41:04, 16.76s/batch, batch_loss=8.73, batch_index=596, batch_size=256]

Validation:  80%|█████████▋  | 596/743 [2:34:31<41:04, 16.76s/batch, batch_loss=1.8e+3, batch_index=597, batch_size=256]

Validation:  80%|█████████▋  | 597/743 [2:34:31<39:53, 16.39s/batch, batch_loss=1.8e+3, batch_index=597, batch_size=256]

Validation:  80%|███████████▏  | 597/743 [2:34:47<39:53, 16.39s/batch, batch_loss=14.8, batch_index=598, batch_size=256]

Validation:  80%|███████████▎  | 598/743 [2:34:47<39:10, 16.21s/batch, batch_loss=14.8, batch_index=598, batch_size=256]

Validation:  80%|███████████▎  | 598/743 [2:35:02<39:10, 16.21s/batch, batch_loss=16.1, batch_index=599, batch_size=256]

Validation:  81%|███████████▎  | 599/743 [2:35:02<37:55, 15.80s/batch, batch_loss=16.1, batch_index=599, batch_size=256]

Validation:  81%|████████████▉   | 599/743 [2:35:18<37:55, 15.80s/batch, batch_loss=23, batch_index=600, batch_size=256]

Validation:  81%|████████████▉   | 600/743 [2:35:18<37:57, 15.93s/batch, batch_loss=23, batch_index=600, batch_size=256]

Validation:  81%|███████████▎  | 600/743 [2:35:43<37:57, 15.93s/batch, batch_loss=14.3, batch_index=601, batch_size=256]

Validation:  81%|███████████▎  | 601/743 [2:35:43<44:00, 18.59s/batch, batch_loss=14.3, batch_index=601, batch_size=256]

Validation:  81%|███████████▎  | 601/743 [2:36:00<44:00, 18.59s/batch, batch_loss=20.2, batch_index=602, batch_size=256]

Validation:  81%|███████████▎  | 602/743 [2:36:00<42:30, 18.09s/batch, batch_loss=20.2, batch_index=602, batch_size=256]

Validation:  81%|████████▉  | 602/743 [2:36:15<42:30, 18.09s/batch, batch_loss=1.15e+4, batch_index=603, batch_size=256]

Validation:  81%|████████▉  | 603/743 [2:36:15<40:14, 17.24s/batch, batch_loss=1.15e+4, batch_index=603, batch_size=256]

Validation:  81%|███████████▎  | 603/743 [2:36:31<40:14, 17.24s/batch, batch_loss=19.1, batch_index=604, batch_size=256]

Validation:  81%|███████████▍  | 604/743 [2:36:31<39:03, 16.86s/batch, batch_loss=19.1, batch_index=604, batch_size=256]

Validation:  81%|███████████▍  | 604/743 [2:36:47<39:03, 16.86s/batch, batch_loss=25.2, batch_index=605, batch_size=256]

Validation:  81%|███████████▍  | 605/743 [2:36:47<37:58, 16.51s/batch, batch_loss=25.2, batch_index=605, batch_size=256]

Validation:  81%|████████████▏  | 605/743 [2:37:04<37:58, 16.51s/batch, batch_loss=252, batch_index=606, batch_size=256]

Validation:  82%|████████████▏  | 606/743 [2:37:04<38:03, 16.67s/batch, batch_loss=252, batch_index=606, batch_size=256]

Validation:  82%|███████████▍  | 606/743 [2:37:20<38:03, 16.67s/batch, batch_loss=28.9, batch_index=607, batch_size=256]

Validation:  82%|███████████▍  | 607/743 [2:37:20<37:33, 16.57s/batch, batch_loss=28.9, batch_index=607, batch_size=256]

Validation:  82%|█████████████   | 607/743 [2:37:36<37:33, 16.57s/batch, batch_loss=22, batch_index=608, batch_size=256]

Validation:  82%|█████████████   | 608/743 [2:37:36<36:39, 16.29s/batch, batch_loss=22, batch_index=608, batch_size=256]

Validation:  82%|███████████▍  | 608/743 [2:37:54<36:39, 16.29s/batch, batch_loss=15.9, batch_index=609, batch_size=256]

Validation:  82%|███████████▍  | 609/743 [2:37:54<37:34, 16.83s/batch, batch_loss=15.9, batch_index=609, batch_size=256]

Validation:  82%|███████████▍  | 609/743 [2:38:09<37:34, 16.83s/batch, batch_loss=17.2, batch_index=610, batch_size=256]

Validation:  82%|███████████▍  | 610/743 [2:38:09<36:33, 16.50s/batch, batch_loss=17.2, batch_index=610, batch_size=256]

Validation:  82%|███████████▍  | 610/743 [2:38:25<36:33, 16.50s/batch, batch_loss=22.6, batch_index=611, batch_size=256]

Validation:  82%|███████████▌  | 611/743 [2:38:25<35:24, 16.09s/batch, batch_loss=22.6, batch_index=611, batch_size=256]

Validation:  82%|███████████▌  | 611/743 [2:38:40<35:24, 16.09s/batch, batch_loss=13.1, batch_index=612, batch_size=256]

Validation:  82%|███████████▌  | 612/743 [2:38:40<34:30, 15.80s/batch, batch_loss=13.1, batch_index=612, batch_size=256]

Validation:  82%|███████████▌  | 612/743 [2:38:55<34:30, 15.80s/batch, batch_loss=14.3, batch_index=613, batch_size=256]

Validation:  83%|███████████▌  | 613/743 [2:38:55<34:01, 15.70s/batch, batch_loss=14.3, batch_index=613, batch_size=256]

Validation:  83%|█████████  | 613/743 [2:39:10<34:01, 15.70s/batch, batch_loss=5.64e+3, batch_index=614, batch_size=256]

Validation:  83%|█████████  | 614/743 [2:39:10<33:23, 15.53s/batch, batch_loss=5.64e+3, batch_index=614, batch_size=256]

Validation:  83%|███████████▌  | 614/743 [2:39:26<33:23, 15.53s/batch, batch_loss=15.5, batch_index=615, batch_size=256]

Validation:  83%|███████████▌  | 615/743 [2:39:26<33:11, 15.56s/batch, batch_loss=15.5, batch_index=615, batch_size=256]

Validation:  83%|███████████▌  | 615/743 [2:39:41<33:11, 15.56s/batch, batch_loss=14.6, batch_index=616, batch_size=256]

Validation:  83%|███████████▌  | 616/743 [2:39:41<32:27, 15.33s/batch, batch_loss=14.6, batch_index=616, batch_size=256]

Validation:  83%|███████████▌  | 616/743 [2:39:55<32:27, 15.33s/batch, batch_loss=7.35, batch_index=617, batch_size=256]

Validation:  83%|███████████▋  | 617/743 [2:39:55<31:48, 15.14s/batch, batch_loss=7.35, batch_index=617, batch_size=256]

Validation:  83%|███████████▋  | 617/743 [2:40:10<31:48, 15.14s/batch, batch_loss=11.1, batch_index=618, batch_size=256]

Validation:  83%|███████████▋  | 618/743 [2:40:10<30:59, 14.87s/batch, batch_loss=11.1, batch_index=618, batch_size=256]

Validation:  83%|████████████▍  | 618/743 [2:40:24<30:59, 14.87s/batch, batch_loss=341, batch_index=619, batch_size=256]

Validation:  83%|████████████▍  | 619/743 [2:40:24<30:38, 14.83s/batch, batch_loss=341, batch_index=619, batch_size=256]

Validation:  83%|███████████▋  | 619/743 [2:40:39<30:38, 14.83s/batch, batch_loss=14.8, batch_index=620, batch_size=256]

Validation:  83%|███████████▋  | 620/743 [2:40:39<30:23, 14.83s/batch, batch_loss=14.8, batch_index=620, batch_size=256]

Validation:  83%|███████████▋  | 620/743 [2:40:55<30:23, 14.83s/batch, batch_loss=9.27, batch_index=621, batch_size=256]

Validation:  84%|███████████▋  | 621/743 [2:40:55<30:30, 15.00s/batch, batch_loss=9.27, batch_index=621, batch_size=256]

Validation:  84%|███████████▋  | 621/743 [2:41:11<30:30, 15.00s/batch, batch_loss=15.3, batch_index=622, batch_size=256]

Validation:  84%|███████████▋  | 622/743 [2:41:11<30:49, 15.28s/batch, batch_loss=15.3, batch_index=622, batch_size=256]

Validation:  84%|████████████▌  | 622/743 [2:41:26<30:49, 15.28s/batch, batch_loss=194, batch_index=623, batch_size=256]

Validation:  84%|████████████▌  | 623/743 [2:41:26<30:31, 15.26s/batch, batch_loss=194, batch_index=623, batch_size=256]

Validation:  84%|███████████▋  | 623/743 [2:41:40<30:31, 15.26s/batch, batch_loss=14.6, batch_index=624, batch_size=256]

Validation:  84%|███████████▊  | 624/743 [2:41:40<29:32, 14.90s/batch, batch_loss=14.6, batch_index=624, batch_size=256]

Validation:  84%|█████████▏ | 624/743 [2:41:55<29:32, 14.90s/batch, batch_loss=2.39e+3, batch_index=625, batch_size=256]

Validation:  84%|█████████▎ | 625/743 [2:41:55<29:17, 14.90s/batch, batch_loss=2.39e+3, batch_index=625, batch_size=256]

Validation:  84%|███████████▊  | 625/743 [2:42:09<29:17, 14.90s/batch, batch_loss=20.7, batch_index=626, batch_size=256]

Validation:  84%|███████████▊  | 626/743 [2:42:09<28:54, 14.83s/batch, batch_loss=20.7, batch_index=626, batch_size=256]

Validation:  84%|███████████▊  | 626/743 [2:42:23<28:54, 14.83s/batch, batch_loss=17.7, batch_index=627, batch_size=256]

Validation:  84%|███████████▊  | 627/743 [2:42:23<28:06, 14.54s/batch, batch_loss=17.7, batch_index=627, batch_size=256]

Validation:  84%|███████████▊  | 627/743 [2:42:39<28:06, 14.54s/batch, batch_loss=17.9, batch_index=628, batch_size=256]

Validation:  85%|███████████▊  | 628/743 [2:42:39<28:25, 14.83s/batch, batch_loss=17.9, batch_index=628, batch_size=256]

Validation:  85%|███████████▊  | 628/743 [2:42:53<28:25, 14.83s/batch, batch_loss=13.2, batch_index=629, batch_size=256]

Validation:  85%|███████████▊  | 629/743 [2:42:53<27:54, 14.69s/batch, batch_loss=13.2, batch_index=629, batch_size=256]

Validation:  85%|███████████▊  | 629/743 [2:43:08<27:54, 14.69s/batch, batch_loss=17.4, batch_index=630, batch_size=256]

Validation:  85%|███████████▊  | 630/743 [2:43:08<27:43, 14.72s/batch, batch_loss=17.4, batch_index=630, batch_size=256]

Validation:  85%|████████████▋  | 630/743 [2:43:24<27:43, 14.72s/batch, batch_loss=242, batch_index=631, batch_size=256]

Validation:  85%|████████████▋  | 631/743 [2:43:24<27:59, 15.00s/batch, batch_loss=242, batch_index=631, batch_size=256]

Validation:  85%|███████████▉  | 631/743 [2:43:40<27:59, 15.00s/batch, batch_loss=20.4, batch_index=632, batch_size=256]

Validation:  85%|███████████▉  | 632/743 [2:43:40<28:17, 15.29s/batch, batch_loss=20.4, batch_index=632, batch_size=256]

Validation:  85%|███████████▉  | 632/743 [2:43:55<28:17, 15.29s/batch, batch_loss=15.1, batch_index=633, batch_size=256]

Validation:  85%|███████████▉  | 633/743 [2:43:55<28:22, 15.48s/batch, batch_loss=15.1, batch_index=633, batch_size=256]

Validation:  85%|███████████▉  | 633/743 [2:44:10<28:22, 15.48s/batch, batch_loss=11.6, batch_index=634, batch_size=256]

Validation:  85%|███████████▉  | 634/743 [2:44:10<27:43, 15.27s/batch, batch_loss=11.6, batch_index=634, batch_size=256]

Validation:  85%|███████████▉  | 634/743 [2:44:26<27:43, 15.27s/batch, batch_loss=8.32, batch_index=635, batch_size=256]

Validation:  85%|███████████▉  | 635/743 [2:44:26<27:35, 15.33s/batch, batch_loss=8.32, batch_index=635, batch_size=256]

Validation:  85%|████████████▊  | 635/743 [2:44:41<27:35, 15.33s/batch, batch_loss=803, batch_index=636, batch_size=256]

Validation:  86%|████████████▊  | 636/743 [2:44:41<27:21, 15.34s/batch, batch_loss=803, batch_index=636, batch_size=256]

Validation:  86%|████████████▊  | 636/743 [2:44:56<27:21, 15.34s/batch, batch_loss=712, batch_index=637, batch_size=256]

Validation:  86%|████████████▊  | 637/743 [2:44:56<26:48, 15.18s/batch, batch_loss=712, batch_index=637, batch_size=256]

Validation:  86%|████████████  | 637/743 [2:45:13<26:48, 15.18s/batch, batch_loss=17.7, batch_index=638, batch_size=256]

Validation:  86%|████████████  | 638/743 [2:45:13<27:35, 15.77s/batch, batch_loss=17.7, batch_index=638, batch_size=256]

Validation:  86%|█████████▍ | 638/743 [2:45:30<27:35, 15.77s/batch, batch_loss=1.21e+4, batch_index=639, batch_size=256]

Validation:  86%|█████████▍ | 639/743 [2:45:30<27:46, 16.03s/batch, batch_loss=1.21e+4, batch_index=639, batch_size=256]

Validation:  86%|████████████  | 639/743 [2:45:47<27:46, 16.03s/batch, batch_loss=21.8, batch_index=640, batch_size=256]

Validation:  86%|████████████  | 640/743 [2:45:47<28:17, 16.48s/batch, batch_loss=21.8, batch_index=640, batch_size=256]

Validation:  86%|████████████  | 640/743 [2:46:04<28:17, 16.48s/batch, batch_loss=32.8, batch_index=641, batch_size=256]

Validation:  86%|████████████  | 641/743 [2:46:04<28:16, 16.63s/batch, batch_loss=32.8, batch_index=641, batch_size=256]

Validation:  86%|████████████  | 641/743 [2:46:20<28:16, 16.63s/batch, batch_loss=29.1, batch_index=642, batch_size=256]

Validation:  86%|████████████  | 642/743 [2:46:20<27:32, 16.36s/batch, batch_loss=29.1, batch_index=642, batch_size=256]

Validation:  86%|█████████▌ | 642/743 [2:46:36<27:32, 16.36s/batch, batch_loss=1.04e+3, batch_index=643, batch_size=256]

Validation:  87%|█████████▌ | 643/743 [2:46:36<27:22, 16.43s/batch, batch_loss=1.04e+3, batch_index=643, batch_size=256]

Validation:  87%|████████████  | 643/743 [2:46:53<27:22, 16.43s/batch, batch_loss=18.7, batch_index=644, batch_size=256]

Validation:  87%|████████████▏ | 644/743 [2:46:53<26:59, 16.36s/batch, batch_loss=18.7, batch_index=644, batch_size=256]

Validation:  87%|████████████▏ | 644/743 [2:47:08<26:59, 16.36s/batch, batch_loss=18.1, batch_index=645, batch_size=256]

Validation:  87%|████████████▏ | 645/743 [2:47:08<26:21, 16.14s/batch, batch_loss=18.1, batch_index=645, batch_size=256]

Validation:  87%|█████████▌ | 645/743 [2:47:23<26:21, 16.14s/batch, batch_loss=6.25e+3, batch_index=646, batch_size=256]

Validation:  87%|█████████▌ | 646/743 [2:47:23<25:17, 15.64s/batch, batch_loss=6.25e+3, batch_index=646, batch_size=256]

Validation:  87%|████████████▏ | 646/743 [2:47:39<25:17, 15.64s/batch, batch_loss=20.1, batch_index=647, batch_size=256]

Validation:  87%|████████████▏ | 647/743 [2:47:39<25:19, 15.83s/batch, batch_loss=20.1, batch_index=647, batch_size=256]

Validation:  87%|█████████████  | 647/743 [2:47:54<25:19, 15.83s/batch, batch_loss=8.2, batch_index=648, batch_size=256]

Validation:  87%|█████████████  | 648/743 [2:47:54<24:46, 15.65s/batch, batch_loss=8.2, batch_index=648, batch_size=256]

Validation:  87%|████████████▏ | 648/743 [2:48:09<24:46, 15.65s/batch, batch_loss=11.9, batch_index=649, batch_size=256]

Validation:  87%|████████████▏ | 649/743 [2:48:09<24:15, 15.49s/batch, batch_loss=11.9, batch_index=649, batch_size=256]

Validation:  87%|████████████▏ | 649/743 [2:48:24<24:15, 15.49s/batch, batch_loss=15.8, batch_index=650, batch_size=256]

Validation:  87%|████████████▏ | 650/743 [2:48:24<23:46, 15.33s/batch, batch_loss=15.8, batch_index=650, batch_size=256]

Validation:  87%|████████████▏ | 650/743 [2:48:40<23:46, 15.33s/batch, batch_loss=24.5, batch_index=651, batch_size=256]

Validation:  88%|████████████▎ | 651/743 [2:48:40<23:30, 15.33s/batch, batch_loss=24.5, batch_index=651, batch_size=256]

Validation:  88%|██████████████  | 651/743 [2:48:55<23:30, 15.33s/batch, batch_loss=25, batch_index=652, batch_size=256]

Validation:  88%|██████████████  | 652/743 [2:48:55<23:18, 15.36s/batch, batch_loss=25, batch_index=652, batch_size=256]

Validation:  88%|████████████▎ | 652/743 [2:49:10<23:18, 15.36s/batch, batch_loss=16.3, batch_index=653, batch_size=256]

Validation:  88%|████████████▎ | 653/743 [2:49:10<22:39, 15.10s/batch, batch_loss=16.3, batch_index=653, batch_size=256]

Validation:  88%|██████████████  | 653/743 [2:49:25<22:39, 15.10s/batch, batch_loss=21, batch_index=654, batch_size=256]

Validation:  88%|██████████████  | 654/743 [2:49:25<22:24, 15.11s/batch, batch_loss=21, batch_index=654, batch_size=256]

Validation:  88%|████████████▎ | 654/743 [2:49:39<22:24, 15.11s/batch, batch_loss=28.5, batch_index=655, batch_size=256]

Validation:  88%|████████████▎ | 655/743 [2:49:39<21:47, 14.86s/batch, batch_loss=28.5, batch_index=655, batch_size=256]

Validation:  88%|████████████▎ | 655/743 [2:49:54<21:47, 14.86s/batch, batch_loss=20.6, batch_index=656, batch_size=256]

Validation:  88%|████████████▎ | 656/743 [2:49:54<21:22, 14.74s/batch, batch_loss=20.6, batch_index=656, batch_size=256]

Validation:  88%|████████████▎ | 656/743 [2:50:09<21:22, 14.74s/batch, batch_loss=15.6, batch_index=657, batch_size=256]

Validation:  88%|████████████▍ | 657/743 [2:50:09<21:22, 14.91s/batch, batch_loss=15.6, batch_index=657, batch_size=256]

Validation:  88%|████████████▍ | 657/743 [2:50:24<21:22, 14.91s/batch, batch_loss=17.1, batch_index=658, batch_size=256]

Validation:  89%|████████████▍ | 658/743 [2:50:24<21:10, 14.95s/batch, batch_loss=17.1, batch_index=658, batch_size=256]

Validation:  89%|████████████▍ | 658/743 [2:50:39<21:10, 14.95s/batch, batch_loss=22.9, batch_index=659, batch_size=256]

Validation:  89%|████████████▍ | 659/743 [2:50:39<21:00, 15.01s/batch, batch_loss=22.9, batch_index=659, batch_size=256]

Validation:  89%|████████████▍ | 659/743 [2:50:54<21:00, 15.01s/batch, batch_loss=21.4, batch_index=660, batch_size=256]

Validation:  89%|████████████▍ | 660/743 [2:50:54<20:54, 15.11s/batch, batch_loss=21.4, batch_index=660, batch_size=256]

Validation:  89%|████████████▍ | 660/743 [2:51:11<20:54, 15.11s/batch, batch_loss=18.5, batch_index=661, batch_size=256]

Validation:  89%|████████████▍ | 661/743 [2:51:11<21:15, 15.55s/batch, batch_loss=18.5, batch_index=661, batch_size=256]

Validation:  89%|████████████▍ | 661/743 [2:51:28<21:15, 15.55s/batch, batch_loss=8.91, batch_index=662, batch_size=256]

Validation:  89%|████████████▍ | 662/743 [2:51:28<21:29, 15.92s/batch, batch_loss=8.91, batch_index=662, batch_size=256]

Validation:  89%|█████████▊ | 662/743 [2:51:44<21:29, 15.92s/batch, batch_loss=3.58e+3, batch_index=663, batch_size=256]

Validation:  89%|█████████▊ | 663/743 [2:51:44<21:21, 16.02s/batch, batch_loss=3.58e+3, batch_index=663, batch_size=256]

Validation:  89%|████████████▍ | 663/743 [2:52:00<21:21, 16.02s/batch, batch_loss=13.6, batch_index=664, batch_size=256]

Validation:  89%|████████████▌ | 664/743 [2:52:00<21:03, 15.99s/batch, batch_loss=13.6, batch_index=664, batch_size=256]

Validation:  89%|██████████████▎ | 664/743 [2:52:16<21:03, 15.99s/batch, batch_loss=20, batch_index=665, batch_size=256]

Validation:  90%|██████████████▎ | 665/743 [2:52:16<20:53, 16.08s/batch, batch_loss=20, batch_index=665, batch_size=256]

Validation:  90%|████████████▌ | 665/743 [2:52:32<20:53, 16.08s/batch, batch_loss=15.1, batch_index=666, batch_size=256]

Validation:  90%|████████████▌ | 666/743 [2:52:32<20:33, 16.02s/batch, batch_loss=15.1, batch_index=666, batch_size=256]

Validation:  90%|█████████▊ | 666/743 [2:52:47<20:33, 16.02s/batch, batch_loss=2.09e+4, batch_index=667, batch_size=256]

Validation:  90%|█████████▊ | 667/743 [2:52:47<19:56, 15.75s/batch, batch_loss=2.09e+4, batch_index=667, batch_size=256]

Validation:  90%|████████████▌ | 667/743 [2:53:03<19:56, 15.75s/batch, batch_loss=19.7, batch_index=668, batch_size=256]

Validation:  90%|████████████▌ | 668/743 [2:53:03<19:47, 15.83s/batch, batch_loss=19.7, batch_index=668, batch_size=256]

Validation:  90%|████████████▌ | 668/743 [2:53:19<19:47, 15.83s/batch, batch_loss=23.3, batch_index=669, batch_size=256]

Validation:  90%|████████████▌ | 669/743 [2:53:19<19:31, 15.83s/batch, batch_loss=23.3, batch_index=669, batch_size=256]

Validation:  90%|████████████▌ | 669/743 [2:53:36<19:31, 15.83s/batch, batch_loss=21.8, batch_index=670, batch_size=256]

Validation:  90%|████████████▌ | 670/743 [2:53:36<19:43, 16.21s/batch, batch_loss=21.8, batch_index=670, batch_size=256]

Validation:  90%|█████████▉ | 670/743 [2:53:52<19:43, 16.21s/batch, batch_loss=3.11e+3, batch_index=671, batch_size=256]

Validation:  90%|█████████▉ | 671/743 [2:53:52<19:14, 16.03s/batch, batch_loss=3.11e+3, batch_index=671, batch_size=256]

Validation:  90%|████████████▋ | 671/743 [2:54:07<19:14, 16.03s/batch, batch_loss=21.5, batch_index=672, batch_size=256]

Validation:  90%|████████████▋ | 672/743 [2:54:07<18:41, 15.79s/batch, batch_loss=21.5, batch_index=672, batch_size=256]

Validation:  90%|████████████▋ | 672/743 [2:54:23<18:41, 15.79s/batch, batch_loss=17.1, batch_index=673, batch_size=256]

Validation:  91%|████████████▋ | 673/743 [2:54:23<18:33, 15.91s/batch, batch_loss=17.1, batch_index=673, batch_size=256]

Validation:  91%|████████████▋ | 673/743 [2:54:39<18:33, 15.91s/batch, batch_loss=14.7, batch_index=674, batch_size=256]

Validation:  91%|████████████▋ | 674/743 [2:54:39<18:19, 15.93s/batch, batch_loss=14.7, batch_index=674, batch_size=256]

Validation:  91%|████████████▋ | 674/743 [2:54:55<18:19, 15.93s/batch, batch_loss=22.5, batch_index=675, batch_size=256]

Validation:  91%|████████████▋ | 675/743 [2:54:55<17:57, 15.85s/batch, batch_loss=22.5, batch_index=675, batch_size=256]

Validation:  91%|████████████▋ | 675/743 [2:55:10<17:57, 15.85s/batch, batch_loss=19.4, batch_index=676, batch_size=256]

Validation:  91%|████████████▋ | 676/743 [2:55:10<17:37, 15.79s/batch, batch_loss=19.4, batch_index=676, batch_size=256]

Validation:  91%|████████████▋ | 676/743 [2:55:26<17:37, 15.79s/batch, batch_loss=22.5, batch_index=677, batch_size=256]

Validation:  91%|████████████▊ | 677/743 [2:55:26<17:08, 15.59s/batch, batch_loss=22.5, batch_index=677, batch_size=256]

Validation:  91%|██████████████▌ | 677/743 [2:55:41<17:08, 15.59s/batch, batch_loss=18, batch_index=678, batch_size=256]

Validation:  91%|██████████████▌ | 678/743 [2:55:41<16:56, 15.64s/batch, batch_loss=18, batch_index=678, batch_size=256]

Validation:  91%|████████████▊ | 678/743 [2:55:56<16:56, 15.64s/batch, batch_loss=15.1, batch_index=679, batch_size=256]

Validation:  91%|████████████▊ | 679/743 [2:55:56<16:30, 15.48s/batch, batch_loss=15.1, batch_index=679, batch_size=256]

Validation:  91%|████████████▊ | 679/743 [2:56:12<16:30, 15.48s/batch, batch_loss=18.9, batch_index=680, batch_size=256]

Validation:  92%|████████████▊ | 680/743 [2:56:12<16:10, 15.40s/batch, batch_loss=18.9, batch_index=680, batch_size=256]

Validation:  92%|████████████▊ | 680/743 [2:56:27<16:10, 15.40s/batch, batch_loss=21.4, batch_index=681, batch_size=256]

Validation:  92%|████████████▊ | 681/743 [2:56:27<15:48, 15.30s/batch, batch_loss=21.4, batch_index=681, batch_size=256]

Validation:  92%|████████████▊ | 681/743 [2:56:42<15:48, 15.30s/batch, batch_loss=25.7, batch_index=682, batch_size=256]

Validation:  92%|████████████▊ | 682/743 [2:56:42<15:31, 15.28s/batch, batch_loss=25.7, batch_index=682, batch_size=256]

Validation:  92%|████████████▊ | 682/743 [2:56:57<15:31, 15.28s/batch, batch_loss=18.1, batch_index=683, batch_size=256]

Validation:  92%|████████████▊ | 683/743 [2:56:57<15:21, 15.36s/batch, batch_loss=18.1, batch_index=683, batch_size=256]

Validation:  92%|████████████▊ | 683/743 [2:57:12<15:21, 15.36s/batch, batch_loss=17.8, batch_index=684, batch_size=256]

Validation:  92%|████████████▉ | 684/743 [2:57:12<14:57, 15.21s/batch, batch_loss=17.8, batch_index=684, batch_size=256]

Validation:  92%|████████████▉ | 684/743 [2:57:27<14:57, 15.21s/batch, batch_loss=14.8, batch_index=685, batch_size=256]

Validation:  92%|████████████▉ | 685/743 [2:57:27<14:39, 15.16s/batch, batch_loss=14.8, batch_index=685, batch_size=256]

Validation:  92%|██████████▏| 685/743 [2:57:43<14:39, 15.16s/batch, batch_loss=1.67e+3, batch_index=686, batch_size=256]

Validation:  92%|██████████▏| 686/743 [2:57:43<14:36, 15.37s/batch, batch_loss=1.67e+3, batch_index=686, batch_size=256]

Validation:  92%|████████████▉ | 686/743 [2:57:58<14:36, 15.37s/batch, batch_loss=21.7, batch_index=687, batch_size=256]

Validation:  92%|████████████▉ | 687/743 [2:57:58<14:16, 15.29s/batch, batch_loss=21.7, batch_index=687, batch_size=256]

Validation:  92%|████████████▉ | 687/743 [2:58:14<14:16, 15.29s/batch, batch_loss=14.3, batch_index=688, batch_size=256]

Validation:  93%|████████████▉ | 688/743 [2:58:14<14:01, 15.31s/batch, batch_loss=14.3, batch_index=688, batch_size=256]

Validation:  93%|████████████▉ | 688/743 [2:58:29<14:01, 15.31s/batch, batch_loss=14.4, batch_index=689, batch_size=256]

Validation:  93%|████████████▉ | 689/743 [2:58:29<13:38, 15.16s/batch, batch_loss=14.4, batch_index=689, batch_size=256]

Validation:  93%|████████████▉ | 689/743 [2:58:44<13:38, 15.16s/batch, batch_loss=19.4, batch_index=690, batch_size=256]

Validation:  93%|█████████████ | 690/743 [2:58:44<13:31, 15.32s/batch, batch_loss=19.4, batch_index=690, batch_size=256]

Validation:  93%|█████████████ | 690/743 [2:59:00<13:31, 15.32s/batch, batch_loss=12.9, batch_index=691, batch_size=256]

Validation:  93%|█████████████ | 691/743 [2:59:00<13:31, 15.61s/batch, batch_loss=12.9, batch_index=691, batch_size=256]

Validation:  93%|█████████████ | 691/743 [2:59:17<13:31, 15.61s/batch, batch_loss=17.9, batch_index=692, batch_size=256]

Validation:  93%|█████████████ | 692/743 [2:59:17<13:25, 15.79s/batch, batch_loss=17.9, batch_index=692, batch_size=256]

Validation:  93%|█████████████ | 692/743 [2:59:35<13:25, 15.79s/batch, batch_loss=20.8, batch_index=693, batch_size=256]

Validation:  93%|█████████████ | 693/743 [2:59:35<13:42, 16.45s/batch, batch_loss=20.8, batch_index=693, batch_size=256]

Validation:  93%|█████████████ | 693/743 [2:59:50<13:42, 16.45s/batch, batch_loss=23.6, batch_index=694, batch_size=256]

Validation:  93%|█████████████ | 694/743 [2:59:50<13:09, 16.10s/batch, batch_loss=23.6, batch_index=694, batch_size=256]

Validation:  93%|██████████▎| 694/743 [3:00:06<13:09, 16.10s/batch, batch_loss=3.13e+3, batch_index=695, batch_size=256]

Validation:  94%|██████████▎| 695/743 [3:00:06<12:52, 16.10s/batch, batch_loss=3.13e+3, batch_index=695, batch_size=256]

Validation:  94%|█████████████ | 695/743 [3:00:21<12:52, 16.10s/batch, batch_loss=8.55, batch_index=696, batch_size=256]

Validation:  94%|█████████████ | 696/743 [3:00:21<12:19, 15.74s/batch, batch_loss=8.55, batch_index=696, batch_size=256]

Validation:  94%|█████████████ | 696/743 [3:00:36<12:19, 15.74s/batch, batch_loss=33.1, batch_index=697, batch_size=256]

Validation:  94%|█████████████▏| 697/743 [3:00:36<11:57, 15.60s/batch, batch_loss=33.1, batch_index=697, batch_size=256]

Validation:  94%|██████████████ | 697/743 [3:00:52<11:57, 15.60s/batch, batch_loss=758, batch_index=698, batch_size=256]

Validation:  94%|██████████████ | 698/743 [3:00:52<11:46, 15.71s/batch, batch_loss=758, batch_index=698, batch_size=256]

Validation:  94%|█████████████▏| 698/743 [3:01:07<11:46, 15.71s/batch, batch_loss=7.27, batch_index=699, batch_size=256]

Validation:  94%|█████████████▏| 699/743 [3:01:07<11:22, 15.50s/batch, batch_loss=7.27, batch_index=699, batch_size=256]

Validation:  94%|██████████████ | 699/743 [3:01:23<11:22, 15.50s/batch, batch_loss=938, batch_index=700, batch_size=256]

Validation:  94%|██████████████▏| 700/743 [3:01:23<11:12, 15.64s/batch, batch_loss=938, batch_index=700, batch_size=256]

Validation:  94%|█████████████▏| 700/743 [3:01:40<11:12, 15.64s/batch, batch_loss=7.38, batch_index=701, batch_size=256]

Validation:  94%|█████████████▏| 701/743 [3:01:40<11:08, 15.91s/batch, batch_loss=7.38, batch_index=701, batch_size=256]

Validation:  94%|█████████████▏| 701/743 [3:01:55<11:08, 15.91s/batch, batch_loss=8.86, batch_index=702, batch_size=256]

Validation:  94%|█████████████▏| 702/743 [3:01:55<10:39, 15.60s/batch, batch_loss=8.86, batch_index=702, batch_size=256]

Validation:  94%|██████████████▏| 702/743 [3:02:09<10:39, 15.60s/batch, batch_loss=176, batch_index=703, batch_size=256]

Validation:  95%|██████████████▏| 703/743 [3:02:09<10:12, 15.31s/batch, batch_loss=176, batch_index=703, batch_size=256]

Validation:  95%|██████████████▏| 703/743 [3:02:25<10:12, 15.31s/batch, batch_loss=473, batch_index=704, batch_size=256]

Validation:  95%|██████████████▏| 704/743 [3:02:25<10:07, 15.58s/batch, batch_loss=473, batch_index=704, batch_size=256]

Validation:  95%|█████████████▎| 704/743 [3:02:40<10:07, 15.58s/batch, batch_loss=11.7, batch_index=705, batch_size=256]

Validation:  95%|█████████████▎| 705/743 [3:02:40<09:35, 15.15s/batch, batch_loss=11.7, batch_index=705, batch_size=256]

Validation:  95%|█████████████▎| 705/743 [3:02:53<09:35, 15.15s/batch, batch_loss=18.4, batch_index=706, batch_size=256]

Validation:  95%|█████████████▎| 706/743 [3:02:53<09:03, 14.69s/batch, batch_loss=18.4, batch_index=706, batch_size=256]

Validation:  95%|██████████████▎| 706/743 [3:03:08<09:03, 14.69s/batch, batch_loss=414, batch_index=707, batch_size=256]

Validation:  95%|██████████████▎| 707/743 [3:03:08<08:44, 14.57s/batch, batch_loss=414, batch_index=707, batch_size=256]

Validation:  95%|█████████████▎| 707/743 [3:03:23<08:44, 14.57s/batch, batch_loss=16.7, batch_index=708, batch_size=256]

Validation:  95%|█████████████▎| 708/743 [3:03:23<08:34, 14.70s/batch, batch_loss=16.7, batch_index=708, batch_size=256]

Validation:  95%|█████████████▎| 708/743 [3:03:38<08:34, 14.70s/batch, batch_loss=25.2, batch_index=709, batch_size=256]

Validation:  95%|█████████████▎| 709/743 [3:03:38<08:28, 14.97s/batch, batch_loss=25.2, batch_index=709, batch_size=256]

Validation:  95%|█████████████▎| 709/743 [3:03:53<08:28, 14.97s/batch, batch_loss=17.6, batch_index=710, batch_size=256]

Validation:  96%|█████████████▍| 710/743 [3:03:53<08:12, 14.92s/batch, batch_loss=17.6, batch_index=710, batch_size=256]

Validation:  96%|█████████████▍| 710/743 [3:04:08<08:12, 14.92s/batch, batch_loss=16.3, batch_index=711, batch_size=256]

Validation:  96%|█████████████▍| 711/743 [3:04:08<08:01, 15.04s/batch, batch_loss=16.3, batch_index=711, batch_size=256]

Validation:  96%|█████████████▍| 711/743 [3:04:24<08:01, 15.04s/batch, batch_loss=21.2, batch_index=712, batch_size=256]

Validation:  96%|█████████████▍| 712/743 [3:04:24<07:51, 15.21s/batch, batch_loss=21.2, batch_index=712, batch_size=256]

Validation:  96%|█████████████▍| 712/743 [3:04:40<07:51, 15.21s/batch, batch_loss=19.1, batch_index=713, batch_size=256]

Validation:  96%|█████████████▍| 713/743 [3:04:40<07:45, 15.51s/batch, batch_loss=19.1, batch_index=713, batch_size=256]

Validation:  96%|█████████████▍| 713/743 [3:04:56<07:45, 15.51s/batch, batch_loss=6.24, batch_index=714, batch_size=256]

Validation:  96%|█████████████▍| 714/743 [3:04:56<07:29, 15.50s/batch, batch_loss=6.24, batch_index=714, batch_size=256]

Validation:  96%|█████████████▍| 714/743 [3:05:10<07:29, 15.50s/batch, batch_loss=11.5, batch_index=715, batch_size=256]

Validation:  96%|█████████████▍| 715/743 [3:05:10<07:08, 15.32s/batch, batch_loss=11.5, batch_index=715, batch_size=256]

Validation:  96%|█████████████▍| 715/743 [3:05:26<07:08, 15.32s/batch, batch_loss=20.6, batch_index=716, batch_size=256]

Validation:  96%|█████████████▍| 716/743 [3:05:26<06:54, 15.34s/batch, batch_loss=20.6, batch_index=716, batch_size=256]

Validation:  96%|██████████████▍| 716/743 [3:05:42<06:54, 15.34s/batch, batch_loss=386, batch_index=717, batch_size=256]

Validation:  97%|██████████████▍| 717/743 [3:05:42<06:45, 15.60s/batch, batch_loss=386, batch_index=717, batch_size=256]

Validation:  97%|███████████████▍| 717/743 [3:05:57<06:45, 15.60s/batch, batch_loss=19, batch_index=718, batch_size=256]

Validation:  97%|███████████████▍| 718/743 [3:05:57<06:24, 15.37s/batch, batch_loss=19, batch_index=718, batch_size=256]

Validation:  97%|███████████████▍| 718/743 [3:06:12<06:24, 15.37s/batch, batch_loss=17, batch_index=719, batch_size=256]

Validation:  97%|███████████████▍| 719/743 [3:06:12<06:06, 15.28s/batch, batch_loss=17, batch_index=719, batch_size=256]

Validation:  97%|█████████████▌| 719/743 [3:06:27<06:06, 15.28s/batch, batch_loss=14.9, batch_index=720, batch_size=256]

Validation:  97%|█████████████▌| 720/743 [3:06:27<05:48, 15.16s/batch, batch_loss=14.9, batch_index=720, batch_size=256]

Validation:  97%|█████████████▌| 720/743 [3:06:41<05:48, 15.16s/batch, batch_loss=11.6, batch_index=721, batch_size=256]

Validation:  97%|█████████████▌| 721/743 [3:06:41<05:28, 14.92s/batch, batch_loss=11.6, batch_index=721, batch_size=256]

Validation:  97%|█████████████▌| 721/743 [3:06:56<05:28, 14.92s/batch, batch_loss=23.2, batch_index=722, batch_size=256]

Validation:  97%|█████████████▌| 722/743 [3:06:56<05:11, 14.85s/batch, batch_loss=23.2, batch_index=722, batch_size=256]

Validation:  97%|██████████▋| 722/743 [3:07:11<05:11, 14.85s/batch, batch_loss=5.29e+3, batch_index=723, batch_size=256]

Validation:  97%|██████████▋| 723/743 [3:07:11<05:00, 15.05s/batch, batch_loss=5.29e+3, batch_index=723, batch_size=256]

Validation:  97%|█████████████▌| 723/743 [3:07:28<05:00, 15.05s/batch, batch_loss=18.6, batch_index=724, batch_size=256]

Validation:  97%|█████████████▋| 724/743 [3:07:28<04:54, 15.50s/batch, batch_loss=18.6, batch_index=724, batch_size=256]

Validation:  97%|█████████████▋| 724/743 [3:07:44<04:54, 15.50s/batch, batch_loss=15.4, batch_index=725, batch_size=256]

Validation:  98%|█████████████▋| 725/743 [3:07:44<04:43, 15.73s/batch, batch_loss=15.4, batch_index=725, batch_size=256]

Validation:  98%|█████████████▋| 725/743 [3:08:00<04:43, 15.73s/batch, batch_loss=21.9, batch_index=726, batch_size=256]

Validation:  98%|█████████████▋| 726/743 [3:08:00<04:27, 15.74s/batch, batch_loss=21.9, batch_index=726, batch_size=256]

Validation:  98%|██████████▋| 726/743 [3:08:15<04:27, 15.74s/batch, batch_loss=2.41e+4, batch_index=727, batch_size=256]

Validation:  98%|██████████▊| 727/743 [3:08:15<04:10, 15.67s/batch, batch_loss=2.41e+4, batch_index=727, batch_size=256]

Validation:  98%|█████████████▋| 727/743 [3:08:31<04:10, 15.67s/batch, batch_loss=25.7, batch_index=728, batch_size=256]

Validation:  98%|█████████████▋| 728/743 [3:08:31<03:53, 15.57s/batch, batch_loss=25.7, batch_index=728, batch_size=256]

Validation:  98%|█████████████▋| 728/743 [3:08:46<03:53, 15.57s/batch, batch_loss=33.2, batch_index=729, batch_size=256]

Validation:  98%|█████████████▋| 729/743 [3:08:46<03:35, 15.39s/batch, batch_loss=33.2, batch_index=729, batch_size=256]

Validation:  98%|█████████████▋| 729/743 [3:09:01<03:35, 15.39s/batch, batch_loss=19.4, batch_index=730, batch_size=256]

Validation:  98%|█████████████▊| 730/743 [3:09:01<03:18, 15.30s/batch, batch_loss=19.4, batch_index=730, batch_size=256]

Validation:  98%|█████████████▊| 730/743 [3:09:17<03:18, 15.30s/batch, batch_loss=15.5, batch_index=731, batch_size=256]

Validation:  98%|█████████████▊| 731/743 [3:09:17<03:05, 15.49s/batch, batch_loss=15.5, batch_index=731, batch_size=256]

Validation:  98%|█████████████▊| 731/743 [3:09:34<03:05, 15.49s/batch, batch_loss=10.8, batch_index=732, batch_size=256]

Validation:  99%|█████████████▊| 732/743 [3:09:34<02:55, 15.99s/batch, batch_loss=10.8, batch_index=732, batch_size=256]

Validation:  99%|█████████████▊| 732/743 [3:09:49<02:55, 15.99s/batch, batch_loss=23.9, batch_index=733, batch_size=256]

Validation:  99%|█████████████▊| 733/743 [3:09:49<02:38, 15.84s/batch, batch_loss=23.9, batch_index=733, batch_size=256]

Validation:  99%|█████████████▊| 733/743 [3:10:05<02:38, 15.84s/batch, batch_loss=4.01, batch_index=734, batch_size=256]

Validation:  99%|█████████████▊| 734/743 [3:10:05<02:21, 15.76s/batch, batch_loss=4.01, batch_index=734, batch_size=256]

Validation:  99%|█████████████▊| 734/743 [3:10:20<02:21, 15.76s/batch, batch_loss=7.29, batch_index=735, batch_size=256]

Validation:  99%|█████████████▊| 735/743 [3:10:20<02:04, 15.58s/batch, batch_loss=7.29, batch_index=735, batch_size=256]

Validation:  99%|█████████████▊| 735/743 [3:10:36<02:04, 15.58s/batch, batch_loss=13.4, batch_index=736, batch_size=256]

Validation:  99%|█████████████▊| 736/743 [3:10:36<01:49, 15.59s/batch, batch_loss=13.4, batch_index=736, batch_size=256]

Validation:  99%|█████████████▊| 736/743 [3:10:49<01:49, 15.59s/batch, batch_loss=12.3, batch_index=737, batch_size=256]

Validation:  99%|█████████████▉| 737/743 [3:10:49<01:29, 14.85s/batch, batch_loss=12.3, batch_index=737, batch_size=256]

Validation:  99%|█████████████▉| 737/743 [3:11:02<01:29, 14.85s/batch, batch_loss=12.3, batch_index=738, batch_size=256]

Validation:  99%|█████████████▉| 738/743 [3:11:02<01:10, 14.18s/batch, batch_loss=12.3, batch_index=738, batch_size=256]

Validation:  99%|█████████████▉| 738/743 [3:11:15<01:10, 14.18s/batch, batch_loss=12.3, batch_index=739, batch_size=256]

Validation:  99%|█████████████▉| 739/743 [3:11:15<00:55, 13.84s/batch, batch_loss=12.3, batch_index=739, batch_size=256]

Validation:  99%|█████████████▉| 739/743 [3:11:27<00:55, 13.84s/batch, batch_loss=12.3, batch_index=740, batch_size=256]

Validation: 100%|█████████████▉| 740/743 [3:11:27<00:40, 13.52s/batch, batch_loss=12.3, batch_index=740, batch_size=256]

Validation: 100%|█████████████▉| 740/743 [3:11:41<00:40, 13.52s/batch, batch_loss=12.3, batch_index=741, batch_size=256]

Validation: 100%|█████████████▉| 741/743 [3:11:41<00:27, 13.56s/batch, batch_loss=12.3, batch_index=741, batch_size=256]

Validation: 100%|█████████████▉| 741/743 [3:11:54<00:27, 13.56s/batch, batch_loss=12.3, batch_index=742, batch_size=256]

Validation: 100%|█████████████▉| 742/743 [3:11:54<00:13, 13.43s/batch, batch_loss=12.3, batch_index=742, batch_size=256]

Validation: 100%|█████████████▉| 742/743 [3:12:07<00:13, 13.43s/batch, batch_loss=12.7, batch_index=743, batch_size=238]

Validation: 100%|██████████████| 743/743 [3:12:07<00:00, 13.23s/batch, batch_loss=12.7, batch_index=743, batch_size=238]

Validation: 100%|██████████████| 743/743 [3:12:07<00:00, 15.51s/batch, batch_loss=12.7, batch_index=743, batch_size=238]




Val Loss: 1297.3059


Epoch 2/10:   0%|                                                                            | 0/991 [00:00<?, ?batch/s]

Epoch 2/10:   0%|                            | 0/991 [00:16<?, ?batch/s, batch_loss=19.2, batch_index=1, batch_size=256]

Epoch 2/10:   0%|                  | 1/991 [00:16<4:24:44, 16.05s/batch, batch_loss=19.2, batch_index=1, batch_size=256]

Epoch 2/10:   0%|                  | 1/991 [00:31<4:24:44, 16.05s/batch, batch_loss=19.5, batch_index=2, batch_size=256]

Epoch 2/10:   0%|                  | 2/991 [00:31<4:18:25, 15.68s/batch, batch_loss=19.5, batch_index=2, batch_size=256]

Epoch 2/10:   0%|                  | 2/991 [00:47<4:18:25, 15.68s/batch, batch_loss=12.3, batch_index=3, batch_size=256]

Epoch 2/10:   0%|                  | 3/991 [00:47<4:19:09, 15.74s/batch, batch_loss=12.3, batch_index=3, batch_size=256]

Epoch 2/10:   0%|                  | 3/991 [01:02<4:19:09, 15.74s/batch, batch_loss=7.55, batch_index=4, batch_size=256]

Epoch 2/10:   0%|                  | 4/991 [01:02<4:12:55, 15.37s/batch, batch_loss=7.55, batch_index=4, batch_size=256]

Epoch 2/10:   0%|                  | 4/991 [01:17<4:12:55, 15.37s/batch, batch_loss=21.3, batch_index=5, batch_size=256]

Epoch 2/10:   1%|                  | 5/991 [01:17<4:14:51, 15.51s/batch, batch_loss=21.3, batch_index=5, batch_size=256]

Epoch 2/10:   1%|                  | 5/991 [01:33<4:14:51, 15.51s/batch, batch_loss=24.6, batch_index=6, batch_size=256]

Epoch 2/10:   1%|                  | 6/991 [01:33<4:15:51, 15.59s/batch, batch_loss=24.6, batch_index=6, batch_size=256]

Epoch 2/10:   1%|                  | 6/991 [01:48<4:15:51, 15.59s/batch, batch_loss=17.8, batch_index=7, batch_size=256]

Epoch 2/10:   1%|▏                 | 7/991 [01:48<4:14:36, 15.52s/batch, batch_loss=17.8, batch_index=7, batch_size=256]

Epoch 2/10:   1%|▏                  | 7/991 [02:04<4:14:36, 15.52s/batch, batch_loss=602, batch_index=8, batch_size=256]

Epoch 2/10:   1%|▏                  | 8/991 [02:04<4:13:55, 15.50s/batch, batch_loss=602, batch_index=8, batch_size=256]

Epoch 2/10:   1%|▏                 | 8/991 [02:19<4:13:55, 15.50s/batch, batch_loss=16.6, batch_index=9, batch_size=256]

Epoch 2/10:   1%|▏                 | 9/991 [02:19<4:13:12, 15.47s/batch, batch_loss=16.6, batch_index=9, batch_size=256]

Epoch 2/10:   1%|▏                | 9/991 [02:35<4:13:12, 15.47s/batch, batch_loss=14.5, batch_index=10, batch_size=256]

Epoch 2/10:   1%|▏               | 10/991 [02:35<4:12:05, 15.42s/batch, batch_loss=14.5, batch_index=10, batch_size=256]

Epoch 2/10:   1%|▏               | 10/991 [02:50<4:12:05, 15.42s/batch, batch_loss=12.5, batch_index=11, batch_size=256]

Epoch 2/10:   1%|▏               | 11/991 [02:50<4:13:38, 15.53s/batch, batch_loss=12.5, batch_index=11, batch_size=256]

Epoch 2/10:   1%|▏               | 11/991 [03:07<4:13:38, 15.53s/batch, batch_loss=2e+3, batch_index=12, batch_size=256]

Epoch 2/10:   1%|▏               | 12/991 [03:07<4:18:26, 15.84s/batch, batch_loss=2e+3, batch_index=12, batch_size=256]

Epoch 2/10:   1%|▏                 | 12/991 [03:23<4:18:26, 15.84s/batch, batch_loss=20, batch_index=13, batch_size=256]

Epoch 2/10:   1%|▏                 | 13/991 [03:23<4:18:17, 15.85s/batch, batch_loss=20, batch_index=13, batch_size=256]

Epoch 2/10:   1%|▏               | 13/991 [03:38<4:18:17, 15.85s/batch, batch_loss=12.1, batch_index=14, batch_size=256]

Epoch 2/10:   1%|▏               | 14/991 [03:38<4:16:16, 15.74s/batch, batch_loss=12.1, batch_index=14, batch_size=256]

Epoch 2/10:   1%|▎                 | 14/991 [03:55<4:16:16, 15.74s/batch, batch_loss=11, batch_index=15, batch_size=256]

Epoch 2/10:   2%|▎                 | 15/991 [03:55<4:18:14, 15.88s/batch, batch_loss=11, batch_index=15, batch_size=256]

Epoch 2/10:   2%|▏               | 15/991 [04:10<4:18:14, 15.88s/batch, batch_loss=14.2, batch_index=16, batch_size=256]

Epoch 2/10:   2%|▎               | 16/991 [04:10<4:14:19, 15.65s/batch, batch_loss=14.2, batch_index=16, batch_size=256]

Epoch 2/10:   2%|▎               | 16/991 [04:24<4:14:19, 15.65s/batch, batch_loss=15.4, batch_index=17, batch_size=256]

Epoch 2/10:   2%|▎               | 17/991 [04:24<4:09:31, 15.37s/batch, batch_loss=15.4, batch_index=17, batch_size=256]

Epoch 2/10:   2%|▎                 | 17/991 [04:40<4:09:31, 15.37s/batch, batch_loss=10, batch_index=18, batch_size=256]

Epoch 2/10:   2%|▎                 | 18/991 [04:40<4:10:02, 15.42s/batch, batch_loss=10, batch_index=18, batch_size=256]

Epoch 2/10:   2%|▏            | 18/991 [04:54<4:10:02, 15.42s/batch, batch_loss=8.96e+3, batch_index=19, batch_size=256]

Epoch 2/10:   2%|▏            | 19/991 [04:54<4:05:43, 15.17s/batch, batch_loss=8.96e+3, batch_index=19, batch_size=256]

Epoch 2/10:   2%|▎               | 19/991 [05:09<4:05:43, 15.17s/batch, batch_loss=9.98, batch_index=20, batch_size=256]

Epoch 2/10:   2%|▎               | 20/991 [05:09<4:04:17, 15.10s/batch, batch_loss=9.98, batch_index=20, batch_size=256]

Epoch 2/10:   2%|▎               | 20/991 [05:25<4:04:17, 15.10s/batch, batch_loss=17.1, batch_index=21, batch_size=256]

Epoch 2/10:   2%|▎               | 21/991 [05:25<4:04:07, 15.10s/batch, batch_loss=17.1, batch_index=21, batch_size=256]

Epoch 2/10:   2%|▎            | 21/991 [05:40<4:04:07, 15.10s/batch, batch_loss=1.08e+3, batch_index=22, batch_size=256]

Epoch 2/10:   2%|▎            | 22/991 [05:40<4:03:28, 15.08s/batch, batch_loss=1.08e+3, batch_index=22, batch_size=256]

Epoch 2/10:   2%|▎               | 22/991 [05:55<4:03:28, 15.08s/batch, batch_loss=7.19, batch_index=23, batch_size=256]

Epoch 2/10:   2%|▎               | 23/991 [05:55<4:05:18, 15.20s/batch, batch_loss=7.19, batch_index=23, batch_size=256]

Epoch 2/10:   2%|▎               | 23/991 [06:11<4:05:18, 15.20s/batch, batch_loss=11.6, batch_index=24, batch_size=256]

Epoch 2/10:   2%|▍               | 24/991 [06:11<4:07:49, 15.38s/batch, batch_loss=11.6, batch_index=24, batch_size=256]

Epoch 2/10:   2%|▍               | 24/991 [06:27<4:07:49, 15.38s/batch, batch_loss=12.6, batch_index=25, batch_size=256]

Epoch 2/10:   3%|▍               | 25/991 [06:27<4:10:26, 15.56s/batch, batch_loss=12.6, batch_index=25, batch_size=256]

Epoch 2/10:   3%|▍               | 25/991 [06:42<4:10:26, 15.56s/batch, batch_loss=17.9, batch_index=26, batch_size=256]

Epoch 2/10:   3%|▍               | 26/991 [06:42<4:08:26, 15.45s/batch, batch_loss=17.9, batch_index=26, batch_size=256]

Epoch 2/10:   3%|▍               | 26/991 [06:58<4:08:26, 15.45s/batch, batch_loss=15.2, batch_index=27, batch_size=256]

Epoch 2/10:   3%|▍               | 27/991 [06:58<4:09:24, 15.52s/batch, batch_loss=15.2, batch_index=27, batch_size=256]

Epoch 2/10:   3%|▎            | 27/991 [07:13<4:09:24, 15.52s/batch, batch_loss=1.13e+3, batch_index=28, batch_size=256]

Epoch 2/10:   3%|▎            | 28/991 [07:13<4:08:14, 15.47s/batch, batch_loss=1.13e+3, batch_index=28, batch_size=256]

Epoch 2/10:   3%|▍               | 28/991 [07:28<4:08:14, 15.47s/batch, batch_loss=10.4, batch_index=29, batch_size=256]

Epoch 2/10:   3%|▍               | 29/991 [07:28<4:05:54, 15.34s/batch, batch_loss=10.4, batch_index=29, batch_size=256]

Epoch 2/10:   3%|▍               | 29/991 [07:43<4:05:54, 15.34s/batch, batch_loss=10.4, batch_index=30, batch_size=256]

Epoch 2/10:   3%|▍               | 30/991 [07:43<4:04:11, 15.25s/batch, batch_loss=10.4, batch_index=30, batch_size=256]

Epoch 2/10:   3%|▍               | 30/991 [07:57<4:04:11, 15.25s/batch, batch_loss=10.3, batch_index=31, batch_size=256]

Epoch 2/10:   3%|▌               | 31/991 [07:57<3:58:21, 14.90s/batch, batch_loss=10.3, batch_index=31, batch_size=256]

Epoch 2/10:   3%|▍             | 31/991 [08:12<3:58:21, 14.90s/batch, batch_loss=1.3e+4, batch_index=32, batch_size=256]

Epoch 2/10:   3%|▍             | 32/991 [08:12<3:59:26, 14.98s/batch, batch_loss=1.3e+4, batch_index=32, batch_size=256]

Epoch 2/10:   3%|▌               | 32/991 [08:28<3:59:26, 14.98s/batch, batch_loss=14.4, batch_index=33, batch_size=256]

Epoch 2/10:   3%|▌               | 33/991 [08:28<4:02:40, 15.20s/batch, batch_loss=14.4, batch_index=33, batch_size=256]

Epoch 2/10:   3%|▌                | 33/991 [08:43<4:02:40, 15.20s/batch, batch_loss=8.1, batch_index=34, batch_size=256]

Epoch 2/10:   3%|▌                | 34/991 [08:43<4:00:10, 15.06s/batch, batch_loss=8.1, batch_index=34, batch_size=256]

Epoch 2/10:   3%|▌               | 34/991 [08:57<4:00:10, 15.06s/batch, batch_loss=11.4, batch_index=35, batch_size=256]

Epoch 2/10:   4%|▌               | 35/991 [08:57<3:58:10, 14.95s/batch, batch_loss=11.4, batch_index=35, batch_size=256]

Epoch 2/10:   4%|▌                | 35/991 [09:12<3:58:10, 14.95s/batch, batch_loss=9.7, batch_index=36, batch_size=256]

Epoch 2/10:   4%|▌                | 36/991 [09:12<3:56:43, 14.87s/batch, batch_loss=9.7, batch_index=36, batch_size=256]

Epoch 2/10:   4%|▌               | 36/991 [09:28<3:56:43, 14.87s/batch, batch_loss=10.3, batch_index=37, batch_size=256]

Epoch 2/10:   4%|▌               | 37/991 [09:28<3:58:54, 15.03s/batch, batch_loss=10.3, batch_index=37, batch_size=256]

Epoch 2/10:   4%|▌               | 37/991 [09:43<3:58:54, 15.03s/batch, batch_loss=6.66, batch_index=38, batch_size=256]

Epoch 2/10:   4%|▌               | 38/991 [09:43<3:59:06, 15.05s/batch, batch_loss=6.66, batch_index=38, batch_size=256]

Epoch 2/10:   4%|▍            | 38/991 [09:57<3:59:06, 15.05s/batch, batch_loss=1.72e+3, batch_index=39, batch_size=256]

Epoch 2/10:   4%|▌            | 39/991 [09:57<3:57:38, 14.98s/batch, batch_loss=1.72e+3, batch_index=39, batch_size=256]

Epoch 2/10:   4%|▋               | 39/991 [10:13<3:57:38, 14.98s/batch, batch_loss=13.8, batch_index=40, batch_size=256]

Epoch 2/10:   4%|▋               | 40/991 [10:13<3:59:35, 15.12s/batch, batch_loss=13.8, batch_index=40, batch_size=256]

Epoch 2/10:   4%|▌            | 40/991 [10:28<3:59:35, 15.12s/batch, batch_loss=6.21e+3, batch_index=41, batch_size=256]

Epoch 2/10:   4%|▌            | 41/991 [10:28<4:01:26, 15.25s/batch, batch_loss=6.21e+3, batch_index=41, batch_size=256]

Epoch 2/10:   4%|▋               | 41/991 [10:43<4:01:26, 15.25s/batch, batch_loss=14.4, batch_index=42, batch_size=256]

Epoch 2/10:   4%|▋               | 42/991 [10:43<3:59:54, 15.17s/batch, batch_loss=14.4, batch_index=42, batch_size=256]

Epoch 2/10:   4%|▋               | 42/991 [10:59<3:59:54, 15.17s/batch, batch_loss=9.01, batch_index=43, batch_size=256]

Epoch 2/10:   4%|▋               | 43/991 [10:59<3:59:13, 15.14s/batch, batch_loss=9.01, batch_index=43, batch_size=256]

Epoch 2/10:   4%|▋               | 43/991 [11:13<3:59:13, 15.14s/batch, batch_loss=14.5, batch_index=44, batch_size=256]

Epoch 2/10:   4%|▋               | 44/991 [11:13<3:58:00, 15.08s/batch, batch_loss=14.5, batch_index=44, batch_size=256]

Epoch 2/10:   4%|▋               | 44/991 [11:29<3:58:00, 15.08s/batch, batch_loss=15.2, batch_index=45, batch_size=256]

Epoch 2/10:   5%|▋               | 45/991 [11:29<4:00:53, 15.28s/batch, batch_loss=15.2, batch_index=45, batch_size=256]

Epoch 2/10:   5%|▋               | 45/991 [11:45<4:00:53, 15.28s/batch, batch_loss=13.1, batch_index=46, batch_size=256]

Epoch 2/10:   5%|▋               | 46/991 [11:45<4:01:26, 15.33s/batch, batch_loss=13.1, batch_index=46, batch_size=256]

Epoch 2/10:   5%|▋               | 46/991 [12:00<4:01:26, 15.33s/batch, batch_loss=5.82, batch_index=47, batch_size=256]

Epoch 2/10:   5%|▊               | 47/991 [12:00<4:01:22, 15.34s/batch, batch_loss=5.82, batch_index=47, batch_size=256]

Epoch 2/10:   5%|▊               | 47/991 [12:15<4:01:22, 15.34s/batch, batch_loss=13.6, batch_index=48, batch_size=256]

Epoch 2/10:   5%|▊               | 48/991 [12:15<4:00:28, 15.30s/batch, batch_loss=13.6, batch_index=48, batch_size=256]

Epoch 2/10:   5%|▊               | 48/991 [12:31<4:00:28, 15.30s/batch, batch_loss=12.6, batch_index=49, batch_size=256]

Epoch 2/10:   5%|▊               | 49/991 [12:31<4:01:22, 15.37s/batch, batch_loss=12.6, batch_index=49, batch_size=256]

Epoch 2/10:   5%|▊               | 49/991 [12:45<4:01:22, 15.37s/batch, batch_loss=13.8, batch_index=50, batch_size=256]

Epoch 2/10:   5%|▊               | 50/991 [12:45<3:57:40, 15.15s/batch, batch_loss=13.8, batch_index=50, batch_size=256]

Epoch 2/10:   5%|▊               | 50/991 [12:59<3:57:40, 15.15s/batch, batch_loss=8.64, batch_index=51, batch_size=256]

Epoch 2/10:   5%|▊               | 51/991 [12:59<3:51:48, 14.80s/batch, batch_loss=8.64, batch_index=51, batch_size=256]

Epoch 2/10:   5%|▊               | 51/991 [13:14<3:51:48, 14.80s/batch, batch_loss=13.1, batch_index=52, batch_size=256]

Epoch 2/10:   5%|▊               | 52/991 [13:14<3:49:14, 14.65s/batch, batch_loss=13.1, batch_index=52, batch_size=256]

Epoch 2/10:   5%|▊               | 52/991 [13:29<3:49:14, 14.65s/batch, batch_loss=14.5, batch_index=53, batch_size=256]

Epoch 2/10:   5%|▊               | 53/991 [13:29<3:50:54, 14.77s/batch, batch_loss=14.5, batch_index=53, batch_size=256]

Epoch 2/10:   5%|▊               | 53/991 [13:44<3:50:54, 14.77s/batch, batch_loss=8.02, batch_index=54, batch_size=256]

Epoch 2/10:   5%|▊               | 54/991 [13:44<3:54:11, 15.00s/batch, batch_loss=8.02, batch_index=54, batch_size=256]

Epoch 2/10:   5%|▊               | 54/991 [13:58<3:54:11, 15.00s/batch, batch_loss=10.2, batch_index=55, batch_size=256]

Epoch 2/10:   6%|▉               | 55/991 [13:58<3:50:08, 14.75s/batch, batch_loss=10.2, batch_index=55, batch_size=256]

Epoch 2/10:   6%|▉               | 55/991 [14:14<3:50:08, 14.75s/batch, batch_loss=12.2, batch_index=56, batch_size=256]

Epoch 2/10:   6%|▉               | 56/991 [14:14<3:54:13, 15.03s/batch, batch_loss=12.2, batch_index=56, batch_size=256]

Epoch 2/10:   6%|▉               | 56/991 [14:29<3:54:13, 15.03s/batch, batch_loss=7.34, batch_index=57, batch_size=256]

Epoch 2/10:   6%|▉               | 57/991 [14:29<3:51:46, 14.89s/batch, batch_loss=7.34, batch_index=57, batch_size=256]

Epoch 2/10:   6%|▉               | 57/991 [14:44<3:51:46, 14.89s/batch, batch_loss=14.4, batch_index=58, batch_size=256]

Epoch 2/10:   6%|▉               | 58/991 [14:44<3:54:38, 15.09s/batch, batch_loss=14.4, batch_index=58, batch_size=256]

Epoch 2/10:   6%|▉               | 58/991 [14:59<3:54:38, 15.09s/batch, batch_loss=9.13, batch_index=59, batch_size=256]

Epoch 2/10:   6%|▉               | 59/991 [14:59<3:54:28, 15.09s/batch, batch_loss=9.13, batch_index=59, batch_size=256]

Epoch 2/10:   6%|▉               | 59/991 [15:14<3:54:28, 15.09s/batch, batch_loss=16.8, batch_index=60, batch_size=256]

Epoch 2/10:   6%|▉               | 60/991 [15:14<3:51:38, 14.93s/batch, batch_loss=16.8, batch_index=60, batch_size=256]

Epoch 2/10:   6%|▉               | 60/991 [15:29<3:51:38, 14.93s/batch, batch_loss=10.6, batch_index=61, batch_size=256]

Epoch 2/10:   6%|▉               | 61/991 [15:29<3:52:46, 15.02s/batch, batch_loss=10.6, batch_index=61, batch_size=256]

Epoch 2/10:   6%|▉               | 61/991 [15:44<3:52:46, 15.02s/batch, batch_loss=10.9, batch_index=62, batch_size=256]

Epoch 2/10:   6%|█               | 62/991 [15:44<3:52:13, 15.00s/batch, batch_loss=10.9, batch_index=62, batch_size=256]

Epoch 2/10:   6%|█                | 62/991 [15:59<3:52:13, 15.00s/batch, batch_loss=417, batch_index=63, batch_size=256]

Epoch 2/10:   6%|█                | 63/991 [15:59<3:50:14, 14.89s/batch, batch_loss=417, batch_index=63, batch_size=256]

Epoch 2/10:   6%|█                | 63/991 [16:13<3:50:14, 14.89s/batch, batch_loss=799, batch_index=64, batch_size=256]

Epoch 2/10:   6%|█                | 64/991 [16:13<3:49:00, 14.82s/batch, batch_loss=799, batch_index=64, batch_size=256]

Epoch 2/10:   6%|▊            | 64/991 [16:28<3:49:00, 14.82s/batch, batch_loss=2.55e+3, batch_index=65, batch_size=256]

Epoch 2/10:   7%|▊            | 65/991 [16:28<3:49:49, 14.89s/batch, batch_loss=2.55e+3, batch_index=65, batch_size=256]

Epoch 2/10:   7%|█               | 65/991 [16:43<3:49:49, 14.89s/batch, batch_loss=4.21, batch_index=66, batch_size=256]

Epoch 2/10:   7%|█               | 66/991 [16:43<3:48:09, 14.80s/batch, batch_loss=4.21, batch_index=66, batch_size=256]

Epoch 2/10:   7%|█               | 66/991 [16:58<3:48:09, 14.80s/batch, batch_loss=10.8, batch_index=67, batch_size=256]

Epoch 2/10:   7%|█               | 67/991 [16:58<3:49:59, 14.93s/batch, batch_loss=10.8, batch_index=67, batch_size=256]

Epoch 2/10:   7%|█▏               | 67/991 [17:13<3:49:59, 14.93s/batch, batch_loss=8.3, batch_index=68, batch_size=256]

Epoch 2/10:   7%|█▏               | 68/991 [17:13<3:50:49, 15.00s/batch, batch_loss=8.3, batch_index=68, batch_size=256]

Epoch 2/10:   7%|█               | 68/991 [17:28<3:50:49, 15.00s/batch, batch_loss=19.8, batch_index=69, batch_size=256]

Epoch 2/10:   7%|█               | 69/991 [17:28<3:49:09, 14.91s/batch, batch_loss=19.8, batch_index=69, batch_size=256]

Epoch 2/10:   7%|█▏               | 69/991 [17:43<3:49:09, 14.91s/batch, batch_loss=9.2, batch_index=70, batch_size=256]

Epoch 2/10:   7%|█▏               | 70/991 [17:43<3:48:15, 14.87s/batch, batch_loss=9.2, batch_index=70, batch_size=256]

Epoch 2/10:   7%|█▏              | 70/991 [17:58<3:48:15, 14.87s/batch, batch_loss=9.44, batch_index=71, batch_size=256]

Epoch 2/10:   7%|█▏              | 71/991 [17:58<3:49:28, 14.97s/batch, batch_loss=9.44, batch_index=71, batch_size=256]

Epoch 2/10:   7%|█▏              | 71/991 [18:12<3:49:28, 14.97s/batch, batch_loss=13.3, batch_index=72, batch_size=256]

Epoch 2/10:   7%|█▏              | 72/991 [18:12<3:46:06, 14.76s/batch, batch_loss=13.3, batch_index=72, batch_size=256]

Epoch 2/10:   7%|█▏              | 72/991 [18:27<3:46:06, 14.76s/batch, batch_loss=24.6, batch_index=73, batch_size=256]

Epoch 2/10:   7%|█▏              | 73/991 [18:27<3:46:28, 14.80s/batch, batch_loss=24.6, batch_index=73, batch_size=256]

Epoch 2/10:   7%|▉            | 73/991 [18:42<3:46:28, 14.80s/batch, batch_loss=1.74e+3, batch_index=74, batch_size=256]

Epoch 2/10:   7%|▉            | 74/991 [18:42<3:44:34, 14.69s/batch, batch_loss=1.74e+3, batch_index=74, batch_size=256]

Epoch 2/10:   7%|█▏              | 74/991 [18:56<3:44:34, 14.69s/batch, batch_loss=15.3, batch_index=75, batch_size=256]

Epoch 2/10:   8%|█▏              | 75/991 [18:56<3:41:53, 14.53s/batch, batch_loss=15.3, batch_index=75, batch_size=256]

Epoch 2/10:   8%|█▎                | 75/991 [19:11<3:41:53, 14.53s/batch, batch_loss=11, batch_index=76, batch_size=256]

Epoch 2/10:   8%|█▍                | 76/991 [19:11<3:42:21, 14.58s/batch, batch_loss=11, batch_index=76, batch_size=256]

Epoch 2/10:   8%|█▏              | 76/991 [19:26<3:42:21, 14.58s/batch, batch_loss=11.2, batch_index=77, batch_size=256]

Epoch 2/10:   8%|█▏              | 77/991 [19:26<3:47:46, 14.95s/batch, batch_loss=11.2, batch_index=77, batch_size=256]

Epoch 2/10:   8%|█▏              | 77/991 [19:41<3:47:46, 14.95s/batch, batch_loss=13.8, batch_index=78, batch_size=256]

Epoch 2/10:   8%|█▎              | 78/991 [19:41<3:48:10, 14.99s/batch, batch_loss=13.8, batch_index=78, batch_size=256]

Epoch 2/10:   8%|█▎              | 78/991 [19:56<3:48:10, 14.99s/batch, batch_loss=11.1, batch_index=79, batch_size=256]

Epoch 2/10:   8%|█▎              | 79/991 [19:56<3:45:34, 14.84s/batch, batch_loss=11.1, batch_index=79, batch_size=256]

Epoch 2/10:   8%|█▎              | 79/991 [20:12<3:45:34, 14.84s/batch, batch_loss=8.98, batch_index=80, batch_size=256]

Epoch 2/10:   8%|█▎              | 80/991 [20:12<3:49:12, 15.10s/batch, batch_loss=8.98, batch_index=80, batch_size=256]

Epoch 2/10:   8%|█▎              | 80/991 [20:28<3:49:12, 15.10s/batch, batch_loss=13.6, batch_index=81, batch_size=256]

Epoch 2/10:   8%|█▎              | 81/991 [20:28<3:54:52, 15.49s/batch, batch_loss=13.6, batch_index=81, batch_size=256]

Epoch 2/10:   8%|█▎              | 81/991 [20:41<3:54:52, 15.49s/batch, batch_loss=11.7, batch_index=82, batch_size=256]

Epoch 2/10:   8%|█▎              | 82/991 [20:41<3:44:17, 14.81s/batch, batch_loss=11.7, batch_index=82, batch_size=256]

Epoch 2/10:   8%|█▌                 | 82/991 [20:55<3:44:17, 14.81s/batch, batch_loss=8, batch_index=83, batch_size=256]

Epoch 2/10:   8%|█▌                 | 83/991 [20:55<3:40:57, 14.60s/batch, batch_loss=8, batch_index=83, batch_size=256]

Epoch 2/10:   8%|█▎              | 83/991 [21:10<3:40:57, 14.60s/batch, batch_loss=11.1, batch_index=84, batch_size=256]

Epoch 2/10:   8%|█▎              | 84/991 [21:10<3:40:07, 14.56s/batch, batch_loss=11.1, batch_index=84, batch_size=256]

Epoch 2/10:   8%|█▎              | 84/991 [21:25<3:40:07, 14.56s/batch, batch_loss=8.14, batch_index=85, batch_size=256]

Epoch 2/10:   9%|█▎              | 85/991 [21:25<3:42:13, 14.72s/batch, batch_loss=8.14, batch_index=85, batch_size=256]

Epoch 2/10:   9%|█▎              | 85/991 [21:40<3:42:13, 14.72s/batch, batch_loss=10.5, batch_index=86, batch_size=256]

Epoch 2/10:   9%|█▍              | 86/991 [21:40<3:43:59, 14.85s/batch, batch_loss=10.5, batch_index=86, batch_size=256]

Epoch 2/10:   9%|█▌                | 86/991 [21:55<3:43:59, 14.85s/batch, batch_loss=11, batch_index=87, batch_size=256]

Epoch 2/10:   9%|█▌                | 87/991 [21:55<3:43:41, 14.85s/batch, batch_loss=11, batch_index=87, batch_size=256]

Epoch 2/10:   9%|█▍              | 87/991 [22:09<3:43:41, 14.85s/batch, batch_loss=9.11, batch_index=88, batch_size=256]

Epoch 2/10:   9%|█▍              | 88/991 [22:09<3:42:11, 14.76s/batch, batch_loss=9.11, batch_index=88, batch_size=256]

Epoch 2/10:   9%|█▍              | 88/991 [22:25<3:42:11, 14.76s/batch, batch_loss=6.38, batch_index=89, batch_size=256]

Epoch 2/10:   9%|█▍              | 89/991 [22:25<3:46:57, 15.10s/batch, batch_loss=6.38, batch_index=89, batch_size=256]

Epoch 2/10:   9%|█▌               | 89/991 [22:41<3:46:57, 15.10s/batch, batch_loss=249, batch_index=90, batch_size=256]

Epoch 2/10:   9%|█▌               | 90/991 [22:41<3:51:07, 15.39s/batch, batch_loss=249, batch_index=90, batch_size=256]

Epoch 2/10:   9%|█▏           | 90/991 [22:56<3:51:07, 15.39s/batch, batch_loss=1.54e+3, batch_index=91, batch_size=256]

Epoch 2/10:   9%|█▏           | 91/991 [22:56<3:48:57, 15.26s/batch, batch_loss=1.54e+3, batch_index=91, batch_size=256]

Epoch 2/10:   9%|█▍              | 91/991 [23:11<3:48:57, 15.26s/batch, batch_loss=15.5, batch_index=92, batch_size=256]

Epoch 2/10:   9%|█▍              | 92/991 [23:11<3:46:20, 15.11s/batch, batch_loss=15.5, batch_index=92, batch_size=256]

Epoch 2/10:   9%|█▍              | 92/991 [23:26<3:46:20, 15.11s/batch, batch_loss=19.5, batch_index=93, batch_size=256]

Epoch 2/10:   9%|█▌              | 93/991 [23:26<3:44:28, 15.00s/batch, batch_loss=19.5, batch_index=93, batch_size=256]

Epoch 2/10:   9%|█▌              | 93/991 [23:40<3:44:28, 15.00s/batch, batch_loss=18.4, batch_index=94, batch_size=256]

Epoch 2/10:   9%|█▌              | 94/991 [23:40<3:40:27, 14.75s/batch, batch_loss=18.4, batch_index=94, batch_size=256]

Epoch 2/10:   9%|█▌              | 94/991 [23:54<3:40:27, 14.75s/batch, batch_loss=18.6, batch_index=95, batch_size=256]

Epoch 2/10:  10%|█▌              | 95/991 [23:54<3:38:07, 14.61s/batch, batch_loss=18.6, batch_index=95, batch_size=256]

Epoch 2/10:  10%|█▌              | 95/991 [24:11<3:38:07, 14.61s/batch, batch_loss=18.5, batch_index=96, batch_size=256]

Epoch 2/10:  10%|█▌              | 96/991 [24:11<3:48:26, 15.31s/batch, batch_loss=18.5, batch_index=96, batch_size=256]

Epoch 2/10:  10%|█▌              | 96/991 [24:26<3:48:26, 15.31s/batch, batch_loss=17.9, batch_index=97, batch_size=256]

Epoch 2/10:  10%|█▌              | 97/991 [24:26<3:46:55, 15.23s/batch, batch_loss=17.9, batch_index=97, batch_size=256]

Epoch 2/10:  10%|█▌              | 97/991 [24:41<3:46:55, 15.23s/batch, batch_loss=19.5, batch_index=98, batch_size=256]

Epoch 2/10:  10%|█▌              | 98/991 [24:41<3:44:14, 15.07s/batch, batch_loss=19.5, batch_index=98, batch_size=256]

Epoch 2/10:  10%|█▌              | 98/991 [24:56<3:44:14, 15.07s/batch, batch_loss=15.5, batch_index=99, batch_size=256]

Epoch 2/10:  10%|█▌              | 99/991 [24:56<3:44:03, 15.07s/batch, batch_loss=15.5, batch_index=99, batch_size=256]

Epoch 2/10:  10%|█▍             | 99/991 [25:11<3:44:03, 15.07s/batch, batch_loss=15.7, batch_index=100, batch_size=256]

Epoch 2/10:  10%|█▍            | 100/991 [25:11<3:44:58, 15.15s/batch, batch_loss=15.7, batch_index=100, batch_size=256]

Epoch 2/10:  10%|█▍            | 100/991 [25:26<3:44:58, 15.15s/batch, batch_loss=12.4, batch_index=101, batch_size=256]

Epoch 2/10:  10%|█▍            | 101/991 [25:26<3:44:04, 15.11s/batch, batch_loss=12.4, batch_index=101, batch_size=256]

Epoch 2/10:  10%|█▍            | 101/991 [25:42<3:44:04, 15.11s/batch, batch_loss=20.2, batch_index=102, batch_size=256]

Epoch 2/10:  10%|█▍            | 102/991 [25:42<3:45:31, 15.22s/batch, batch_loss=20.2, batch_index=102, batch_size=256]

Epoch 2/10:  10%|█▌             | 102/991 [25:56<3:45:31, 15.22s/batch, batch_loss=913, batch_index=103, batch_size=256]

Epoch 2/10:  10%|█▌             | 103/991 [25:56<3:42:16, 15.02s/batch, batch_loss=913, batch_index=103, batch_size=256]

Epoch 2/10:  10%|█▍            | 103/991 [26:14<3:42:16, 15.02s/batch, batch_loss=14.8, batch_index=104, batch_size=256]

Epoch 2/10:  10%|█▍            | 104/991 [26:14<3:51:28, 15.66s/batch, batch_loss=14.8, batch_index=104, batch_size=256]

Epoch 2/10:  10%|█▍            | 104/991 [26:28<3:51:28, 15.66s/batch, batch_loss=10.3, batch_index=105, batch_size=256]

Epoch 2/10:  11%|█▍            | 105/991 [26:28<3:47:14, 15.39s/batch, batch_loss=10.3, batch_index=105, batch_size=256]

Epoch 2/10:  11%|█▍            | 105/991 [26:43<3:47:14, 15.39s/batch, batch_loss=10.3, batch_index=106, batch_size=256]

Epoch 2/10:  11%|█▍            | 106/991 [26:43<3:42:30, 15.09s/batch, batch_loss=10.3, batch_index=106, batch_size=256]

Epoch 2/10:  11%|█▍            | 106/991 [26:58<3:42:30, 15.09s/batch, batch_loss=19.2, batch_index=107, batch_size=256]

Epoch 2/10:  11%|█▌            | 107/991 [26:58<3:40:56, 15.00s/batch, batch_loss=19.2, batch_index=107, batch_size=256]

Epoch 2/10:  11%|█▌            | 107/991 [27:13<3:40:56, 15.00s/batch, batch_loss=25.4, batch_index=108, batch_size=256]

Epoch 2/10:  11%|█▌            | 108/991 [27:13<3:42:58, 15.15s/batch, batch_loss=25.4, batch_index=108, batch_size=256]

Epoch 2/10:  11%|█▌            | 108/991 [27:29<3:42:58, 15.15s/batch, batch_loss=15.4, batch_index=109, batch_size=256]

Epoch 2/10:  11%|█▌            | 109/991 [27:29<3:45:11, 15.32s/batch, batch_loss=15.4, batch_index=109, batch_size=256]

Epoch 2/10:  11%|█▌            | 109/991 [27:44<3:45:11, 15.32s/batch, batch_loss=14.4, batch_index=110, batch_size=256]

Epoch 2/10:  11%|█▌            | 110/991 [27:44<3:43:51, 15.25s/batch, batch_loss=14.4, batch_index=110, batch_size=256]

Epoch 2/10:  11%|█▌            | 110/991 [28:02<3:43:51, 15.25s/batch, batch_loss=19.2, batch_index=111, batch_size=256]

Epoch 2/10:  11%|█▌            | 111/991 [28:02<3:55:31, 16.06s/batch, batch_loss=19.2, batch_index=111, batch_size=256]

Epoch 2/10:  11%|█▌            | 111/991 [28:17<3:55:31, 16.06s/batch, batch_loss=24.6, batch_index=112, batch_size=256]

Epoch 2/10:  11%|█▌            | 112/991 [28:17<3:50:54, 15.76s/batch, batch_loss=24.6, batch_index=112, batch_size=256]

Epoch 2/10:  11%|█▌            | 112/991 [28:33<3:50:54, 15.76s/batch, batch_loss=11.2, batch_index=113, batch_size=256]

Epoch 2/10:  11%|█▌            | 113/991 [28:33<3:50:25, 15.75s/batch, batch_loss=11.2, batch_index=113, batch_size=256]

Epoch 2/10:  11%|█▌            | 113/991 [28:49<3:50:25, 15.75s/batch, batch_loss=17.9, batch_index=114, batch_size=256]

Epoch 2/10:  12%|█▌            | 114/991 [28:49<3:51:05, 15.81s/batch, batch_loss=17.9, batch_index=114, batch_size=256]

Epoch 2/10:  12%|█▌            | 114/991 [29:04<3:51:05, 15.81s/batch, batch_loss=21.9, batch_index=115, batch_size=256]

Epoch 2/10:  12%|█▌            | 115/991 [29:04<3:48:42, 15.66s/batch, batch_loss=21.9, batch_index=115, batch_size=256]

Epoch 2/10:  12%|█▌            | 115/991 [29:19<3:48:42, 15.66s/batch, batch_loss=11.3, batch_index=116, batch_size=256]

Epoch 2/10:  12%|█▋            | 116/991 [29:19<3:47:46, 15.62s/batch, batch_loss=11.3, batch_index=116, batch_size=256]

Epoch 2/10:  12%|█▋            | 116/991 [29:35<3:47:46, 15.62s/batch, batch_loss=18.2, batch_index=117, batch_size=256]

Epoch 2/10:  12%|█▋            | 117/991 [29:35<3:47:03, 15.59s/batch, batch_loss=18.2, batch_index=117, batch_size=256]

Epoch 2/10:  12%|█▋            | 117/991 [29:51<3:47:03, 15.59s/batch, batch_loss=18.1, batch_index=118, batch_size=256]

Epoch 2/10:  12%|█▋            | 118/991 [29:51<3:49:08, 15.75s/batch, batch_loss=18.1, batch_index=118, batch_size=256]

Epoch 2/10:  12%|█▋            | 118/991 [30:06<3:49:08, 15.75s/batch, batch_loss=26.5, batch_index=119, batch_size=256]

Epoch 2/10:  12%|█▋            | 119/991 [30:06<3:47:04, 15.62s/batch, batch_loss=26.5, batch_index=119, batch_size=256]

Epoch 2/10:  12%|█▋            | 119/991 [30:22<3:47:04, 15.62s/batch, batch_loss=17.6, batch_index=120, batch_size=256]

Epoch 2/10:  12%|█▋            | 120/991 [30:22<3:47:25, 15.67s/batch, batch_loss=17.6, batch_index=120, batch_size=256]

Epoch 2/10:  12%|█▋            | 120/991 [30:37<3:47:25, 15.67s/batch, batch_loss=23.7, batch_index=121, batch_size=256]

Epoch 2/10:  12%|█▋            | 121/991 [30:37<3:41:42, 15.29s/batch, batch_loss=23.7, batch_index=121, batch_size=256]

Epoch 2/10:  12%|█▋            | 121/991 [30:51<3:41:42, 15.29s/batch, batch_loss=9.59, batch_index=122, batch_size=256]

Epoch 2/10:  12%|█▋            | 122/991 [30:51<3:38:14, 15.07s/batch, batch_loss=9.59, batch_index=122, batch_size=256]

Epoch 2/10:  12%|█▋            | 122/991 [31:07<3:38:14, 15.07s/batch, batch_loss=15.8, batch_index=123, batch_size=256]

Epoch 2/10:  12%|█▋            | 123/991 [31:07<3:40:50, 15.27s/batch, batch_loss=15.8, batch_index=123, batch_size=256]

Epoch 2/10:  12%|█▎         | 123/991 [31:22<3:40:50, 15.27s/batch, batch_loss=3.48e+3, batch_index=124, batch_size=256]

Epoch 2/10:  13%|█▍         | 124/991 [31:22<3:41:31, 15.33s/batch, batch_loss=3.48e+3, batch_index=124, batch_size=256]

Epoch 2/10:  13%|█▊            | 124/991 [31:37<3:41:31, 15.33s/batch, batch_loss=8.26, batch_index=125, batch_size=256]

Epoch 2/10:  13%|█▊            | 125/991 [31:37<3:39:12, 15.19s/batch, batch_loss=8.26, batch_index=125, batch_size=256]

Epoch 2/10:  13%|█▊            | 125/991 [31:53<3:39:12, 15.19s/batch, batch_loss=13.9, batch_index=126, batch_size=256]

Epoch 2/10:  13%|█▊            | 126/991 [31:53<3:40:25, 15.29s/batch, batch_loss=13.9, batch_index=126, batch_size=256]

Epoch 2/10:  13%|█▍         | 126/991 [32:10<3:40:25, 15.29s/batch, batch_loss=1.89e+3, batch_index=127, batch_size=256]

Epoch 2/10:  13%|█▍         | 127/991 [32:10<3:49:50, 15.96s/batch, batch_loss=1.89e+3, batch_index=127, batch_size=256]

Epoch 2/10:  13%|█▍         | 127/991 [32:25<3:49:50, 15.96s/batch, batch_loss=1.57e+3, batch_index=128, batch_size=256]

Epoch 2/10:  13%|█▍         | 128/991 [32:25<3:44:00, 15.57s/batch, batch_loss=1.57e+3, batch_index=128, batch_size=256]

Epoch 2/10:  13%|█▉             | 128/991 [32:40<3:44:00, 15.57s/batch, batch_loss=226, batch_index=129, batch_size=256]

Epoch 2/10:  13%|█▉             | 129/991 [32:40<3:40:51, 15.37s/batch, batch_loss=226, batch_index=129, batch_size=256]

Epoch 2/10:  13%|█▉             | 129/991 [32:55<3:40:51, 15.37s/batch, batch_loss=994, batch_index=130, batch_size=256]

Epoch 2/10:  13%|█▉             | 130/991 [32:55<3:39:44, 15.31s/batch, batch_loss=994, batch_index=130, batch_size=256]

Epoch 2/10:  13%|█▍         | 130/991 [33:10<3:39:44, 15.31s/batch, batch_loss=8.33e+3, batch_index=131, batch_size=256]

Epoch 2/10:  13%|█▍         | 131/991 [33:10<3:37:01, 15.14s/batch, batch_loss=8.33e+3, batch_index=131, batch_size=256]

Epoch 2/10:  13%|█▊            | 131/991 [33:24<3:37:01, 15.14s/batch, batch_loss=18.3, batch_index=132, batch_size=256]

Epoch 2/10:  13%|█▊            | 132/991 [33:24<3:34:16, 14.97s/batch, batch_loss=18.3, batch_index=132, batch_size=256]

Epoch 2/10:  13%|█▊            | 132/991 [33:39<3:34:16, 14.97s/batch, batch_loss=8.99, batch_index=133, batch_size=256]

Epoch 2/10:  13%|█▉            | 133/991 [33:39<3:31:48, 14.81s/batch, batch_loss=8.99, batch_index=133, batch_size=256]

Epoch 2/10:  13%|█▉            | 133/991 [33:53<3:31:48, 14.81s/batch, batch_loss=12.3, batch_index=134, batch_size=256]

Epoch 2/10:  14%|█▉            | 134/991 [33:53<3:29:02, 14.64s/batch, batch_loss=12.3, batch_index=134, batch_size=256]

Epoch 2/10:  14%|█▉            | 134/991 [34:08<3:29:02, 14.64s/batch, batch_loss=16.7, batch_index=135, batch_size=256]

Epoch 2/10:  14%|█▉            | 135/991 [34:08<3:31:17, 14.81s/batch, batch_loss=16.7, batch_index=135, batch_size=256]

Epoch 2/10:  14%|█▉            | 135/991 [34:23<3:31:17, 14.81s/batch, batch_loss=8.24, batch_index=136, batch_size=256]

Epoch 2/10:  14%|█▉            | 136/991 [34:23<3:33:06, 14.95s/batch, batch_loss=8.24, batch_index=136, batch_size=256]

Epoch 2/10:  14%|█▉            | 136/991 [34:39<3:33:06, 14.95s/batch, batch_loss=15.8, batch_index=137, batch_size=256]

Epoch 2/10:  14%|█▉            | 137/991 [34:39<3:35:26, 15.14s/batch, batch_loss=15.8, batch_index=137, batch_size=256]

Epoch 2/10:  14%|█▉            | 137/991 [34:54<3:35:26, 15.14s/batch, batch_loss=16.2, batch_index=138, batch_size=256]

Epoch 2/10:  14%|█▉            | 138/991 [34:54<3:35:42, 15.17s/batch, batch_loss=16.2, batch_index=138, batch_size=256]

Epoch 2/10:  14%|█▉            | 138/991 [35:10<3:35:42, 15.17s/batch, batch_loss=8.17, batch_index=139, batch_size=256]

Epoch 2/10:  14%|█▉            | 139/991 [35:10<3:36:37, 15.26s/batch, batch_loss=8.17, batch_index=139, batch_size=256]

Epoch 2/10:  14%|█▉            | 139/991 [35:24<3:36:37, 15.26s/batch, batch_loss=11.1, batch_index=140, batch_size=256]

Epoch 2/10:  14%|█▉            | 140/991 [35:24<3:32:49, 15.01s/batch, batch_loss=11.1, batch_index=140, batch_size=256]

Epoch 2/10:  14%|█▉            | 140/991 [35:40<3:32:49, 15.01s/batch, batch_loss=5.65, batch_index=141, batch_size=256]

Epoch 2/10:  14%|█▉            | 141/991 [35:40<3:37:56, 15.38s/batch, batch_loss=5.65, batch_index=141, batch_size=256]

Epoch 2/10:  14%|██▏            | 141/991 [35:56<3:37:56, 15.38s/batch, batch_loss=6.3, batch_index=142, batch_size=256]

Epoch 2/10:  14%|██▏            | 142/991 [35:56<3:37:02, 15.34s/batch, batch_loss=6.3, batch_index=142, batch_size=256]

Epoch 2/10:  14%|██            | 142/991 [36:13<3:37:02, 15.34s/batch, batch_loss=14.5, batch_index=143, batch_size=256]

Epoch 2/10:  14%|██            | 143/991 [36:13<3:47:27, 16.09s/batch, batch_loss=14.5, batch_index=143, batch_size=256]

Epoch 2/10:  14%|██            | 143/991 [36:29<3:47:27, 16.09s/batch, batch_loss=14.5, batch_index=144, batch_size=256]

Epoch 2/10:  15%|██            | 144/991 [36:29<3:42:56, 15.79s/batch, batch_loss=14.5, batch_index=144, batch_size=256]

Epoch 2/10:  15%|██            | 144/991 [36:44<3:42:56, 15.79s/batch, batch_loss=18.7, batch_index=145, batch_size=256]

Epoch 2/10:  15%|██            | 145/991 [36:44<3:39:23, 15.56s/batch, batch_loss=18.7, batch_index=145, batch_size=256]

Epoch 2/10:  15%|██            | 145/991 [36:59<3:39:23, 15.56s/batch, batch_loss=15.6, batch_index=146, batch_size=256]

Epoch 2/10:  15%|██            | 146/991 [36:59<3:38:37, 15.52s/batch, batch_loss=15.6, batch_index=146, batch_size=256]

Epoch 2/10:  15%|██            | 146/991 [37:15<3:38:37, 15.52s/batch, batch_loss=8.95, batch_index=147, batch_size=256]

Epoch 2/10:  15%|██            | 147/991 [37:15<3:39:45, 15.62s/batch, batch_loss=8.95, batch_index=147, batch_size=256]

Epoch 2/10:  15%|██            | 147/991 [37:33<3:39:45, 15.62s/batch, batch_loss=38.5, batch_index=148, batch_size=256]

Epoch 2/10:  15%|██            | 148/991 [37:33<3:49:46, 16.35s/batch, batch_loss=38.5, batch_index=148, batch_size=256]

Epoch 2/10:  15%|██            | 148/991 [37:48<3:49:46, 16.35s/batch, batch_loss=10.7, batch_index=149, batch_size=256]

Epoch 2/10:  15%|██            | 149/991 [37:48<3:44:15, 15.98s/batch, batch_loss=10.7, batch_index=149, batch_size=256]

Epoch 2/10:  15%|██            | 149/991 [38:03<3:44:15, 15.98s/batch, batch_loss=10.1, batch_index=150, batch_size=256]

Epoch 2/10:  15%|██            | 150/991 [38:03<3:38:23, 15.58s/batch, batch_loss=10.1, batch_index=150, batch_size=256]

Epoch 2/10:  15%|██▍             | 150/991 [38:18<3:38:23, 15.58s/batch, batch_loss=19, batch_index=151, batch_size=256]

Epoch 2/10:  15%|██▍             | 151/991 [38:18<3:38:47, 15.63s/batch, batch_loss=19, batch_index=151, batch_size=256]

Epoch 2/10:  15%|██▏           | 151/991 [38:34<3:38:47, 15.63s/batch, batch_loss=15.6, batch_index=152, batch_size=256]

Epoch 2/10:  15%|██▏           | 152/991 [38:34<3:40:09, 15.74s/batch, batch_loss=15.6, batch_index=152, batch_size=256]

Epoch 2/10:  15%|██▏           | 152/991 [38:50<3:40:09, 15.74s/batch, batch_loss=20.8, batch_index=153, batch_size=256]

Epoch 2/10:  15%|██▏           | 153/991 [38:50<3:40:43, 15.80s/batch, batch_loss=20.8, batch_index=153, batch_size=256]

Epoch 2/10:  15%|██▏           | 153/991 [39:06<3:40:43, 15.80s/batch, batch_loss=20.9, batch_index=154, batch_size=256]

Epoch 2/10:  16%|██▏           | 154/991 [39:06<3:41:17, 15.86s/batch, batch_loss=20.9, batch_index=154, batch_size=256]

Epoch 2/10:  16%|██▏           | 154/991 [39:21<3:41:17, 15.86s/batch, batch_loss=22.2, batch_index=155, batch_size=256]

Epoch 2/10:  16%|██▏           | 155/991 [39:21<3:37:07, 15.58s/batch, batch_loss=22.2, batch_index=155, batch_size=256]

Epoch 2/10:  16%|██▏           | 155/991 [39:36<3:37:07, 15.58s/batch, batch_loss=8.76, batch_index=156, batch_size=256]

Epoch 2/10:  16%|██▏           | 156/991 [39:36<3:34:05, 15.38s/batch, batch_loss=8.76, batch_index=156, batch_size=256]

Epoch 2/10:  16%|██▏           | 156/991 [39:51<3:34:05, 15.38s/batch, batch_loss=24.3, batch_index=157, batch_size=256]

Epoch 2/10:  16%|██▏           | 157/991 [39:51<3:30:52, 15.17s/batch, batch_loss=24.3, batch_index=157, batch_size=256]

Epoch 2/10:  16%|██▏           | 157/991 [40:06<3:30:52, 15.17s/batch, batch_loss=7.84, batch_index=158, batch_size=256]

Epoch 2/10:  16%|██▏           | 158/991 [40:06<3:30:08, 15.14s/batch, batch_loss=7.84, batch_index=158, batch_size=256]

Epoch 2/10:  16%|██▏           | 158/991 [40:21<3:30:08, 15.14s/batch, batch_loss=5.79, batch_index=159, batch_size=256]

Epoch 2/10:  16%|██▏           | 159/991 [40:21<3:27:59, 15.00s/batch, batch_loss=5.79, batch_index=159, batch_size=256]

Epoch 2/10:  16%|██▏           | 159/991 [40:36<3:27:59, 15.00s/batch, batch_loss=13.2, batch_index=160, batch_size=256]

Epoch 2/10:  16%|██▎           | 160/991 [40:36<3:30:38, 15.21s/batch, batch_loss=13.2, batch_index=160, batch_size=256]

Epoch 2/10:  16%|██▍            | 160/991 [40:51<3:30:38, 15.21s/batch, batch_loss=460, batch_index=161, batch_size=256]

Epoch 2/10:  16%|██▍            | 161/991 [40:51<3:27:44, 15.02s/batch, batch_loss=460, batch_index=161, batch_size=256]

Epoch 2/10:  16%|██▎           | 161/991 [41:06<3:27:44, 15.02s/batch, batch_loss=15.3, batch_index=162, batch_size=256]

Epoch 2/10:  16%|██▎           | 162/991 [41:06<3:27:20, 15.01s/batch, batch_loss=15.3, batch_index=162, batch_size=256]

Epoch 2/10:  16%|██▎           | 162/991 [41:21<3:27:20, 15.01s/batch, batch_loss=9.31, batch_index=163, batch_size=256]

Epoch 2/10:  16%|██▎           | 163/991 [41:21<3:26:31, 14.97s/batch, batch_loss=9.31, batch_index=163, batch_size=256]

Epoch 2/10:  16%|██▎           | 163/991 [41:36<3:26:31, 14.97s/batch, batch_loss=12.3, batch_index=164, batch_size=256]

Epoch 2/10:  17%|██▎           | 164/991 [41:36<3:26:47, 15.00s/batch, batch_loss=12.3, batch_index=164, batch_size=256]

Epoch 2/10:  17%|██▋             | 164/991 [41:50<3:26:47, 15.00s/batch, batch_loss=11, batch_index=165, batch_size=256]

Epoch 2/10:  17%|██▋             | 165/991 [41:50<3:24:53, 14.88s/batch, batch_loss=11, batch_index=165, batch_size=256]

Epoch 2/10:  17%|██▎           | 165/991 [42:05<3:24:53, 14.88s/batch, batch_loss=9.93, batch_index=166, batch_size=256]

Epoch 2/10:  17%|██▎           | 166/991 [42:05<3:22:55, 14.76s/batch, batch_loss=9.93, batch_index=166, batch_size=256]

Epoch 2/10:  17%|██▎           | 166/991 [42:21<3:22:55, 14.76s/batch, batch_loss=19.8, batch_index=167, batch_size=256]

Epoch 2/10:  17%|██▎           | 167/991 [42:21<3:27:10, 15.08s/batch, batch_loss=19.8, batch_index=167, batch_size=256]

Epoch 2/10:  17%|██▎           | 167/991 [42:36<3:27:10, 15.08s/batch, batch_loss=13.3, batch_index=168, batch_size=256]

Epoch 2/10:  17%|██▎           | 168/991 [42:36<3:26:29, 15.05s/batch, batch_loss=13.3, batch_index=168, batch_size=256]

Epoch 2/10:  17%|██▎           | 168/991 [42:51<3:26:29, 15.05s/batch, batch_loss=13.7, batch_index=169, batch_size=256]

Epoch 2/10:  17%|██▍           | 169/991 [42:51<3:26:47, 15.09s/batch, batch_loss=13.7, batch_index=169, batch_size=256]

Epoch 2/10:  17%|██▍           | 169/991 [43:07<3:26:47, 15.09s/batch, batch_loss=9.01, batch_index=170, batch_size=256]

Epoch 2/10:  17%|██▍           | 170/991 [43:07<3:29:32, 15.31s/batch, batch_loss=9.01, batch_index=170, batch_size=256]

Epoch 2/10:  17%|██▍           | 170/991 [43:22<3:29:32, 15.31s/batch, batch_loss=6.21, batch_index=171, batch_size=256]

Epoch 2/10:  17%|██▍           | 171/991 [43:22<3:28:16, 15.24s/batch, batch_loss=6.21, batch_index=171, batch_size=256]

Epoch 2/10:  17%|██▍           | 171/991 [43:38<3:28:16, 15.24s/batch, batch_loss=9.52, batch_index=172, batch_size=256]

Epoch 2/10:  17%|██▍           | 172/991 [43:38<3:31:16, 15.48s/batch, batch_loss=9.52, batch_index=172, batch_size=256]

Epoch 2/10:  17%|██▌            | 172/991 [43:53<3:31:16, 15.48s/batch, batch_loss=6.1, batch_index=173, batch_size=256]

Epoch 2/10:  17%|██▌            | 173/991 [43:53<3:31:22, 15.50s/batch, batch_loss=6.1, batch_index=173, batch_size=256]

Epoch 2/10:  17%|█▉         | 173/991 [44:10<3:31:22, 15.50s/batch, batch_loss=3.02e+4, batch_index=174, batch_size=256]

Epoch 2/10:  18%|█▉         | 174/991 [44:10<3:34:21, 15.74s/batch, batch_loss=3.02e+4, batch_index=174, batch_size=256]

Epoch 2/10:  18%|██▍           | 174/991 [44:26<3:34:21, 15.74s/batch, batch_loss=19.7, batch_index=175, batch_size=256]

Epoch 2/10:  18%|██▍           | 175/991 [44:26<3:37:33, 16.00s/batch, batch_loss=19.7, batch_index=175, batch_size=256]

Epoch 2/10:  18%|██▍           | 175/991 [44:42<3:37:33, 16.00s/batch, batch_loss=24.3, batch_index=176, batch_size=256]

Epoch 2/10:  18%|██▍           | 176/991 [44:42<3:36:42, 15.95s/batch, batch_loss=24.3, batch_index=176, batch_size=256]

Epoch 2/10:  18%|██▍           | 176/991 [44:58<3:36:42, 15.95s/batch, batch_loss=25.6, batch_index=177, batch_size=256]

Epoch 2/10:  18%|██▌           | 177/991 [44:58<3:34:55, 15.84s/batch, batch_loss=25.6, batch_index=177, batch_size=256]

Epoch 2/10:  18%|██▌           | 177/991 [45:13<3:34:55, 15.84s/batch, batch_loss=23.2, batch_index=178, batch_size=256]

Epoch 2/10:  18%|██▌           | 178/991 [45:13<3:33:11, 15.73s/batch, batch_loss=23.2, batch_index=178, batch_size=256]

Epoch 2/10:  18%|██▌           | 178/991 [45:29<3:33:11, 15.73s/batch, batch_loss=12.7, batch_index=179, batch_size=256]

Epoch 2/10:  18%|██▌           | 179/991 [45:29<3:34:17, 15.83s/batch, batch_loss=12.7, batch_index=179, batch_size=256]

Epoch 2/10:  18%|███              | 179/991 [45:45<3:34:17, 15.83s/batch, batch_loss=7, batch_index=180, batch_size=256]

Epoch 2/10:  18%|███              | 180/991 [45:45<3:34:19, 15.86s/batch, batch_loss=7, batch_index=180, batch_size=256]

Epoch 2/10:  18%|█▉         | 180/991 [46:01<3:34:19, 15.86s/batch, batch_loss=2.52e+4, batch_index=181, batch_size=256]

Epoch 2/10:  18%|██         | 181/991 [46:01<3:33:10, 15.79s/batch, batch_loss=2.52e+4, batch_index=181, batch_size=256]

Epoch 2/10:  18%|██▌           | 181/991 [46:16<3:33:10, 15.79s/batch, batch_loss=12.2, batch_index=182, batch_size=256]

Epoch 2/10:  18%|██▌           | 182/991 [46:16<3:31:08, 15.66s/batch, batch_loss=12.2, batch_index=182, batch_size=256]

Epoch 2/10:  18%|██▌           | 182/991 [46:32<3:31:08, 15.66s/batch, batch_loss=18.5, batch_index=183, batch_size=256]

Epoch 2/10:  18%|██▌           | 183/991 [46:32<3:32:13, 15.76s/batch, batch_loss=18.5, batch_index=183, batch_size=256]

Epoch 2/10:  18%|██▌           | 183/991 [46:47<3:32:13, 15.76s/batch, batch_loss=19.1, batch_index=184, batch_size=256]

Epoch 2/10:  19%|██▌           | 184/991 [46:47<3:29:19, 15.56s/batch, batch_loss=19.1, batch_index=184, batch_size=256]

Epoch 2/10:  19%|██▉             | 184/991 [47:03<3:29:19, 15.56s/batch, batch_loss=13, batch_index=185, batch_size=256]

Epoch 2/10:  19%|██▉             | 185/991 [47:03<3:28:26, 15.52s/batch, batch_loss=13, batch_index=185, batch_size=256]

Epoch 2/10:  19%|██▌           | 185/991 [47:18<3:28:26, 15.52s/batch, batch_loss=19.1, batch_index=186, batch_size=256]

Epoch 2/10:  19%|██▋           | 186/991 [47:18<3:27:43, 15.48s/batch, batch_loss=19.1, batch_index=186, batch_size=256]

Epoch 2/10:  19%|██▋           | 186/991 [47:35<3:27:43, 15.48s/batch, batch_loss=15.3, batch_index=187, batch_size=256]

Epoch 2/10:  19%|██▋           | 187/991 [47:35<3:32:03, 15.83s/batch, batch_loss=15.3, batch_index=187, batch_size=256]

Epoch 2/10:  19%|██▋           | 187/991 [47:49<3:32:03, 15.83s/batch, batch_loss=17.7, batch_index=188, batch_size=256]

Epoch 2/10:  19%|██▋           | 188/991 [47:49<3:26:39, 15.44s/batch, batch_loss=17.7, batch_index=188, batch_size=256]

Epoch 2/10:  19%|██▋           | 188/991 [48:05<3:26:39, 15.44s/batch, batch_loss=18.3, batch_index=189, batch_size=256]

Epoch 2/10:  19%|██▋           | 189/991 [48:05<3:26:32, 15.45s/batch, batch_loss=18.3, batch_index=189, batch_size=256]

Epoch 2/10:  19%|███             | 189/991 [48:20<3:26:32, 15.45s/batch, batch_loss=22, batch_index=190, batch_size=256]

Epoch 2/10:  19%|███             | 190/991 [48:20<3:25:08, 15.37s/batch, batch_loss=22, batch_index=190, batch_size=256]

Epoch 2/10:  19%|██▋           | 190/991 [48:34<3:25:08, 15.37s/batch, batch_loss=16.7, batch_index=191, batch_size=256]

Epoch 2/10:  19%|██▋           | 191/991 [48:34<3:20:50, 15.06s/batch, batch_loss=16.7, batch_index=191, batch_size=256]

Epoch 2/10:  19%|███             | 191/991 [48:49<3:20:50, 15.06s/batch, batch_loss=13, batch_index=192, batch_size=256]

Epoch 2/10:  19%|███             | 192/991 [48:49<3:19:16, 14.96s/batch, batch_loss=13, batch_index=192, batch_size=256]

Epoch 2/10:  19%|██▋           | 192/991 [49:03<3:19:16, 14.96s/batch, batch_loss=20.5, batch_index=193, batch_size=256]

Epoch 2/10:  19%|██▋           | 193/991 [49:03<3:16:24, 14.77s/batch, batch_loss=20.5, batch_index=193, batch_size=256]

Epoch 2/10:  19%|██▋           | 193/991 [49:21<3:16:24, 14.77s/batch, batch_loss=10.5, batch_index=194, batch_size=256]

Epoch 2/10:  20%|██▋           | 194/991 [49:21<3:29:12, 15.75s/batch, batch_loss=10.5, batch_index=194, batch_size=256]

Epoch 2/10:  20%|██▋           | 194/991 [49:37<3:29:12, 15.75s/batch, batch_loss=3.23, batch_index=195, batch_size=256]

Epoch 2/10:  20%|██▊           | 195/991 [49:37<3:28:26, 15.71s/batch, batch_loss=3.23, batch_index=195, batch_size=256]

Epoch 2/10:  20%|██▊           | 195/991 [49:53<3:28:26, 15.71s/batch, batch_loss=5.56, batch_index=196, batch_size=256]

Epoch 2/10:  20%|██▊           | 196/991 [49:53<3:27:59, 15.70s/batch, batch_loss=5.56, batch_index=196, batch_size=256]

Epoch 2/10:  20%|██▊           | 196/991 [50:08<3:27:59, 15.70s/batch, batch_loss=13.3, batch_index=197, batch_size=256]

Epoch 2/10:  20%|██▊           | 197/991 [50:08<3:25:43, 15.55s/batch, batch_loss=13.3, batch_index=197, batch_size=256]

Epoch 2/10:  20%|███▏            | 197/991 [50:25<3:25:43, 15.55s/batch, batch_loss=10, batch_index=198, batch_size=256]

Epoch 2/10:  20%|███▏            | 198/991 [50:25<3:33:30, 16.15s/batch, batch_loss=10, batch_index=198, batch_size=256]

Epoch 2/10:  20%|██▊           | 198/991 [50:39<3:33:30, 16.15s/batch, batch_loss=17.9, batch_index=199, batch_size=256]

Epoch 2/10:  20%|██▊           | 199/991 [50:39<3:24:00, 15.46s/batch, batch_loss=17.9, batch_index=199, batch_size=256]

Epoch 2/10:  20%|███            | 199/991 [50:54<3:24:00, 15.46s/batch, batch_loss=8.3, batch_index=200, batch_size=256]

Epoch 2/10:  20%|███            | 200/991 [50:54<3:21:42, 15.30s/batch, batch_loss=8.3, batch_index=200, batch_size=256]

Epoch 2/10:  20%|██▊           | 200/991 [51:11<3:21:42, 15.30s/batch, batch_loss=14.2, batch_index=201, batch_size=256]

Epoch 2/10:  20%|██▊           | 201/991 [51:11<3:29:13, 15.89s/batch, batch_loss=14.2, batch_index=201, batch_size=256]

Epoch 2/10:  20%|██▊           | 201/991 [51:32<3:29:13, 15.89s/batch, batch_loss=11.9, batch_index=202, batch_size=256]

Epoch 2/10:  20%|██▊           | 202/991 [51:32<3:47:01, 17.26s/batch, batch_loss=11.9, batch_index=202, batch_size=256]

Epoch 2/10:  20%|██▊           | 202/991 [51:50<3:47:01, 17.26s/batch, batch_loss=16.3, batch_index=203, batch_size=256]

Epoch 2/10:  20%|██▊           | 203/991 [51:50<3:50:05, 17.52s/batch, batch_loss=16.3, batch_index=203, batch_size=256]

Epoch 2/10:  20%|██▊           | 203/991 [52:07<3:50:05, 17.52s/batch, batch_loss=20.4, batch_index=204, batch_size=256]

Epoch 2/10:  21%|██▉           | 204/991 [52:07<3:47:08, 17.32s/batch, batch_loss=20.4, batch_index=204, batch_size=256]

Epoch 2/10:  21%|██▉           | 204/991 [52:24<3:47:08, 17.32s/batch, batch_loss=18.3, batch_index=205, batch_size=256]

Epoch 2/10:  21%|██▉           | 205/991 [52:24<3:47:46, 17.39s/batch, batch_loss=18.3, batch_index=205, batch_size=256]

Epoch 2/10:  21%|██▉           | 205/991 [52:41<3:47:46, 17.39s/batch, batch_loss=7.27, batch_index=206, batch_size=256]

Epoch 2/10:  21%|██▉           | 206/991 [52:41<3:44:16, 17.14s/batch, batch_loss=7.27, batch_index=206, batch_size=256]

Epoch 2/10:  21%|██▉           | 206/991 [52:59<3:44:16, 17.14s/batch, batch_loss=9.55, batch_index=207, batch_size=256]

Epoch 2/10:  21%|██▉           | 207/991 [52:59<3:46:57, 17.37s/batch, batch_loss=9.55, batch_index=207, batch_size=256]

Epoch 2/10:  21%|██▉           | 207/991 [53:15<3:46:57, 17.37s/batch, batch_loss=11.6, batch_index=208, batch_size=256]

Epoch 2/10:  21%|██▉           | 208/991 [53:15<3:41:13, 16.95s/batch, batch_loss=11.6, batch_index=208, batch_size=256]

Epoch 2/10:  21%|██▉           | 208/991 [53:34<3:41:13, 16.95s/batch, batch_loss=9.87, batch_index=209, batch_size=256]

Epoch 2/10:  21%|██▉           | 209/991 [53:34<3:50:12, 17.66s/batch, batch_loss=9.87, batch_index=209, batch_size=256]

Epoch 2/10:  21%|██▉           | 209/991 [53:51<3:50:12, 17.66s/batch, batch_loss=20.1, batch_index=210, batch_size=256]

Epoch 2/10:  21%|██▉           | 210/991 [53:51<3:46:40, 17.41s/batch, batch_loss=20.1, batch_index=210, batch_size=256]

Epoch 2/10:  21%|██▉           | 210/991 [54:07<3:46:40, 17.41s/batch, batch_loss=12.2, batch_index=211, batch_size=256]

Epoch 2/10:  21%|██▉           | 211/991 [54:07<3:42:17, 17.10s/batch, batch_loss=12.2, batch_index=211, batch_size=256]

Epoch 2/10:  21%|███▍            | 211/991 [54:23<3:42:17, 17.10s/batch, batch_loss=16, batch_index=212, batch_size=256]

Epoch 2/10:  21%|███▍            | 212/991 [54:23<3:37:22, 16.74s/batch, batch_loss=16, batch_index=212, batch_size=256]

Epoch 2/10:  21%|██▉           | 212/991 [54:38<3:37:22, 16.74s/batch, batch_loss=3.42, batch_index=213, batch_size=256]

Epoch 2/10:  21%|███           | 213/991 [54:38<3:30:41, 16.25s/batch, batch_loss=3.42, batch_index=213, batch_size=256]

Epoch 2/10:  21%|███▍            | 213/991 [54:55<3:30:41, 16.25s/batch, batch_loss=14, batch_index=214, batch_size=256]

Epoch 2/10:  22%|███▍            | 214/991 [54:55<3:30:14, 16.24s/batch, batch_loss=14, batch_index=214, batch_size=256]

Epoch 2/10:  22%|███           | 214/991 [55:10<3:30:14, 16.24s/batch, batch_loss=15.3, batch_index=215, batch_size=256]

Epoch 2/10:  22%|███           | 215/991 [55:10<3:27:51, 16.07s/batch, batch_loss=15.3, batch_index=215, batch_size=256]

Epoch 2/10:  22%|███           | 215/991 [55:25<3:27:51, 16.07s/batch, batch_loss=10.3, batch_index=216, batch_size=256]

Epoch 2/10:  22%|███           | 216/991 [55:25<3:23:43, 15.77s/batch, batch_loss=10.3, batch_index=216, batch_size=256]

Epoch 2/10:  22%|███           | 216/991 [55:40<3:23:43, 15.77s/batch, batch_loss=12.9, batch_index=217, batch_size=256]

Epoch 2/10:  22%|███           | 217/991 [55:40<3:17:41, 15.32s/batch, batch_loss=12.9, batch_index=217, batch_size=256]

Epoch 2/10:  22%|███           | 217/991 [55:54<3:17:41, 15.32s/batch, batch_loss=18.9, batch_index=218, batch_size=256]

Epoch 2/10:  22%|███           | 218/991 [55:54<3:15:10, 15.15s/batch, batch_loss=18.9, batch_index=218, batch_size=256]

Epoch 2/10:  22%|███           | 218/991 [56:12<3:15:10, 15.15s/batch, batch_loss=19.7, batch_index=219, batch_size=256]

Epoch 2/10:  22%|███           | 219/991 [56:12<3:23:13, 15.80s/batch, batch_loss=19.7, batch_index=219, batch_size=256]

Epoch 2/10:  22%|███           | 219/991 [56:28<3:23:13, 15.80s/batch, batch_loss=24.6, batch_index=220, batch_size=256]

Epoch 2/10:  22%|███           | 220/991 [56:28<3:24:57, 15.95s/batch, batch_loss=24.6, batch_index=220, batch_size=256]

Epoch 2/10:  22%|███           | 220/991 [56:43<3:24:57, 15.95s/batch, batch_loss=21.3, batch_index=221, batch_size=256]

Epoch 2/10:  22%|███           | 221/991 [56:43<3:21:20, 15.69s/batch, batch_loss=21.3, batch_index=221, batch_size=256]

Epoch 2/10:  22%|███           | 221/991 [56:58<3:21:20, 15.69s/batch, batch_loss=17.7, batch_index=222, batch_size=256]

Epoch 2/10:  22%|███▏          | 222/991 [56:58<3:19:15, 15.55s/batch, batch_loss=17.7, batch_index=222, batch_size=256]

Epoch 2/10:  22%|███▏          | 222/991 [57:13<3:19:15, 15.55s/batch, batch_loss=23.8, batch_index=223, batch_size=256]

Epoch 2/10:  23%|███▏          | 223/991 [57:13<3:16:07, 15.32s/batch, batch_loss=23.8, batch_index=223, batch_size=256]

Epoch 2/10:  23%|███▏          | 223/991 [57:28<3:16:07, 15.32s/batch, batch_loss=16.8, batch_index=224, batch_size=256]

Epoch 2/10:  23%|███▏          | 224/991 [57:28<3:14:27, 15.21s/batch, batch_loss=16.8, batch_index=224, batch_size=256]

Epoch 2/10:  23%|███▏          | 224/991 [57:43<3:14:27, 15.21s/batch, batch_loss=11.1, batch_index=225, batch_size=256]

Epoch 2/10:  23%|███▏          | 225/991 [57:43<3:13:32, 15.16s/batch, batch_loss=11.1, batch_index=225, batch_size=256]

Epoch 2/10:  23%|███▏          | 225/991 [57:58<3:13:32, 15.16s/batch, batch_loss=22.9, batch_index=226, batch_size=256]

Epoch 2/10:  23%|███▏          | 226/991 [57:58<3:11:08, 14.99s/batch, batch_loss=22.9, batch_index=226, batch_size=256]

Epoch 2/10:  23%|██▌        | 226/991 [58:16<3:11:08, 14.99s/batch, batch_loss=2.41e+3, batch_index=227, batch_size=256]

Epoch 2/10:  23%|██▌        | 227/991 [58:16<3:22:51, 15.93s/batch, batch_loss=2.41e+3, batch_index=227, batch_size=256]

Epoch 2/10:  23%|██▌        | 227/991 [58:32<3:22:51, 15.93s/batch, batch_loss=3.61e+3, batch_index=228, batch_size=256]

Epoch 2/10:  23%|██▌        | 228/991 [58:32<3:22:43, 15.94s/batch, batch_loss=3.61e+3, batch_index=228, batch_size=256]

Epoch 2/10:  23%|███▏          | 228/991 [58:46<3:22:43, 15.94s/batch, batch_loss=14.1, batch_index=229, batch_size=256]

Epoch 2/10:  23%|███▏          | 229/991 [58:46<3:17:41, 15.57s/batch, batch_loss=14.1, batch_index=229, batch_size=256]

Epoch 2/10:  23%|███▏          | 229/991 [59:02<3:17:41, 15.57s/batch, batch_loss=10.7, batch_index=230, batch_size=256]

Epoch 2/10:  23%|███▏          | 230/991 [59:02<3:17:00, 15.53s/batch, batch_loss=10.7, batch_index=230, batch_size=256]

Epoch 2/10:  23%|███▏          | 230/991 [59:18<3:17:00, 15.53s/batch, batch_loss=12.6, batch_index=231, batch_size=256]

Epoch 2/10:  23%|███▎          | 231/991 [59:18<3:18:44, 15.69s/batch, batch_loss=12.6, batch_index=231, batch_size=256]

Epoch 2/10:  23%|███▎          | 231/991 [59:34<3:18:44, 15.69s/batch, batch_loss=9.07, batch_index=232, batch_size=256]

Epoch 2/10:  23%|███▎          | 232/991 [59:34<3:19:12, 15.75s/batch, batch_loss=9.07, batch_index=232, batch_size=256]

Epoch 2/10:  23%|███▎          | 232/991 [59:49<3:19:12, 15.75s/batch, batch_loss=8.98, batch_index=233, batch_size=256]

Epoch 2/10:  24%|███▎          | 233/991 [59:49<3:18:09, 15.69s/batch, batch_loss=8.98, batch_index=233, batch_size=256]

Epoch 2/10:  24%|██▊         | 233/991 [1:00:06<3:18:09, 15.69s/batch, batch_loss=14.7, batch_index=234, batch_size=256]

Epoch 2/10:  24%|██▊         | 234/991 [1:00:06<3:22:27, 16.05s/batch, batch_loss=14.7, batch_index=234, batch_size=256]

Epoch 2/10:  24%|██▊         | 234/991 [1:00:21<3:22:27, 16.05s/batch, batch_loss=15.4, batch_index=235, batch_size=256]

Epoch 2/10:  24%|██▊         | 235/991 [1:00:21<3:18:30, 15.75s/batch, batch_loss=15.4, batch_index=235, batch_size=256]

Epoch 2/10:  24%|██▊         | 235/991 [1:00:37<3:18:30, 15.75s/batch, batch_loss=25.5, batch_index=236, batch_size=256]

Epoch 2/10:  24%|██▊         | 236/991 [1:00:37<3:18:55, 15.81s/batch, batch_loss=25.5, batch_index=236, batch_size=256]

Epoch 2/10:  24%|███▎          | 236/991 [1:00:53<3:18:55, 15.81s/batch, batch_loss=28, batch_index=237, batch_size=256]

Epoch 2/10:  24%|███▎          | 237/991 [1:00:53<3:16:46, 15.66s/batch, batch_loss=28, batch_index=237, batch_size=256]

Epoch 2/10:  24%|██▊         | 237/991 [1:01:09<3:16:46, 15.66s/batch, batch_loss=18.1, batch_index=238, batch_size=256]

Epoch 2/10:  24%|██▉         | 238/991 [1:01:09<3:17:55, 15.77s/batch, batch_loss=18.1, batch_index=238, batch_size=256]

Epoch 2/10:  24%|██▉         | 238/991 [1:01:24<3:17:55, 15.77s/batch, batch_loss=5.68, batch_index=239, batch_size=256]

Epoch 2/10:  24%|██▉         | 239/991 [1:01:24<3:15:39, 15.61s/batch, batch_loss=5.68, batch_index=239, batch_size=256]

Epoch 2/10:  24%|███▏         | 239/991 [1:01:39<3:15:39, 15.61s/batch, batch_loss=8.4, batch_index=240, batch_size=256]

Epoch 2/10:  24%|███▏         | 240/991 [1:01:39<3:15:19, 15.60s/batch, batch_loss=8.4, batch_index=240, batch_size=256]

Epoch 2/10:  24%|██▉         | 240/991 [1:01:55<3:15:19, 15.60s/batch, batch_loss=11.2, batch_index=241, batch_size=256]

Epoch 2/10:  24%|██▉         | 241/991 [1:01:55<3:15:51, 15.67s/batch, batch_loss=11.2, batch_index=241, batch_size=256]

Epoch 2/10:  24%|██▉         | 241/991 [1:02:11<3:15:51, 15.67s/batch, batch_loss=23.4, batch_index=242, batch_size=256]

Epoch 2/10:  24%|██▉         | 242/991 [1:02:11<3:14:38, 15.59s/batch, batch_loss=23.4, batch_index=242, batch_size=256]

Epoch 2/10:  24%|███▏         | 242/991 [1:02:27<3:14:38, 15.59s/batch, batch_loss=269, batch_index=243, batch_size=256]

Epoch 2/10:  25%|███▏         | 243/991 [1:02:27<3:15:49, 15.71s/batch, batch_loss=269, batch_index=243, batch_size=256]

Epoch 2/10:  25%|██▉         | 243/991 [1:02:43<3:15:49, 15.71s/batch, batch_loss=18.9, batch_index=244, batch_size=256]

Epoch 2/10:  25%|██▉         | 244/991 [1:02:43<3:17:04, 15.83s/batch, batch_loss=18.9, batch_index=244, batch_size=256]

Epoch 2/10:  25%|██▉         | 244/991 [1:02:58<3:17:04, 15.83s/batch, batch_loss=6.53, batch_index=245, batch_size=256]

Epoch 2/10:  25%|██▉         | 245/991 [1:02:58<3:12:52, 15.51s/batch, batch_loss=6.53, batch_index=245, batch_size=256]

Epoch 2/10:  25%|██▉         | 245/991 [1:03:12<3:12:52, 15.51s/batch, batch_loss=5.65, batch_index=246, batch_size=256]

Epoch 2/10:  25%|██▉         | 246/991 [1:03:12<3:10:23, 15.33s/batch, batch_loss=5.65, batch_index=246, batch_size=256]

Epoch 2/10:  25%|██▉         | 246/991 [1:03:28<3:10:23, 15.33s/batch, batch_loss=15.4, batch_index=247, batch_size=256]

Epoch 2/10:  25%|██▉         | 247/991 [1:03:28<3:09:01, 15.24s/batch, batch_loss=15.4, batch_index=247, batch_size=256]

Epoch 2/10:  25%|██▉         | 247/991 [1:03:41<3:09:01, 15.24s/batch, batch_loss=5.48, batch_index=248, batch_size=256]

Epoch 2/10:  25%|███         | 248/991 [1:03:41<3:03:46, 14.84s/batch, batch_loss=5.48, batch_index=248, batch_size=256]

Epoch 2/10:  25%|███         | 248/991 [1:03:59<3:03:46, 14.84s/batch, batch_loss=13.7, batch_index=249, batch_size=256]

Epoch 2/10:  25%|███         | 249/991 [1:03:59<3:12:15, 15.55s/batch, batch_loss=13.7, batch_index=249, batch_size=256]

Epoch 2/10:  25%|███         | 249/991 [1:04:13<3:12:15, 15.55s/batch, batch_loss=9.91, batch_index=250, batch_size=256]

Epoch 2/10:  25%|███         | 250/991 [1:04:13<3:06:16, 15.08s/batch, batch_loss=9.91, batch_index=250, batch_size=256]

Epoch 2/10:  25%|███         | 250/991 [1:04:28<3:06:16, 15.08s/batch, batch_loss=7.62, batch_index=251, batch_size=256]

Epoch 2/10:  25%|███         | 251/991 [1:04:28<3:07:24, 15.19s/batch, batch_loss=7.62, batch_index=251, batch_size=256]

Epoch 2/10:  25%|███         | 251/991 [1:04:43<3:07:24, 15.19s/batch, batch_loss=15.3, batch_index=252, batch_size=256]

Epoch 2/10:  25%|███         | 252/991 [1:04:43<3:07:32, 15.23s/batch, batch_loss=15.3, batch_index=252, batch_size=256]

Epoch 2/10:  25%|███         | 252/991 [1:04:58<3:07:32, 15.23s/batch, batch_loss=7.64, batch_index=253, batch_size=256]

Epoch 2/10:  26%|███         | 253/991 [1:04:58<3:05:20, 15.07s/batch, batch_loss=7.64, batch_index=253, batch_size=256]

Epoch 2/10:  26%|███         | 253/991 [1:05:13<3:05:20, 15.07s/batch, batch_loss=20.3, batch_index=254, batch_size=256]

Epoch 2/10:  26%|███         | 254/991 [1:05:13<3:02:49, 14.88s/batch, batch_loss=20.3, batch_index=254, batch_size=256]

Epoch 2/10:  26%|███         | 254/991 [1:05:29<3:02:49, 14.88s/batch, batch_loss=15.4, batch_index=255, batch_size=256]

Epoch 2/10:  26%|███         | 255/991 [1:05:29<3:09:53, 15.48s/batch, batch_loss=15.4, batch_index=255, batch_size=256]

Epoch 2/10:  26%|███▎         | 255/991 [1:05:43<3:09:53, 15.48s/batch, batch_loss=695, batch_index=256, batch_size=256]

Epoch 2/10:  26%|███▎         | 256/991 [1:05:43<3:03:38, 14.99s/batch, batch_loss=695, batch_index=256, batch_size=256]

Epoch 2/10:  26%|███         | 256/991 [1:05:57<3:03:38, 14.99s/batch, batch_loss=19.4, batch_index=257, batch_size=256]

Epoch 2/10:  26%|███         | 257/991 [1:05:57<3:00:30, 14.76s/batch, batch_loss=19.4, batch_index=257, batch_size=256]

Epoch 2/10:  26%|███▎         | 257/991 [1:06:11<3:00:30, 14.76s/batch, batch_loss=205, batch_index=258, batch_size=256]

Epoch 2/10:  26%|███▍         | 258/991 [1:06:11<2:57:36, 14.54s/batch, batch_loss=205, batch_index=258, batch_size=256]

Epoch 2/10:  26%|███         | 258/991 [1:06:26<2:57:36, 14.54s/batch, batch_loss=16.1, batch_index=259, batch_size=256]

Epoch 2/10:  26%|███▏        | 259/991 [1:06:26<2:57:04, 14.51s/batch, batch_loss=16.1, batch_index=259, batch_size=256]

Epoch 2/10:  26%|███▏        | 259/991 [1:06:41<2:57:04, 14.51s/batch, batch_loss=21.7, batch_index=260, batch_size=256]

Epoch 2/10:  26%|███▏        | 260/991 [1:06:41<2:58:41, 14.67s/batch, batch_loss=21.7, batch_index=260, batch_size=256]

Epoch 2/10:  26%|███▏        | 260/991 [1:06:56<2:58:41, 14.67s/batch, batch_loss=13.9, batch_index=261, batch_size=256]

Epoch 2/10:  26%|███▏        | 261/991 [1:06:56<3:01:23, 14.91s/batch, batch_loss=13.9, batch_index=261, batch_size=256]

Epoch 2/10:  26%|███▏        | 261/991 [1:07:13<3:01:23, 14.91s/batch, batch_loss=12.2, batch_index=262, batch_size=256]

Epoch 2/10:  26%|███▏        | 262/991 [1:07:13<3:05:55, 15.30s/batch, batch_loss=12.2, batch_index=262, batch_size=256]

Epoch 2/10:  26%|███▏        | 262/991 [1:07:29<3:05:55, 15.30s/batch, batch_loss=12.3, batch_index=263, batch_size=256]

Epoch 2/10:  27%|███▏        | 263/991 [1:07:29<3:08:27, 15.53s/batch, batch_loss=12.3, batch_index=263, batch_size=256]

Epoch 2/10:  27%|███▏        | 263/991 [1:07:44<3:08:27, 15.53s/batch, batch_loss=13.8, batch_index=264, batch_size=256]

Epoch 2/10:  27%|███▏        | 264/991 [1:07:44<3:07:44, 15.49s/batch, batch_loss=13.8, batch_index=264, batch_size=256]

Epoch 2/10:  27%|███▏        | 264/991 [1:07:59<3:07:44, 15.49s/batch, batch_loss=16.3, batch_index=265, batch_size=256]

Epoch 2/10:  27%|███▏        | 265/991 [1:07:59<3:04:48, 15.27s/batch, batch_loss=16.3, batch_index=265, batch_size=256]

Epoch 2/10:  27%|███▏        | 265/991 [1:08:15<3:04:48, 15.27s/batch, batch_loss=15.7, batch_index=266, batch_size=256]

Epoch 2/10:  27%|███▏        | 266/991 [1:08:15<3:07:07, 15.49s/batch, batch_loss=15.7, batch_index=266, batch_size=256]

Epoch 2/10:  27%|███▏        | 266/991 [1:08:30<3:07:07, 15.49s/batch, batch_loss=11.7, batch_index=267, batch_size=256]

Epoch 2/10:  27%|███▏        | 267/991 [1:08:30<3:05:39, 15.39s/batch, batch_loss=11.7, batch_index=267, batch_size=256]

Epoch 2/10:  27%|███▏        | 267/991 [1:08:45<3:05:39, 15.39s/batch, batch_loss=8.21, batch_index=268, batch_size=256]

Epoch 2/10:  27%|███▏        | 268/991 [1:08:45<3:05:33, 15.40s/batch, batch_loss=8.21, batch_index=268, batch_size=256]

Epoch 2/10:  27%|███▏        | 268/991 [1:09:02<3:05:33, 15.40s/batch, batch_loss=13.9, batch_index=269, batch_size=256]

Epoch 2/10:  27%|███▎        | 269/991 [1:09:02<3:08:19, 15.65s/batch, batch_loss=13.9, batch_index=269, batch_size=256]

Epoch 2/10:  27%|███▎        | 269/991 [1:09:18<3:08:19, 15.65s/batch, batch_loss=1.54, batch_index=270, batch_size=256]

Epoch 2/10:  27%|███▎        | 270/991 [1:09:18<3:08:50, 15.72s/batch, batch_loss=1.54, batch_index=270, batch_size=256]

Epoch 2/10:  27%|███▎        | 270/991 [1:09:33<3:08:50, 15.72s/batch, batch_loss=10.8, batch_index=271, batch_size=256]

Epoch 2/10:  27%|███▎        | 271/991 [1:09:33<3:08:32, 15.71s/batch, batch_loss=10.8, batch_index=271, batch_size=256]

Epoch 2/10:  27%|███▎        | 271/991 [1:09:49<3:08:32, 15.71s/batch, batch_loss=11.9, batch_index=272, batch_size=256]

Epoch 2/10:  27%|███▎        | 272/991 [1:09:49<3:09:55, 15.85s/batch, batch_loss=11.9, batch_index=272, batch_size=256]

Epoch 2/10:  27%|███▎        | 272/991 [1:10:05<3:09:55, 15.85s/batch, batch_loss=19.4, batch_index=273, batch_size=256]

Epoch 2/10:  28%|███▎        | 273/991 [1:10:05<3:07:30, 15.67s/batch, batch_loss=19.4, batch_index=273, batch_size=256]

Epoch 2/10:  28%|███▎        | 273/991 [1:10:20<3:07:30, 15.67s/batch, batch_loss=11.2, batch_index=274, batch_size=256]

Epoch 2/10:  28%|███▎        | 274/991 [1:10:20<3:07:29, 15.69s/batch, batch_loss=11.2, batch_index=274, batch_size=256]

Epoch 2/10:  28%|██▍      | 274/991 [1:10:37<3:07:29, 15.69s/batch, batch_loss=3.31e+3, batch_index=275, batch_size=256]

Epoch 2/10:  28%|██▍      | 275/991 [1:10:37<3:12:13, 16.11s/batch, batch_loss=3.31e+3, batch_index=275, batch_size=256]

Epoch 2/10:  28%|███▎        | 275/991 [1:10:53<3:12:13, 16.11s/batch, batch_loss=14.7, batch_index=276, batch_size=256]

Epoch 2/10:  28%|███▎        | 276/991 [1:10:53<3:10:02, 15.95s/batch, batch_loss=14.7, batch_index=276, batch_size=256]

Epoch 2/10:  28%|██▌      | 276/991 [1:11:08<3:10:02, 15.95s/batch, batch_loss=4.79e+3, batch_index=277, batch_size=256]

Epoch 2/10:  28%|██▌      | 277/991 [1:11:08<3:06:16, 15.65s/batch, batch_loss=4.79e+3, batch_index=277, batch_size=256]

Epoch 2/10:  28%|███▎        | 277/991 [1:11:23<3:06:16, 15.65s/batch, batch_loss=12.7, batch_index=278, batch_size=256]

Epoch 2/10:  28%|███▎        | 278/991 [1:11:23<3:04:09, 15.50s/batch, batch_loss=12.7, batch_index=278, batch_size=256]

Epoch 2/10:  28%|███▎        | 278/991 [1:11:39<3:04:09, 15.50s/batch, batch_loss=18.1, batch_index=279, batch_size=256]

Epoch 2/10:  28%|███▍        | 279/991 [1:11:39<3:03:29, 15.46s/batch, batch_loss=18.1, batch_index=279, batch_size=256]

Epoch 2/10:  28%|███▍        | 279/991 [1:11:56<3:03:29, 15.46s/batch, batch_loss=14.3, batch_index=280, batch_size=256]

Epoch 2/10:  28%|███▍        | 280/991 [1:11:56<3:08:32, 15.91s/batch, batch_loss=14.3, batch_index=280, batch_size=256]

Epoch 2/10:  28%|███▍        | 280/991 [1:12:11<3:08:32, 15.91s/batch, batch_loss=14.2, batch_index=281, batch_size=256]

Epoch 2/10:  28%|███▍        | 281/991 [1:12:11<3:07:04, 15.81s/batch, batch_loss=14.2, batch_index=281, batch_size=256]

Epoch 2/10:  28%|███▍        | 281/991 [1:12:26<3:07:04, 15.81s/batch, batch_loss=6.22, batch_index=282, batch_size=256]

Epoch 2/10:  28%|███▍        | 282/991 [1:12:26<3:01:58, 15.40s/batch, batch_loss=6.22, batch_index=282, batch_size=256]

Epoch 2/10:  28%|███▍        | 282/991 [1:12:40<3:01:58, 15.40s/batch, batch_loss=16.4, batch_index=283, batch_size=256]

Epoch 2/10:  29%|███▍        | 283/991 [1:12:40<3:00:11, 15.27s/batch, batch_loss=16.4, batch_index=283, batch_size=256]

Epoch 2/10:  29%|███▍        | 283/991 [1:12:56<3:00:11, 15.27s/batch, batch_loss=20.8, batch_index=284, batch_size=256]

Epoch 2/10:  29%|███▍        | 284/991 [1:12:56<3:01:44, 15.42s/batch, batch_loss=20.8, batch_index=284, batch_size=256]

Epoch 2/10:  29%|███▍        | 284/991 [1:13:11<3:01:44, 15.42s/batch, batch_loss=13.3, batch_index=285, batch_size=256]

Epoch 2/10:  29%|███▍        | 285/991 [1:13:11<2:59:55, 15.29s/batch, batch_loss=13.3, batch_index=285, batch_size=256]

Epoch 2/10:  29%|███▍        | 285/991 [1:13:26<2:59:55, 15.29s/batch, batch_loss=8.06, batch_index=286, batch_size=256]

Epoch 2/10:  29%|███▍        | 286/991 [1:13:26<2:56:38, 15.03s/batch, batch_loss=8.06, batch_index=286, batch_size=256]

Epoch 2/10:  29%|███▍        | 286/991 [1:13:40<2:56:38, 15.03s/batch, batch_loss=6.86, batch_index=287, batch_size=256]

Epoch 2/10:  29%|███▍        | 287/991 [1:13:40<2:53:22, 14.78s/batch, batch_loss=6.86, batch_index=287, batch_size=256]

Epoch 2/10:  29%|██▌      | 287/991 [1:13:55<2:53:22, 14.78s/batch, batch_loss=2.58e+3, batch_index=288, batch_size=256]

Epoch 2/10:  29%|██▌      | 288/991 [1:13:55<2:53:14, 14.79s/batch, batch_loss=2.58e+3, batch_index=288, batch_size=256]

Epoch 2/10:  29%|██▌      | 288/991 [1:14:10<2:53:14, 14.79s/batch, batch_loss=1.25e+3, batch_index=289, batch_size=256]

Epoch 2/10:  29%|██▌      | 289/991 [1:14:10<2:53:27, 14.83s/batch, batch_loss=1.25e+3, batch_index=289, batch_size=256]

Epoch 2/10:  29%|███▍        | 289/991 [1:14:24<2:53:27, 14.83s/batch, batch_loss=12.5, batch_index=290, batch_size=256]

Epoch 2/10:  29%|███▌        | 290/991 [1:14:24<2:51:56, 14.72s/batch, batch_loss=12.5, batch_index=290, batch_size=256]

Epoch 2/10:  29%|███▌        | 290/991 [1:14:39<2:51:56, 14.72s/batch, batch_loss=5.32, batch_index=291, batch_size=256]

Epoch 2/10:  29%|███▌        | 291/991 [1:14:39<2:52:43, 14.80s/batch, batch_loss=5.32, batch_index=291, batch_size=256]

Epoch 2/10:  29%|███▌        | 291/991 [1:14:54<2:52:43, 14.80s/batch, batch_loss=11.7, batch_index=292, batch_size=256]

Epoch 2/10:  29%|███▌        | 292/991 [1:14:54<2:51:47, 14.75s/batch, batch_loss=11.7, batch_index=292, batch_size=256]

Epoch 2/10:  29%|████▏         | 292/991 [1:15:08<2:51:47, 14.75s/batch, batch_loss=16, batch_index=293, batch_size=256]

Epoch 2/10:  30%|████▏         | 293/991 [1:15:08<2:51:25, 14.74s/batch, batch_loss=16, batch_index=293, batch_size=256]

Epoch 2/10:  30%|███▌        | 293/991 [1:15:23<2:51:25, 14.74s/batch, batch_loss=14.7, batch_index=294, batch_size=256]

Epoch 2/10:  30%|███▌        | 294/991 [1:15:23<2:50:37, 14.69s/batch, batch_loss=14.7, batch_index=294, batch_size=256]

Epoch 2/10:  30%|████▏         | 294/991 [1:15:38<2:50:37, 14.69s/batch, batch_loss=10, batch_index=295, batch_size=256]

Epoch 2/10:  30%|████▏         | 295/991 [1:15:38<2:50:22, 14.69s/batch, batch_loss=10, batch_index=295, batch_size=256]

Epoch 2/10:  30%|███▌        | 295/991 [1:15:52<2:50:22, 14.69s/batch, batch_loss=17.5, batch_index=296, batch_size=256]

Epoch 2/10:  30%|███▌        | 296/991 [1:15:52<2:49:12, 14.61s/batch, batch_loss=17.5, batch_index=296, batch_size=256]

Epoch 2/10:  30%|███▌        | 296/991 [1:16:08<2:49:12, 14.61s/batch, batch_loss=12.9, batch_index=297, batch_size=256]

Epoch 2/10:  30%|███▌        | 297/991 [1:16:08<2:52:29, 14.91s/batch, batch_loss=12.9, batch_index=297, batch_size=256]

Epoch 2/10:  30%|██▋      | 297/991 [1:16:23<2:52:29, 14.91s/batch, batch_loss=3.24e+4, batch_index=298, batch_size=256]

Epoch 2/10:  30%|██▋      | 298/991 [1:16:23<2:52:34, 14.94s/batch, batch_loss=3.24e+4, batch_index=298, batch_size=256]

Epoch 2/10:  30%|███▌        | 298/991 [1:16:38<2:52:34, 14.94s/batch, batch_loss=15.4, batch_index=299, batch_size=256]

Epoch 2/10:  30%|███▌        | 299/991 [1:16:38<2:53:44, 15.06s/batch, batch_loss=15.4, batch_index=299, batch_size=256]

Epoch 2/10:  30%|███▌        | 299/991 [1:16:53<2:53:44, 15.06s/batch, batch_loss=7.51, batch_index=300, batch_size=256]

Epoch 2/10:  30%|███▋        | 300/991 [1:16:53<2:52:29, 14.98s/batch, batch_loss=7.51, batch_index=300, batch_size=256]

Epoch 2/10:  30%|███▉         | 300/991 [1:17:07<2:52:29, 14.98s/batch, batch_loss=8.7, batch_index=301, batch_size=256]

Epoch 2/10:  30%|███▉         | 301/991 [1:17:07<2:48:18, 14.63s/batch, batch_loss=8.7, batch_index=301, batch_size=256]

Epoch 2/10:  30%|███▋        | 301/991 [1:17:21<2:48:18, 14.63s/batch, batch_loss=12.4, batch_index=302, batch_size=256]

Epoch 2/10:  30%|███▋        | 302/991 [1:17:21<2:46:24, 14.49s/batch, batch_loss=12.4, batch_index=302, batch_size=256]

Epoch 2/10:  30%|███▉         | 302/991 [1:17:36<2:46:24, 14.49s/batch, batch_loss=9.2, batch_index=303, batch_size=256]

Epoch 2/10:  31%|███▉         | 303/991 [1:17:36<2:48:05, 14.66s/batch, batch_loss=9.2, batch_index=303, batch_size=256]

Epoch 2/10:  31%|███▉         | 303/991 [1:17:51<2:48:05, 14.66s/batch, batch_loss=3.2, batch_index=304, batch_size=256]

Epoch 2/10:  31%|███▉         | 304/991 [1:17:51<2:48:36, 14.73s/batch, batch_loss=3.2, batch_index=304, batch_size=256]

Epoch 2/10:  31%|███▋        | 304/991 [1:18:06<2:48:36, 14.73s/batch, batch_loss=15.7, batch_index=305, batch_size=256]

Epoch 2/10:  31%|███▋        | 305/991 [1:18:06<2:51:15, 14.98s/batch, batch_loss=15.7, batch_index=305, batch_size=256]

Epoch 2/10:  31%|███▋        | 305/991 [1:18:20<2:51:15, 14.98s/batch, batch_loss=11.4, batch_index=306, batch_size=256]

Epoch 2/10:  31%|███▋        | 306/991 [1:18:20<2:47:32, 14.68s/batch, batch_loss=11.4, batch_index=306, batch_size=256]

Epoch 2/10:  31%|██▊      | 306/991 [1:18:34<2:47:32, 14.68s/batch, batch_loss=6.28e+3, batch_index=307, batch_size=256]

Epoch 2/10:  31%|██▊      | 307/991 [1:18:34<2:45:34, 14.52s/batch, batch_loss=6.28e+3, batch_index=307, batch_size=256]

Epoch 2/10:  31%|███▋        | 307/991 [1:18:48<2:45:34, 14.52s/batch, batch_loss=13.1, batch_index=308, batch_size=256]

Epoch 2/10:  31%|███▋        | 308/991 [1:18:48<2:43:10, 14.33s/batch, batch_loss=13.1, batch_index=308, batch_size=256]

Epoch 2/10:  31%|███▋        | 308/991 [1:19:03<2:43:10, 14.33s/batch, batch_loss=30.8, batch_index=309, batch_size=256]

Epoch 2/10:  31%|███▋        | 309/991 [1:19:03<2:44:11, 14.45s/batch, batch_loss=30.8, batch_index=309, batch_size=256]

Epoch 2/10:  31%|███▋        | 309/991 [1:19:21<2:44:11, 14.45s/batch, batch_loss=29.3, batch_index=310, batch_size=256]

Epoch 2/10:  31%|███▊        | 310/991 [1:19:21<2:56:11, 15.52s/batch, batch_loss=29.3, batch_index=310, batch_size=256]

Epoch 2/10:  31%|███▊        | 310/991 [1:19:37<2:56:11, 15.52s/batch, batch_loss=19.4, batch_index=311, batch_size=256]

Epoch 2/10:  31%|███▊        | 311/991 [1:19:37<2:56:29, 15.57s/batch, batch_loss=19.4, batch_index=311, batch_size=256]

Epoch 2/10:  31%|███▊        | 311/991 [1:19:52<2:56:29, 15.57s/batch, batch_loss=12.6, batch_index=312, batch_size=256]

Epoch 2/10:  31%|███▊        | 312/991 [1:19:52<2:55:04, 15.47s/batch, batch_loss=12.6, batch_index=312, batch_size=256]

Epoch 2/10:  31%|██▊      | 312/991 [1:20:07<2:55:04, 15.47s/batch, batch_loss=1.06e+4, batch_index=313, batch_size=256]

Epoch 2/10:  32%|██▊      | 313/991 [1:20:07<2:54:41, 15.46s/batch, batch_loss=1.06e+4, batch_index=313, batch_size=256]

Epoch 2/10:  32%|███▊        | 313/991 [1:20:23<2:54:41, 15.46s/batch, batch_loss=10.6, batch_index=314, batch_size=256]

Epoch 2/10:  32%|███▊        | 314/991 [1:20:23<2:53:57, 15.42s/batch, batch_loss=10.6, batch_index=314, batch_size=256]

Epoch 2/10:  32%|███▊        | 314/991 [1:20:37<2:53:57, 15.42s/batch, batch_loss=16.1, batch_index=315, batch_size=256]

Epoch 2/10:  32%|███▊        | 315/991 [1:20:37<2:49:30, 15.04s/batch, batch_loss=16.1, batch_index=315, batch_size=256]

Epoch 2/10:  32%|███▊        | 315/991 [1:20:50<2:49:30, 15.04s/batch, batch_loss=21.6, batch_index=316, batch_size=256]

Epoch 2/10:  32%|███▊        | 316/991 [1:20:50<2:43:50, 14.56s/batch, batch_loss=21.6, batch_index=316, batch_size=256]

Epoch 2/10:  32%|███▊        | 316/991 [1:21:04<2:43:50, 14.56s/batch, batch_loss=21.9, batch_index=317, batch_size=256]

Epoch 2/10:  32%|███▊        | 317/991 [1:21:04<2:41:42, 14.40s/batch, batch_loss=21.9, batch_index=317, batch_size=256]

Epoch 2/10:  32%|███▊        | 317/991 [1:21:20<2:41:42, 14.40s/batch, batch_loss=21.1, batch_index=318, batch_size=256]

Epoch 2/10:  32%|███▊        | 318/991 [1:21:20<2:46:21, 14.83s/batch, batch_loss=21.1, batch_index=318, batch_size=256]

Epoch 2/10:  32%|███▊        | 318/991 [1:21:36<2:46:21, 14.83s/batch, batch_loss=18.4, batch_index=319, batch_size=256]

Epoch 2/10:  32%|███▊        | 319/991 [1:21:36<2:48:13, 15.02s/batch, batch_loss=18.4, batch_index=319, batch_size=256]

Epoch 2/10:  32%|███▊        | 319/991 [1:21:52<2:48:13, 15.02s/batch, batch_loss=15.2, batch_index=320, batch_size=256]

Epoch 2/10:  32%|███▊        | 320/991 [1:21:52<2:52:36, 15.43s/batch, batch_loss=15.2, batch_index=320, batch_size=256]

Epoch 2/10:  32%|███▊        | 320/991 [1:22:08<2:52:36, 15.43s/batch, batch_loss=27.6, batch_index=321, batch_size=256]

Epoch 2/10:  32%|███▉        | 321/991 [1:22:08<2:53:51, 15.57s/batch, batch_loss=27.6, batch_index=321, batch_size=256]

Epoch 2/10:  32%|███▉        | 321/991 [1:22:25<2:53:51, 15.57s/batch, batch_loss=7.27, batch_index=322, batch_size=256]

Epoch 2/10:  32%|███▉        | 322/991 [1:22:25<2:59:15, 16.08s/batch, batch_loss=7.27, batch_index=322, batch_size=256]

Epoch 2/10:  32%|███▉        | 322/991 [1:22:41<2:59:15, 16.08s/batch, batch_loss=8.93, batch_index=323, batch_size=256]

Epoch 2/10:  33%|███▉        | 323/991 [1:22:41<2:56:25, 15.85s/batch, batch_loss=8.93, batch_index=323, batch_size=256]

Epoch 2/10:  33%|███▉        | 323/991 [1:22:57<2:56:25, 15.85s/batch, batch_loss=21.1, batch_index=324, batch_size=256]

Epoch 2/10:  33%|███▉        | 324/991 [1:22:57<2:57:51, 16.00s/batch, batch_loss=21.1, batch_index=324, batch_size=256]

Epoch 2/10:  33%|████▌         | 324/991 [1:23:11<2:57:51, 16.00s/batch, batch_loss=11, batch_index=325, batch_size=256]

Epoch 2/10:  33%|████▌         | 325/991 [1:23:11<2:52:41, 15.56s/batch, batch_loss=11, batch_index=325, batch_size=256]

Epoch 2/10:  33%|███▉        | 325/991 [1:23:26<2:52:41, 15.56s/batch, batch_loss=24.4, batch_index=326, batch_size=256]

Epoch 2/10:  33%|███▉        | 326/991 [1:23:26<2:50:28, 15.38s/batch, batch_loss=24.4, batch_index=326, batch_size=256]

Epoch 2/10:  33%|██▉      | 326/991 [1:23:44<2:50:28, 15.38s/batch, batch_loss=3.05e+3, batch_index=327, batch_size=256]

Epoch 2/10:  33%|██▉      | 327/991 [1:23:44<2:56:49, 15.98s/batch, batch_loss=3.05e+3, batch_index=327, batch_size=256]

Epoch 2/10:  33%|███▉        | 327/991 [1:23:59<2:56:49, 15.98s/batch, batch_loss=10.6, batch_index=328, batch_size=256]

Epoch 2/10:  33%|███▉        | 328/991 [1:23:59<2:53:24, 15.69s/batch, batch_loss=10.6, batch_index=328, batch_size=256]

Epoch 2/10:  33%|███▉        | 328/991 [1:24:13<2:53:24, 15.69s/batch, batch_loss=18.9, batch_index=329, batch_size=256]

Epoch 2/10:  33%|███▉        | 329/991 [1:24:13<2:48:09, 15.24s/batch, batch_loss=18.9, batch_index=329, batch_size=256]

Epoch 2/10:  33%|███▉        | 329/991 [1:24:27<2:48:09, 15.24s/batch, batch_loss=14.1, batch_index=330, batch_size=256]

Epoch 2/10:  33%|███▉        | 330/991 [1:24:27<2:44:35, 14.94s/batch, batch_loss=14.1, batch_index=330, batch_size=256]

Epoch 2/10:  33%|███▉        | 330/991 [1:24:42<2:44:35, 14.94s/batch, batch_loss=13.7, batch_index=331, batch_size=256]

Epoch 2/10:  33%|████        | 331/991 [1:24:42<2:43:01, 14.82s/batch, batch_loss=13.7, batch_index=331, batch_size=256]

Epoch 2/10:  33%|████        | 331/991 [1:24:56<2:43:01, 14.82s/batch, batch_loss=17.1, batch_index=332, batch_size=256]

Epoch 2/10:  34%|████        | 332/991 [1:24:56<2:41:32, 14.71s/batch, batch_loss=17.1, batch_index=332, batch_size=256]

Epoch 2/10:  34%|████        | 332/991 [1:25:12<2:41:32, 14.71s/batch, batch_loss=13.7, batch_index=333, batch_size=256]

Epoch 2/10:  34%|████        | 333/991 [1:25:12<2:43:13, 14.88s/batch, batch_loss=13.7, batch_index=333, batch_size=256]

Epoch 2/10:  34%|████        | 333/991 [1:25:26<2:43:13, 14.88s/batch, batch_loss=18.3, batch_index=334, batch_size=256]

Epoch 2/10:  34%|████        | 334/991 [1:25:26<2:41:26, 14.74s/batch, batch_loss=18.3, batch_index=334, batch_size=256]

Epoch 2/10:  34%|████        | 334/991 [1:25:44<2:41:26, 14.74s/batch, batch_loss=4.86, batch_index=335, batch_size=256]

Epoch 2/10:  34%|████        | 335/991 [1:25:44<2:51:54, 15.72s/batch, batch_loss=4.86, batch_index=335, batch_size=256]

Epoch 2/10:  34%|███      | 335/991 [1:25:58<2:51:54, 15.72s/batch, batch_loss=8.49e+3, batch_index=336, batch_size=256]

Epoch 2/10:  34%|███      | 336/991 [1:25:58<2:47:33, 15.35s/batch, batch_loss=8.49e+3, batch_index=336, batch_size=256]

Epoch 2/10:  34%|███      | 336/991 [1:26:13<2:47:33, 15.35s/batch, batch_loss=2.35e+3, batch_index=337, batch_size=256]

Epoch 2/10:  34%|███      | 337/991 [1:26:13<2:45:06, 15.15s/batch, batch_loss=2.35e+3, batch_index=337, batch_size=256]

Epoch 2/10:  34%|████        | 337/991 [1:26:28<2:45:06, 15.15s/batch, batch_loss=8.54, batch_index=338, batch_size=256]

Epoch 2/10:  34%|████        | 338/991 [1:26:28<2:43:12, 15.00s/batch, batch_loss=8.54, batch_index=338, batch_size=256]

Epoch 2/10:  34%|████▊         | 338/991 [1:26:43<2:43:12, 15.00s/batch, batch_loss=27, batch_index=339, batch_size=256]

Epoch 2/10:  34%|████▊         | 339/991 [1:26:43<2:42:25, 14.95s/batch, batch_loss=27, batch_index=339, batch_size=256]

Epoch 2/10:  34%|████        | 339/991 [1:26:57<2:42:25, 14.95s/batch, batch_loss=9.59, batch_index=340, batch_size=256]

Epoch 2/10:  34%|████        | 340/991 [1:26:57<2:41:36, 14.89s/batch, batch_loss=9.59, batch_index=340, batch_size=256]

Epoch 2/10:  34%|████        | 340/991 [1:27:12<2:41:36, 14.89s/batch, batch_loss=9.44, batch_index=341, batch_size=256]

Epoch 2/10:  34%|████▏       | 341/991 [1:27:12<2:39:11, 14.69s/batch, batch_loss=9.44, batch_index=341, batch_size=256]

Epoch 2/10:  34%|███▊       | 341/991 [1:27:26<2:39:11, 14.69s/batch, batch_loss=0.786, batch_index=342, batch_size=256]

Epoch 2/10:  35%|███▊       | 342/991 [1:27:26<2:38:27, 14.65s/batch, batch_loss=0.786, batch_index=342, batch_size=256]

Epoch 2/10:  35%|████▍        | 342/991 [1:27:43<2:38:27, 14.65s/batch, batch_loss=7.1, batch_index=343, batch_size=256]

Epoch 2/10:  35%|████▍        | 343/991 [1:27:43<2:45:33, 15.33s/batch, batch_loss=7.1, batch_index=343, batch_size=256]

Epoch 2/10:  35%|████▏       | 343/991 [1:27:58<2:45:33, 15.33s/batch, batch_loss=17.2, batch_index=344, batch_size=256]

Epoch 2/10:  35%|████▏       | 344/991 [1:27:58<2:44:24, 15.25s/batch, batch_loss=17.2, batch_index=344, batch_size=256]

Epoch 2/10:  35%|████▌        | 344/991 [1:28:13<2:44:24, 15.25s/batch, batch_loss=114, batch_index=345, batch_size=256]

Epoch 2/10:  35%|████▌        | 345/991 [1:28:13<2:43:47, 15.21s/batch, batch_loss=114, batch_index=345, batch_size=256]

Epoch 2/10:  35%|████▏       | 345/991 [1:28:29<2:43:47, 15.21s/batch, batch_loss=13.7, batch_index=346, batch_size=256]

Epoch 2/10:  35%|████▏       | 346/991 [1:28:29<2:44:09, 15.27s/batch, batch_loss=13.7, batch_index=346, batch_size=256]

Epoch 2/10:  35%|████▏       | 346/991 [1:28:44<2:44:09, 15.27s/batch, batch_loss=12.7, batch_index=347, batch_size=256]

Epoch 2/10:  35%|████▏       | 347/991 [1:28:44<2:44:14, 15.30s/batch, batch_loss=12.7, batch_index=347, batch_size=256]

Epoch 2/10:  35%|████▏       | 347/991 [1:28:58<2:44:14, 15.30s/batch, batch_loss=12.8, batch_index=348, batch_size=256]

Epoch 2/10:  35%|████▏       | 348/991 [1:28:58<2:38:47, 14.82s/batch, batch_loss=12.8, batch_index=348, batch_size=256]

Epoch 2/10:  35%|████▏       | 348/991 [1:29:12<2:38:47, 14.82s/batch, batch_loss=9.86, batch_index=349, batch_size=256]

Epoch 2/10:  35%|████▏       | 349/991 [1:29:12<2:36:07, 14.59s/batch, batch_loss=9.86, batch_index=349, batch_size=256]

Epoch 2/10:  35%|████▏       | 349/991 [1:29:26<2:36:07, 14.59s/batch, batch_loss=12.7, batch_index=350, batch_size=256]

Epoch 2/10:  35%|████▏       | 350/991 [1:29:26<2:36:12, 14.62s/batch, batch_loss=12.7, batch_index=350, batch_size=256]

Epoch 2/10:  35%|████▏       | 350/991 [1:29:41<2:36:12, 14.62s/batch, batch_loss=8.21, batch_index=351, batch_size=256]

Epoch 2/10:  35%|████▎       | 351/991 [1:29:41<2:35:46, 14.60s/batch, batch_loss=8.21, batch_index=351, batch_size=256]

Epoch 2/10:  35%|████▎       | 351/991 [1:29:56<2:35:46, 14.60s/batch, batch_loss=14.8, batch_index=352, batch_size=256]

Epoch 2/10:  36%|████▎       | 352/991 [1:29:56<2:38:23, 14.87s/batch, batch_loss=14.8, batch_index=352, batch_size=256]

Epoch 2/10:  36%|████▎       | 352/991 [1:30:12<2:38:23, 14.87s/batch, batch_loss=15.1, batch_index=353, batch_size=256]

Epoch 2/10:  36%|████▎       | 353/991 [1:30:12<2:39:09, 14.97s/batch, batch_loss=15.1, batch_index=353, batch_size=256]

Epoch 2/10:  36%|████▎       | 353/991 [1:30:27<2:39:09, 14.97s/batch, batch_loss=22.5, batch_index=354, batch_size=256]

Epoch 2/10:  36%|████▎       | 354/991 [1:30:27<2:40:55, 15.16s/batch, batch_loss=22.5, batch_index=354, batch_size=256]

Epoch 2/10:  36%|████▎       | 354/991 [1:30:42<2:40:55, 15.16s/batch, batch_loss=9.27, batch_index=355, batch_size=256]

Epoch 2/10:  36%|████▎       | 355/991 [1:30:42<2:39:43, 15.07s/batch, batch_loss=9.27, batch_index=355, batch_size=256]

Epoch 2/10:  36%|████▎       | 355/991 [1:30:57<2:39:43, 15.07s/batch, batch_loss=17.9, batch_index=356, batch_size=256]

Epoch 2/10:  36%|████▎       | 356/991 [1:30:57<2:38:32, 14.98s/batch, batch_loss=17.9, batch_index=356, batch_size=256]

Epoch 2/10:  36%|████▎       | 356/991 [1:31:12<2:38:32, 14.98s/batch, batch_loss=14.1, batch_index=357, batch_size=256]

Epoch 2/10:  36%|████▎       | 357/991 [1:31:12<2:39:50, 15.13s/batch, batch_loss=14.1, batch_index=357, batch_size=256]

Epoch 2/10:  36%|████▎       | 357/991 [1:31:28<2:39:50, 15.13s/batch, batch_loss=13.6, batch_index=358, batch_size=256]

Epoch 2/10:  36%|████▎       | 358/991 [1:31:28<2:40:45, 15.24s/batch, batch_loss=13.6, batch_index=358, batch_size=256]

Epoch 2/10:  36%|████▎       | 358/991 [1:31:46<2:40:45, 15.24s/batch, batch_loss=4.93, batch_index=359, batch_size=256]

Epoch 2/10:  36%|████▎       | 359/991 [1:31:46<2:48:58, 16.04s/batch, batch_loss=4.93, batch_index=359, batch_size=256]

Epoch 2/10:  36%|████▋        | 359/991 [1:32:01<2:48:58, 16.04s/batch, batch_loss=9.7, batch_index=360, batch_size=256]

Epoch 2/10:  36%|████▋        | 360/991 [1:32:01<2:46:09, 15.80s/batch, batch_loss=9.7, batch_index=360, batch_size=256]

Epoch 2/10:  36%|████▎       | 360/991 [1:32:16<2:46:09, 15.80s/batch, batch_loss=26.8, batch_index=361, batch_size=256]

Epoch 2/10:  36%|████▎       | 361/991 [1:32:16<2:42:30, 15.48s/batch, batch_loss=26.8, batch_index=361, batch_size=256]

Epoch 2/10:  36%|█████         | 361/991 [1:32:32<2:42:30, 15.48s/batch, batch_loss=19, batch_index=362, batch_size=256]

Epoch 2/10:  37%|█████         | 362/991 [1:32:32<2:43:26, 15.59s/batch, batch_loss=19, batch_index=362, batch_size=256]

Epoch 2/10:  37%|████▍       | 362/991 [1:32:47<2:43:26, 15.59s/batch, batch_loss=11.3, batch_index=363, batch_size=256]

Epoch 2/10:  37%|████▍       | 363/991 [1:32:47<2:41:37, 15.44s/batch, batch_loss=11.3, batch_index=363, batch_size=256]

Epoch 2/10:  37%|████▍       | 363/991 [1:33:01<2:41:37, 15.44s/batch, batch_loss=14.2, batch_index=364, batch_size=256]

Epoch 2/10:  37%|████▍       | 364/991 [1:33:01<2:38:03, 15.13s/batch, batch_loss=14.2, batch_index=364, batch_size=256]

Epoch 2/10:  37%|████▍       | 364/991 [1:33:16<2:38:03, 15.13s/batch, batch_loss=10.2, batch_index=365, batch_size=256]

Epoch 2/10:  37%|████▍       | 365/991 [1:33:16<2:38:19, 15.18s/batch, batch_loss=10.2, batch_index=365, batch_size=256]

Epoch 2/10:  37%|████▍       | 365/991 [1:33:32<2:38:19, 15.18s/batch, batch_loss=12.9, batch_index=366, batch_size=256]

Epoch 2/10:  37%|████▍       | 366/991 [1:33:32<2:39:07, 15.28s/batch, batch_loss=12.9, batch_index=366, batch_size=256]

Epoch 2/10:  37%|████▍       | 366/991 [1:33:47<2:39:07, 15.28s/batch, batch_loss=13.6, batch_index=367, batch_size=256]

Epoch 2/10:  37%|████▍       | 367/991 [1:33:47<2:39:06, 15.30s/batch, batch_loss=13.6, batch_index=367, batch_size=256]

Epoch 2/10:  37%|████▍       | 367/991 [1:34:03<2:39:06, 15.30s/batch, batch_loss=13.3, batch_index=368, batch_size=256]

Epoch 2/10:  37%|████▍       | 368/991 [1:34:03<2:39:36, 15.37s/batch, batch_loss=13.3, batch_index=368, batch_size=256]

Epoch 2/10:  37%|████▍       | 368/991 [1:34:19<2:39:36, 15.37s/batch, batch_loss=13.6, batch_index=369, batch_size=256]

Epoch 2/10:  37%|████▍       | 369/991 [1:34:19<2:42:17, 15.66s/batch, batch_loss=13.6, batch_index=369, batch_size=256]

Epoch 2/10:  37%|███▎     | 369/991 [1:34:35<2:42:17, 15.66s/batch, batch_loss=1.21e+4, batch_index=370, batch_size=256]

Epoch 2/10:  37%|███▎     | 370/991 [1:34:35<2:41:35, 15.61s/batch, batch_loss=1.21e+4, batch_index=370, batch_size=256]

Epoch 2/10:  37%|█████▏        | 370/991 [1:34:50<2:41:35, 15.61s/batch, batch_loss=22, batch_index=371, batch_size=256]

Epoch 2/10:  37%|█████▏        | 371/991 [1:34:50<2:41:00, 15.58s/batch, batch_loss=22, batch_index=371, batch_size=256]

Epoch 2/10:  37%|████▍       | 371/991 [1:35:05<2:41:00, 15.58s/batch, batch_loss=13.8, batch_index=372, batch_size=256]

Epoch 2/10:  38%|████▌       | 372/991 [1:35:05<2:39:15, 15.44s/batch, batch_loss=13.8, batch_index=372, batch_size=256]

Epoch 2/10:  38%|█████▎        | 372/991 [1:35:20<2:39:15, 15.44s/batch, batch_loss=26, batch_index=373, batch_size=256]

Epoch 2/10:  38%|█████▎        | 373/991 [1:35:20<2:37:29, 15.29s/batch, batch_loss=26, batch_index=373, batch_size=256]

Epoch 2/10:  38%|████▉        | 373/991 [1:35:35<2:37:29, 15.29s/batch, batch_loss=472, batch_index=374, batch_size=256]

Epoch 2/10:  38%|████▉        | 374/991 [1:35:35<2:34:41, 15.04s/batch, batch_loss=472, batch_index=374, batch_size=256]

Epoch 2/10:  38%|███▍     | 374/991 [1:35:51<2:34:41, 15.04s/batch, batch_loss=1.43e+3, batch_index=375, batch_size=256]

Epoch 2/10:  38%|███▍     | 375/991 [1:35:51<2:38:19, 15.42s/batch, batch_loss=1.43e+3, batch_index=375, batch_size=256]

Epoch 2/10:  38%|███▍     | 375/991 [1:36:06<2:38:19, 15.42s/batch, batch_loss=1.22e+3, batch_index=376, batch_size=256]

Epoch 2/10:  38%|███▍     | 376/991 [1:36:06<2:36:59, 15.32s/batch, batch_loss=1.22e+3, batch_index=376, batch_size=256]

Epoch 2/10:  38%|████▌       | 376/991 [1:36:21<2:36:59, 15.32s/batch, batch_loss=20.6, batch_index=377, batch_size=256]

Epoch 2/10:  38%|████▌       | 377/991 [1:36:21<2:34:16, 15.07s/batch, batch_loss=20.6, batch_index=377, batch_size=256]

Epoch 2/10:  38%|███▍     | 377/991 [1:36:35<2:34:16, 15.07s/batch, batch_loss=1.18e+3, batch_index=378, batch_size=256]

Epoch 2/10:  38%|███▍     | 378/991 [1:36:35<2:31:56, 14.87s/batch, batch_loss=1.18e+3, batch_index=378, batch_size=256]

Epoch 2/10:  38%|████▌       | 378/991 [1:36:49<2:31:56, 14.87s/batch, batch_loss=11.5, batch_index=379, batch_size=256]

Epoch 2/10:  38%|████▌       | 379/991 [1:36:49<2:29:51, 14.69s/batch, batch_loss=11.5, batch_index=379, batch_size=256]

Epoch 2/10:  38%|████▌       | 379/991 [1:37:04<2:29:51, 14.69s/batch, batch_loss=13.2, batch_index=380, batch_size=256]

Epoch 2/10:  38%|████▌       | 380/991 [1:37:04<2:29:01, 14.63s/batch, batch_loss=13.2, batch_index=380, batch_size=256]

Epoch 2/10:  38%|████▌       | 380/991 [1:37:20<2:29:01, 14.63s/batch, batch_loss=19.6, batch_index=381, batch_size=256]

Epoch 2/10:  38%|████▌       | 381/991 [1:37:20<2:33:33, 15.10s/batch, batch_loss=19.6, batch_index=381, batch_size=256]

Epoch 2/10:  38%|████▌       | 381/991 [1:37:34<2:33:33, 15.10s/batch, batch_loss=11.1, batch_index=382, batch_size=256]

Epoch 2/10:  39%|████▋       | 382/991 [1:37:34<2:30:41, 14.85s/batch, batch_loss=11.1, batch_index=382, batch_size=256]

Epoch 2/10:  39%|████▋       | 382/991 [1:37:49<2:30:41, 14.85s/batch, batch_loss=10.5, batch_index=383, batch_size=256]

Epoch 2/10:  39%|████▋       | 383/991 [1:37:49<2:30:27, 14.85s/batch, batch_loss=10.5, batch_index=383, batch_size=256]

Epoch 2/10:  39%|████▋       | 383/991 [1:38:04<2:30:27, 14.85s/batch, batch_loss=24.5, batch_index=384, batch_size=256]

Epoch 2/10:  39%|████▋       | 384/991 [1:38:04<2:30:18, 14.86s/batch, batch_loss=24.5, batch_index=384, batch_size=256]

Epoch 2/10:  39%|████▋       | 384/991 [1:38:18<2:30:18, 14.86s/batch, batch_loss=9.22, batch_index=385, batch_size=256]

Epoch 2/10:  39%|████▋       | 385/991 [1:38:18<2:28:58, 14.75s/batch, batch_loss=9.22, batch_index=385, batch_size=256]

Epoch 2/10:  39%|████▋       | 385/991 [1:38:33<2:28:58, 14.75s/batch, batch_loss=19.7, batch_index=386, batch_size=256]

Epoch 2/10:  39%|████▋       | 386/991 [1:38:33<2:27:26, 14.62s/batch, batch_loss=19.7, batch_index=386, batch_size=256]

Epoch 2/10:  39%|████▋       | 386/991 [1:38:48<2:27:26, 14.62s/batch, batch_loss=24.4, batch_index=387, batch_size=256]

Epoch 2/10:  39%|████▋       | 387/991 [1:38:48<2:28:36, 14.76s/batch, batch_loss=24.4, batch_index=387, batch_size=256]

Epoch 2/10:  39%|█████        | 387/991 [1:39:02<2:28:36, 14.76s/batch, batch_loss=796, batch_index=388, batch_size=256]

Epoch 2/10:  39%|█████        | 388/991 [1:39:02<2:27:29, 14.68s/batch, batch_loss=796, batch_index=388, batch_size=256]

Epoch 2/10:  39%|████▋       | 388/991 [1:39:18<2:27:29, 14.68s/batch, batch_loss=14.8, batch_index=389, batch_size=256]

Epoch 2/10:  39%|████▋       | 389/991 [1:39:18<2:29:03, 14.86s/batch, batch_loss=14.8, batch_index=389, batch_size=256]

Epoch 2/10:  39%|█████        | 389/991 [1:39:34<2:29:03, 14.86s/batch, batch_loss=866, batch_index=390, batch_size=256]

Epoch 2/10:  39%|█████        | 390/991 [1:39:34<2:32:26, 15.22s/batch, batch_loss=866, batch_index=390, batch_size=256]

Epoch 2/10:  39%|████▋       | 390/991 [1:39:50<2:32:26, 15.22s/batch, batch_loss=19.7, batch_index=391, batch_size=256]

Epoch 2/10:  39%|████▋       | 391/991 [1:39:50<2:35:01, 15.50s/batch, batch_loss=19.7, batch_index=391, batch_size=256]

Epoch 2/10:  39%|████▋       | 391/991 [1:40:05<2:35:01, 15.50s/batch, batch_loss=15.3, batch_index=392, batch_size=256]

Epoch 2/10:  40%|████▋       | 392/991 [1:40:05<2:34:46, 15.50s/batch, batch_loss=15.3, batch_index=392, batch_size=256]

Epoch 2/10:  40%|████▋       | 392/991 [1:40:21<2:34:46, 15.50s/batch, batch_loss=20.7, batch_index=393, batch_size=256]

Epoch 2/10:  40%|████▊       | 393/991 [1:40:21<2:34:14, 15.48s/batch, batch_loss=20.7, batch_index=393, batch_size=256]

Epoch 2/10:  40%|█████▏       | 393/991 [1:40:39<2:34:14, 15.48s/batch, batch_loss=614, batch_index=394, batch_size=256]

Epoch 2/10:  40%|█████▏       | 394/991 [1:40:39<2:42:37, 16.35s/batch, batch_loss=614, batch_index=394, batch_size=256]

Epoch 2/10:  40%|████▊       | 394/991 [1:40:54<2:42:37, 16.35s/batch, batch_loss=18.8, batch_index=395, batch_size=256]

Epoch 2/10:  40%|████▊       | 395/991 [1:40:54<2:37:22, 15.84s/batch, batch_loss=18.8, batch_index=395, batch_size=256]

Epoch 2/10:  40%|████▊       | 395/991 [1:41:09<2:37:22, 15.84s/batch, batch_loss=12.1, batch_index=396, batch_size=256]

Epoch 2/10:  40%|████▊       | 396/991 [1:41:09<2:35:01, 15.63s/batch, batch_loss=12.1, batch_index=396, batch_size=256]

Epoch 2/10:  40%|████▊       | 396/991 [1:41:25<2:35:01, 15.63s/batch, batch_loss=16.1, batch_index=397, batch_size=256]

Epoch 2/10:  40%|████▊       | 397/991 [1:41:25<2:34:52, 15.64s/batch, batch_loss=16.1, batch_index=397, batch_size=256]

Epoch 2/10:  40%|████▊       | 397/991 [1:41:40<2:34:52, 15.64s/batch, batch_loss=14.8, batch_index=398, batch_size=256]

Epoch 2/10:  40%|████▊       | 398/991 [1:41:40<2:32:57, 15.48s/batch, batch_loss=14.8, batch_index=398, batch_size=256]

Epoch 2/10:  40%|████▊       | 398/991 [1:41:56<2:32:57, 15.48s/batch, batch_loss=23.8, batch_index=399, batch_size=256]

Epoch 2/10:  40%|████▊       | 399/991 [1:41:56<2:34:16, 15.64s/batch, batch_loss=23.8, batch_index=399, batch_size=256]

Epoch 2/10:  40%|████▊       | 399/991 [1:42:12<2:34:16, 15.64s/batch, batch_loss=10.8, batch_index=400, batch_size=256]

Epoch 2/10:  40%|████▊       | 400/991 [1:42:12<2:34:56, 15.73s/batch, batch_loss=10.8, batch_index=400, batch_size=256]

Epoch 2/10:  40%|█████▏       | 400/991 [1:42:27<2:34:56, 15.73s/batch, batch_loss=8.9, batch_index=401, batch_size=256]

Epoch 2/10:  40%|█████▎       | 401/991 [1:42:27<2:33:53, 15.65s/batch, batch_loss=8.9, batch_index=401, batch_size=256]

Epoch 2/10:  40%|████▊       | 401/991 [1:42:42<2:33:53, 15.65s/batch, batch_loss=16.6, batch_index=402, batch_size=256]

Epoch 2/10:  41%|████▊       | 402/991 [1:42:42<2:31:25, 15.43s/batch, batch_loss=16.6, batch_index=402, batch_size=256]

Epoch 2/10:  41%|████▊       | 402/991 [1:42:58<2:31:25, 15.43s/batch, batch_loss=15.8, batch_index=403, batch_size=256]

Epoch 2/10:  41%|████▉       | 403/991 [1:42:58<2:32:10, 15.53s/batch, batch_loss=15.8, batch_index=403, batch_size=256]

Epoch 2/10:  41%|████▉       | 403/991 [1:43:13<2:32:10, 15.53s/batch, batch_loss=10.9, batch_index=404, batch_size=256]

Epoch 2/10:  41%|████▉       | 404/991 [1:43:13<2:32:13, 15.56s/batch, batch_loss=10.9, batch_index=404, batch_size=256]

Epoch 2/10:  41%|████▉       | 404/991 [1:43:29<2:32:13, 15.56s/batch, batch_loss=13.6, batch_index=405, batch_size=256]

Epoch 2/10:  41%|████▉       | 405/991 [1:43:29<2:32:37, 15.63s/batch, batch_loss=13.6, batch_index=405, batch_size=256]

Epoch 2/10:  41%|████▉       | 405/991 [1:43:45<2:32:37, 15.63s/batch, batch_loss=5.58, batch_index=406, batch_size=256]

Epoch 2/10:  41%|████▉       | 406/991 [1:43:45<2:32:11, 15.61s/batch, batch_loss=5.58, batch_index=406, batch_size=256]

Epoch 2/10:  41%|████▉       | 406/991 [1:44:00<2:32:11, 15.61s/batch, batch_loss=21.9, batch_index=407, batch_size=256]

Epoch 2/10:  41%|████▉       | 407/991 [1:44:00<2:29:38, 15.37s/batch, batch_loss=21.9, batch_index=407, batch_size=256]

Epoch 2/10:  41%|████▉       | 407/991 [1:44:15<2:29:38, 15.37s/batch, batch_loss=7.02, batch_index=408, batch_size=256]

Epoch 2/10:  41%|████▉       | 408/991 [1:44:15<2:29:13, 15.36s/batch, batch_loss=7.02, batch_index=408, batch_size=256]

Epoch 2/10:  41%|████▉       | 408/991 [1:44:30<2:29:13, 15.36s/batch, batch_loss=21.4, batch_index=409, batch_size=256]

Epoch 2/10:  41%|████▉       | 409/991 [1:44:30<2:28:03, 15.26s/batch, batch_loss=21.4, batch_index=409, batch_size=256]

Epoch 2/10:  41%|████▉       | 409/991 [1:44:44<2:28:03, 15.26s/batch, batch_loss=21.6, batch_index=410, batch_size=256]

Epoch 2/10:  41%|████▉       | 410/991 [1:44:44<2:25:11, 14.99s/batch, batch_loss=21.6, batch_index=410, batch_size=256]

Epoch 2/10:  41%|████▉       | 410/991 [1:44:59<2:25:11, 14.99s/batch, batch_loss=12.6, batch_index=411, batch_size=256]

Epoch 2/10:  41%|████▉       | 411/991 [1:44:59<2:25:09, 15.02s/batch, batch_loss=12.6, batch_index=411, batch_size=256]

Epoch 2/10:  41%|████▉       | 411/991 [1:45:14<2:25:09, 15.02s/batch, batch_loss=13.5, batch_index=412, batch_size=256]

Epoch 2/10:  42%|████▉       | 412/991 [1:45:14<2:23:19, 14.85s/batch, batch_loss=13.5, batch_index=412, batch_size=256]

Epoch 2/10:  42%|████▉       | 412/991 [1:45:28<2:23:19, 14.85s/batch, batch_loss=14.9, batch_index=413, batch_size=256]

Epoch 2/10:  42%|█████       | 413/991 [1:45:28<2:20:34, 14.59s/batch, batch_loss=14.9, batch_index=413, batch_size=256]

Epoch 2/10:  42%|█████       | 413/991 [1:45:42<2:20:34, 14.59s/batch, batch_loss=13.7, batch_index=414, batch_size=256]

Epoch 2/10:  42%|█████       | 414/991 [1:45:42<2:19:43, 14.53s/batch, batch_loss=13.7, batch_index=414, batch_size=256]

Epoch 2/10:  42%|█████       | 414/991 [1:45:57<2:19:43, 14.53s/batch, batch_loss=8.28, batch_index=415, batch_size=256]

Epoch 2/10:  42%|█████       | 415/991 [1:45:57<2:21:03, 14.69s/batch, batch_loss=8.28, batch_index=415, batch_size=256]

Epoch 2/10:  42%|█████       | 415/991 [1:46:12<2:21:03, 14.69s/batch, batch_loss=10.9, batch_index=416, batch_size=256]

Epoch 2/10:  42%|█████       | 416/991 [1:46:12<2:22:15, 14.84s/batch, batch_loss=10.9, batch_index=416, batch_size=256]

Epoch 2/10:  42%|█████       | 416/991 [1:46:27<2:22:15, 14.84s/batch, batch_loss=8.56, batch_index=417, batch_size=256]

Epoch 2/10:  42%|█████       | 417/991 [1:46:27<2:20:41, 14.71s/batch, batch_loss=8.56, batch_index=417, batch_size=256]

Epoch 2/10:  42%|█████       | 417/991 [1:46:42<2:20:41, 14.71s/batch, batch_loss=13.3, batch_index=418, batch_size=256]

Epoch 2/10:  42%|█████       | 418/991 [1:46:42<2:21:01, 14.77s/batch, batch_loss=13.3, batch_index=418, batch_size=256]

Epoch 2/10:  42%|████▏     | 418/991 [1:46:57<2:21:01, 14.77s/batch, batch_loss=1.3e+3, batch_index=419, batch_size=256]

Epoch 2/10:  42%|████▏     | 419/991 [1:46:57<2:20:48, 14.77s/batch, batch_loss=1.3e+3, batch_index=419, batch_size=256]

Epoch 2/10:  42%|█████       | 419/991 [1:47:11<2:20:48, 14.77s/batch, batch_loss=16.8, batch_index=420, batch_size=256]

Epoch 2/10:  42%|█████       | 420/991 [1:47:11<2:20:23, 14.75s/batch, batch_loss=16.8, batch_index=420, batch_size=256]

Epoch 2/10:  42%|█████       | 420/991 [1:47:27<2:20:23, 14.75s/batch, batch_loss=12.9, batch_index=421, batch_size=256]

Epoch 2/10:  42%|█████       | 421/991 [1:47:27<2:21:44, 14.92s/batch, batch_loss=12.9, batch_index=421, batch_size=256]

Epoch 2/10:  42%|█████▌       | 421/991 [1:47:42<2:21:44, 14.92s/batch, batch_loss=8.9, batch_index=422, batch_size=256]

Epoch 2/10:  43%|█████▌       | 422/991 [1:47:42<2:24:26, 15.23s/batch, batch_loss=8.9, batch_index=422, batch_size=256]

Epoch 2/10:  43%|█████▉        | 422/991 [1:47:58<2:24:26, 15.23s/batch, batch_loss=11, batch_index=423, batch_size=256]

Epoch 2/10:  43%|█████▉        | 423/991 [1:47:58<2:25:45, 15.40s/batch, batch_loss=11, batch_index=423, batch_size=256]

Epoch 2/10:  43%|█████       | 423/991 [1:48:15<2:25:45, 15.40s/batch, batch_loss=10.9, batch_index=424, batch_size=256]

Epoch 2/10:  43%|█████▏      | 424/991 [1:48:15<2:28:03, 15.67s/batch, batch_loss=10.9, batch_index=424, batch_size=256]

Epoch 2/10:  43%|██████▍        | 424/991 [1:48:30<2:28:03, 15.67s/batch, batch_loss=7, batch_index=425, batch_size=256]

Epoch 2/10:  43%|██████▍        | 425/991 [1:48:30<2:27:43, 15.66s/batch, batch_loss=7, batch_index=425, batch_size=256]

Epoch 2/10:  43%|█████▏      | 425/991 [1:48:45<2:27:43, 15.66s/batch, batch_loss=2.43, batch_index=426, batch_size=256]

Epoch 2/10:  43%|█████▏      | 426/991 [1:48:45<2:26:07, 15.52s/batch, batch_loss=2.43, batch_index=426, batch_size=256]

Epoch 2/10:  43%|█████▏      | 426/991 [1:49:00<2:26:07, 15.52s/batch, batch_loss=11.3, batch_index=427, batch_size=256]

Epoch 2/10:  43%|█████▏      | 427/991 [1:49:00<2:23:23, 15.25s/batch, batch_loss=11.3, batch_index=427, batch_size=256]

Epoch 2/10:  43%|█████▏      | 427/991 [1:49:15<2:23:23, 15.25s/batch, batch_loss=20.4, batch_index=428, batch_size=256]

Epoch 2/10:  43%|█████▏      | 428/991 [1:49:15<2:21:47, 15.11s/batch, batch_loss=20.4, batch_index=428, batch_size=256]

Epoch 2/10:  43%|█████▏      | 428/991 [1:49:33<2:21:47, 15.11s/batch, batch_loss=22.7, batch_index=429, batch_size=256]

Epoch 2/10:  43%|█████▏      | 429/991 [1:49:33<2:29:49, 16.00s/batch, batch_loss=22.7, batch_index=429, batch_size=256]

Epoch 2/10:  43%|███▉     | 429/991 [1:49:48<2:29:49, 16.00s/batch, batch_loss=9.32e+3, batch_index=430, batch_size=256]

Epoch 2/10:  43%|███▉     | 430/991 [1:49:48<2:26:37, 15.68s/batch, batch_loss=9.32e+3, batch_index=430, batch_size=256]

Epoch 2/10:  43%|█████▏      | 430/991 [1:50:03<2:26:37, 15.68s/batch, batch_loss=25.5, batch_index=431, batch_size=256]

Epoch 2/10:  43%|█████▏      | 431/991 [1:50:03<2:25:55, 15.63s/batch, batch_loss=25.5, batch_index=431, batch_size=256]

Epoch 2/10:  43%|█████▏      | 431/991 [1:50:21<2:25:55, 15.63s/batch, batch_loss=20.4, batch_index=432, batch_size=256]

Epoch 2/10:  44%|█████▏      | 432/991 [1:50:21<2:31:05, 16.22s/batch, batch_loss=20.4, batch_index=432, batch_size=256]

Epoch 2/10:  44%|█████▏      | 432/991 [1:50:35<2:31:05, 16.22s/batch, batch_loss=9.84, batch_index=433, batch_size=256]

Epoch 2/10:  44%|█████▏      | 433/991 [1:50:35<2:24:58, 15.59s/batch, batch_loss=9.84, batch_index=433, batch_size=256]

Epoch 2/10:  44%|█████▏      | 433/991 [1:50:49<2:24:58, 15.59s/batch, batch_loss=18.9, batch_index=434, batch_size=256]

Epoch 2/10:  44%|█████▎      | 434/991 [1:50:49<2:21:23, 15.23s/batch, batch_loss=18.9, batch_index=434, batch_size=256]

Epoch 2/10:  44%|█████▎      | 434/991 [1:51:04<2:21:23, 15.23s/batch, batch_loss=13.8, batch_index=435, batch_size=256]

Epoch 2/10:  44%|█████▎      | 435/991 [1:51:04<2:19:14, 15.03s/batch, batch_loss=13.8, batch_index=435, batch_size=256]

Epoch 2/10:  44%|█████▎      | 435/991 [1:51:19<2:19:14, 15.03s/batch, batch_loss=12.9, batch_index=436, batch_size=256]

Epoch 2/10:  44%|█████▎      | 436/991 [1:51:19<2:19:24, 15.07s/batch, batch_loss=12.9, batch_index=436, batch_size=256]

Epoch 2/10:  44%|█████▎      | 436/991 [1:51:34<2:19:24, 15.07s/batch, batch_loss=18.9, batch_index=437, batch_size=256]

Epoch 2/10:  44%|█████▎      | 437/991 [1:51:34<2:18:15, 14.97s/batch, batch_loss=18.9, batch_index=437, batch_size=256]

Epoch 2/10:  44%|█████▎      | 437/991 [1:51:49<2:18:15, 14.97s/batch, batch_loss=23.5, batch_index=438, batch_size=256]

Epoch 2/10:  44%|█████▎      | 438/991 [1:51:49<2:18:21, 15.01s/batch, batch_loss=23.5, batch_index=438, batch_size=256]

Epoch 2/10:  44%|█████▎      | 438/991 [1:52:07<2:18:21, 15.01s/batch, batch_loss=14.3, batch_index=439, batch_size=256]

Epoch 2/10:  44%|█████▎      | 439/991 [1:52:07<2:26:28, 15.92s/batch, batch_loss=14.3, batch_index=439, batch_size=256]

Epoch 2/10:  44%|█████▎      | 439/991 [1:52:22<2:26:28, 15.92s/batch, batch_loss=24.5, batch_index=440, batch_size=256]

Epoch 2/10:  44%|█████▎      | 440/991 [1:52:22<2:22:47, 15.55s/batch, batch_loss=24.5, batch_index=440, batch_size=256]

Epoch 2/10:  44%|█████▎      | 440/991 [1:52:36<2:22:47, 15.55s/batch, batch_loss=22.3, batch_index=441, batch_size=256]

Epoch 2/10:  45%|█████▎      | 441/991 [1:52:36<2:19:10, 15.18s/batch, batch_loss=22.3, batch_index=441, batch_size=256]

Epoch 2/10:  45%|█████▎      | 441/991 [1:52:50<2:19:10, 15.18s/batch, batch_loss=14.8, batch_index=442, batch_size=256]

Epoch 2/10:  45%|█████▎      | 442/991 [1:52:50<2:16:21, 14.90s/batch, batch_loss=14.8, batch_index=442, batch_size=256]

Epoch 2/10:  45%|█████▎      | 442/991 [1:53:05<2:16:21, 14.90s/batch, batch_loss=21.8, batch_index=443, batch_size=256]

Epoch 2/10:  45%|█████▎      | 443/991 [1:53:05<2:15:59, 14.89s/batch, batch_loss=21.8, batch_index=443, batch_size=256]

Epoch 2/10:  45%|█████▎      | 443/991 [1:53:23<2:15:59, 14.89s/batch, batch_loss=14.2, batch_index=444, batch_size=256]

Epoch 2/10:  45%|█████▍      | 444/991 [1:53:23<2:22:52, 15.67s/batch, batch_loss=14.2, batch_index=444, batch_size=256]

Epoch 2/10:  45%|█████▍      | 444/991 [1:53:38<2:22:52, 15.67s/batch, batch_loss=17.9, batch_index=445, batch_size=256]

Epoch 2/10:  45%|█████▍      | 445/991 [1:53:38<2:21:19, 15.53s/batch, batch_loss=17.9, batch_index=445, batch_size=256]

Epoch 2/10:  45%|█████▍      | 445/991 [1:53:53<2:21:19, 15.53s/batch, batch_loss=28.6, batch_index=446, batch_size=256]

Epoch 2/10:  45%|█████▍      | 446/991 [1:53:53<2:19:47, 15.39s/batch, batch_loss=28.6, batch_index=446, batch_size=256]

Epoch 2/10:  45%|██████▎       | 446/991 [1:54:08<2:19:47, 15.39s/batch, batch_loss=14, batch_index=447, batch_size=256]

Epoch 2/10:  45%|██████▎       | 447/991 [1:54:08<2:17:57, 15.22s/batch, batch_loss=14, batch_index=447, batch_size=256]

Epoch 2/10:  45%|█████▍      | 447/991 [1:54:23<2:17:57, 15.22s/batch, batch_loss=18.6, batch_index=448, batch_size=256]

Epoch 2/10:  45%|█████▍      | 448/991 [1:54:23<2:17:44, 15.22s/batch, batch_loss=18.6, batch_index=448, batch_size=256]

Epoch 2/10:  45%|██████▎       | 448/991 [1:54:38<2:17:44, 15.22s/batch, batch_loss=17, batch_index=449, batch_size=256]

Epoch 2/10:  45%|██████▎       | 449/991 [1:54:38<2:16:12, 15.08s/batch, batch_loss=17, batch_index=449, batch_size=256]

Epoch 2/10:  45%|█████▍      | 449/991 [1:54:53<2:16:12, 15.08s/batch, batch_loss=24.7, batch_index=450, batch_size=256]

Epoch 2/10:  45%|█████▍      | 450/991 [1:54:53<2:16:22, 15.13s/batch, batch_loss=24.7, batch_index=450, batch_size=256]

Epoch 2/10:  45%|█████▍      | 450/991 [1:55:09<2:16:22, 15.13s/batch, batch_loss=19.8, batch_index=451, batch_size=256]

Epoch 2/10:  46%|█████▍      | 451/991 [1:55:09<2:18:20, 15.37s/batch, batch_loss=19.8, batch_index=451, batch_size=256]

Epoch 2/10:  46%|█████▍      | 451/991 [1:55:24<2:18:20, 15.37s/batch, batch_loss=18.5, batch_index=452, batch_size=256]

Epoch 2/10:  46%|█████▍      | 452/991 [1:55:24<2:17:06, 15.26s/batch, batch_loss=18.5, batch_index=452, batch_size=256]

Epoch 2/10:  46%|█████▍      | 452/991 [1:55:42<2:17:06, 15.26s/batch, batch_loss=20.2, batch_index=453, batch_size=256]

Epoch 2/10:  46%|█████▍      | 453/991 [1:55:42<2:24:24, 16.11s/batch, batch_loss=20.2, batch_index=453, batch_size=256]

Epoch 2/10:  46%|████     | 453/991 [1:55:57<2:24:24, 16.11s/batch, batch_loss=7.24e+3, batch_index=454, batch_size=256]

Epoch 2/10:  46%|████     | 454/991 [1:55:57<2:21:42, 15.83s/batch, batch_loss=7.24e+3, batch_index=454, batch_size=256]

Epoch 2/10:  46%|█████▍      | 454/991 [1:56:12<2:21:42, 15.83s/batch, batch_loss=26.6, batch_index=455, batch_size=256]

Epoch 2/10:  46%|█████▌      | 455/991 [1:56:12<2:19:15, 15.59s/batch, batch_loss=26.6, batch_index=455, batch_size=256]

Epoch 2/10:  46%|█████▌      | 455/991 [1:56:28<2:19:15, 15.59s/batch, batch_loss=26.5, batch_index=456, batch_size=256]

Epoch 2/10:  46%|█████▌      | 456/991 [1:56:28<2:19:11, 15.61s/batch, batch_loss=26.5, batch_index=456, batch_size=256]

Epoch 2/10:  46%|█████▌      | 456/991 [1:56:43<2:19:11, 15.61s/batch, batch_loss=13.6, batch_index=457, batch_size=256]

Epoch 2/10:  46%|█████▌      | 457/991 [1:56:43<2:18:27, 15.56s/batch, batch_loss=13.6, batch_index=457, batch_size=256]

Epoch 2/10:  46%|█████▌      | 457/991 [1:56:58<2:18:27, 15.56s/batch, batch_loss=17.1, batch_index=458, batch_size=256]

Epoch 2/10:  46%|█████▌      | 458/991 [1:56:58<2:16:37, 15.38s/batch, batch_loss=17.1, batch_index=458, batch_size=256]

Epoch 2/10:  46%|█████▌      | 458/991 [1:57:13<2:16:37, 15.38s/batch, batch_loss=27.8, batch_index=459, batch_size=256]

Epoch 2/10:  46%|█████▌      | 459/991 [1:57:13<2:14:35, 15.18s/batch, batch_loss=27.8, batch_index=459, batch_size=256]

Epoch 2/10:  46%|█████▌      | 459/991 [1:57:30<2:14:35, 15.18s/batch, batch_loss=24.9, batch_index=460, batch_size=256]

Epoch 2/10:  46%|█████▌      | 460/991 [1:57:30<2:20:29, 15.88s/batch, batch_loss=24.9, batch_index=460, batch_size=256]

Epoch 2/10:  46%|█████▌      | 460/991 [1:57:47<2:20:29, 15.88s/batch, batch_loss=54.7, batch_index=461, batch_size=256]

Epoch 2/10:  47%|█████▌      | 461/991 [1:57:47<2:21:52, 16.06s/batch, batch_loss=54.7, batch_index=461, batch_size=256]

Epoch 2/10:  47%|██████▌       | 461/991 [1:58:03<2:21:52, 16.06s/batch, batch_loss=15, batch_index=462, batch_size=256]

Epoch 2/10:  47%|██████▌       | 462/991 [1:58:03<2:21:12, 16.02s/batch, batch_loss=15, batch_index=462, batch_size=256]

Epoch 2/10:  47%|████▏    | 462/991 [1:58:18<2:21:12, 16.02s/batch, batch_loss=6.21e+4, batch_index=463, batch_size=256]

Epoch 2/10:  47%|████▏    | 463/991 [1:58:18<2:19:15, 15.83s/batch, batch_loss=6.21e+4, batch_index=463, batch_size=256]

Epoch 2/10:  47%|█████▌      | 463/991 [1:58:34<2:19:15, 15.83s/batch, batch_loss=17.1, batch_index=464, batch_size=256]

Epoch 2/10:  47%|█████▌      | 464/991 [1:58:34<2:19:19, 15.86s/batch, batch_loss=17.1, batch_index=464, batch_size=256]

Epoch 2/10:  47%|█████▌      | 464/991 [1:58:50<2:19:19, 15.86s/batch, batch_loss=15.1, batch_index=465, batch_size=256]

Epoch 2/10:  47%|█████▋      | 465/991 [1:58:50<2:19:45, 15.94s/batch, batch_loss=15.1, batch_index=465, batch_size=256]

Epoch 2/10:  47%|██████▌       | 465/991 [1:59:05<2:19:45, 15.94s/batch, batch_loss=16, batch_index=466, batch_size=256]

Epoch 2/10:  47%|██████▌       | 466/991 [1:59:05<2:16:32, 15.61s/batch, batch_loss=16, batch_index=466, batch_size=256]

Epoch 2/10:  47%|█████▋      | 466/991 [1:59:19<2:16:32, 15.61s/batch, batch_loss=12.4, batch_index=467, batch_size=256]

Epoch 2/10:  47%|█████▋      | 467/991 [1:59:19<2:12:49, 15.21s/batch, batch_loss=12.4, batch_index=467, batch_size=256]

Epoch 2/10:  47%|██████▌       | 467/991 [1:59:33<2:12:49, 15.21s/batch, batch_loss=17, batch_index=468, batch_size=256]

Epoch 2/10:  47%|██████▌       | 468/991 [1:59:33<2:08:58, 14.80s/batch, batch_loss=17, batch_index=468, batch_size=256]

Epoch 2/10:  47%|█████▋      | 468/991 [1:59:51<2:08:58, 14.80s/batch, batch_loss=13.6, batch_index=469, batch_size=256]

Epoch 2/10:  47%|█████▋      | 469/991 [1:59:51<2:15:36, 15.59s/batch, batch_loss=13.6, batch_index=469, batch_size=256]

Epoch 2/10:  47%|█████▋      | 469/991 [2:00:05<2:15:36, 15.59s/batch, batch_loss=15.8, batch_index=470, batch_size=256]

Epoch 2/10:  47%|█████▋      | 470/991 [2:00:05<2:12:19, 15.24s/batch, batch_loss=15.8, batch_index=470, batch_size=256]

Epoch 2/10:  47%|█████▋      | 470/991 [2:00:19<2:12:19, 15.24s/batch, batch_loss=23.7, batch_index=471, batch_size=256]

Epoch 2/10:  48%|█████▋      | 471/991 [2:00:19<2:09:39, 14.96s/batch, batch_loss=23.7, batch_index=471, batch_size=256]

Epoch 2/10:  48%|█████▋      | 471/991 [2:00:34<2:09:39, 14.96s/batch, batch_loss=21.9, batch_index=472, batch_size=256]

Epoch 2/10:  48%|█████▋      | 472/991 [2:00:34<2:08:21, 14.84s/batch, batch_loss=21.9, batch_index=472, batch_size=256]

Epoch 2/10:  48%|█████▋      | 472/991 [2:00:49<2:08:21, 14.84s/batch, batch_loss=15.4, batch_index=473, batch_size=256]

Epoch 2/10:  48%|█████▋      | 473/991 [2:00:49<2:08:12, 14.85s/batch, batch_loss=15.4, batch_index=473, batch_size=256]

Epoch 2/10:  48%|█████▋      | 473/991 [2:01:04<2:08:12, 14.85s/batch, batch_loss=15.4, batch_index=474, batch_size=256]

Epoch 2/10:  48%|█████▋      | 474/991 [2:01:04<2:07:56, 14.85s/batch, batch_loss=15.4, batch_index=474, batch_size=256]

Epoch 2/10:  48%|████▊     | 474/991 [2:01:18<2:07:56, 14.85s/batch, batch_loss=2.4e+3, batch_index=475, batch_size=256]

Epoch 2/10:  48%|████▊     | 475/991 [2:01:18<2:06:29, 14.71s/batch, batch_loss=2.4e+3, batch_index=475, batch_size=256]

Epoch 2/10:  48%|█████▊      | 475/991 [2:01:33<2:06:29, 14.71s/batch, batch_loss=17.7, batch_index=476, batch_size=256]

Epoch 2/10:  48%|█████▊      | 476/991 [2:01:33<2:05:41, 14.64s/batch, batch_loss=17.7, batch_index=476, batch_size=256]

Epoch 2/10:  48%|█████▊      | 476/991 [2:01:47<2:05:41, 14.64s/batch, batch_loss=19.8, batch_index=477, batch_size=256]

Epoch 2/10:  48%|█████▊      | 477/991 [2:01:47<2:05:56, 14.70s/batch, batch_loss=19.8, batch_index=477, batch_size=256]

Epoch 2/10:  48%|█████▊      | 477/991 [2:02:02<2:05:56, 14.70s/batch, batch_loss=16.5, batch_index=478, batch_size=256]

Epoch 2/10:  48%|█████▊      | 478/991 [2:02:02<2:06:14, 14.77s/batch, batch_loss=16.5, batch_index=478, batch_size=256]

Epoch 2/10:  48%|█████▊      | 478/991 [2:02:17<2:06:14, 14.77s/batch, batch_loss=21.4, batch_index=479, batch_size=256]

Epoch 2/10:  48%|█████▊      | 479/991 [2:02:17<2:06:44, 14.85s/batch, batch_loss=21.4, batch_index=479, batch_size=256]

Epoch 2/10:  48%|█████▊      | 479/991 [2:02:33<2:06:44, 14.85s/batch, batch_loss=18.6, batch_index=480, batch_size=256]

Epoch 2/10:  48%|█████▊      | 480/991 [2:02:33<2:08:07, 15.04s/batch, batch_loss=18.6, batch_index=480, batch_size=256]

Epoch 2/10:  48%|█████▊      | 480/991 [2:02:48<2:08:07, 15.04s/batch, batch_loss=28.2, batch_index=481, batch_size=256]

Epoch 2/10:  49%|█████▊      | 481/991 [2:02:48<2:06:58, 14.94s/batch, batch_loss=28.2, batch_index=481, batch_size=256]

Epoch 2/10:  49%|█████▊      | 481/991 [2:03:02<2:06:58, 14.94s/batch, batch_loss=20.2, batch_index=482, batch_size=256]

Epoch 2/10:  49%|█████▊      | 482/991 [2:03:02<2:05:54, 14.84s/batch, batch_loss=20.2, batch_index=482, batch_size=256]

Epoch 2/10:  49%|█████▊      | 482/991 [2:03:17<2:05:54, 14.84s/batch, batch_loss=16.2, batch_index=483, batch_size=256]

Epoch 2/10:  49%|█████▊      | 483/991 [2:03:17<2:05:41, 14.85s/batch, batch_loss=16.2, batch_index=483, batch_size=256]

Epoch 2/10:  49%|█████▊      | 483/991 [2:03:35<2:05:41, 14.85s/batch, batch_loss=22.3, batch_index=484, batch_size=256]

Epoch 2/10:  49%|█████▊      | 484/991 [2:03:35<2:13:36, 15.81s/batch, batch_loss=22.3, batch_index=484, batch_size=256]

Epoch 2/10:  49%|█████▊      | 484/991 [2:03:50<2:13:36, 15.81s/batch, batch_loss=9.82, batch_index=485, batch_size=256]

Epoch 2/10:  49%|█████▊      | 485/991 [2:03:50<2:11:19, 15.57s/batch, batch_loss=9.82, batch_index=485, batch_size=256]

Epoch 2/10:  49%|█████▊      | 485/991 [2:04:05<2:11:19, 15.57s/batch, batch_loss=28.4, batch_index=486, batch_size=256]

Epoch 2/10:  49%|█████▉      | 486/991 [2:04:05<2:10:14, 15.47s/batch, batch_loss=28.4, batch_index=486, batch_size=256]

Epoch 2/10:  49%|█████▉      | 486/991 [2:04:21<2:10:14, 15.47s/batch, batch_loss=12.4, batch_index=487, batch_size=256]

Epoch 2/10:  49%|█████▉      | 487/991 [2:04:21<2:10:15, 15.51s/batch, batch_loss=12.4, batch_index=487, batch_size=256]

Epoch 2/10:  49%|█████▉      | 487/991 [2:04:37<2:10:15, 15.51s/batch, batch_loss=8.52, batch_index=488, batch_size=256]

Epoch 2/10:  49%|█████▉      | 488/991 [2:04:37<2:11:52, 15.73s/batch, batch_loss=8.52, batch_index=488, batch_size=256]

Epoch 2/10:  49%|█████▉      | 488/991 [2:04:53<2:11:52, 15.73s/batch, batch_loss=10.6, batch_index=489, batch_size=256]

Epoch 2/10:  49%|█████▉      | 489/991 [2:04:53<2:11:07, 15.67s/batch, batch_loss=10.6, batch_index=489, batch_size=256]

Epoch 2/10:  49%|█████▉      | 489/991 [2:05:07<2:11:07, 15.67s/batch, batch_loss=9.75, batch_index=490, batch_size=256]

Epoch 2/10:  49%|█████▉      | 490/991 [2:05:07<2:08:06, 15.34s/batch, batch_loss=9.75, batch_index=490, batch_size=256]

Epoch 2/10:  49%|█████▉      | 490/991 [2:05:26<2:08:06, 15.34s/batch, batch_loss=20.6, batch_index=491, batch_size=256]

Epoch 2/10:  50%|█████▉      | 491/991 [2:05:26<2:15:09, 16.22s/batch, batch_loss=20.6, batch_index=491, batch_size=256]

Epoch 2/10:  50%|█████▉      | 491/991 [2:05:41<2:15:09, 16.22s/batch, batch_loss=19.7, batch_index=492, batch_size=256]

Epoch 2/10:  50%|█████▉      | 492/991 [2:05:41<2:12:29, 15.93s/batch, batch_loss=19.7, batch_index=492, batch_size=256]

Epoch 2/10:  50%|█████▉      | 492/991 [2:05:56<2:12:29, 15.93s/batch, batch_loss=22.5, batch_index=493, batch_size=256]

Epoch 2/10:  50%|█████▉      | 493/991 [2:05:56<2:09:41, 15.63s/batch, batch_loss=22.5, batch_index=493, batch_size=256]

Epoch 2/10:  50%|█████▉      | 493/991 [2:06:10<2:09:41, 15.63s/batch, batch_loss=10.2, batch_index=494, batch_size=256]

Epoch 2/10:  50%|█████▉      | 494/991 [2:06:10<2:05:59, 15.21s/batch, batch_loss=10.2, batch_index=494, batch_size=256]

Epoch 2/10:  50%|████▍    | 494/991 [2:06:24<2:05:59, 15.21s/batch, batch_loss=8.55e+4, batch_index=495, batch_size=256]

Epoch 2/10:  50%|████▍    | 495/991 [2:06:24<2:03:32, 14.95s/batch, batch_loss=8.55e+4, batch_index=495, batch_size=256]

Epoch 2/10:  50%|█████▉      | 495/991 [2:06:38<2:03:32, 14.95s/batch, batch_loss=15.3, batch_index=496, batch_size=256]

Epoch 2/10:  50%|██████      | 496/991 [2:06:38<2:00:53, 14.65s/batch, batch_loss=15.3, batch_index=496, batch_size=256]

Epoch 2/10:  50%|██████▌      | 496/991 [2:06:53<2:00:53, 14.65s/batch, batch_loss=175, batch_index=497, batch_size=256]

Epoch 2/10:  50%|██████▌      | 497/991 [2:06:53<2:00:33, 14.64s/batch, batch_loss=175, batch_index=497, batch_size=256]

Epoch 2/10:  50%|███████       | 497/991 [2:07:06<2:00:33, 14.64s/batch, batch_loss=11, batch_index=498, batch_size=256]

Epoch 2/10:  50%|███████       | 498/991 [2:07:06<1:57:31, 14.30s/batch, batch_loss=11, batch_index=498, batch_size=256]

Epoch 2/10:  50%|██████▌      | 498/991 [2:07:22<1:57:31, 14.30s/batch, batch_loss=405, batch_index=499, batch_size=256]

Epoch 2/10:  50%|██████▌      | 499/991 [2:07:22<1:59:14, 14.54s/batch, batch_loss=405, batch_index=499, batch_size=256]

Epoch 2/10:  50%|██████      | 499/991 [2:07:36<1:59:14, 14.54s/batch, batch_loss=16.5, batch_index=500, batch_size=256]

Epoch 2/10:  50%|██████      | 500/991 [2:07:36<1:59:27, 14.60s/batch, batch_loss=16.5, batch_index=500, batch_size=256]

Epoch 2/10:  50%|██████      | 500/991 [2:07:52<1:59:27, 14.60s/batch, batch_loss=7.49, batch_index=501, batch_size=256]

Epoch 2/10:  51%|██████      | 501/991 [2:07:52<2:01:08, 14.83s/batch, batch_loss=7.49, batch_index=501, batch_size=256]

Epoch 2/10:  51%|██████      | 501/991 [2:08:07<2:01:08, 14.83s/batch, batch_loss=10.4, batch_index=502, batch_size=256]

Epoch 2/10:  51%|██████      | 502/991 [2:08:07<2:02:26, 15.02s/batch, batch_loss=10.4, batch_index=502, batch_size=256]

Epoch 2/10:  51%|██████      | 502/991 [2:08:21<2:02:26, 15.02s/batch, batch_loss=16.5, batch_index=503, batch_size=256]

Epoch 2/10:  51%|██████      | 503/991 [2:08:21<2:00:18, 14.79s/batch, batch_loss=16.5, batch_index=503, batch_size=256]

Epoch 2/10:  51%|██████      | 503/991 [2:08:36<2:00:18, 14.79s/batch, batch_loss=11.2, batch_index=504, batch_size=256]

Epoch 2/10:  51%|██████      | 504/991 [2:08:36<1:59:59, 14.78s/batch, batch_loss=11.2, batch_index=504, batch_size=256]

Epoch 2/10:  51%|██████      | 504/991 [2:08:50<1:59:59, 14.78s/batch, batch_loss=7.38, batch_index=505, batch_size=256]

Epoch 2/10:  51%|██████      | 505/991 [2:08:50<1:57:43, 14.53s/batch, batch_loss=7.38, batch_index=505, batch_size=256]

Epoch 2/10:  51%|██████      | 505/991 [2:09:04<1:57:43, 14.53s/batch, batch_loss=11.3, batch_index=506, batch_size=256]

Epoch 2/10:  51%|██████▏     | 506/991 [2:09:04<1:57:10, 14.50s/batch, batch_loss=11.3, batch_index=506, batch_size=256]

Epoch 2/10:  51%|██████▏     | 506/991 [2:09:20<1:57:10, 14.50s/batch, batch_loss=10.9, batch_index=507, batch_size=256]

Epoch 2/10:  51%|██████▏     | 507/991 [2:09:20<1:58:13, 14.66s/batch, batch_loss=10.9, batch_index=507, batch_size=256]

Epoch 2/10:  51%|██████▏     | 507/991 [2:09:34<1:58:13, 14.66s/batch, batch_loss=14.8, batch_index=508, batch_size=256]

Epoch 2/10:  51%|██████▏     | 508/991 [2:09:34<1:57:23, 14.58s/batch, batch_loss=14.8, batch_index=508, batch_size=256]

Epoch 2/10:  51%|██████▏     | 508/991 [2:09:49<1:57:23, 14.58s/batch, batch_loss=15.3, batch_index=509, batch_size=256]

Epoch 2/10:  51%|██████▏     | 509/991 [2:09:49<1:57:35, 14.64s/batch, batch_loss=15.3, batch_index=509, batch_size=256]

Epoch 2/10:  51%|██████▏     | 509/991 [2:10:04<1:57:35, 14.64s/batch, batch_loss=14.8, batch_index=510, batch_size=256]

Epoch 2/10:  51%|██████▏     | 510/991 [2:10:04<1:58:45, 14.81s/batch, batch_loss=14.8, batch_index=510, batch_size=256]

Epoch 2/10:  51%|██████▏     | 510/991 [2:10:19<1:58:45, 14.81s/batch, batch_loss=12.5, batch_index=511, batch_size=256]

Epoch 2/10:  52%|██████▏     | 511/991 [2:10:19<1:58:55, 14.87s/batch, batch_loss=12.5, batch_index=511, batch_size=256]

Epoch 2/10:  52%|██████▏     | 511/991 [2:10:34<1:58:55, 14.87s/batch, batch_loss=8.97, batch_index=512, batch_size=256]

Epoch 2/10:  52%|██████▏     | 512/991 [2:10:34<1:58:14, 14.81s/batch, batch_loss=8.97, batch_index=512, batch_size=256]

Epoch 2/10:  52%|██████▏     | 512/991 [2:10:49<1:58:14, 14.81s/batch, batch_loss=9.86, batch_index=513, batch_size=256]

Epoch 2/10:  52%|██████▏     | 513/991 [2:10:49<1:59:36, 15.01s/batch, batch_loss=9.86, batch_index=513, batch_size=256]

Epoch 2/10:  52%|██████▏     | 513/991 [2:11:04<1:59:36, 15.01s/batch, batch_loss=13.6, batch_index=514, batch_size=256]

Epoch 2/10:  52%|██████▏     | 514/991 [2:11:04<2:00:01, 15.10s/batch, batch_loss=13.6, batch_index=514, batch_size=256]

Epoch 2/10:  52%|██████▏     | 514/991 [2:11:22<2:00:01, 15.10s/batch, batch_loss=9.88, batch_index=515, batch_size=256]

Epoch 2/10:  52%|██████▏     | 515/991 [2:11:22<2:04:44, 15.72s/batch, batch_loss=9.88, batch_index=515, batch_size=256]

Epoch 2/10:  52%|██████▏     | 515/991 [2:11:38<2:04:44, 15.72s/batch, batch_loss=13.6, batch_index=516, batch_size=256]

Epoch 2/10:  52%|██████▏     | 516/991 [2:11:38<2:06:30, 15.98s/batch, batch_loss=13.6, batch_index=516, batch_size=256]

Epoch 2/10:  52%|██████▏     | 516/991 [2:11:55<2:06:30, 15.98s/batch, batch_loss=12.6, batch_index=517, batch_size=256]

Epoch 2/10:  52%|██████▎     | 517/991 [2:11:55<2:08:17, 16.24s/batch, batch_loss=12.6, batch_index=517, batch_size=256]

Epoch 2/10:  52%|██████▎     | 517/991 [2:12:10<2:08:17, 16.24s/batch, batch_loss=22.6, batch_index=518, batch_size=256]

Epoch 2/10:  52%|██████▎     | 518/991 [2:12:10<2:05:15, 15.89s/batch, batch_loss=22.6, batch_index=518, batch_size=256]

Epoch 2/10:  52%|██████▎     | 518/991 [2:12:26<2:05:15, 15.89s/batch, batch_loss=12.2, batch_index=519, batch_size=256]

Epoch 2/10:  52%|██████▎     | 519/991 [2:12:26<2:04:45, 15.86s/batch, batch_loss=12.2, batch_index=519, batch_size=256]

Epoch 2/10:  52%|██████▎     | 519/991 [2:12:41<2:04:45, 15.86s/batch, batch_loss=11.3, batch_index=520, batch_size=256]

Epoch 2/10:  52%|██████▎     | 520/991 [2:12:41<2:03:45, 15.76s/batch, batch_loss=11.3, batch_index=520, batch_size=256]

Epoch 2/10:  52%|██████▎     | 520/991 [2:12:57<2:03:45, 15.76s/batch, batch_loss=7.31, batch_index=521, batch_size=256]

Epoch 2/10:  53%|██████▎     | 521/991 [2:12:57<2:03:37, 15.78s/batch, batch_loss=7.31, batch_index=521, batch_size=256]

Epoch 2/10:  53%|██████▎     | 521/991 [2:13:13<2:03:37, 15.78s/batch, batch_loss=10.2, batch_index=522, batch_size=256]

Epoch 2/10:  53%|██████▎     | 522/991 [2:13:13<2:03:05, 15.75s/batch, batch_loss=10.2, batch_index=522, batch_size=256]

Epoch 2/10:  53%|██████▊      | 522/991 [2:13:29<2:03:05, 15.75s/batch, batch_loss=3.1, batch_index=523, batch_size=256]

Epoch 2/10:  53%|██████▊      | 523/991 [2:13:29<2:03:31, 15.84s/batch, batch_loss=3.1, batch_index=523, batch_size=256]

Epoch 2/10:  53%|██████▎     | 523/991 [2:13:45<2:03:31, 15.84s/batch, batch_loss=8.66, batch_index=524, batch_size=256]

Epoch 2/10:  53%|██████▎     | 524/991 [2:13:45<2:03:20, 15.85s/batch, batch_loss=8.66, batch_index=524, batch_size=256]

Epoch 2/10:  53%|██████▎     | 524/991 [2:14:01<2:03:20, 15.85s/batch, batch_loss=7.15, batch_index=525, batch_size=256]

Epoch 2/10:  53%|██████▎     | 525/991 [2:14:01<2:03:41, 15.93s/batch, batch_loss=7.15, batch_index=525, batch_size=256]

Epoch 2/10:  53%|██████▎     | 525/991 [2:14:17<2:03:41, 15.93s/batch, batch_loss=6.99, batch_index=526, batch_size=256]

Epoch 2/10:  53%|██████▎     | 526/991 [2:14:17<2:03:00, 15.87s/batch, batch_loss=6.99, batch_index=526, batch_size=256]

Epoch 2/10:  53%|██████▎     | 526/991 [2:14:31<2:03:00, 15.87s/batch, batch_loss=13.9, batch_index=527, batch_size=256]

Epoch 2/10:  53%|██████▍     | 527/991 [2:14:31<2:00:15, 15.55s/batch, batch_loss=13.9, batch_index=527, batch_size=256]

Epoch 2/10:  53%|██████▍     | 527/991 [2:14:46<2:00:15, 15.55s/batch, batch_loss=12.6, batch_index=528, batch_size=256]

Epoch 2/10:  53%|██████▍     | 528/991 [2:14:46<1:56:53, 15.15s/batch, batch_loss=12.6, batch_index=528, batch_size=256]

Epoch 2/10:  53%|██████▍     | 528/991 [2:15:00<1:56:53, 15.15s/batch, batch_loss=9.31, batch_index=529, batch_size=256]

Epoch 2/10:  53%|██████▍     | 529/991 [2:15:00<1:55:07, 14.95s/batch, batch_loss=9.31, batch_index=529, batch_size=256]

Epoch 2/10:  53%|██████▍     | 529/991 [2:15:14<1:55:07, 14.95s/batch, batch_loss=16.8, batch_index=530, batch_size=256]

Epoch 2/10:  53%|██████▍     | 530/991 [2:15:14<1:52:18, 14.62s/batch, batch_loss=16.8, batch_index=530, batch_size=256]

Epoch 2/10:  53%|██████▍     | 530/991 [2:15:28<1:52:18, 14.62s/batch, batch_loss=12.8, batch_index=531, batch_size=256]

Epoch 2/10:  54%|██████▍     | 531/991 [2:15:28<1:50:10, 14.37s/batch, batch_loss=12.8, batch_index=531, batch_size=256]

Epoch 2/10:  54%|███████▌      | 531/991 [2:15:45<1:50:10, 14.37s/batch, batch_loss=12, batch_index=532, batch_size=256]

Epoch 2/10:  54%|███████▌      | 532/991 [2:15:45<1:56:21, 15.21s/batch, batch_loss=12, batch_index=532, batch_size=256]

Epoch 2/10:  54%|██████▍     | 532/991 [2:16:00<1:56:21, 15.21s/batch, batch_loss=12.8, batch_index=533, batch_size=256]

Epoch 2/10:  54%|██████▍     | 533/991 [2:16:00<1:56:43, 15.29s/batch, batch_loss=12.8, batch_index=533, batch_size=256]

Epoch 2/10:  54%|███████▌      | 533/991 [2:16:16<1:56:43, 15.29s/batch, batch_loss=13, batch_index=534, batch_size=256]

Epoch 2/10:  54%|███████▌      | 534/991 [2:16:16<1:56:59, 15.36s/batch, batch_loss=13, batch_index=534, batch_size=256]

Epoch 2/10:  54%|██████▍     | 534/991 [2:16:30<1:56:59, 15.36s/batch, batch_loss=17.5, batch_index=535, batch_size=256]

Epoch 2/10:  54%|██████▍     | 535/991 [2:16:30<1:54:27, 15.06s/batch, batch_loss=17.5, batch_index=535, batch_size=256]

Epoch 2/10:  54%|██████▍     | 535/991 [2:16:44<1:54:27, 15.06s/batch, batch_loss=12.8, batch_index=536, batch_size=256]

Epoch 2/10:  54%|██████▍     | 536/991 [2:16:44<1:51:31, 14.71s/batch, batch_loss=12.8, batch_index=536, batch_size=256]

Epoch 2/10:  54%|██████▍     | 536/991 [2:16:59<1:51:31, 14.71s/batch, batch_loss=8.97, batch_index=537, batch_size=256]

Epoch 2/10:  54%|██████▌     | 537/991 [2:16:59<1:50:26, 14.60s/batch, batch_loss=8.97, batch_index=537, batch_size=256]

Epoch 2/10:  54%|████▉    | 537/991 [2:17:13<1:50:26, 14.60s/batch, batch_loss=1.79e+3, batch_index=538, batch_size=256]

Epoch 2/10:  54%|████▉    | 538/991 [2:17:13<1:49:18, 14.48s/batch, batch_loss=1.79e+3, batch_index=538, batch_size=256]

Epoch 2/10:  54%|██████▌     | 538/991 [2:17:27<1:49:18, 14.48s/batch, batch_loss=30.6, batch_index=539, batch_size=256]

Epoch 2/10:  54%|██████▌     | 539/991 [2:17:27<1:49:20, 14.51s/batch, batch_loss=30.6, batch_index=539, batch_size=256]

Epoch 2/10:  54%|██████▌     | 539/991 [2:17:42<1:49:20, 14.51s/batch, batch_loss=27.5, batch_index=540, batch_size=256]

Epoch 2/10:  54%|██████▌     | 540/991 [2:17:42<1:49:37, 14.58s/batch, batch_loss=27.5, batch_index=540, batch_size=256]

Epoch 2/10:  54%|█████▍    | 540/991 [2:17:56<1:49:37, 14.58s/batch, batch_loss=1.3e+4, batch_index=541, batch_size=256]

Epoch 2/10:  55%|█████▍    | 541/991 [2:17:56<1:48:59, 14.53s/batch, batch_loss=1.3e+4, batch_index=541, batch_size=256]

Epoch 2/10:  55%|████▉    | 541/991 [2:18:11<1:48:59, 14.53s/batch, batch_loss=2.85e+3, batch_index=542, batch_size=256]

Epoch 2/10:  55%|████▉    | 542/991 [2:18:11<1:49:46, 14.67s/batch, batch_loss=2.85e+3, batch_index=542, batch_size=256]

Epoch 2/10:  55%|██████▌     | 542/991 [2:18:26<1:49:46, 14.67s/batch, batch_loss=25.2, batch_index=543, batch_size=256]

Epoch 2/10:  55%|██████▌     | 543/991 [2:18:26<1:49:04, 14.61s/batch, batch_loss=25.2, batch_index=543, batch_size=256]

Epoch 2/10:  55%|██████▌     | 543/991 [2:18:39<1:49:04, 14.61s/batch, batch_loss=21.1, batch_index=544, batch_size=256]

Epoch 2/10:  55%|██████▌     | 544/991 [2:18:39<1:46:18, 14.27s/batch, batch_loss=21.1, batch_index=544, batch_size=256]

Epoch 2/10:  55%|██████▌     | 544/991 [2:18:54<1:46:18, 14.27s/batch, batch_loss=15.8, batch_index=545, batch_size=256]

Epoch 2/10:  55%|██████▌     | 545/991 [2:18:54<1:46:54, 14.38s/batch, batch_loss=15.8, batch_index=545, batch_size=256]

Epoch 2/10:  55%|███████▏     | 545/991 [2:19:09<1:46:54, 14.38s/batch, batch_loss=299, batch_index=546, batch_size=256]

Epoch 2/10:  55%|███████▏     | 546/991 [2:19:09<1:48:36, 14.64s/batch, batch_loss=299, batch_index=546, batch_size=256]

Epoch 2/10:  55%|██████▌     | 546/991 [2:19:27<1:48:36, 14.64s/batch, batch_loss=16.2, batch_index=547, batch_size=256]

Epoch 2/10:  55%|██████▌     | 547/991 [2:19:27<1:55:44, 15.64s/batch, batch_loss=16.2, batch_index=547, batch_size=256]

Epoch 2/10:  55%|██████▌     | 547/991 [2:19:42<1:55:44, 15.64s/batch, batch_loss=16.6, batch_index=548, batch_size=256]

Epoch 2/10:  55%|██████▋     | 548/991 [2:19:42<1:53:40, 15.40s/batch, batch_loss=16.6, batch_index=548, batch_size=256]

Epoch 2/10:  55%|██████▋     | 548/991 [2:19:58<1:53:40, 15.40s/batch, batch_loss=8.83, batch_index=549, batch_size=256]

Epoch 2/10:  55%|██████▋     | 549/991 [2:19:58<1:53:47, 15.45s/batch, batch_loss=8.83, batch_index=549, batch_size=256]

Epoch 2/10:  55%|██████▋     | 549/991 [2:20:13<1:53:47, 15.45s/batch, batch_loss=19.8, batch_index=550, batch_size=256]

Epoch 2/10:  55%|██████▋     | 550/991 [2:20:13<1:54:15, 15.54s/batch, batch_loss=19.8, batch_index=550, batch_size=256]

Epoch 2/10:  55%|██████▋     | 550/991 [2:20:28<1:54:15, 15.54s/batch, batch_loss=17.7, batch_index=551, batch_size=256]

Epoch 2/10:  56%|██████▋     | 551/991 [2:20:28<1:52:17, 15.31s/batch, batch_loss=17.7, batch_index=551, batch_size=256]

Epoch 2/10:  56%|██████▋     | 551/991 [2:20:42<1:52:17, 15.31s/batch, batch_loss=13.3, batch_index=552, batch_size=256]

Epoch 2/10:  56%|██████▋     | 552/991 [2:20:42<1:49:43, 15.00s/batch, batch_loss=13.3, batch_index=552, batch_size=256]

Epoch 2/10:  56%|███████▊      | 552/991 [2:20:56<1:49:43, 15.00s/batch, batch_loss=16, batch_index=553, batch_size=256]

Epoch 2/10:  56%|███████▊      | 553/991 [2:20:56<1:46:59, 14.66s/batch, batch_loss=16, batch_index=553, batch_size=256]

Epoch 2/10:  56%|█████    | 553/991 [2:21:12<1:46:59, 14.66s/batch, batch_loss=5.73e+3, batch_index=554, batch_size=256]

Epoch 2/10:  56%|█████    | 554/991 [2:21:12<1:49:35, 15.05s/batch, batch_loss=5.73e+3, batch_index=554, batch_size=256]

Epoch 2/10:  56%|█████    | 554/991 [2:21:29<1:49:35, 15.05s/batch, batch_loss=2.57e+3, batch_index=555, batch_size=256]

Epoch 2/10:  56%|█████    | 555/991 [2:21:29<1:53:39, 15.64s/batch, batch_loss=2.57e+3, batch_index=555, batch_size=256]

Epoch 2/10:  56%|██████▋     | 555/991 [2:21:45<1:53:39, 15.64s/batch, batch_loss=16.4, batch_index=556, batch_size=256]

Epoch 2/10:  56%|██████▋     | 556/991 [2:21:45<1:53:15, 15.62s/batch, batch_loss=16.4, batch_index=556, batch_size=256]

Epoch 2/10:  56%|█████    | 556/991 [2:22:01<1:53:15, 15.62s/batch, batch_loss=1.27e+4, batch_index=557, batch_size=256]

Epoch 2/10:  56%|█████    | 557/991 [2:22:01<1:53:29, 15.69s/batch, batch_loss=1.27e+4, batch_index=557, batch_size=256]

Epoch 2/10:  56%|██████▋     | 557/991 [2:22:16<1:53:29, 15.69s/batch, batch_loss=9.35, batch_index=558, batch_size=256]

Epoch 2/10:  56%|██████▊     | 558/991 [2:22:16<1:51:47, 15.49s/batch, batch_loss=9.35, batch_index=558, batch_size=256]

Epoch 2/10:  56%|██████▊     | 558/991 [2:22:32<1:51:47, 15.49s/batch, batch_loss=18.5, batch_index=559, batch_size=256]

Epoch 2/10:  56%|██████▊     | 559/991 [2:22:32<1:53:17, 15.73s/batch, batch_loss=18.5, batch_index=559, batch_size=256]

Epoch 2/10:  56%|██████▊     | 559/991 [2:22:48<1:53:17, 15.73s/batch, batch_loss=7.16, batch_index=560, batch_size=256]

Epoch 2/10:  57%|██████▊     | 560/991 [2:22:48<1:52:59, 15.73s/batch, batch_loss=7.16, batch_index=560, batch_size=256]

Epoch 2/10:  57%|██████▊     | 560/991 [2:23:03<1:52:59, 15.73s/batch, batch_loss=7.87, batch_index=561, batch_size=256]

Epoch 2/10:  57%|██████▊     | 561/991 [2:23:03<1:51:01, 15.49s/batch, batch_loss=7.87, batch_index=561, batch_size=256]

Epoch 2/10:  57%|██████▊     | 561/991 [2:23:18<1:51:01, 15.49s/batch, batch_loss=16.4, batch_index=562, batch_size=256]

Epoch 2/10:  57%|██████▊     | 562/991 [2:23:18<1:49:26, 15.31s/batch, batch_loss=16.4, batch_index=562, batch_size=256]

Epoch 2/10:  57%|██████▊     | 562/991 [2:23:33<1:49:26, 15.31s/batch, batch_loss=7.15, batch_index=563, batch_size=256]

Epoch 2/10:  57%|██████▊     | 563/991 [2:23:33<1:50:03, 15.43s/batch, batch_loss=7.15, batch_index=563, batch_size=256]

Epoch 2/10:  57%|██████▊     | 563/991 [2:23:50<1:50:03, 15.43s/batch, batch_loss=12.6, batch_index=564, batch_size=256]

Epoch 2/10:  57%|██████▊     | 564/991 [2:23:50<1:53:15, 15.91s/batch, batch_loss=12.6, batch_index=564, batch_size=256]

Epoch 2/10:  57%|███████▍     | 564/991 [2:24:05<1:53:15, 15.91s/batch, batch_loss=495, batch_index=565, batch_size=256]

Epoch 2/10:  57%|███████▍     | 565/991 [2:24:05<1:49:12, 15.38s/batch, batch_loss=495, batch_index=565, batch_size=256]

Epoch 2/10:  57%|██████▊     | 565/991 [2:24:19<1:49:12, 15.38s/batch, batch_loss=10.7, batch_index=566, batch_size=256]

Epoch 2/10:  57%|██████▊     | 566/991 [2:24:19<1:47:25, 15.17s/batch, batch_loss=10.7, batch_index=566, batch_size=256]

Epoch 2/10:  57%|██████▊     | 566/991 [2:24:34<1:47:25, 15.17s/batch, batch_loss=21.7, batch_index=567, batch_size=256]

Epoch 2/10:  57%|██████▊     | 567/991 [2:24:34<1:45:44, 14.96s/batch, batch_loss=21.7, batch_index=567, batch_size=256]

Epoch 2/10:  57%|███████▍     | 567/991 [2:24:49<1:45:44, 14.96s/batch, batch_loss=297, batch_index=568, batch_size=256]

Epoch 2/10:  57%|███████▍     | 568/991 [2:24:49<1:45:39, 14.99s/batch, batch_loss=297, batch_index=568, batch_size=256]

Epoch 2/10:  57%|██████▉     | 568/991 [2:25:03<1:45:39, 14.99s/batch, batch_loss=29.4, batch_index=569, batch_size=256]

Epoch 2/10:  57%|██████▉     | 569/991 [2:25:03<1:44:32, 14.86s/batch, batch_loss=29.4, batch_index=569, batch_size=256]

Epoch 2/10:  57%|█████▏   | 569/991 [2:25:17<1:44:32, 14.86s/batch, batch_loss=8.48e+3, batch_index=570, batch_size=256]

Epoch 2/10:  58%|█████▏   | 570/991 [2:25:17<1:42:03, 14.55s/batch, batch_loss=8.48e+3, batch_index=570, batch_size=256]

Epoch 2/10:  58%|██████▉     | 570/991 [2:25:32<1:42:03, 14.55s/batch, batch_loss=9.84, batch_index=571, batch_size=256]

Epoch 2/10:  58%|██████▉     | 571/991 [2:25:32<1:42:29, 14.64s/batch, batch_loss=9.84, batch_index=571, batch_size=256]

Epoch 2/10:  58%|████████      | 571/991 [2:25:48<1:42:29, 14.64s/batch, batch_loss=12, batch_index=572, batch_size=256]

Epoch 2/10:  58%|████████      | 572/991 [2:25:48<1:45:54, 15.17s/batch, batch_loss=12, batch_index=572, batch_size=256]

Epoch 2/10:  58%|██████▉     | 572/991 [2:26:02<1:45:54, 15.17s/batch, batch_loss=7.54, batch_index=573, batch_size=256]

Epoch 2/10:  58%|██████▉     | 573/991 [2:26:02<1:43:03, 14.79s/batch, batch_loss=7.54, batch_index=573, batch_size=256]

Epoch 2/10:  58%|██████▉     | 573/991 [2:26:17<1:43:03, 14.79s/batch, batch_loss=11.6, batch_index=574, batch_size=256]

Epoch 2/10:  58%|██████▉     | 574/991 [2:26:17<1:43:37, 14.91s/batch, batch_loss=11.6, batch_index=574, batch_size=256]

Epoch 2/10:  58%|██████▉     | 574/991 [2:26:32<1:43:37, 14.91s/batch, batch_loss=17.7, batch_index=575, batch_size=256]

Epoch 2/10:  58%|██████▉     | 575/991 [2:26:32<1:42:15, 14.75s/batch, batch_loss=17.7, batch_index=575, batch_size=256]

Epoch 2/10:  58%|██████▉     | 575/991 [2:26:47<1:42:15, 14.75s/batch, batch_loss=25.4, batch_index=576, batch_size=256]

Epoch 2/10:  58%|██████▉     | 576/991 [2:26:47<1:42:58, 14.89s/batch, batch_loss=25.4, batch_index=576, batch_size=256]

Epoch 2/10:  58%|██████▉     | 576/991 [2:27:02<1:42:58, 14.89s/batch, batch_loss=11.2, batch_index=577, batch_size=256]

Epoch 2/10:  58%|██████▉     | 577/991 [2:27:02<1:43:00, 14.93s/batch, batch_loss=11.2, batch_index=577, batch_size=256]

Epoch 2/10:  58%|██████▉     | 577/991 [2:27:17<1:43:00, 14.93s/batch, batch_loss=9.39, batch_index=578, batch_size=256]

Epoch 2/10:  58%|██████▉     | 578/991 [2:27:17<1:42:48, 14.94s/batch, batch_loss=9.39, batch_index=578, batch_size=256]

Epoch 2/10:  58%|██████▉     | 578/991 [2:27:32<1:42:48, 14.94s/batch, batch_loss=11.9, batch_index=579, batch_size=256]

Epoch 2/10:  58%|███████     | 579/991 [2:27:32<1:42:39, 14.95s/batch, batch_loss=11.9, batch_index=579, batch_size=256]

Epoch 2/10:  58%|███████     | 579/991 [2:27:49<1:42:39, 14.95s/batch, batch_loss=16.1, batch_index=580, batch_size=256]

Epoch 2/10:  59%|███████     | 580/991 [2:27:49<1:45:58, 15.47s/batch, batch_loss=16.1, batch_index=580, batch_size=256]

Epoch 2/10:  59%|███████     | 580/991 [2:28:03<1:45:58, 15.47s/batch, batch_loss=5.99, batch_index=581, batch_size=256]

Epoch 2/10:  59%|███████     | 581/991 [2:28:03<1:44:11, 15.25s/batch, batch_loss=5.99, batch_index=581, batch_size=256]

Epoch 2/10:  59%|██████▍    | 581/991 [2:28:18<1:44:11, 15.25s/batch, batch_loss=0.476, batch_index=582, batch_size=256]

Epoch 2/10:  59%|██████▍    | 582/991 [2:28:18<1:42:14, 15.00s/batch, batch_loss=0.476, batch_index=582, batch_size=256]

Epoch 2/10:  59%|█████▊    | 582/991 [2:28:32<1:42:14, 15.00s/batch, batch_loss=6.6e+3, batch_index=583, batch_size=256]

Epoch 2/10:  59%|█████▉    | 583/991 [2:28:32<1:40:58, 14.85s/batch, batch_loss=6.6e+3, batch_index=583, batch_size=256]

Epoch 2/10:  59%|███████     | 583/991 [2:28:47<1:40:58, 14.85s/batch, batch_loss=12.2, batch_index=584, batch_size=256]

Epoch 2/10:  59%|███████     | 584/991 [2:28:47<1:40:26, 14.81s/batch, batch_loss=12.2, batch_index=584, batch_size=256]

Epoch 2/10:  59%|███████     | 584/991 [2:29:02<1:40:26, 14.81s/batch, batch_loss=7.37, batch_index=585, batch_size=256]

Epoch 2/10:  59%|███████     | 585/991 [2:29:02<1:39:59, 14.78s/batch, batch_loss=7.37, batch_index=585, batch_size=256]

Epoch 2/10:  59%|███████     | 585/991 [2:29:17<1:39:59, 14.78s/batch, batch_loss=24.1, batch_index=586, batch_size=256]

Epoch 2/10:  59%|███████     | 586/991 [2:29:17<1:40:55, 14.95s/batch, batch_loss=24.1, batch_index=586, batch_size=256]

Epoch 2/10:  59%|███████     | 586/991 [2:29:33<1:40:55, 14.95s/batch, batch_loss=23.7, batch_index=587, batch_size=256]

Epoch 2/10:  59%|███████     | 587/991 [2:29:33<1:41:40, 15.10s/batch, batch_loss=23.7, batch_index=587, batch_size=256]

Epoch 2/10:  59%|███████     | 587/991 [2:29:48<1:41:40, 15.10s/batch, batch_loss=14.2, batch_index=588, batch_size=256]

Epoch 2/10:  59%|███████     | 588/991 [2:29:48<1:41:41, 15.14s/batch, batch_loss=14.2, batch_index=588, batch_size=256]

Epoch 2/10:  59%|███████     | 588/991 [2:30:03<1:41:41, 15.14s/batch, batch_loss=8.75, batch_index=589, batch_size=256]

Epoch 2/10:  59%|███████▏    | 589/991 [2:30:03<1:42:16, 15.26s/batch, batch_loss=8.75, batch_index=589, batch_size=256]

Epoch 2/10:  59%|███████▏    | 589/991 [2:30:18<1:42:16, 15.26s/batch, batch_loss=15.6, batch_index=590, batch_size=256]

Epoch 2/10:  60%|███████▏    | 590/991 [2:30:18<1:41:29, 15.18s/batch, batch_loss=15.6, batch_index=590, batch_size=256]

Epoch 2/10:  60%|███████▏    | 590/991 [2:30:34<1:41:29, 15.18s/batch, batch_loss=15.3, batch_index=591, batch_size=256]

Epoch 2/10:  60%|███████▏    | 591/991 [2:30:34<1:41:16, 15.19s/batch, batch_loss=15.3, batch_index=591, batch_size=256]

Epoch 2/10:  60%|███████▏    | 591/991 [2:30:49<1:41:16, 15.19s/batch, batch_loss=8.05, batch_index=592, batch_size=256]

Epoch 2/10:  60%|███████▏    | 592/991 [2:30:49<1:41:08, 15.21s/batch, batch_loss=8.05, batch_index=592, batch_size=256]

Epoch 2/10:  60%|███████▏    | 592/991 [2:31:04<1:41:08, 15.21s/batch, batch_loss=11.6, batch_index=593, batch_size=256]

Epoch 2/10:  60%|███████▏    | 593/991 [2:31:04<1:41:05, 15.24s/batch, batch_loss=11.6, batch_index=593, batch_size=256]

Epoch 2/10:  60%|███████▏    | 593/991 [2:31:19<1:41:05, 15.24s/batch, batch_loss=10.7, batch_index=594, batch_size=256]

Epoch 2/10:  60%|███████▏    | 594/991 [2:31:19<1:39:24, 15.02s/batch, batch_loss=10.7, batch_index=594, batch_size=256]

Epoch 2/10:  60%|███████▏    | 594/991 [2:31:33<1:39:24, 15.02s/batch, batch_loss=6.31, batch_index=595, batch_size=256]

Epoch 2/10:  60%|███████▏    | 595/991 [2:31:33<1:37:52, 14.83s/batch, batch_loss=6.31, batch_index=595, batch_size=256]

Epoch 2/10:  60%|███████▏    | 595/991 [2:31:51<1:37:52, 14.83s/batch, batch_loss=7.61, batch_index=596, batch_size=256]

Epoch 2/10:  60%|███████▏    | 596/991 [2:31:51<1:43:10, 15.67s/batch, batch_loss=7.61, batch_index=596, batch_size=256]

Epoch 2/10:  60%|███████▏    | 596/991 [2:32:04<1:43:10, 15.67s/batch, batch_loss=25.3, batch_index=597, batch_size=256]

Epoch 2/10:  60%|███████▏    | 597/991 [2:32:04<1:38:41, 15.03s/batch, batch_loss=25.3, batch_index=597, batch_size=256]

Epoch 2/10:  60%|███████▏    | 597/991 [2:32:18<1:38:41, 15.03s/batch, batch_loss=8.87, batch_index=598, batch_size=256]

Epoch 2/10:  60%|███████▏    | 598/991 [2:32:18<1:37:02, 14.82s/batch, batch_loss=8.87, batch_index=598, batch_size=256]

Epoch 2/10:  60%|███████▏    | 598/991 [2:32:34<1:37:02, 14.82s/batch, batch_loss=17.3, batch_index=599, batch_size=256]

Epoch 2/10:  60%|███████▎    | 599/991 [2:32:34<1:37:32, 14.93s/batch, batch_loss=17.3, batch_index=599, batch_size=256]

Epoch 2/10:  60%|███████▎    | 599/991 [2:32:48<1:37:32, 14.93s/batch, batch_loss=11.6, batch_index=600, batch_size=256]

Epoch 2/10:  61%|███████▎    | 600/991 [2:32:48<1:36:29, 14.81s/batch, batch_loss=11.6, batch_index=600, batch_size=256]

Epoch 2/10:  61%|████████▍     | 600/991 [2:33:03<1:36:29, 14.81s/batch, batch_loss=16, batch_index=601, batch_size=256]

Epoch 2/10:  61%|████████▍     | 601/991 [2:33:03<1:36:31, 14.85s/batch, batch_loss=16, batch_index=601, batch_size=256]

Epoch 2/10:  61%|████████▍     | 601/991 [2:33:18<1:36:31, 14.85s/batch, batch_loss=11, batch_index=602, batch_size=256]

Epoch 2/10:  61%|████████▌     | 602/991 [2:33:18<1:36:39, 14.91s/batch, batch_loss=11, batch_index=602, batch_size=256]

Epoch 2/10:  61%|███████▎    | 602/991 [2:33:33<1:36:39, 14.91s/batch, batch_loss=6.36, batch_index=603, batch_size=256]

Epoch 2/10:  61%|███████▎    | 603/991 [2:33:33<1:36:04, 14.86s/batch, batch_loss=6.36, batch_index=603, batch_size=256]

Epoch 2/10:  61%|█████▍   | 603/991 [2:33:47<1:36:04, 14.86s/batch, batch_loss=1.01e+4, batch_index=604, batch_size=256]

Epoch 2/10:  61%|█████▍   | 604/991 [2:33:47<1:34:23, 14.63s/batch, batch_loss=1.01e+4, batch_index=604, batch_size=256]

Epoch 2/10:  61%|███████▎    | 604/991 [2:34:02<1:34:23, 14.63s/batch, batch_loss=10.1, batch_index=605, batch_size=256]

Epoch 2/10:  61%|███████▎    | 605/991 [2:34:02<1:34:14, 14.65s/batch, batch_loss=10.1, batch_index=605, batch_size=256]

Epoch 2/10:  61%|███████▎    | 605/991 [2:34:17<1:34:14, 14.65s/batch, batch_loss=9.28, batch_index=606, batch_size=256]

Epoch 2/10:  61%|███████▎    | 606/991 [2:34:17<1:34:30, 14.73s/batch, batch_loss=9.28, batch_index=606, batch_size=256]

Epoch 2/10:  61%|███████▎    | 606/991 [2:34:31<1:34:30, 14.73s/batch, batch_loss=11.5, batch_index=607, batch_size=256]

Epoch 2/10:  61%|███████▎    | 607/991 [2:34:31<1:33:43, 14.64s/batch, batch_loss=11.5, batch_index=607, batch_size=256]

Epoch 2/10:  61%|███████▎    | 607/991 [2:34:44<1:33:43, 14.64s/batch, batch_loss=12.8, batch_index=608, batch_size=256]

Epoch 2/10:  61%|███████▎    | 608/991 [2:34:44<1:30:51, 14.23s/batch, batch_loss=12.8, batch_index=608, batch_size=256]

Epoch 2/10:  61%|████████▌     | 608/991 [2:35:00<1:30:51, 14.23s/batch, batch_loss=15, batch_index=609, batch_size=256]

Epoch 2/10:  61%|████████▌     | 609/991 [2:35:00<1:34:10, 14.79s/batch, batch_loss=15, batch_index=609, batch_size=256]

Epoch 2/10:  61%|███████▎    | 609/991 [2:35:16<1:34:10, 14.79s/batch, batch_loss=15.9, batch_index=610, batch_size=256]

Epoch 2/10:  62%|███████▍    | 610/991 [2:35:16<1:36:11, 15.15s/batch, batch_loss=15.9, batch_index=610, batch_size=256]

Epoch 2/10:  62%|███████▍    | 610/991 [2:35:32<1:36:11, 15.15s/batch, batch_loss=25.1, batch_index=611, batch_size=256]

Epoch 2/10:  62%|███████▍    | 611/991 [2:35:32<1:36:35, 15.25s/batch, batch_loss=25.1, batch_index=611, batch_size=256]

Epoch 2/10:  62%|███████▍    | 611/991 [2:35:51<1:36:35, 15.25s/batch, batch_loss=7.31, batch_index=612, batch_size=256]

Epoch 2/10:  62%|███████▍    | 612/991 [2:35:51<1:42:39, 16.25s/batch, batch_loss=7.31, batch_index=612, batch_size=256]

Epoch 2/10:  62%|████████▋     | 612/991 [2:36:07<1:42:39, 16.25s/batch, batch_loss=14, batch_index=613, batch_size=256]

Epoch 2/10:  62%|████████▋     | 613/991 [2:36:07<1:41:58, 16.19s/batch, batch_loss=14, batch_index=613, batch_size=256]

Epoch 2/10:  62%|█████▌   | 613/991 [2:36:23<1:41:58, 16.19s/batch, batch_loss=1.73e+4, batch_index=614, batch_size=256]

Epoch 2/10:  62%|█████▌   | 614/991 [2:36:23<1:41:35, 16.17s/batch, batch_loss=1.73e+4, batch_index=614, batch_size=256]

Epoch 2/10:  62%|████████     | 614/991 [2:36:39<1:41:35, 16.17s/batch, batch_loss=988, batch_index=615, batch_size=256]

Epoch 2/10:  62%|████████     | 615/991 [2:36:39<1:42:02, 16.28s/batch, batch_loss=988, batch_index=615, batch_size=256]

Epoch 2/10:  62%|███████▍    | 615/991 [2:36:54<1:42:02, 16.28s/batch, batch_loss=8.18, batch_index=616, batch_size=256]

Epoch 2/10:  62%|███████▍    | 616/991 [2:36:54<1:39:34, 15.93s/batch, batch_loss=8.18, batch_index=616, batch_size=256]

Epoch 2/10:  62%|███████▍    | 616/991 [2:37:10<1:39:34, 15.93s/batch, batch_loss=18.7, batch_index=617, batch_size=256]

Epoch 2/10:  62%|███████▍    | 617/991 [2:37:10<1:38:53, 15.87s/batch, batch_loss=18.7, batch_index=617, batch_size=256]

Epoch 2/10:  62%|███████▍    | 617/991 [2:37:28<1:38:53, 15.87s/batch, batch_loss=12.3, batch_index=618, batch_size=256]

Epoch 2/10:  62%|███████▍    | 618/991 [2:37:28<1:42:35, 16.50s/batch, batch_loss=12.3, batch_index=618, batch_size=256]

Epoch 2/10:  62%|███████▍    | 618/991 [2:37:44<1:42:35, 16.50s/batch, batch_loss=18.5, batch_index=619, batch_size=256]

Epoch 2/10:  62%|███████▍    | 619/991 [2:37:44<1:41:30, 16.37s/batch, batch_loss=18.5, batch_index=619, batch_size=256]

Epoch 2/10:  62%|███████▍    | 619/991 [2:38:00<1:41:30, 16.37s/batch, batch_loss=12.7, batch_index=620, batch_size=256]

Epoch 2/10:  63%|███████▌    | 620/991 [2:38:00<1:40:32, 16.26s/batch, batch_loss=12.7, batch_index=620, batch_size=256]

Epoch 2/10:  63%|███████▌    | 620/991 [2:38:14<1:40:32, 16.26s/batch, batch_loss=9.42, batch_index=621, batch_size=256]

Epoch 2/10:  63%|███████▌    | 621/991 [2:38:14<1:36:28, 15.64s/batch, batch_loss=9.42, batch_index=621, batch_size=256]

Epoch 2/10:  63%|██████▎   | 621/991 [2:38:30<1:36:28, 15.64s/batch, batch_loss=5.5e+3, batch_index=622, batch_size=256]

Epoch 2/10:  63%|██████▎   | 622/991 [2:38:30<1:36:21, 15.67s/batch, batch_loss=5.5e+3, batch_index=622, batch_size=256]

Epoch 2/10:  63%|███████▌    | 622/991 [2:38:48<1:36:21, 15.67s/batch, batch_loss=21.3, batch_index=623, batch_size=256]

Epoch 2/10:  63%|███████▌    | 623/991 [2:38:48<1:39:48, 16.27s/batch, batch_loss=21.3, batch_index=623, batch_size=256]

Epoch 2/10:  63%|██████▎   | 623/991 [2:39:03<1:39:48, 16.27s/batch, batch_loss=1.6e+4, batch_index=624, batch_size=256]

Epoch 2/10:  63%|██████▎   | 624/991 [2:39:03<1:36:50, 15.83s/batch, batch_loss=1.6e+4, batch_index=624, batch_size=256]

Epoch 2/10:  63%|███████▌    | 624/991 [2:39:17<1:36:50, 15.83s/batch, batch_loss=8.02, batch_index=625, batch_size=256]

Epoch 2/10:  63%|███████▌    | 625/991 [2:39:17<1:34:53, 15.56s/batch, batch_loss=8.02, batch_index=625, batch_size=256]

Epoch 2/10:  63%|███████▌    | 625/991 [2:39:33<1:34:53, 15.56s/batch, batch_loss=6.24, batch_index=626, batch_size=256]

Epoch 2/10:  63%|███████▌    | 626/991 [2:39:33<1:34:33, 15.54s/batch, batch_loss=6.24, batch_index=626, batch_size=256]

Epoch 2/10:  63%|█████▋   | 626/991 [2:39:47<1:34:33, 15.54s/batch, batch_loss=4.31e+3, batch_index=627, batch_size=256]

Epoch 2/10:  63%|█████▋   | 627/991 [2:39:47<1:31:22, 15.06s/batch, batch_loss=4.31e+3, batch_index=627, batch_size=256]

Epoch 2/10:  63%|█████▋   | 627/991 [2:40:00<1:31:22, 15.06s/batch, batch_loss=1.05e+3, batch_index=628, batch_size=256]

Epoch 2/10:  63%|█████▋   | 628/991 [2:40:00<1:28:20, 14.60s/batch, batch_loss=1.05e+3, batch_index=628, batch_size=256]

Epoch 2/10:  63%|███████▌    | 628/991 [2:40:15<1:28:20, 14.60s/batch, batch_loss=13.1, batch_index=629, batch_size=256]

Epoch 2/10:  63%|███████▌    | 629/991 [2:40:15<1:27:17, 14.47s/batch, batch_loss=13.1, batch_index=629, batch_size=256]

Epoch 2/10:  63%|███████▌    | 629/991 [2:40:28<1:27:17, 14.47s/batch, batch_loss=19.9, batch_index=630, batch_size=256]

Epoch 2/10:  64%|███████▋    | 630/991 [2:40:28<1:25:18, 14.18s/batch, batch_loss=19.9, batch_index=630, batch_size=256]

Epoch 2/10:  64%|████████▉     | 630/991 [2:40:42<1:25:18, 14.18s/batch, batch_loss=18, batch_index=631, batch_size=256]

Epoch 2/10:  64%|████████▉     | 631/991 [2:40:42<1:25:08, 14.19s/batch, batch_loss=18, batch_index=631, batch_size=256]

Epoch 2/10:  64%|███████▋    | 631/991 [2:40:56<1:25:08, 14.19s/batch, batch_loss=3.09, batch_index=632, batch_size=256]

Epoch 2/10:  64%|███████▋    | 632/991 [2:40:56<1:24:11, 14.07s/batch, batch_loss=3.09, batch_index=632, batch_size=256]

Epoch 2/10:  64%|███████▋    | 632/991 [2:41:11<1:24:11, 14.07s/batch, batch_loss=19.9, batch_index=633, batch_size=256]

Epoch 2/10:  64%|███████▋    | 633/991 [2:41:11<1:25:34, 14.34s/batch, batch_loss=19.9, batch_index=633, batch_size=256]

Epoch 2/10:  64%|███████▋    | 633/991 [2:41:25<1:25:34, 14.34s/batch, batch_loss=24.9, batch_index=634, batch_size=256]

Epoch 2/10:  64%|███████▋    | 634/991 [2:41:25<1:24:51, 14.26s/batch, batch_loss=24.9, batch_index=634, batch_size=256]

Epoch 2/10:  64%|████████▉     | 634/991 [2:41:39<1:24:51, 14.26s/batch, batch_loss=20, batch_index=635, batch_size=256]

Epoch 2/10:  64%|████████▉     | 635/991 [2:41:39<1:24:24, 14.23s/batch, batch_loss=20, batch_index=635, batch_size=256]

Epoch 2/10:  64%|███████▋    | 635/991 [2:41:54<1:24:24, 14.23s/batch, batch_loss=15.6, batch_index=636, batch_size=256]

Epoch 2/10:  64%|███████▋    | 636/991 [2:41:54<1:24:25, 14.27s/batch, batch_loss=15.6, batch_index=636, batch_size=256]

Epoch 2/10:  64%|███████▋    | 636/991 [2:42:08<1:24:25, 14.27s/batch, batch_loss=18.4, batch_index=637, batch_size=256]

Epoch 2/10:  64%|███████▋    | 637/991 [2:42:08<1:24:45, 14.37s/batch, batch_loss=18.4, batch_index=637, batch_size=256]

Epoch 2/10:  64%|███████▋    | 637/991 [2:42:23<1:24:45, 14.37s/batch, batch_loss=18.4, batch_index=638, batch_size=256]

Epoch 2/10:  64%|███████▋    | 638/991 [2:42:23<1:25:53, 14.60s/batch, batch_loss=18.4, batch_index=638, batch_size=256]

Epoch 2/10:  64%|███████▋    | 638/991 [2:42:38<1:25:53, 14.60s/batch, batch_loss=13.1, batch_index=639, batch_size=256]

Epoch 2/10:  64%|███████▋    | 639/991 [2:42:38<1:25:09, 14.52s/batch, batch_loss=13.1, batch_index=639, batch_size=256]

Epoch 2/10:  64%|████████▍    | 639/991 [2:42:52<1:25:09, 14.52s/batch, batch_loss=675, batch_index=640, batch_size=256]

Epoch 2/10:  65%|████████▍    | 640/991 [2:42:52<1:24:48, 14.50s/batch, batch_loss=675, batch_index=640, batch_size=256]

Epoch 2/10:  65%|███████▋    | 640/991 [2:43:08<1:24:48, 14.50s/batch, batch_loss=15.2, batch_index=641, batch_size=256]

Epoch 2/10:  65%|███████▊    | 641/991 [2:43:08<1:26:08, 14.77s/batch, batch_loss=15.2, batch_index=641, batch_size=256]

Epoch 2/10:  65%|███████▊    | 641/991 [2:43:23<1:26:08, 14.77s/batch, batch_loss=9.64, batch_index=642, batch_size=256]

Epoch 2/10:  65%|███████▊    | 642/991 [2:43:23<1:26:46, 14.92s/batch, batch_loss=9.64, batch_index=642, batch_size=256]

Epoch 2/10:  65%|█████▊   | 642/991 [2:43:38<1:26:46, 14.92s/batch, batch_loss=2.12e+4, batch_index=643, batch_size=256]

Epoch 2/10:  65%|█████▊   | 643/991 [2:43:38<1:27:08, 15.02s/batch, batch_loss=2.12e+4, batch_index=643, batch_size=256]

Epoch 2/10:  65%|█████▊   | 643/991 [2:43:53<1:27:08, 15.02s/batch, batch_loss=1.76e+4, batch_index=644, batch_size=256]

Epoch 2/10:  65%|█████▊   | 644/991 [2:43:53<1:26:38, 14.98s/batch, batch_loss=1.76e+4, batch_index=644, batch_size=256]

Epoch 2/10:  65%|█████▊   | 644/991 [2:44:09<1:26:38, 14.98s/batch, batch_loss=2.19e+3, batch_index=645, batch_size=256]

Epoch 2/10:  65%|█████▊   | 645/991 [2:44:09<1:28:16, 15.31s/batch, batch_loss=2.19e+3, batch_index=645, batch_size=256]

Epoch 2/10:  65%|███████▊    | 645/991 [2:44:24<1:28:16, 15.31s/batch, batch_loss=12.2, batch_index=646, batch_size=256]

Epoch 2/10:  65%|███████▊    | 646/991 [2:44:24<1:27:35, 15.23s/batch, batch_loss=12.2, batch_index=646, batch_size=256]

Epoch 2/10:  65%|███████▊    | 646/991 [2:44:39<1:27:35, 15.23s/batch, batch_loss=12.4, batch_index=647, batch_size=256]

Epoch 2/10:  65%|███████▊    | 647/991 [2:44:39<1:27:11, 15.21s/batch, batch_loss=12.4, batch_index=647, batch_size=256]

Epoch 2/10:  65%|███████▊    | 647/991 [2:44:54<1:27:11, 15.21s/batch, batch_loss=14.5, batch_index=648, batch_size=256]

Epoch 2/10:  65%|███████▊    | 648/991 [2:44:54<1:26:14, 15.09s/batch, batch_loss=14.5, batch_index=648, batch_size=256]

Epoch 2/10:  65%|███████▊    | 648/991 [2:45:10<1:26:14, 15.09s/batch, batch_loss=15.3, batch_index=649, batch_size=256]

Epoch 2/10:  65%|███████▊    | 649/991 [2:45:10<1:26:41, 15.21s/batch, batch_loss=15.3, batch_index=649, batch_size=256]

Epoch 2/10:  65%|█████▉   | 649/991 [2:45:25<1:26:41, 15.21s/batch, batch_loss=1.35e+4, batch_index=650, batch_size=256]

Epoch 2/10:  66%|█████▉   | 650/991 [2:45:25<1:26:40, 15.25s/batch, batch_loss=1.35e+4, batch_index=650, batch_size=256]

Epoch 2/10:  66%|███████▊    | 650/991 [2:45:39<1:26:40, 15.25s/batch, batch_loss=10.3, batch_index=651, batch_size=256]

Epoch 2/10:  66%|███████▉    | 651/991 [2:45:39<1:24:57, 14.99s/batch, batch_loss=10.3, batch_index=651, batch_size=256]

Epoch 2/10:  66%|███████▉    | 651/991 [2:45:54<1:24:57, 14.99s/batch, batch_loss=13.7, batch_index=652, batch_size=256]

Epoch 2/10:  66%|███████▉    | 652/991 [2:45:54<1:23:49, 14.84s/batch, batch_loss=13.7, batch_index=652, batch_size=256]

Epoch 2/10:  66%|███████▉    | 652/991 [2:46:09<1:23:49, 14.84s/batch, batch_loss=19.6, batch_index=653, batch_size=256]

Epoch 2/10:  66%|███████▉    | 653/991 [2:46:09<1:23:39, 14.85s/batch, batch_loss=19.6, batch_index=653, batch_size=256]

Epoch 2/10:  66%|█████████▏    | 653/991 [2:46:24<1:23:39, 14.85s/batch, batch_loss=19, batch_index=654, batch_size=256]

Epoch 2/10:  66%|█████████▏    | 654/991 [2:46:24<1:23:34, 14.88s/batch, batch_loss=19, batch_index=654, batch_size=256]

Epoch 2/10:  66%|█████▉   | 654/991 [2:46:39<1:23:34, 14.88s/batch, batch_loss=3.84e+3, batch_index=655, batch_size=256]

Epoch 2/10:  66%|█████▉   | 655/991 [2:46:39<1:23:50, 14.97s/batch, batch_loss=3.84e+3, batch_index=655, batch_size=256]

Epoch 2/10:  66%|█████▉   | 655/991 [2:46:54<1:23:50, 14.97s/batch, batch_loss=5.19e+3, batch_index=656, batch_size=256]

Epoch 2/10:  66%|█████▉   | 656/991 [2:46:54<1:23:42, 14.99s/batch, batch_loss=5.19e+3, batch_index=656, batch_size=256]

Epoch 2/10:  66%|█████▉   | 656/991 [2:47:08<1:23:42, 14.99s/batch, batch_loss=4.22e+3, batch_index=657, batch_size=256]

Epoch 2/10:  66%|█████▉   | 657/991 [2:47:08<1:22:00, 14.73s/batch, batch_loss=4.22e+3, batch_index=657, batch_size=256]

Epoch 2/10:  66%|██████▋   | 657/991 [2:47:23<1:22:00, 14.73s/batch, batch_loss=2.2e+4, batch_index=658, batch_size=256]

Epoch 2/10:  66%|██████▋   | 658/991 [2:47:23<1:22:42, 14.90s/batch, batch_loss=2.2e+4, batch_index=658, batch_size=256]

Epoch 2/10:  66%|███████▉    | 658/991 [2:47:39<1:22:42, 14.90s/batch, batch_loss=4.56, batch_index=659, batch_size=256]

Epoch 2/10:  66%|███████▉    | 659/991 [2:47:39<1:23:30, 15.09s/batch, batch_loss=4.56, batch_index=659, batch_size=256]

Epoch 2/10:  66%|███████▉    | 659/991 [2:47:53<1:23:30, 15.09s/batch, batch_loss=4.63, batch_index=660, batch_size=256]

Epoch 2/10:  67%|███████▉    | 660/991 [2:47:53<1:22:05, 14.88s/batch, batch_loss=4.63, batch_index=660, batch_size=256]

Epoch 2/10:  67%|███████▉    | 660/991 [2:48:08<1:22:05, 14.88s/batch, batch_loss=12.8, batch_index=661, batch_size=256]

Epoch 2/10:  67%|████████    | 661/991 [2:48:08<1:21:54, 14.89s/batch, batch_loss=12.8, batch_index=661, batch_size=256]

Epoch 2/10:  67%|████████    | 661/991 [2:48:24<1:21:54, 14.89s/batch, batch_loss=15.6, batch_index=662, batch_size=256]

Epoch 2/10:  67%|████████    | 662/991 [2:48:24<1:22:56, 15.12s/batch, batch_loss=15.6, batch_index=662, batch_size=256]

Epoch 2/10:  67%|█████████▎    | 662/991 [2:48:39<1:22:56, 15.12s/batch, batch_loss=18, batch_index=663, batch_size=256]

Epoch 2/10:  67%|█████████▎    | 663/991 [2:48:39<1:22:16, 15.05s/batch, batch_loss=18, batch_index=663, batch_size=256]

Epoch 2/10:  67%|██████   | 663/991 [2:48:53<1:22:16, 15.05s/batch, batch_loss=3.04e+3, batch_index=664, batch_size=256]

Epoch 2/10:  67%|██████   | 664/991 [2:48:53<1:21:10, 14.89s/batch, batch_loss=3.04e+3, batch_index=664, batch_size=256]

Epoch 2/10:  67%|████████    | 664/991 [2:49:08<1:21:10, 14.89s/batch, batch_loss=13.4, batch_index=665, batch_size=256]

Epoch 2/10:  67%|████████    | 665/991 [2:49:08<1:21:30, 15.00s/batch, batch_loss=13.4, batch_index=665, batch_size=256]

Epoch 2/10:  67%|██████   | 665/991 [2:49:25<1:21:30, 15.00s/batch, batch_loss=3.06e+3, batch_index=666, batch_size=256]

Epoch 2/10:  67%|██████   | 666/991 [2:49:25<1:24:17, 15.56s/batch, batch_loss=3.06e+3, batch_index=666, batch_size=256]

Epoch 2/10:  67%|████████    | 666/991 [2:49:40<1:24:17, 15.56s/batch, batch_loss=18.9, batch_index=667, batch_size=256]

Epoch 2/10:  67%|████████    | 667/991 [2:49:40<1:23:11, 15.41s/batch, batch_loss=18.9, batch_index=667, batch_size=256]

Epoch 2/10:  67%|████████▋    | 667/991 [2:49:55<1:23:11, 15.41s/batch, batch_loss=375, batch_index=668, batch_size=256]

Epoch 2/10:  67%|████████▊    | 668/991 [2:49:55<1:21:14, 15.09s/batch, batch_loss=375, batch_index=668, batch_size=256]

Epoch 2/10:  67%|██████   | 668/991 [2:50:10<1:21:14, 15.09s/batch, batch_loss=2.96e+3, batch_index=669, batch_size=256]

Epoch 2/10:  68%|██████   | 669/991 [2:50:10<1:21:52, 15.26s/batch, batch_loss=2.96e+3, batch_index=669, batch_size=256]

Epoch 2/10:  68%|██████   | 669/991 [2:50:26<1:21:52, 15.26s/batch, batch_loss=1.02e+3, batch_index=670, batch_size=256]

Epoch 2/10:  68%|██████   | 670/991 [2:50:26<1:22:29, 15.42s/batch, batch_loss=1.02e+3, batch_index=670, batch_size=256]

Epoch 2/10:  68%|████████    | 670/991 [2:50:39<1:22:29, 15.42s/batch, batch_loss=10.6, batch_index=671, batch_size=256]

Epoch 2/10:  68%|████████▏   | 671/991 [2:50:39<1:17:52, 14.60s/batch, batch_loss=10.6, batch_index=671, batch_size=256]

Epoch 2/10:  68%|████████▏   | 671/991 [2:50:51<1:17:52, 14.60s/batch, batch_loss=14.4, batch_index=672, batch_size=256]

Epoch 2/10:  68%|████████▏   | 672/991 [2:50:51<1:14:17, 13.97s/batch, batch_loss=14.4, batch_index=672, batch_size=256]

Epoch 2/10:  68%|████████▏   | 672/991 [2:51:07<1:14:17, 13.97s/batch, batch_loss=17.3, batch_index=673, batch_size=256]

Epoch 2/10:  68%|████████▏   | 673/991 [2:51:07<1:16:01, 14.34s/batch, batch_loss=17.3, batch_index=673, batch_size=256]

Epoch 2/10:  68%|█████████▌    | 673/991 [2:51:22<1:16:01, 14.34s/batch, batch_loss=16, batch_index=674, batch_size=256]

Epoch 2/10:  68%|█████████▌    | 674/991 [2:51:22<1:17:40, 14.70s/batch, batch_loss=16, batch_index=674, batch_size=256]

Epoch 2/10:  68%|████████▏   | 674/991 [2:51:37<1:17:40, 14.70s/batch, batch_loss=4.29, batch_index=675, batch_size=256]

Epoch 2/10:  68%|████████▏   | 675/991 [2:51:37<1:18:18, 14.87s/batch, batch_loss=4.29, batch_index=675, batch_size=256]

Epoch 2/10:  68%|████████▏   | 675/991 [2:51:56<1:18:18, 14.87s/batch, batch_loss=10.4, batch_index=676, batch_size=256]

Epoch 2/10:  68%|████████▏   | 676/991 [2:51:56<1:23:29, 15.90s/batch, batch_loss=10.4, batch_index=676, batch_size=256]

Epoch 2/10:  68%|████████▏   | 676/991 [2:52:11<1:23:29, 15.90s/batch, batch_loss=17.9, batch_index=677, batch_size=256]

Epoch 2/10:  68%|████████▏   | 677/991 [2:52:11<1:22:12, 15.71s/batch, batch_loss=17.9, batch_index=677, batch_size=256]

Epoch 2/10:  68%|████████▏   | 677/991 [2:52:26<1:22:12, 15.71s/batch, batch_loss=7.41, batch_index=678, batch_size=256]

Epoch 2/10:  68%|████████▏   | 678/991 [2:52:26<1:21:09, 15.56s/batch, batch_loss=7.41, batch_index=678, batch_size=256]

Epoch 2/10:  68%|██████▏  | 678/991 [2:52:42<1:21:09, 15.56s/batch, batch_loss=3.81e+3, batch_index=679, batch_size=256]

Epoch 2/10:  69%|██████▏  | 679/991 [2:52:42<1:21:20, 15.64s/batch, batch_loss=3.81e+3, batch_index=679, batch_size=256]

Epoch 2/10:  69%|██████▊   | 679/991 [2:52:58<1:21:20, 15.64s/batch, batch_loss=6.1e+3, batch_index=680, batch_size=256]

Epoch 2/10:  69%|██████▊   | 680/991 [2:52:58<1:21:25, 15.71s/batch, batch_loss=6.1e+3, batch_index=680, batch_size=256]

Epoch 2/10:  69%|██████▏  | 680/991 [2:53:14<1:21:25, 15.71s/batch, batch_loss=7.23e+4, batch_index=681, batch_size=256]

Epoch 2/10:  69%|██████▏  | 681/991 [2:53:14<1:21:34, 15.79s/batch, batch_loss=7.23e+4, batch_index=681, batch_size=256]

Epoch 2/10:  69%|████████▏   | 681/991 [2:53:32<1:21:34, 15.79s/batch, batch_loss=12.6, batch_index=682, batch_size=256]

Epoch 2/10:  69%|████████▎   | 682/991 [2:53:32<1:24:20, 16.38s/batch, batch_loss=12.6, batch_index=682, batch_size=256]

Epoch 2/10:  69%|████████▉    | 682/991 [2:53:47<1:24:20, 16.38s/batch, batch_loss=377, batch_index=683, batch_size=256]

Epoch 2/10:  69%|████████▉    | 683/991 [2:53:47<1:22:20, 16.04s/batch, batch_loss=377, batch_index=683, batch_size=256]

Epoch 2/10:  69%|████████▎   | 683/991 [2:54:01<1:22:20, 16.04s/batch, batch_loss=5.26, batch_index=684, batch_size=256]

Epoch 2/10:  69%|████████▎   | 684/991 [2:54:01<1:19:50, 15.60s/batch, batch_loss=5.26, batch_index=684, batch_size=256]

Epoch 2/10:  69%|████████▎   | 684/991 [2:54:16<1:19:50, 15.60s/batch, batch_loss=12.3, batch_index=685, batch_size=256]

Epoch 2/10:  69%|████████▎   | 685/991 [2:54:16<1:18:40, 15.43s/batch, batch_loss=12.3, batch_index=685, batch_size=256]

Epoch 2/10:  69%|████████▎   | 685/991 [2:54:32<1:18:40, 15.43s/batch, batch_loss=13.8, batch_index=686, batch_size=256]

Epoch 2/10:  69%|████████▎   | 686/991 [2:54:32<1:18:47, 15.50s/batch, batch_loss=13.8, batch_index=686, batch_size=256]

Epoch 2/10:  69%|████████▉    | 686/991 [2:54:48<1:18:47, 15.50s/batch, batch_loss=535, batch_index=687, batch_size=256]

Epoch 2/10:  69%|█████████    | 687/991 [2:54:48<1:19:16, 15.65s/batch, batch_loss=535, batch_index=687, batch_size=256]

Epoch 2/10:  69%|████████▎   | 687/991 [2:55:04<1:19:16, 15.65s/batch, batch_loss=4.72, batch_index=688, batch_size=256]

Epoch 2/10:  69%|████████▎   | 688/991 [2:55:04<1:18:46, 15.60s/batch, batch_loss=4.72, batch_index=688, batch_size=256]

Epoch 2/10:  69%|████████▎   | 688/991 [2:55:19<1:18:46, 15.60s/batch, batch_loss=5.31, batch_index=689, batch_size=256]

Epoch 2/10:  70%|████████▎   | 689/991 [2:55:19<1:18:09, 15.53s/batch, batch_loss=5.31, batch_index=689, batch_size=256]

Epoch 2/10:  70%|████████▎   | 689/991 [2:55:36<1:18:09, 15.53s/batch, batch_loss=11.8, batch_index=690, batch_size=256]

Epoch 2/10:  70%|████████▎   | 690/991 [2:55:36<1:20:22, 16.02s/batch, batch_loss=11.8, batch_index=690, batch_size=256]

Epoch 2/10:  70%|████████▎   | 690/991 [2:55:51<1:20:22, 16.02s/batch, batch_loss=15.6, batch_index=691, batch_size=256]

Epoch 2/10:  70%|████████▎   | 691/991 [2:55:51<1:18:59, 15.80s/batch, batch_loss=15.6, batch_index=691, batch_size=256]

Epoch 2/10:  70%|████████▎   | 691/991 [2:56:07<1:18:59, 15.80s/batch, batch_loss=5.95, batch_index=692, batch_size=256]

Epoch 2/10:  70%|████████▍   | 692/991 [2:56:07<1:18:18, 15.71s/batch, batch_loss=5.95, batch_index=692, batch_size=256]

Epoch 2/10:  70%|██████▎  | 692/991 [2:56:22<1:18:18, 15.71s/batch, batch_loss=4.65e+3, batch_index=693, batch_size=256]

Epoch 2/10:  70%|██████▎  | 693/991 [2:56:22<1:17:42, 15.65s/batch, batch_loss=4.65e+3, batch_index=693, batch_size=256]

Epoch 2/10:  70%|█████████    | 693/991 [2:56:37<1:17:42, 15.65s/batch, batch_loss=450, batch_index=694, batch_size=256]

Epoch 2/10:  70%|█████████    | 694/991 [2:56:37<1:16:30, 15.46s/batch, batch_loss=450, batch_index=694, batch_size=256]

Epoch 2/10:  70%|█████████    | 694/991 [2:56:52<1:16:30, 15.46s/batch, batch_loss=778, batch_index=695, batch_size=256]

Epoch 2/10:  70%|█████████    | 695/991 [2:56:52<1:15:05, 15.22s/batch, batch_loss=778, batch_index=695, batch_size=256]

Epoch 2/10:  70%|████████▍   | 695/991 [2:57:06<1:15:05, 15.22s/batch, batch_loss=8.78, batch_index=696, batch_size=256]

Epoch 2/10:  70%|████████▍   | 696/991 [2:57:06<1:12:35, 14.76s/batch, batch_loss=8.78, batch_index=696, batch_size=256]

Epoch 2/10:  70%|███████   | 696/991 [2:57:21<1:12:35, 14.76s/batch, batch_loss=6.8e+3, batch_index=697, batch_size=256]

Epoch 2/10:  70%|███████   | 697/991 [2:57:21<1:13:16, 14.96s/batch, batch_loss=6.8e+3, batch_index=697, batch_size=256]

Epoch 2/10:  70%|████████▍   | 697/991 [2:57:36<1:13:16, 14.96s/batch, batch_loss=12.9, batch_index=698, batch_size=256]

Epoch 2/10:  70%|████████▍   | 698/991 [2:57:36<1:12:21, 14.82s/batch, batch_loss=12.9, batch_index=698, batch_size=256]

Epoch 2/10:  70%|████████▍   | 698/991 [2:57:50<1:12:21, 14.82s/batch, batch_loss=7.92, batch_index=699, batch_size=256]

Epoch 2/10:  71%|████████▍   | 699/991 [2:57:50<1:11:41, 14.73s/batch, batch_loss=7.92, batch_index=699, batch_size=256]

Epoch 2/10:  71%|████████▍   | 699/991 [2:58:05<1:11:41, 14.73s/batch, batch_loss=9.87, batch_index=700, batch_size=256]

Epoch 2/10:  71%|████████▍   | 700/991 [2:58:05<1:11:24, 14.72s/batch, batch_loss=9.87, batch_index=700, batch_size=256]

Epoch 2/10:  71%|█████████▏   | 700/991 [2:58:21<1:11:24, 14.72s/batch, batch_loss=210, batch_index=701, batch_size=256]

Epoch 2/10:  71%|█████████▏   | 701/991 [2:58:21<1:12:37, 15.03s/batch, batch_loss=210, batch_index=701, batch_size=256]

Epoch 2/10:  71%|████████▍   | 701/991 [2:58:35<1:12:37, 15.03s/batch, batch_loss=20.2, batch_index=702, batch_size=256]

Epoch 2/10:  71%|████████▌   | 702/991 [2:58:35<1:11:38, 14.87s/batch, batch_loss=20.2, batch_index=702, batch_size=256]

Epoch 2/10:  71%|█████████▏   | 702/991 [2:58:50<1:11:38, 14.87s/batch, batch_loss=275, batch_index=703, batch_size=256]

Epoch 2/10:  71%|█████████▏   | 703/991 [2:58:50<1:10:50, 14.76s/batch, batch_loss=275, batch_index=703, batch_size=256]

Epoch 2/10:  71%|████████▌   | 703/991 [2:59:05<1:10:50, 14.76s/batch, batch_loss=8.19, batch_index=704, batch_size=256]

Epoch 2/10:  71%|████████▌   | 704/991 [2:59:05<1:11:09, 14.88s/batch, batch_loss=8.19, batch_index=704, batch_size=256]

Epoch 2/10:  71%|████████▌   | 704/991 [2:59:19<1:11:09, 14.88s/batch, batch_loss=11.9, batch_index=705, batch_size=256]

Epoch 2/10:  71%|████████▌   | 705/991 [2:59:19<1:09:56, 14.67s/batch, batch_loss=11.9, batch_index=705, batch_size=256]

Epoch 2/10:  71%|████████▌   | 705/991 [2:59:36<1:09:56, 14.67s/batch, batch_loss=17.2, batch_index=706, batch_size=256]

Epoch 2/10:  71%|████████▌   | 706/991 [2:59:36<1:13:10, 15.41s/batch, batch_loss=17.2, batch_index=706, batch_size=256]

Epoch 2/10:  71%|████████▌   | 706/991 [2:59:51<1:13:10, 15.41s/batch, batch_loss=16.5, batch_index=707, batch_size=256]

Epoch 2/10:  71%|████████▌   | 707/991 [2:59:51<1:11:54, 15.19s/batch, batch_loss=16.5, batch_index=707, batch_size=256]

Epoch 2/10:  71%|█████████▎   | 707/991 [3:00:06<1:11:54, 15.19s/batch, batch_loss=9.2, batch_index=708, batch_size=256]

Epoch 2/10:  71%|█████████▎   | 708/991 [3:00:06<1:12:16, 15.32s/batch, batch_loss=9.2, batch_index=708, batch_size=256]

Epoch 2/10:  71%|████████▌   | 708/991 [3:00:22<1:12:16, 15.32s/batch, batch_loss=7.47, batch_index=709, batch_size=256]

Epoch 2/10:  72%|████████▌   | 709/991 [3:00:22<1:13:03, 15.54s/batch, batch_loss=7.47, batch_index=709, batch_size=256]

Epoch 2/10:  72%|████████▌   | 709/991 [3:00:39<1:13:03, 15.54s/batch, batch_loss=28.9, batch_index=710, batch_size=256]

Epoch 2/10:  72%|████████▌   | 710/991 [3:00:39<1:13:42, 15.74s/batch, batch_loss=28.9, batch_index=710, batch_size=256]

Epoch 2/10:  72%|████████▌   | 710/991 [3:00:54<1:13:42, 15.74s/batch, batch_loss=97.2, batch_index=711, batch_size=256]

Epoch 2/10:  72%|████████▌   | 711/991 [3:00:54<1:13:19, 15.71s/batch, batch_loss=97.2, batch_index=711, batch_size=256]

Epoch 2/10:  72%|████████▌   | 711/991 [3:01:10<1:13:19, 15.71s/batch, batch_loss=12.4, batch_index=712, batch_size=256]

Epoch 2/10:  72%|████████▌   | 712/991 [3:01:10<1:12:59, 15.70s/batch, batch_loss=12.4, batch_index=712, batch_size=256]

Epoch 2/10:  72%|████████▌   | 712/991 [3:01:25<1:12:59, 15.70s/batch, batch_loss=76.5, batch_index=713, batch_size=256]

Epoch 2/10:  72%|████████▋   | 713/991 [3:01:25<1:11:36, 15.45s/batch, batch_loss=76.5, batch_index=713, batch_size=256]

Epoch 2/10:  72%|████████▋   | 713/991 [3:01:41<1:11:36, 15.45s/batch, batch_loss=25.3, batch_index=714, batch_size=256]

Epoch 2/10:  72%|████████▋   | 714/991 [3:01:41<1:12:26, 15.69s/batch, batch_loss=25.3, batch_index=714, batch_size=256]

Epoch 2/10:  72%|████████▋   | 714/991 [3:01:57<1:12:26, 15.69s/batch, batch_loss=18.4, batch_index=715, batch_size=256]

Epoch 2/10:  72%|████████▋   | 715/991 [3:01:57<1:12:34, 15.78s/batch, batch_loss=18.4, batch_index=715, batch_size=256]

Epoch 2/10:  72%|████████▋   | 715/991 [3:02:13<1:12:34, 15.78s/batch, batch_loss=14.9, batch_index=716, batch_size=256]

Epoch 2/10:  72%|████████▋   | 716/991 [3:02:13<1:12:55, 15.91s/batch, batch_loss=14.9, batch_index=716, batch_size=256]

Epoch 2/10:  72%|████████▋   | 716/991 [3:02:29<1:12:55, 15.91s/batch, batch_loss=17.6, batch_index=717, batch_size=256]

Epoch 2/10:  72%|████████▋   | 717/991 [3:02:29<1:12:38, 15.91s/batch, batch_loss=17.6, batch_index=717, batch_size=256]

Epoch 2/10:  72%|████████▋   | 717/991 [3:02:45<1:12:38, 15.91s/batch, batch_loss=21.8, batch_index=718, batch_size=256]

Epoch 2/10:  72%|████████▋   | 718/991 [3:02:45<1:11:36, 15.74s/batch, batch_loss=21.8, batch_index=718, batch_size=256]

Epoch 2/10:  72%|████████▋   | 718/991 [3:03:01<1:11:36, 15.74s/batch, batch_loss=11.4, batch_index=719, batch_size=256]

Epoch 2/10:  73%|████████▋   | 719/991 [3:03:01<1:11:43, 15.82s/batch, batch_loss=11.4, batch_index=719, batch_size=256]

Epoch 2/10:  73%|████████▋   | 719/991 [3:03:16<1:11:43, 15.82s/batch, batch_loss=14.1, batch_index=720, batch_size=256]

Epoch 2/10:  73%|████████▋   | 720/991 [3:03:16<1:10:39, 15.64s/batch, batch_loss=14.1, batch_index=720, batch_size=256]

Epoch 2/10:  73%|██████████▏   | 720/991 [3:03:33<1:10:39, 15.64s/batch, batch_loss=20, batch_index=721, batch_size=256]

Epoch 2/10:  73%|██████████▏   | 721/991 [3:03:33<1:12:21, 16.08s/batch, batch_loss=20, batch_index=721, batch_size=256]

Epoch 2/10:  73%|████████▋   | 721/991 [3:03:47<1:12:21, 16.08s/batch, batch_loss=22.4, batch_index=722, batch_size=256]

Epoch 2/10:  73%|████████▋   | 722/991 [3:03:47<1:10:07, 15.64s/batch, batch_loss=22.4, batch_index=722, batch_size=256]

Epoch 2/10:  73%|██████▌  | 722/991 [3:04:02<1:10:07, 15.64s/batch, batch_loss=7.23e+3, batch_index=723, batch_size=256]

Epoch 2/10:  73%|██████▌  | 723/991 [3:04:02<1:08:57, 15.44s/batch, batch_loss=7.23e+3, batch_index=723, batch_size=256]

Epoch 2/10:  73%|████████▊   | 723/991 [3:04:17<1:08:57, 15.44s/batch, batch_loss=3.98, batch_index=724, batch_size=256]

Epoch 2/10:  73%|████████▊   | 724/991 [3:04:17<1:08:05, 15.30s/batch, batch_loss=3.98, batch_index=724, batch_size=256]

Epoch 2/10:  73%|████████▊   | 724/991 [3:04:33<1:08:05, 15.30s/batch, batch_loss=15.4, batch_index=725, batch_size=256]

Epoch 2/10:  73%|████████▊   | 725/991 [3:04:33<1:07:55, 15.32s/batch, batch_loss=15.4, batch_index=725, batch_size=256]

Epoch 2/10:  73%|██████████▏   | 725/991 [3:04:49<1:07:55, 15.32s/batch, batch_loss=12, batch_index=726, batch_size=256]

Epoch 2/10:  73%|██████████▎   | 726/991 [3:04:49<1:08:13, 15.45s/batch, batch_loss=12, batch_index=726, batch_size=256]

Epoch 2/10:  73%|███████▎  | 726/991 [3:05:05<1:08:13, 15.45s/batch, batch_loss=1.3e+4, batch_index=727, batch_size=256]

Epoch 2/10:  73%|███████▎  | 727/991 [3:05:05<1:09:06, 15.71s/batch, batch_loss=1.3e+4, batch_index=727, batch_size=256]

Epoch 2/10:  73%|████████▊   | 727/991 [3:05:22<1:09:06, 15.71s/batch, batch_loss=12.5, batch_index=728, batch_size=256]

Epoch 2/10:  73%|████████▊   | 728/991 [3:05:22<1:11:13, 16.25s/batch, batch_loss=12.5, batch_index=728, batch_size=256]

Epoch 2/10:  73%|█████████▌   | 728/991 [3:05:37<1:11:13, 16.25s/batch, batch_loss=127, batch_index=729, batch_size=256]

Epoch 2/10:  74%|█████████▌   | 729/991 [3:05:37<1:09:03, 15.82s/batch, batch_loss=127, batch_index=729, batch_size=256]

Epoch 2/10:  74%|████████▊   | 729/991 [3:05:53<1:09:03, 15.82s/batch, batch_loss=10.8, batch_index=730, batch_size=256]

Epoch 2/10:  74%|████████▊   | 730/991 [3:05:53<1:08:21, 15.72s/batch, batch_loss=10.8, batch_index=730, batch_size=256]

Epoch 2/10:  74%|█████████▌   | 730/991 [3:06:08<1:08:21, 15.72s/batch, batch_loss=107, batch_index=731, batch_size=256]

Epoch 2/10:  74%|█████████▌   | 731/991 [3:06:08<1:07:17, 15.53s/batch, batch_loss=107, batch_index=731, batch_size=256]

Epoch 2/10:  74%|██████▋  | 731/991 [3:06:22<1:07:17, 15.53s/batch, batch_loss=1.39e+4, batch_index=732, batch_size=256]

Epoch 2/10:  74%|██████▋  | 732/991 [3:06:22<1:05:20, 15.14s/batch, batch_loss=1.39e+4, batch_index=732, batch_size=256]

Epoch 2/10:  74%|██████████▎   | 732/991 [3:06:36<1:05:20, 15.14s/batch, batch_loss=17, batch_index=733, batch_size=256]

Epoch 2/10:  74%|██████████▎   | 733/991 [3:06:36<1:04:07, 14.91s/batch, batch_loss=17, batch_index=733, batch_size=256]

Epoch 2/10:  74%|███████▍  | 733/991 [3:06:51<1:04:07, 14.91s/batch, batch_loss=6.8e+3, batch_index=734, batch_size=256]

Epoch 2/10:  74%|███████▍  | 734/991 [3:06:51<1:03:52, 14.91s/batch, batch_loss=6.8e+3, batch_index=734, batch_size=256]

Epoch 2/10:  74%|████████▉   | 734/991 [3:07:07<1:03:52, 14.91s/batch, batch_loss=17.9, batch_index=735, batch_size=256]

Epoch 2/10:  74%|████████▉   | 735/991 [3:07:07<1:04:25, 15.10s/batch, batch_loss=17.9, batch_index=735, batch_size=256]

Epoch 2/10:  74%|████████▉   | 735/991 [3:07:24<1:04:25, 15.10s/batch, batch_loss=12.4, batch_index=736, batch_size=256]

Epoch 2/10:  74%|████████▉   | 736/991 [3:07:24<1:06:15, 15.59s/batch, batch_loss=12.4, batch_index=736, batch_size=256]

Epoch 2/10:  74%|████████▉   | 736/991 [3:07:40<1:06:15, 15.59s/batch, batch_loss=8.64, batch_index=737, batch_size=256]

Epoch 2/10:  74%|████████▉   | 737/991 [3:07:40<1:06:47, 15.78s/batch, batch_loss=8.64, batch_index=737, batch_size=256]

Epoch 2/10:  74%|██████▋  | 737/991 [3:07:55<1:06:47, 15.78s/batch, batch_loss=1.48e+3, batch_index=738, batch_size=256]

Epoch 2/10:  74%|██████▋  | 738/991 [3:07:55<1:06:18, 15.73s/batch, batch_loss=1.48e+3, batch_index=738, batch_size=256]

Epoch 2/10:  74%|████████▉   | 738/991 [3:08:10<1:06:18, 15.73s/batch, batch_loss=28.3, batch_index=739, batch_size=256]

Epoch 2/10:  75%|████████▉   | 739/991 [3:08:10<1:04:52, 15.45s/batch, batch_loss=28.3, batch_index=739, batch_size=256]

Epoch 2/10:  75%|████████▉   | 739/991 [3:08:26<1:04:52, 15.45s/batch, batch_loss=9.85, batch_index=740, batch_size=256]

Epoch 2/10:  75%|████████▉   | 740/991 [3:08:26<1:04:49, 15.49s/batch, batch_loss=9.85, batch_index=740, batch_size=256]

Epoch 2/10:  75%|██████▋  | 740/991 [3:08:41<1:04:49, 15.49s/batch, batch_loss=1.81e+4, batch_index=741, batch_size=256]

Epoch 2/10:  75%|██████▋  | 741/991 [3:08:41<1:03:48, 15.32s/batch, batch_loss=1.81e+4, batch_index=741, batch_size=256]

Epoch 2/10:  75%|██████▋  | 741/991 [3:08:57<1:03:48, 15.32s/batch, batch_loss=2.27e+3, batch_index=742, batch_size=256]

Epoch 2/10:  75%|██████▋  | 742/991 [3:08:57<1:04:20, 15.50s/batch, batch_loss=2.27e+3, batch_index=742, batch_size=256]

Epoch 2/10:  75%|████████▉   | 742/991 [3:09:12<1:04:20, 15.50s/batch, batch_loss=10.2, batch_index=743, batch_size=256]

Epoch 2/10:  75%|████████▉   | 743/991 [3:09:12<1:03:44, 15.42s/batch, batch_loss=10.2, batch_index=743, batch_size=256]

Epoch 2/10:  75%|████████▉   | 743/991 [3:09:27<1:03:44, 15.42s/batch, batch_loss=13.2, batch_index=744, batch_size=256]

Epoch 2/10:  75%|█████████   | 744/991 [3:09:27<1:03:23, 15.40s/batch, batch_loss=13.2, batch_index=744, batch_size=256]

Epoch 2/10:  75%|█████████   | 744/991 [3:09:41<1:03:23, 15.40s/batch, batch_loss=16.6, batch_index=745, batch_size=256]

Epoch 2/10:  75%|█████████   | 745/991 [3:09:41<1:01:45, 15.06s/batch, batch_loss=16.6, batch_index=745, batch_size=256]

Epoch 2/10:  75%|██████▊  | 745/991 [3:09:56<1:01:45, 15.06s/batch, batch_loss=1.15e+3, batch_index=746, batch_size=256]

Epoch 2/10:  75%|██████▊  | 746/991 [3:09:56<1:00:56, 14.93s/batch, batch_loss=1.15e+3, batch_index=746, batch_size=256]

Epoch 2/10:  75%|██████▊  | 746/991 [3:10:10<1:00:56, 14.93s/batch, batch_loss=3.88e+3, batch_index=747, batch_size=256]

Epoch 2/10:  75%|████████▎  | 747/991 [3:10:10<59:38, 14.67s/batch, batch_loss=3.88e+3, batch_index=747, batch_size=256]

Epoch 2/10:  75%|██████████▌   | 747/991 [3:10:26<59:38, 14.67s/batch, batch_loss=12.5, batch_index=748, batch_size=256]

Epoch 2/10:  75%|█████████   | 748/991 [3:10:26<1:00:39, 14.98s/batch, batch_loss=12.5, batch_index=748, batch_size=256]

Epoch 2/10:  75%|█████████   | 748/991 [3:10:41<1:00:39, 14.98s/batch, batch_loss=13.1, batch_index=749, batch_size=256]

Epoch 2/10:  76%|█████████   | 749/991 [3:10:41<1:00:30, 15.00s/batch, batch_loss=13.1, batch_index=749, batch_size=256]

Epoch 2/10:  76%|█████████   | 749/991 [3:10:55<1:00:30, 15.00s/batch, batch_loss=10.6, batch_index=750, batch_size=256]

Epoch 2/10:  76%|██████████▌   | 750/991 [3:10:55<59:26, 14.80s/batch, batch_loss=10.6, batch_index=750, batch_size=256]

Epoch 2/10:  76%|██████████▌   | 750/991 [3:11:10<59:26, 14.80s/batch, batch_loss=10.9, batch_index=751, batch_size=256]

Epoch 2/10:  76%|██████████▌   | 751/991 [3:11:10<59:04, 14.77s/batch, batch_loss=10.9, batch_index=751, batch_size=256]

Epoch 2/10:  76%|██████████▌   | 751/991 [3:11:24<59:04, 14.77s/batch, batch_loss=6.44, batch_index=752, batch_size=256]

Epoch 2/10:  76%|██████████▌   | 752/991 [3:11:24<57:43, 14.49s/batch, batch_loss=6.44, batch_index=752, batch_size=256]

Epoch 2/10:  76%|██████████▌   | 752/991 [3:11:39<57:43, 14.49s/batch, batch_loss=7.16, batch_index=753, batch_size=256]

Epoch 2/10:  76%|██████████▋   | 753/991 [3:11:39<58:38, 14.78s/batch, batch_loss=7.16, batch_index=753, batch_size=256]

Epoch 2/10:  76%|██████████▋   | 753/991 [3:11:56<58:38, 14.78s/batch, batch_loss=5.39, batch_index=754, batch_size=256]

Epoch 2/10:  76%|█████████▏  | 754/991 [3:11:56<1:01:00, 15.45s/batch, batch_loss=5.39, batch_index=754, batch_size=256]

Epoch 2/10:  76%|█████████▏  | 754/991 [3:12:11<1:01:00, 15.45s/batch, batch_loss=14.3, batch_index=755, batch_size=256]

Epoch 2/10:  76%|█████████▏  | 755/991 [3:12:11<1:00:11, 15.30s/batch, batch_loss=14.3, batch_index=755, batch_size=256]

Epoch 2/10:  76%|█████████▏  | 755/991 [3:12:26<1:00:11, 15.30s/batch, batch_loss=13.5, batch_index=756, batch_size=256]

Epoch 2/10:  76%|██████████▋   | 756/991 [3:12:26<59:36, 15.22s/batch, batch_loss=13.5, batch_index=756, batch_size=256]

Epoch 2/10:  76%|██████████▋   | 756/991 [3:12:41<59:36, 15.22s/batch, batch_loss=4.49, batch_index=757, batch_size=256]

Epoch 2/10:  76%|██████████▋   | 757/991 [3:12:41<58:29, 15.00s/batch, batch_loss=4.49, batch_index=757, batch_size=256]

Epoch 2/10:  76%|██████████▋   | 757/991 [3:12:55<58:29, 15.00s/batch, batch_loss=13.7, batch_index=758, batch_size=256]

Epoch 2/10:  76%|██████████▋   | 758/991 [3:12:55<57:23, 14.78s/batch, batch_loss=13.7, batch_index=758, batch_size=256]

Epoch 2/10:  76%|██████████▋   | 758/991 [3:13:09<57:23, 14.78s/batch, batch_loss=13.8, batch_index=759, batch_size=256]

Epoch 2/10:  77%|██████████▋   | 759/991 [3:13:09<56:29, 14.61s/batch, batch_loss=13.8, batch_index=759, batch_size=256]

Epoch 2/10:  77%|██████████▋   | 759/991 [3:13:24<56:29, 14.61s/batch, batch_loss=15.8, batch_index=760, batch_size=256]

Epoch 2/10:  77%|██████████▋   | 760/991 [3:13:24<56:17, 14.62s/batch, batch_loss=15.8, batch_index=760, batch_size=256]

Epoch 2/10:  77%|██████████▋   | 760/991 [3:13:41<56:17, 14.62s/batch, batch_loss=17.8, batch_index=761, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 761/991 [3:13:41<58:50, 15.35s/batch, batch_loss=17.8, batch_index=761, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 761/991 [3:13:55<58:50, 15.35s/batch, batch_loss=23.8, batch_index=762, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 762/991 [3:13:55<57:42, 15.12s/batch, batch_loss=23.8, batch_index=762, batch_size=256]

Epoch 2/10:  77%|███████████▌   | 762/991 [3:14:09<57:42, 15.12s/batch, batch_loss=514, batch_index=763, batch_size=256]

Epoch 2/10:  77%|███████████▌   | 763/991 [3:14:09<56:13, 14.80s/batch, batch_loss=514, batch_index=763, batch_size=256]

Epoch 2/10:  77%|████████████▎   | 763/991 [3:14:24<56:13, 14.80s/batch, batch_loss=11, batch_index=764, batch_size=256]

Epoch 2/10:  77%|████████████▎   | 764/991 [3:14:24<55:27, 14.66s/batch, batch_loss=11, batch_index=764, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 764/991 [3:14:38<55:27, 14.66s/batch, batch_loss=2.91, batch_index=765, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 765/991 [3:14:38<54:54, 14.58s/batch, batch_loss=2.91, batch_index=765, batch_size=256]

Epoch 2/10:  77%|████████████▎   | 765/991 [3:14:53<54:54, 14.58s/batch, batch_loss=12, batch_index=766, batch_size=256]

Epoch 2/10:  77%|████████████▎   | 766/991 [3:14:53<54:47, 14.61s/batch, batch_loss=12, batch_index=766, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 766/991 [3:15:07<54:47, 14.61s/batch, batch_loss=13.9, batch_index=767, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 767/991 [3:15:07<54:04, 14.48s/batch, batch_loss=13.9, batch_index=767, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 767/991 [3:15:23<54:04, 14.48s/batch, batch_loss=3.74, batch_index=768, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 768/991 [3:15:23<55:09, 14.84s/batch, batch_loss=3.74, batch_index=768, batch_size=256]

Epoch 2/10:  77%|██████████▊   | 768/991 [3:15:40<55:09, 14.84s/batch, batch_loss=1.83, batch_index=769, batch_size=256]

Epoch 2/10:  78%|██████████▊   | 769/991 [3:15:40<57:55, 15.65s/batch, batch_loss=1.83, batch_index=769, batch_size=256]

Epoch 2/10:  78%|██████████▊   | 769/991 [3:15:55<57:55, 15.65s/batch, batch_loss=11.8, batch_index=770, batch_size=256]

Epoch 2/10:  78%|██████████▉   | 770/991 [3:15:55<56:16, 15.28s/batch, batch_loss=11.8, batch_index=770, batch_size=256]

Epoch 2/10:  78%|████████▌  | 770/991 [3:16:10<56:16, 15.28s/batch, batch_loss=2.75e+3, batch_index=771, batch_size=256]

Epoch 2/10:  78%|████████▌  | 771/991 [3:16:10<55:56, 15.26s/batch, batch_loss=2.75e+3, batch_index=771, batch_size=256]

Epoch 2/10:  78%|██████████▉   | 771/991 [3:16:24<55:56, 15.26s/batch, batch_loss=5.26, batch_index=772, batch_size=256]

Epoch 2/10:  78%|██████████▉   | 772/991 [3:16:24<54:46, 15.00s/batch, batch_loss=5.26, batch_index=772, batch_size=256]

Epoch 2/10:  78%|██████████▉   | 772/991 [3:16:39<54:46, 15.00s/batch, batch_loss=1.45, batch_index=773, batch_size=256]

Epoch 2/10:  78%|██████████▉   | 773/991 [3:16:39<53:51, 14.82s/batch, batch_loss=1.45, batch_index=773, batch_size=256]

Epoch 2/10:  78%|███████████▋   | 773/991 [3:16:54<53:51, 14.82s/batch, batch_loss=9.6, batch_index=774, batch_size=256]

Epoch 2/10:  78%|███████████▋   | 774/991 [3:16:54<53:55, 14.91s/batch, batch_loss=9.6, batch_index=774, batch_size=256]

Epoch 2/10:  78%|██████████▉   | 774/991 [3:17:10<53:55, 14.91s/batch, batch_loss=7.02, batch_index=775, batch_size=256]

Epoch 2/10:  78%|██████████▉   | 775/991 [3:17:10<54:43, 15.20s/batch, batch_loss=7.02, batch_index=775, batch_size=256]

Epoch 2/10:  78%|███████████▋   | 775/991 [3:17:24<54:43, 15.20s/batch, batch_loss=257, batch_index=776, batch_size=256]

Epoch 2/10:  78%|███████████▋   | 776/991 [3:17:24<53:36, 14.96s/batch, batch_loss=257, batch_index=776, batch_size=256]

Epoch 2/10:  78%|██████████▏  | 776/991 [3:17:40<53:36, 14.96s/batch, batch_loss=0.399, batch_index=777, batch_size=256]

Epoch 2/10:  78%|██████████▏  | 777/991 [3:17:40<54:48, 15.36s/batch, batch_loss=0.399, batch_index=777, batch_size=256]

Epoch 2/10:  78%|██████████▏  | 777/991 [3:17:56<54:48, 15.36s/batch, batch_loss=0.724, batch_index=778, batch_size=256]

Epoch 2/10:  79%|██████████▏  | 778/991 [3:17:56<54:15, 15.28s/batch, batch_loss=0.724, batch_index=778, batch_size=256]

Epoch 2/10:  79%|██████████▉   | 778/991 [3:18:11<54:15, 15.28s/batch, batch_loss=5.49, batch_index=779, batch_size=256]

Epoch 2/10:  79%|███████████   | 779/991 [3:18:11<53:55, 15.26s/batch, batch_loss=5.49, batch_index=779, batch_size=256]

Epoch 2/10:  79%|███████████   | 779/991 [3:18:26<53:55, 15.26s/batch, batch_loss=3.28, batch_index=780, batch_size=256]

Epoch 2/10:  79%|███████████   | 780/991 [3:18:26<53:18, 15.16s/batch, batch_loss=3.28, batch_index=780, batch_size=256]

Epoch 2/10:  79%|███████████   | 780/991 [3:18:41<53:18, 15.16s/batch, batch_loss=3.32, batch_index=781, batch_size=256]

Epoch 2/10:  79%|███████████   | 781/991 [3:18:41<53:04, 15.17s/batch, batch_loss=3.32, batch_index=781, batch_size=256]

Epoch 2/10:  79%|████████▋  | 781/991 [3:18:55<53:04, 15.17s/batch, batch_loss=2.51e+4, batch_index=782, batch_size=256]

Epoch 2/10:  79%|████████▋  | 782/991 [3:18:55<52:05, 14.95s/batch, batch_loss=2.51e+4, batch_index=782, batch_size=256]

Epoch 2/10:  79%|███████████   | 782/991 [3:19:11<52:05, 14.95s/batch, batch_loss=18.5, batch_index=783, batch_size=256]

Epoch 2/10:  79%|███████████   | 783/991 [3:19:11<52:42, 15.20s/batch, batch_loss=18.5, batch_index=783, batch_size=256]

Epoch 2/10:  79%|███████████   | 783/991 [3:19:29<52:42, 15.20s/batch, batch_loss=14.2, batch_index=784, batch_size=256]

Epoch 2/10:  79%|███████████   | 784/991 [3:19:29<55:19, 16.04s/batch, batch_loss=14.2, batch_index=784, batch_size=256]

Epoch 2/10:  79%|███████████   | 784/991 [3:19:46<55:19, 16.04s/batch, batch_loss=13.4, batch_index=785, batch_size=256]

Epoch 2/10:  79%|███████████   | 785/991 [3:19:46<55:41, 16.22s/batch, batch_loss=13.4, batch_index=785, batch_size=256]

Epoch 2/10:  79%|███████████   | 785/991 [3:20:04<55:41, 16.22s/batch, batch_loss=7.82, batch_index=786, batch_size=256]

Epoch 2/10:  79%|███████████   | 786/991 [3:20:04<57:05, 16.71s/batch, batch_loss=7.82, batch_index=786, batch_size=256]

Epoch 2/10:  79%|████████▋  | 786/991 [3:20:19<57:05, 16.71s/batch, batch_loss=2.48e+4, batch_index=787, batch_size=256]

Epoch 2/10:  79%|████████▋  | 787/991 [3:20:19<55:18, 16.27s/batch, batch_loss=2.48e+4, batch_index=787, batch_size=256]

Epoch 2/10:  79%|███████████▉   | 787/991 [3:20:33<55:18, 16.27s/batch, batch_loss=687, batch_index=788, batch_size=256]

Epoch 2/10:  80%|███████████▉   | 788/991 [3:20:33<53:09, 15.71s/batch, batch_loss=687, batch_index=788, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 788/991 [3:20:47<53:09, 15.71s/batch, batch_loss=19.6, batch_index=789, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 789/991 [3:20:47<50:53, 15.12s/batch, batch_loss=19.6, batch_index=789, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 789/991 [3:21:02<50:53, 15.12s/batch, batch_loss=12.6, batch_index=790, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 790/991 [3:21:02<50:38, 15.12s/batch, batch_loss=12.6, batch_index=790, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 790/991 [3:21:18<50:38, 15.12s/batch, batch_loss=13.1, batch_index=791, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 791/991 [3:21:18<51:26, 15.43s/batch, batch_loss=13.1, batch_index=791, batch_size=256]

Epoch 2/10:  80%|████████▊  | 791/991 [3:21:34<51:26, 15.43s/batch, batch_loss=1.04e+4, batch_index=792, batch_size=256]

Epoch 2/10:  80%|████████▊  | 792/991 [3:21:34<51:47, 15.62s/batch, batch_loss=1.04e+4, batch_index=792, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 792/991 [3:21:48<51:47, 15.62s/batch, batch_loss=8.53, batch_index=793, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 793/991 [3:21:48<50:04, 15.17s/batch, batch_loss=8.53, batch_index=793, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 793/991 [3:22:04<50:04, 15.17s/batch, batch_loss=1.86, batch_index=794, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 794/991 [3:22:04<50:15, 15.31s/batch, batch_loss=1.86, batch_index=794, batch_size=256]

Epoch 2/10:  80%|████████████   | 794/991 [3:22:20<50:15, 15.31s/batch, batch_loss=7.5, batch_index=795, batch_size=256]

Epoch 2/10:  80%|████████████   | 795/991 [3:22:20<50:25, 15.44s/batch, batch_loss=7.5, batch_index=795, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 795/991 [3:22:36<50:25, 15.44s/batch, batch_loss=10.2, batch_index=796, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 796/991 [3:22:36<50:32, 15.55s/batch, batch_loss=10.2, batch_index=796, batch_size=256]

Epoch 2/10:  80%|███████████▏  | 796/991 [3:22:51<50:32, 15.55s/batch, batch_loss=19.3, batch_index=797, batch_size=256]

Epoch 2/10:  80%|███████████▎  | 797/991 [3:22:51<50:14, 15.54s/batch, batch_loss=19.3, batch_index=797, batch_size=256]

Epoch 2/10:  80%|████████████   | 797/991 [3:23:06<50:14, 15.54s/batch, batch_loss=336, batch_index=798, batch_size=256]

Epoch 2/10:  81%|████████████   | 798/991 [3:23:06<49:49, 15.49s/batch, batch_loss=336, batch_index=798, batch_size=256]

Epoch 2/10:  81%|█████████████▋   | 798/991 [3:23:21<49:49, 15.49s/batch, batch_loss=9, batch_index=799, batch_size=256]

Epoch 2/10:  81%|█████████████▋   | 799/991 [3:23:21<49:00, 15.31s/batch, batch_loss=9, batch_index=799, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 799/991 [3:23:36<49:00, 15.31s/batch, batch_loss=14.7, batch_index=800, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 800/991 [3:23:36<47:55, 15.05s/batch, batch_loss=14.7, batch_index=800, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 800/991 [3:23:53<47:55, 15.05s/batch, batch_loss=10.6, batch_index=801, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 801/991 [3:23:53<49:48, 15.73s/batch, batch_loss=10.6, batch_index=801, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 801/991 [3:24:08<49:48, 15.73s/batch, batch_loss=15.1, batch_index=802, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 802/991 [3:24:08<48:43, 15.47s/batch, batch_loss=15.1, batch_index=802, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 802/991 [3:24:23<48:43, 15.47s/batch, batch_loss=5.99, batch_index=803, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 803/991 [3:24:23<48:00, 15.32s/batch, batch_loss=5.99, batch_index=803, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 803/991 [3:24:38<48:00, 15.32s/batch, batch_loss=12.6, batch_index=804, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 804/991 [3:24:38<47:15, 15.17s/batch, batch_loss=12.6, batch_index=804, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 804/991 [3:24:53<47:15, 15.17s/batch, batch_loss=5.21, batch_index=805, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 805/991 [3:24:53<47:15, 15.25s/batch, batch_loss=5.21, batch_index=805, batch_size=256]

Epoch 2/10:  81%|███████████▎  | 805/991 [3:25:08<47:15, 15.25s/batch, batch_loss=10.2, batch_index=806, batch_size=256]

Epoch 2/10:  81%|███████████▍  | 806/991 [3:25:08<46:34, 15.11s/batch, batch_loss=10.2, batch_index=806, batch_size=256]

Epoch 2/10:  81%|███████████▍  | 806/991 [3:25:23<46:34, 15.11s/batch, batch_loss=8.85, batch_index=807, batch_size=256]

Epoch 2/10:  81%|███████████▍  | 807/991 [3:25:23<46:14, 15.08s/batch, batch_loss=8.85, batch_index=807, batch_size=256]

Epoch 2/10:  81%|███████████▍  | 807/991 [3:25:41<46:14, 15.08s/batch, batch_loss=16.6, batch_index=808, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 808/991 [3:25:41<48:18, 15.84s/batch, batch_loss=16.6, batch_index=808, batch_size=256]

Epoch 2/10:  82%|████████▉  | 808/991 [3:25:55<48:18, 15.84s/batch, batch_loss=1.21e+4, batch_index=809, batch_size=256]

Epoch 2/10:  82%|████████▉  | 809/991 [3:25:55<46:48, 15.43s/batch, batch_loss=1.21e+4, batch_index=809, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 809/991 [3:26:10<46:48, 15.43s/batch, batch_loss=13.9, batch_index=810, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 810/991 [3:26:10<46:25, 15.39s/batch, batch_loss=13.9, batch_index=810, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 810/991 [3:26:26<46:25, 15.39s/batch, batch_loss=6.71, batch_index=811, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 811/991 [3:26:26<46:05, 15.37s/batch, batch_loss=6.71, batch_index=811, batch_size=256]

Epoch 2/10:  82%|████████████▎  | 811/991 [3:26:41<46:05, 15.37s/batch, batch_loss=7.1, batch_index=812, batch_size=256]

Epoch 2/10:  82%|████████████▎  | 812/991 [3:26:41<45:41, 15.31s/batch, batch_loss=7.1, batch_index=812, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 812/991 [3:26:56<45:41, 15.31s/batch, batch_loss=7.49, batch_index=813, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 813/991 [3:26:56<44:55, 15.14s/batch, batch_loss=7.49, batch_index=813, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 813/991 [3:27:11<44:55, 15.14s/batch, batch_loss=12.1, batch_index=814, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 814/991 [3:27:11<44:44, 15.17s/batch, batch_loss=12.1, batch_index=814, batch_size=256]

Epoch 2/10:  82%|███████████▍  | 814/991 [3:27:26<44:44, 15.17s/batch, batch_loss=7.14, batch_index=815, batch_size=256]

Epoch 2/10:  82%|███████████▌  | 815/991 [3:27:26<44:33, 15.19s/batch, batch_loss=7.14, batch_index=815, batch_size=256]

Epoch 2/10:  82%|███████████▌  | 815/991 [3:27:44<44:33, 15.19s/batch, batch_loss=90.8, batch_index=816, batch_size=256]

Epoch 2/10:  82%|███████████▌  | 816/991 [3:27:44<46:47, 16.04s/batch, batch_loss=90.8, batch_index=816, batch_size=256]

Epoch 2/10:  82%|████████████▎  | 816/991 [3:27:59<46:47, 16.04s/batch, batch_loss=357, batch_index=817, batch_size=256]

Epoch 2/10:  82%|████████████▎  | 817/991 [3:27:59<45:13, 15.59s/batch, batch_loss=357, batch_index=817, batch_size=256]

Epoch 2/10:  82%|████████████▎  | 817/991 [3:28:14<45:13, 15.59s/batch, batch_loss=361, batch_index=818, batch_size=256]

Epoch 2/10:  83%|████████████▍  | 818/991 [3:28:14<45:01, 15.62s/batch, batch_loss=361, batch_index=818, batch_size=256]

Epoch 2/10:  83%|███████████▌  | 818/991 [3:28:30<45:01, 15.62s/batch, batch_loss=13.2, batch_index=819, batch_size=256]

Epoch 2/10:  83%|███████████▌  | 819/991 [3:28:30<45:09, 15.75s/batch, batch_loss=13.2, batch_index=819, batch_size=256]

Epoch 2/10:  83%|███████████▌  | 819/991 [3:28:46<45:09, 15.75s/batch, batch_loss=7.48, batch_index=820, batch_size=256]

Epoch 2/10:  83%|███████████▌  | 820/991 [3:28:46<44:51, 15.74s/batch, batch_loss=7.48, batch_index=820, batch_size=256]

Epoch 2/10:  83%|███████████▌  | 820/991 [3:29:03<44:51, 15.74s/batch, batch_loss=6.89, batch_index=821, batch_size=256]

Epoch 2/10:  83%|███████████▌  | 821/991 [3:29:03<45:35, 16.09s/batch, batch_loss=6.89, batch_index=821, batch_size=256]

Epoch 2/10:  83%|███████████▌  | 821/991 [3:29:19<45:35, 16.09s/batch, batch_loss=7.58, batch_index=822, batch_size=256]

Epoch 2/10:  83%|███████████▌  | 822/991 [3:29:19<45:16, 16.08s/batch, batch_loss=7.58, batch_index=822, batch_size=256]

Epoch 2/10:  83%|████████████▍  | 822/991 [3:29:34<45:16, 16.08s/batch, batch_loss=152, batch_index=823, batch_size=256]

Epoch 2/10:  83%|████████████▍  | 823/991 [3:29:34<43:55, 15.69s/batch, batch_loss=152, batch_index=823, batch_size=256]

Epoch 2/10:  83%|███████████▋  | 823/991 [3:29:49<43:55, 15.69s/batch, batch_loss=7.32, batch_index=824, batch_size=256]

Epoch 2/10:  83%|███████████▋  | 824/991 [3:29:49<43:21, 15.58s/batch, batch_loss=7.32, batch_index=824, batch_size=256]

Epoch 2/10:  83%|███████████▋  | 824/991 [3:30:05<43:21, 15.58s/batch, batch_loss=13.2, batch_index=825, batch_size=256]

Epoch 2/10:  83%|███████████▋  | 825/991 [3:30:05<43:25, 15.69s/batch, batch_loss=13.2, batch_index=825, batch_size=256]

Epoch 2/10:  83%|█████████▉  | 825/991 [3:30:21<43:25, 15.69s/batch, batch_loss=2.6e+3, batch_index=826, batch_size=256]

Epoch 2/10:  83%|██████████  | 826/991 [3:30:21<43:01, 15.65s/batch, batch_loss=2.6e+3, batch_index=826, batch_size=256]

Epoch 2/10:  83%|███████████▋  | 826/991 [3:30:37<43:01, 15.65s/batch, batch_loss=21.1, batch_index=827, batch_size=256]

Epoch 2/10:  83%|███████████▋  | 827/991 [3:30:37<43:27, 15.90s/batch, batch_loss=21.1, batch_index=827, batch_size=256]

Epoch 2/10:  83%|█████████████▎  | 827/991 [3:30:53<43:27, 15.90s/batch, batch_loss=18, batch_index=828, batch_size=256]

Epoch 2/10:  84%|█████████████▎  | 828/991 [3:30:53<43:00, 15.83s/batch, batch_loss=18, batch_index=828, batch_size=256]

Epoch 2/10:  84%|████████████▌  | 828/991 [3:31:08<43:00, 15.83s/batch, batch_loss=7.4, batch_index=829, batch_size=256]

Epoch 2/10:  84%|████████████▌  | 829/991 [3:31:08<42:11, 15.63s/batch, batch_loss=7.4, batch_index=829, batch_size=256]

Epoch 2/10:  84%|███████████▋  | 829/991 [3:31:23<42:11, 15.63s/batch, batch_loss=11.8, batch_index=830, batch_size=256]

Epoch 2/10:  84%|███████████▋  | 830/991 [3:31:23<41:22, 15.42s/batch, batch_loss=11.8, batch_index=830, batch_size=256]

Epoch 2/10:  84%|███████████▋  | 830/991 [3:31:40<41:22, 15.42s/batch, batch_loss=9.46, batch_index=831, batch_size=256]

Epoch 2/10:  84%|███████████▋  | 831/991 [3:31:40<42:38, 15.99s/batch, batch_loss=9.46, batch_index=831, batch_size=256]

Epoch 2/10:  84%|█████████████▍  | 831/991 [3:31:55<42:38, 15.99s/batch, batch_loss=13, batch_index=832, batch_size=256]

Epoch 2/10:  84%|█████████████▍  | 832/991 [3:31:55<41:37, 15.71s/batch, batch_loss=13, batch_index=832, batch_size=256]

Epoch 2/10:  84%|████████████▌  | 832/991 [3:32:10<41:37, 15.71s/batch, batch_loss=217, batch_index=833, batch_size=256]

Epoch 2/10:  84%|████████████▌  | 833/991 [3:32:10<40:51, 15.51s/batch, batch_loss=217, batch_index=833, batch_size=256]

Epoch 2/10:  84%|███████████▊  | 833/991 [3:32:27<40:51, 15.51s/batch, batch_loss=16.5, batch_index=834, batch_size=256]

Epoch 2/10:  84%|███████████▊  | 834/991 [3:32:27<41:10, 15.74s/batch, batch_loss=16.5, batch_index=834, batch_size=256]

Epoch 2/10:  84%|███████████▊  | 834/991 [3:32:41<41:10, 15.74s/batch, batch_loss=11.3, batch_index=835, batch_size=256]

Epoch 2/10:  84%|███████████▊  | 835/991 [3:32:41<40:03, 15.41s/batch, batch_loss=11.3, batch_index=835, batch_size=256]

Epoch 2/10:  84%|█████████▎ | 835/991 [3:32:57<40:03, 15.41s/batch, batch_loss=3.26e+3, batch_index=836, batch_size=256]

Epoch 2/10:  84%|█████████▎ | 836/991 [3:32:57<39:50, 15.42s/batch, batch_loss=3.26e+3, batch_index=836, batch_size=256]

Epoch 2/10:  84%|█████████▎ | 836/991 [3:33:12<39:50, 15.42s/batch, batch_loss=4.89e+3, batch_index=837, batch_size=256]

Epoch 2/10:  84%|█████████▎ | 837/991 [3:33:12<39:09, 15.25s/batch, batch_loss=4.89e+3, batch_index=837, batch_size=256]

Epoch 2/10:  84%|███████████▊  | 837/991 [3:33:26<39:09, 15.25s/batch, batch_loss=14.3, batch_index=838, batch_size=256]

Epoch 2/10:  85%|███████████▊  | 838/991 [3:33:26<38:15, 15.01s/batch, batch_loss=14.3, batch_index=838, batch_size=256]

Epoch 2/10:  85%|███████████▊  | 838/991 [3:33:41<38:15, 15.01s/batch, batch_loss=3.66, batch_index=839, batch_size=256]

Epoch 2/10:  85%|███████████▊  | 839/991 [3:33:41<38:17, 15.12s/batch, batch_loss=3.66, batch_index=839, batch_size=256]

Epoch 2/10:  85%|███████████▊  | 839/991 [3:33:57<38:17, 15.12s/batch, batch_loss=3.93, batch_index=840, batch_size=256]

Epoch 2/10:  85%|███████████▊  | 840/991 [3:33:57<38:10, 15.17s/batch, batch_loss=3.93, batch_index=840, batch_size=256]

Epoch 2/10:  85%|█████████████▌  | 840/991 [3:34:12<38:10, 15.17s/batch, batch_loss=13, batch_index=841, batch_size=256]

Epoch 2/10:  85%|█████████████▌  | 841/991 [3:34:12<37:41, 15.07s/batch, batch_loss=13, batch_index=841, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 841/991 [3:34:27<37:41, 15.07s/batch, batch_loss=13.9, batch_index=842, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 842/991 [3:34:27<37:27, 15.08s/batch, batch_loss=13.9, batch_index=842, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 842/991 [3:34:42<37:27, 15.08s/batch, batch_loss=8.27, batch_index=843, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 843/991 [3:34:42<37:20, 15.14s/batch, batch_loss=8.27, batch_index=843, batch_size=256]

Epoch 2/10:  85%|█████████▎ | 843/991 [3:34:56<37:20, 15.14s/batch, batch_loss=1.69e+3, batch_index=844, batch_size=256]

Epoch 2/10:  85%|█████████▎ | 844/991 [3:34:56<36:19, 14.83s/batch, batch_loss=1.69e+3, batch_index=844, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 844/991 [3:35:11<36:19, 14.83s/batch, batch_loss=15.8, batch_index=845, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 845/991 [3:35:11<35:58, 14.79s/batch, batch_loss=15.8, batch_index=845, batch_size=256]

Epoch 2/10:  85%|█████████▍ | 845/991 [3:35:25<35:58, 14.79s/batch, batch_loss=1.18e+4, batch_index=846, batch_size=256]

Epoch 2/10:  85%|█████████▍ | 846/991 [3:35:25<35:30, 14.69s/batch, batch_loss=1.18e+4, batch_index=846, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 846/991 [3:35:42<35:30, 14.69s/batch, batch_loss=21.3, batch_index=847, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 847/991 [3:35:42<37:07, 15.47s/batch, batch_loss=21.3, batch_index=847, batch_size=256]

Epoch 2/10:  85%|███████████▉  | 847/991 [3:35:58<37:07, 15.47s/batch, batch_loss=28.3, batch_index=848, batch_size=256]

Epoch 2/10:  86%|███████████▉  | 848/991 [3:35:58<36:37, 15.36s/batch, batch_loss=28.3, batch_index=848, batch_size=256]

Epoch 2/10:  86%|█████████▍ | 848/991 [3:36:13<36:37, 15.36s/batch, batch_loss=1.01e+3, batch_index=849, batch_size=256]

Epoch 2/10:  86%|█████████▍ | 849/991 [3:36:13<36:18, 15.34s/batch, batch_loss=1.01e+3, batch_index=849, batch_size=256]

Epoch 2/10:  86%|███████████▉  | 849/991 [3:36:28<36:18, 15.34s/batch, batch_loss=7.45, batch_index=850, batch_size=256]

Epoch 2/10:  86%|████████████  | 850/991 [3:36:28<35:53, 15.27s/batch, batch_loss=7.45, batch_index=850, batch_size=256]

Epoch 2/10:  86%|████████████  | 850/991 [3:36:42<35:53, 15.27s/batch, batch_loss=15.2, batch_index=851, batch_size=256]

Epoch 2/10:  86%|████████████  | 851/991 [3:36:42<34:34, 14.82s/batch, batch_loss=15.2, batch_index=851, batch_size=256]

Epoch 2/10:  86%|████████████  | 851/991 [3:36:57<34:34, 14.82s/batch, batch_loss=14.6, batch_index=852, batch_size=256]

Epoch 2/10:  86%|████████████  | 852/991 [3:36:57<34:25, 14.86s/batch, batch_loss=14.6, batch_index=852, batch_size=256]

Epoch 2/10:  86%|█████████▍ | 852/991 [3:37:11<34:25, 14.86s/batch, batch_loss=7.65e+3, batch_index=853, batch_size=256]

Epoch 2/10:  86%|█████████▍ | 853/991 [3:37:11<33:38, 14.63s/batch, batch_loss=7.65e+3, batch_index=853, batch_size=256]

Epoch 2/10:  86%|████████████  | 853/991 [3:37:29<33:38, 14.63s/batch, batch_loss=16.3, batch_index=854, batch_size=256]

Epoch 2/10:  86%|████████████  | 854/991 [3:37:29<35:50, 15.70s/batch, batch_loss=16.3, batch_index=854, batch_size=256]

Epoch 2/10:  86%|████████████  | 854/991 [3:37:44<35:50, 15.70s/batch, batch_loss=7.12, batch_index=855, batch_size=256]

Epoch 2/10:  86%|████████████  | 855/991 [3:37:44<34:57, 15.42s/batch, batch_loss=7.12, batch_index=855, batch_size=256]

Epoch 2/10:  86%|████████████  | 855/991 [3:37:59<34:57, 15.42s/batch, batch_loss=7.99, batch_index=856, batch_size=256]

Epoch 2/10:  86%|████████████  | 856/991 [3:37:59<34:21, 15.27s/batch, batch_loss=7.99, batch_index=856, batch_size=256]

Epoch 2/10:  86%|████████████  | 856/991 [3:38:13<34:21, 15.27s/batch, batch_loss=7.94, batch_index=857, batch_size=256]

Epoch 2/10:  86%|████████████  | 857/991 [3:38:13<33:23, 14.95s/batch, batch_loss=7.94, batch_index=857, batch_size=256]

Epoch 2/10:  86%|████████████  | 857/991 [3:38:28<33:23, 14.95s/batch, batch_loss=20.6, batch_index=858, batch_size=256]

Epoch 2/10:  87%|████████████  | 858/991 [3:38:28<33:13, 14.99s/batch, batch_loss=20.6, batch_index=858, batch_size=256]

Epoch 2/10:  87%|████████████  | 858/991 [3:38:43<33:13, 14.99s/batch, batch_loss=12.3, batch_index=859, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 859/991 [3:38:43<33:03, 15.03s/batch, batch_loss=12.3, batch_index=859, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 859/991 [3:38:58<33:03, 15.03s/batch, batch_loss=18.6, batch_index=860, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 860/991 [3:38:58<32:50, 15.04s/batch, batch_loss=18.6, batch_index=860, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 860/991 [3:39:13<32:50, 15.04s/batch, batch_loss=7.92, batch_index=861, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 861/991 [3:39:13<32:35, 15.04s/batch, batch_loss=7.92, batch_index=861, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 861/991 [3:39:29<32:35, 15.04s/batch, batch_loss=16.1, batch_index=862, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 862/991 [3:39:29<32:43, 15.22s/batch, batch_loss=16.1, batch_index=862, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 862/991 [3:39:44<32:43, 15.22s/batch, batch_loss=24.3, batch_index=863, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 863/991 [3:39:44<32:39, 15.31s/batch, batch_loss=24.3, batch_index=863, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 863/991 [3:40:00<32:39, 15.31s/batch, batch_loss=8.99, batch_index=864, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 864/991 [3:40:00<32:38, 15.42s/batch, batch_loss=8.99, batch_index=864, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 864/991 [3:40:15<32:38, 15.42s/batch, batch_loss=15.2, batch_index=865, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 865/991 [3:40:15<32:04, 15.27s/batch, batch_loss=15.2, batch_index=865, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 865/991 [3:40:30<32:04, 15.27s/batch, batch_loss=19.8, batch_index=866, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 866/991 [3:40:30<31:24, 15.08s/batch, batch_loss=19.8, batch_index=866, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 866/991 [3:40:44<31:24, 15.08s/batch, batch_loss=19.5, batch_index=867, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 867/991 [3:40:44<30:56, 14.97s/batch, batch_loss=19.5, batch_index=867, batch_size=256]

Epoch 2/10:  87%|████████████▏ | 867/991 [3:40:59<30:56, 14.97s/batch, batch_loss=19.4, batch_index=868, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 868/991 [3:40:59<30:35, 14.92s/batch, batch_loss=19.4, batch_index=868, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 868/991 [3:41:14<30:35, 14.92s/batch, batch_loss=9.53, batch_index=869, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 869/991 [3:41:14<30:34, 15.03s/batch, batch_loss=9.53, batch_index=869, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 869/991 [3:41:29<30:34, 15.03s/batch, batch_loss=12.2, batch_index=870, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 870/991 [3:41:29<30:12, 14.98s/batch, batch_loss=12.2, batch_index=870, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 870/991 [3:41:45<30:12, 14.98s/batch, batch_loss=7.81, batch_index=871, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 871/991 [3:41:45<30:12, 15.10s/batch, batch_loss=7.81, batch_index=871, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 871/991 [3:41:59<30:12, 15.10s/batch, batch_loss=17.7, batch_index=872, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 872/991 [3:41:59<29:47, 15.02s/batch, batch_loss=17.7, batch_index=872, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 872/991 [3:42:15<29:47, 15.02s/batch, batch_loss=13.5, batch_index=873, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 873/991 [3:42:15<30:06, 15.31s/batch, batch_loss=13.5, batch_index=873, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 873/991 [3:42:33<30:06, 15.31s/batch, batch_loss=6.75, batch_index=874, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 874/991 [3:42:33<31:16, 16.04s/batch, batch_loss=6.75, batch_index=874, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 874/991 [3:42:49<31:16, 16.04s/batch, batch_loss=12.5, batch_index=875, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 875/991 [3:42:49<31:01, 16.05s/batch, batch_loss=12.5, batch_index=875, batch_size=256]

Epoch 2/10:  88%|████████████▎ | 875/991 [3:43:05<31:01, 16.05s/batch, batch_loss=21.5, batch_index=876, batch_size=256]

Epoch 2/10:  88%|████████████▍ | 876/991 [3:43:05<30:45, 16.05s/batch, batch_loss=21.5, batch_index=876, batch_size=256]

Epoch 2/10:  88%|████████████▍ | 876/991 [3:43:21<30:45, 16.05s/batch, batch_loss=17.3, batch_index=877, batch_size=256]

Epoch 2/10:  88%|████████████▍ | 877/991 [3:43:21<30:32, 16.08s/batch, batch_loss=17.3, batch_index=877, batch_size=256]

Epoch 2/10:  88%|██████████████▏ | 877/991 [3:43:38<30:32, 16.08s/batch, batch_loss=25, batch_index=878, batch_size=256]

Epoch 2/10:  89%|██████████████▏ | 878/991 [3:43:38<30:32, 16.21s/batch, batch_loss=25, batch_index=878, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 878/991 [3:43:54<30:32, 16.21s/batch, batch_loss=16.5, batch_index=879, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 879/991 [3:43:54<30:00, 16.08s/batch, batch_loss=16.5, batch_index=879, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 879/991 [3:44:09<30:00, 16.08s/batch, batch_loss=11.1, batch_index=880, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 880/991 [3:44:09<29:31, 15.96s/batch, batch_loss=11.1, batch_index=880, batch_size=256]

Epoch 2/10:  89%|█████████▊ | 880/991 [3:44:26<29:31, 15.96s/batch, batch_loss=5.12e+3, batch_index=881, batch_size=256]

Epoch 2/10:  89%|█████████▊ | 881/991 [3:44:26<29:31, 16.10s/batch, batch_loss=5.12e+3, batch_index=881, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 881/991 [3:44:41<29:31, 16.10s/batch, batch_loss=14.2, batch_index=882, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 882/991 [3:44:41<28:59, 15.96s/batch, batch_loss=14.2, batch_index=882, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 882/991 [3:44:58<28:59, 15.96s/batch, batch_loss=15.1, batch_index=883, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 883/991 [3:44:58<28:51, 16.03s/batch, batch_loss=15.1, batch_index=883, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 883/991 [3:45:13<28:51, 16.03s/batch, batch_loss=8.18, batch_index=884, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 884/991 [3:45:13<28:13, 15.83s/batch, batch_loss=8.18, batch_index=884, batch_size=256]

Epoch 2/10:  89%|████████████▍ | 884/991 [3:45:29<28:13, 15.83s/batch, batch_loss=11.6, batch_index=885, batch_size=256]

Epoch 2/10:  89%|████████████▌ | 885/991 [3:45:29<27:49, 15.75s/batch, batch_loss=11.6, batch_index=885, batch_size=256]

Epoch 2/10:  89%|████████████▌ | 885/991 [3:45:44<27:49, 15.75s/batch, batch_loss=15.5, batch_index=886, batch_size=256]

Epoch 2/10:  89%|████████████▌ | 886/991 [3:45:44<27:19, 15.61s/batch, batch_loss=15.5, batch_index=886, batch_size=256]

Epoch 2/10:  89%|█████████▊ | 886/991 [3:46:00<27:19, 15.61s/batch, batch_loss=1.93e+4, batch_index=887, batch_size=256]

Epoch 2/10:  90%|█████████▊ | 887/991 [3:46:00<27:17, 15.74s/batch, batch_loss=1.93e+4, batch_index=887, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 887/991 [3:46:15<27:17, 15.74s/batch, batch_loss=16.2, batch_index=888, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 888/991 [3:46:15<26:52, 15.66s/batch, batch_loss=16.2, batch_index=888, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 888/991 [3:46:32<26:52, 15.66s/batch, batch_loss=18.6, batch_index=889, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 889/991 [3:46:32<27:03, 15.92s/batch, batch_loss=18.6, batch_index=889, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 889/991 [3:46:47<27:03, 15.92s/batch, batch_loss=11.8, batch_index=890, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 890/991 [3:46:47<26:13, 15.58s/batch, batch_loss=11.8, batch_index=890, batch_size=256]

Epoch 2/10:  90%|██████████████▎ | 890/991 [3:47:02<26:13, 15.58s/batch, batch_loss=13, batch_index=891, batch_size=256]

Epoch 2/10:  90%|██████████████▍ | 891/991 [3:47:02<25:42, 15.42s/batch, batch_loss=13, batch_index=891, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 891/991 [3:47:17<25:42, 15.42s/batch, batch_loss=15.8, batch_index=892, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 892/991 [3:47:17<25:23, 15.39s/batch, batch_loss=15.8, batch_index=892, batch_size=256]

Epoch 2/10:  90%|██████████▊ | 892/991 [3:47:33<25:23, 15.39s/batch, batch_loss=3.7e+3, batch_index=893, batch_size=256]

Epoch 2/10:  90%|██████████▊ | 893/991 [3:47:33<25:22, 15.54s/batch, batch_loss=3.7e+3, batch_index=893, batch_size=256]

Epoch 2/10:  90%|████████████▌ | 893/991 [3:47:48<25:22, 15.54s/batch, batch_loss=8.43, batch_index=894, batch_size=256]

Epoch 2/10:  90%|████████████▋ | 894/991 [3:47:48<24:54, 15.41s/batch, batch_loss=8.43, batch_index=894, batch_size=256]

Epoch 2/10:  90%|██████████████▍ | 894/991 [3:48:04<24:54, 15.41s/batch, batch_loss=14, batch_index=895, batch_size=256]

Epoch 2/10:  90%|██████████████▍ | 895/991 [3:48:04<24:41, 15.43s/batch, batch_loss=14, batch_index=895, batch_size=256]

Epoch 2/10:  90%|████████████▋ | 895/991 [3:48:19<24:41, 15.43s/batch, batch_loss=10.7, batch_index=896, batch_size=256]

Epoch 2/10:  90%|████████████▋ | 896/991 [3:48:19<24:17, 15.34s/batch, batch_loss=10.7, batch_index=896, batch_size=256]

Epoch 2/10:  90%|████████████▋ | 896/991 [3:48:34<24:17, 15.34s/batch, batch_loss=17.1, batch_index=897, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 897/991 [3:48:34<23:48, 15.20s/batch, batch_loss=17.1, batch_index=897, batch_size=256]

Epoch 2/10:  91%|██████████████▍ | 897/991 [3:48:49<23:48, 15.20s/batch, batch_loss=17, batch_index=898, batch_size=256]

Epoch 2/10:  91%|██████████████▍ | 898/991 [3:48:49<23:34, 15.21s/batch, batch_loss=17, batch_index=898, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 898/991 [3:49:05<23:34, 15.21s/batch, batch_loss=15.9, batch_index=899, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 899/991 [3:49:05<23:33, 15.37s/batch, batch_loss=15.9, batch_index=899, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 899/991 [3:49:23<23:33, 15.37s/batch, batch_loss=17.8, batch_index=900, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 900/991 [3:49:23<24:53, 16.42s/batch, batch_loss=17.8, batch_index=900, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 900/991 [3:49:40<24:53, 16.42s/batch, batch_loss=15.5, batch_index=901, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 901/991 [3:49:40<24:31, 16.35s/batch, batch_loss=15.5, batch_index=901, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 901/991 [3:49:56<24:31, 16.35s/batch, batch_loss=11.1, batch_index=902, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 902/991 [3:49:56<24:12, 16.33s/batch, batch_loss=11.1, batch_index=902, batch_size=256]

Epoch 2/10:  91%|████████████▋ | 902/991 [3:50:12<24:12, 16.33s/batch, batch_loss=6.01, batch_index=903, batch_size=256]

Epoch 2/10:  91%|████████████▊ | 903/991 [3:50:12<23:41, 16.15s/batch, batch_loss=6.01, batch_index=903, batch_size=256]

Epoch 2/10:  91%|████████████▊ | 903/991 [3:50:29<23:41, 16.15s/batch, batch_loss=7.82, batch_index=904, batch_size=256]

Epoch 2/10:  91%|████████████▊ | 904/991 [3:50:29<23:52, 16.46s/batch, batch_loss=7.82, batch_index=904, batch_size=256]

Epoch 2/10:  91%|████████████▊ | 904/991 [3:50:43<23:52, 16.46s/batch, batch_loss=24.5, batch_index=905, batch_size=256]

Epoch 2/10:  91%|████████████▊ | 905/991 [3:50:43<22:38, 15.79s/batch, batch_loss=24.5, batch_index=905, batch_size=256]

Epoch 2/10:  91%|████████████▊ | 905/991 [3:50:57<22:38, 15.79s/batch, batch_loss=17.2, batch_index=906, batch_size=256]

Epoch 2/10:  91%|████████████▊ | 906/991 [3:50:57<21:48, 15.39s/batch, batch_loss=17.2, batch_index=906, batch_size=256]

Epoch 2/10:  91%|██████████████▋ | 906/991 [3:51:13<21:48, 15.39s/batch, batch_loss=18, batch_index=907, batch_size=256]

Epoch 2/10:  92%|██████████████▋ | 907/991 [3:51:13<21:45, 15.54s/batch, batch_loss=18, batch_index=907, batch_size=256]

Epoch 2/10:  92%|████████████▊ | 907/991 [3:51:28<21:45, 15.54s/batch, batch_loss=13.2, batch_index=908, batch_size=256]

Epoch 2/10:  92%|████████████▊ | 908/991 [3:51:28<21:12, 15.33s/batch, batch_loss=13.2, batch_index=908, batch_size=256]

Epoch 2/10:  92%|█████████████▋ | 908/991 [3:51:43<21:12, 15.33s/batch, batch_loss=5.5, batch_index=909, batch_size=256]

Epoch 2/10:  92%|█████████████▊ | 909/991 [3:51:43<20:37, 15.10s/batch, batch_loss=5.5, batch_index=909, batch_size=256]

Epoch 2/10:  92%|█████████████▊ | 909/991 [3:51:59<20:37, 15.10s/batch, batch_loss=683, batch_index=910, batch_size=256]

Epoch 2/10:  92%|█████████████▊ | 910/991 [3:51:59<21:01, 15.58s/batch, batch_loss=683, batch_index=910, batch_size=256]

Epoch 2/10:  92%|██████████ | 910/991 [3:52:15<21:01, 15.58s/batch, batch_loss=1.02e+3, batch_index=911, batch_size=256]

Epoch 2/10:  92%|██████████ | 911/991 [3:52:15<20:37, 15.47s/batch, batch_loss=1.02e+3, batch_index=911, batch_size=256]

Epoch 2/10:  92%|████████████▊ | 911/991 [3:52:30<20:37, 15.47s/batch, batch_loss=22.9, batch_index=912, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 912/991 [3:52:30<20:20, 15.44s/batch, batch_loss=22.9, batch_index=912, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 912/991 [3:52:44<20:20, 15.44s/batch, batch_loss=21.6, batch_index=913, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 913/991 [3:52:44<19:36, 15.08s/batch, batch_loss=21.6, batch_index=913, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 913/991 [3:52:59<19:36, 15.08s/batch, batch_loss=19.1, batch_index=914, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 914/991 [3:52:59<19:17, 15.03s/batch, batch_loss=19.1, batch_index=914, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 914/991 [3:53:15<19:17, 15.03s/batch, batch_loss=18.1, batch_index=915, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 915/991 [3:53:15<19:12, 15.16s/batch, batch_loss=18.1, batch_index=915, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 915/991 [3:53:32<19:12, 15.16s/batch, batch_loss=14.4, batch_index=916, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 916/991 [3:53:32<19:51, 15.89s/batch, batch_loss=14.4, batch_index=916, batch_size=256]

Epoch 2/10:  92%|████████████▉ | 916/991 [3:53:47<19:51, 15.89s/batch, batch_loss=7.25, batch_index=917, batch_size=256]

Epoch 2/10:  93%|████████████▉ | 917/991 [3:53:47<19:08, 15.52s/batch, batch_loss=7.25, batch_index=917, batch_size=256]

Epoch 2/10:  93%|████████████▉ | 917/991 [3:54:02<19:08, 15.52s/batch, batch_loss=11.5, batch_index=918, batch_size=256]

Epoch 2/10:  93%|████████████▉ | 918/991 [3:54:02<18:44, 15.40s/batch, batch_loss=11.5, batch_index=918, batch_size=256]

Epoch 2/10:  93%|████████████▉ | 918/991 [3:54:18<18:44, 15.40s/batch, batch_loss=11.8, batch_index=919, batch_size=256]

Epoch 2/10:  93%|████████████▉ | 919/991 [3:54:18<18:34, 15.48s/batch, batch_loss=11.8, batch_index=919, batch_size=256]

Epoch 2/10:  93%|████████████▉ | 919/991 [3:54:33<18:34, 15.48s/batch, batch_loss=12.5, batch_index=920, batch_size=256]

Epoch 2/10:  93%|████████████▉ | 920/991 [3:54:33<18:14, 15.41s/batch, batch_loss=12.5, batch_index=920, batch_size=256]

Epoch 2/10:  93%|████████████▉ | 920/991 [3:54:47<18:14, 15.41s/batch, batch_loss=16.1, batch_index=921, batch_size=256]

Epoch 2/10:  93%|█████████████ | 921/991 [3:54:47<17:36, 15.09s/batch, batch_loss=16.1, batch_index=921, batch_size=256]

Epoch 2/10:  93%|█████████████ | 921/991 [3:55:03<17:36, 15.09s/batch, batch_loss=22.6, batch_index=922, batch_size=256]

Epoch 2/10:  93%|█████████████ | 922/991 [3:55:03<17:30, 15.23s/batch, batch_loss=22.6, batch_index=922, batch_size=256]

Epoch 2/10:  93%|█████████████ | 922/991 [3:55:19<17:30, 15.23s/batch, batch_loss=6.03, batch_index=923, batch_size=256]

Epoch 2/10:  93%|█████████████ | 923/991 [3:55:19<17:35, 15.53s/batch, batch_loss=6.03, batch_index=923, batch_size=256]

Epoch 2/10:  93%|█████████████ | 923/991 [3:55:37<17:35, 15.53s/batch, batch_loss=10.5, batch_index=924, batch_size=256]

Epoch 2/10:  93%|█████████████ | 924/991 [3:55:37<18:11, 16.29s/batch, batch_loss=10.5, batch_index=924, batch_size=256]

Epoch 2/10:  93%|█████████████ | 924/991 [3:55:53<18:11, 16.29s/batch, batch_loss=10.3, batch_index=925, batch_size=256]

Epoch 2/10:  93%|█████████████ | 925/991 [3:55:53<17:40, 16.06s/batch, batch_loss=10.3, batch_index=925, batch_size=256]

Epoch 2/10:  93%|█████████████ | 925/991 [3:56:08<17:40, 16.06s/batch, batch_loss=3e+4, batch_index=926, batch_size=256]

Epoch 2/10:  93%|█████████████ | 926/991 [3:56:08<17:07, 15.80s/batch, batch_loss=3e+4, batch_index=926, batch_size=256]

Epoch 2/10:  93%|█████████████ | 926/991 [3:56:24<17:07, 15.80s/batch, batch_loss=6.44, batch_index=927, batch_size=256]

Epoch 2/10:  94%|█████████████ | 927/991 [3:56:24<16:50, 15.79s/batch, batch_loss=6.44, batch_index=927, batch_size=256]

Epoch 2/10:  94%|██████████████ | 927/991 [3:56:38<16:50, 15.79s/batch, batch_loss=854, batch_index=928, batch_size=256]

Epoch 2/10:  94%|██████████████ | 928/991 [3:56:38<16:15, 15.48s/batch, batch_loss=854, batch_index=928, batch_size=256]

Epoch 2/10:  94%|█████████████ | 928/991 [3:56:53<16:15, 15.48s/batch, batch_loss=9.68, batch_index=929, batch_size=256]

Epoch 2/10:  94%|█████████████ | 929/991 [3:56:53<15:45, 15.25s/batch, batch_loss=9.68, batch_index=929, batch_size=256]

Epoch 2/10:  94%|█████████████ | 929/991 [3:57:08<15:45, 15.25s/batch, batch_loss=8.33, batch_index=930, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 930/991 [3:57:08<15:24, 15.16s/batch, batch_loss=8.33, batch_index=930, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 930/991 [3:57:23<15:24, 15.16s/batch, batch_loss=11.4, batch_index=931, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 931/991 [3:57:23<15:12, 15.21s/batch, batch_loss=11.4, batch_index=931, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 931/991 [3:57:39<15:12, 15.21s/batch, batch_loss=8.61, batch_index=932, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 932/991 [3:57:39<15:02, 15.30s/batch, batch_loss=8.61, batch_index=932, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 932/991 [3:57:55<15:02, 15.30s/batch, batch_loss=10.2, batch_index=933, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 933/991 [3:57:55<14:57, 15.48s/batch, batch_loss=10.2, batch_index=933, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 933/991 [3:58:10<14:57, 15.48s/batch, batch_loss=1.68, batch_index=934, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 934/991 [3:58:10<14:43, 15.50s/batch, batch_loss=1.68, batch_index=934, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 934/991 [3:58:25<14:43, 15.50s/batch, batch_loss=1.72, batch_index=935, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 935/991 [3:58:25<14:17, 15.31s/batch, batch_loss=1.72, batch_index=935, batch_size=256]

Epoch 2/10:  94%|██████████████▏| 935/991 [3:58:40<14:17, 15.31s/batch, batch_loss=164, batch_index=936, batch_size=256]

Epoch 2/10:  94%|██████████████▏| 936/991 [3:58:40<13:52, 15.13s/batch, batch_loss=164, batch_index=936, batch_size=256]

Epoch 2/10:  94%|█████████████▏| 936/991 [3:58:56<13:52, 15.13s/batch, batch_loss=36.7, batch_index=937, batch_size=256]

Epoch 2/10:  95%|█████████████▏| 937/991 [3:58:56<13:56, 15.50s/batch, batch_loss=36.7, batch_index=937, batch_size=256]

Epoch 2/10:  95%|█████████████▏| 937/991 [3:59:11<13:56, 15.50s/batch, batch_loss=9.09, batch_index=938, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 938/991 [3:59:11<13:28, 15.25s/batch, batch_loss=9.09, batch_index=938, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 938/991 [3:59:26<13:28, 15.25s/batch, batch_loss=8.13, batch_index=939, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 939/991 [3:59:26<13:09, 15.18s/batch, batch_loss=8.13, batch_index=939, batch_size=256]

Epoch 2/10:  95%|██████████████▏| 939/991 [3:59:42<13:09, 15.18s/batch, batch_loss=430, batch_index=940, batch_size=256]

Epoch 2/10:  95%|██████████████▏| 940/991 [3:59:42<13:05, 15.41s/batch, batch_loss=430, batch_index=940, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 940/991 [3:59:57<13:05, 15.41s/batch, batch_loss=17.3, batch_index=941, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 941/991 [3:59:57<12:40, 15.21s/batch, batch_loss=17.3, batch_index=941, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 941/991 [4:00:11<12:40, 15.21s/batch, batch_loss=13.7, batch_index=942, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 942/991 [4:00:11<12:12, 14.94s/batch, batch_loss=13.7, batch_index=942, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 942/991 [4:00:25<12:12, 14.94s/batch, batch_loss=10.5, batch_index=943, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 943/991 [4:00:25<11:43, 14.66s/batch, batch_loss=10.5, batch_index=943, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 943/991 [4:00:40<11:43, 14.66s/batch, batch_loss=13.4, batch_index=944, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 944/991 [4:00:40<11:31, 14.72s/batch, batch_loss=13.4, batch_index=944, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 944/991 [4:00:55<11:31, 14.72s/batch, batch_loss=1.72, batch_index=945, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 945/991 [4:00:55<11:16, 14.72s/batch, batch_loss=1.72, batch_index=945, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 945/991 [4:01:09<11:16, 14.72s/batch, batch_loss=12.1, batch_index=946, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 946/991 [4:01:09<11:01, 14.69s/batch, batch_loss=12.1, batch_index=946, batch_size=256]

Epoch 2/10:  95%|█████████████▎| 946/991 [4:01:24<11:01, 14.69s/batch, batch_loss=12.2, batch_index=947, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 947/991 [4:01:24<10:43, 14.63s/batch, batch_loss=12.2, batch_index=947, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 947/991 [4:01:38<10:43, 14.63s/batch, batch_loss=9.66, batch_index=948, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 948/991 [4:01:38<10:29, 14.63s/batch, batch_loss=9.66, batch_index=948, batch_size=256]

Epoch 2/10:  96%|██████████████▎| 948/991 [4:01:53<10:29, 14.63s/batch, batch_loss=5.2, batch_index=949, batch_size=256]

Epoch 2/10:  96%|██████████████▎| 949/991 [4:01:53<10:21, 14.80s/batch, batch_loss=5.2, batch_index=949, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 949/991 [4:02:08<10:21, 14.80s/batch, batch_loss=7.13, batch_index=950, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 950/991 [4:02:08<10:01, 14.68s/batch, batch_loss=7.13, batch_index=950, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 950/991 [4:02:23<10:01, 14.68s/batch, batch_loss=17.4, batch_index=951, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 951/991 [4:02:23<09:50, 14.75s/batch, batch_loss=17.4, batch_index=951, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 951/991 [4:02:38<09:50, 14.75s/batch, batch_loss=18.7, batch_index=952, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 952/991 [4:02:38<09:41, 14.90s/batch, batch_loss=18.7, batch_index=952, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 952/991 [4:02:53<09:41, 14.90s/batch, batch_loss=6.27, batch_index=953, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 953/991 [4:02:53<09:21, 14.79s/batch, batch_loss=6.27, batch_index=953, batch_size=256]

Epoch 2/10:  96%|██████████████▍| 953/991 [4:03:08<09:21, 14.79s/batch, batch_loss=341, batch_index=954, batch_size=256]

Epoch 2/10:  96%|██████████████▍| 954/991 [4:03:08<09:10, 14.87s/batch, batch_loss=341, batch_index=954, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 954/991 [4:03:23<09:10, 14.87s/batch, batch_loss=12.4, batch_index=955, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 955/991 [4:03:23<08:58, 14.96s/batch, batch_loss=12.4, batch_index=955, batch_size=256]

Epoch 2/10:  96%|█████████████▍| 955/991 [4:03:40<08:58, 14.96s/batch, batch_loss=14.1, batch_index=956, batch_size=256]

Epoch 2/10:  96%|█████████████▌| 956/991 [4:03:40<09:07, 15.65s/batch, batch_loss=14.1, batch_index=956, batch_size=256]

Epoch 2/10:  96%|█████████████▌| 956/991 [4:03:54<09:07, 15.65s/batch, batch_loss=13.9, batch_index=957, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 957/991 [4:03:54<08:31, 15.05s/batch, batch_loss=13.9, batch_index=957, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 957/991 [4:04:09<08:31, 15.05s/batch, batch_loss=12.6, batch_index=958, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 958/991 [4:04:09<08:20, 15.16s/batch, batch_loss=12.6, batch_index=958, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 958/991 [4:04:24<08:20, 15.16s/batch, batch_loss=11.2, batch_index=959, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 959/991 [4:04:24<08:06, 15.20s/batch, batch_loss=11.2, batch_index=959, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 959/991 [4:04:38<08:06, 15.20s/batch, batch_loss=15.7, batch_index=960, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 960/991 [4:04:38<07:37, 14.75s/batch, batch_loss=15.7, batch_index=960, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 960/991 [4:04:54<07:37, 14.75s/batch, batch_loss=19.8, batch_index=961, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 961/991 [4:04:54<07:28, 14.95s/batch, batch_loss=19.8, batch_index=961, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 961/991 [4:05:08<07:28, 14.95s/batch, batch_loss=4.84, batch_index=962, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 962/991 [4:05:08<07:13, 14.96s/batch, batch_loss=4.84, batch_index=962, batch_size=256]

Epoch 2/10:  97%|██████████████▌| 962/991 [4:05:26<07:13, 14.96s/batch, batch_loss=6.1, batch_index=963, batch_size=256]

Epoch 2/10:  97%|██████████████▌| 963/991 [4:05:26<07:19, 15.68s/batch, batch_loss=6.1, batch_index=963, batch_size=256]

Epoch 2/10:  97%|██████████▋| 963/991 [4:05:40<07:19, 15.68s/batch, batch_loss=9.41e+3, batch_index=964, batch_size=256]

Epoch 2/10:  97%|██████████▋| 964/991 [4:05:40<06:54, 15.35s/batch, batch_loss=9.41e+3, batch_index=964, batch_size=256]

Epoch 2/10:  97%|█████████████▌| 964/991 [4:05:56<06:54, 15.35s/batch, batch_loss=19.6, batch_index=965, batch_size=256]

Epoch 2/10:  97%|█████████████▋| 965/991 [4:05:56<06:36, 15.26s/batch, batch_loss=19.6, batch_index=965, batch_size=256]

Epoch 2/10:  97%|█████████████▋| 965/991 [4:06:11<06:36, 15.26s/batch, batch_loss=14.7, batch_index=966, batch_size=256]

Epoch 2/10:  97%|█████████████▋| 966/991 [4:06:11<06:20, 15.23s/batch, batch_loss=14.7, batch_index=966, batch_size=256]

Epoch 2/10:  97%|██████████▋| 966/991 [4:06:26<06:20, 15.23s/batch, batch_loss=2.41e+4, batch_index=967, batch_size=256]

Epoch 2/10:  98%|██████████▋| 967/991 [4:06:26<06:03, 15.13s/batch, batch_loss=2.41e+4, batch_index=967, batch_size=256]

Epoch 2/10:  98%|██████████████▋| 967/991 [4:06:41<06:03, 15.13s/batch, batch_loss=410, batch_index=968, batch_size=256]

Epoch 2/10:  98%|██████████████▋| 968/991 [4:06:41<05:49, 15.19s/batch, batch_loss=410, batch_index=968, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 968/991 [4:06:56<05:49, 15.19s/batch, batch_loss=21.8, batch_index=969, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 969/991 [4:06:56<05:30, 15.02s/batch, batch_loss=21.8, batch_index=969, batch_size=256]

Epoch 2/10:  98%|██████████████▋| 969/991 [4:07:11<05:30, 15.02s/batch, batch_loss=1.2, batch_index=970, batch_size=256]

Epoch 2/10:  98%|██████████████▋| 970/991 [4:07:11<05:16, 15.06s/batch, batch_loss=1.2, batch_index=970, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 970/991 [4:07:27<05:16, 15.06s/batch, batch_loss=10.2, batch_index=971, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 971/991 [4:07:27<05:08, 15.40s/batch, batch_loss=10.2, batch_index=971, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 971/991 [4:07:46<05:08, 15.40s/batch, batch_loss=24.9, batch_index=972, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 972/991 [4:07:46<05:15, 16.59s/batch, batch_loss=24.9, batch_index=972, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 972/991 [4:08:02<05:15, 16.59s/batch, batch_loss=21.4, batch_index=973, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 973/991 [4:08:02<04:55, 16.43s/batch, batch_loss=21.4, batch_index=973, batch_size=256]

Epoch 2/10:  98%|█████████████▋| 973/991 [4:08:17<04:55, 16.43s/batch, batch_loss=14.8, batch_index=974, batch_size=256]

Epoch 2/10:  98%|█████████████▊| 974/991 [4:08:17<04:31, 16.00s/batch, batch_loss=14.8, batch_index=974, batch_size=256]

Epoch 2/10:  98%|██████████████▋| 974/991 [4:08:33<04:31, 16.00s/batch, batch_loss=7.6, batch_index=975, batch_size=256]

Epoch 2/10:  98%|██████████████▊| 975/991 [4:08:33<04:13, 15.85s/batch, batch_loss=7.6, batch_index=975, batch_size=256]

Epoch 2/10:  98%|█████████████▊| 975/991 [4:08:47<04:13, 15.85s/batch, batch_loss=26.6, batch_index=976, batch_size=256]

Epoch 2/10:  98%|█████████████▊| 976/991 [4:08:47<03:52, 15.48s/batch, batch_loss=26.6, batch_index=976, batch_size=256]

Epoch 2/10:  98%|█████████████▊| 976/991 [4:09:01<03:52, 15.48s/batch, batch_loss=1.15, batch_index=977, batch_size=256]

Epoch 2/10:  99%|█████████████▊| 977/991 [4:09:01<03:26, 14.78s/batch, batch_loss=1.15, batch_index=977, batch_size=256]

Epoch 2/10:  99%|████████████▊| 977/991 [4:09:15<03:26, 14.78s/batch, batch_loss=0.894, batch_index=978, batch_size=256]

Epoch 2/10:  99%|████████████▊| 978/991 [4:09:15<03:09, 14.57s/batch, batch_loss=0.894, batch_index=978, batch_size=256]

Epoch 2/10:  99%|████████████▊| 978/991 [4:09:28<03:09, 14.57s/batch, batch_loss=0.741, batch_index=979, batch_size=256]

Epoch 2/10:  99%|████████████▊| 979/991 [4:09:28<02:51, 14.28s/batch, batch_loss=0.741, batch_index=979, batch_size=256]

Epoch 2/10:  99%|████████████▊| 979/991 [4:09:41<02:51, 14.28s/batch, batch_loss=0.595, batch_index=980, batch_size=256]

Epoch 2/10:  99%|████████████▊| 980/991 [4:09:41<02:33, 13.95s/batch, batch_loss=0.595, batch_index=980, batch_size=256]

Epoch 2/10:  99%|████████████▊| 980/991 [4:09:56<02:33, 13.95s/batch, batch_loss=0.465, batch_index=981, batch_size=256]

Epoch 2/10:  99%|████████████▊| 981/991 [4:09:56<02:20, 14.07s/batch, batch_loss=0.465, batch_index=981, batch_size=256]

Epoch 2/10:  99%|████████████▊| 981/991 [4:10:09<02:20, 14.07s/batch, batch_loss=0.356, batch_index=982, batch_size=256]

Epoch 2/10:  99%|████████████▉| 982/991 [4:10:09<02:05, 13.93s/batch, batch_loss=0.356, batch_index=982, batch_size=256]

Epoch 2/10:  99%|████████████▉| 982/991 [4:10:23<02:05, 13.93s/batch, batch_loss=0.269, batch_index=983, batch_size=256]

Epoch 2/10:  99%|████████████▉| 983/991 [4:10:23<01:50, 13.84s/batch, batch_loss=0.269, batch_index=983, batch_size=256]

Epoch 2/10:  99%|████████████▉| 983/991 [4:10:37<01:50, 13.84s/batch, batch_loss=0.199, batch_index=984, batch_size=256]

Epoch 2/10:  99%|████████████▉| 984/991 [4:10:37<01:36, 13.80s/batch, batch_loss=0.199, batch_index=984, batch_size=256]

Epoch 2/10:  99%|████████████▉| 984/991 [4:10:50<01:36, 13.80s/batch, batch_loss=0.142, batch_index=985, batch_size=256]

Epoch 2/10:  99%|████████████▉| 985/991 [4:10:50<01:21, 13.60s/batch, batch_loss=0.142, batch_index=985, batch_size=256]

Epoch 2/10:  99%|███████████▉| 985/991 [4:11:04<01:21, 13.60s/batch, batch_loss=0.0975, batch_index=986, batch_size=256]

Epoch 2/10:  99%|███████████▉| 986/991 [4:11:04<01:08, 13.63s/batch, batch_loss=0.0975, batch_index=986, batch_size=256]

Epoch 2/10:  99%|███████████▉| 986/991 [4:11:18<01:08, 13.63s/batch, batch_loss=0.0652, batch_index=987, batch_size=256]

Epoch 2/10: 100%|███████████▉| 987/991 [4:11:18<00:55, 13.78s/batch, batch_loss=0.0652, batch_index=987, batch_size=256]

Epoch 2/10: 100%|███████████▉| 987/991 [4:11:32<00:55, 13.78s/batch, batch_loss=0.0453, batch_index=988, batch_size=256]

Epoch 2/10: 100%|███████████▉| 988/991 [4:11:32<00:41, 13.81s/batch, batch_loss=0.0453, batch_index=988, batch_size=256]

Epoch 2/10: 100%|███████████▉| 988/991 [4:11:47<00:41, 13.81s/batch, batch_loss=0.0363, batch_index=989, batch_size=256]

Epoch 2/10: 100%|███████████▉| 989/991 [4:11:47<00:28, 14.37s/batch, batch_loss=0.0363, batch_index=989, batch_size=256]

Epoch 2/10: 100%|███████████▉| 989/991 [4:12:02<00:28, 14.37s/batch, batch_loss=0.0345, batch_index=990, batch_size=256]

Epoch 2/10: 100%|███████████▉| 990/991 [4:12:02<00:14, 14.35s/batch, batch_loss=0.0345, batch_index=990, batch_size=256]

Epoch 2/10: 100%|███████████▉| 990/991 [4:12:14<00:14, 14.35s/batch, batch_loss=0.0396, batch_index=991, batch_size=220]

Epoch 2/10: 100%|████████████| 991/991 [4:12:14<00:00, 13.65s/batch, batch_loss=0.0396, batch_index=991, batch_size=220]

Epoch 2/10: 100%|████████████| 991/991 [4:12:14<00:00, 15.27s/batch, batch_loss=0.0396, batch_index=991, batch_size=220]




Epoch 2, Loss: 986.9570


Validation:   0%|                                                                            | 0/743 [00:00<?, ?batch/s]

Validation:   0%|                            | 0/743 [00:15<?, ?batch/s, batch_loss=18.1, batch_index=1, batch_size=256]

Validation:   0%|                  | 1/743 [00:15<3:07:56, 15.20s/batch, batch_loss=18.1, batch_index=1, batch_size=256]

Validation:   0%|                  | 1/743 [00:30<3:07:56, 15.20s/batch, batch_loss=19.3, batch_index=2, batch_size=256]

Validation:   0%|                  | 2/743 [00:30<3:05:34, 15.03s/batch, batch_loss=19.3, batch_index=2, batch_size=256]

Validation:   0%|                  | 2/743 [00:45<3:05:34, 15.03s/batch, batch_loss=17.9, batch_index=3, batch_size=256]

Validation:   0%|                  | 3/743 [00:45<3:06:56, 15.16s/batch, batch_loss=17.9, batch_index=3, batch_size=256]

Validation:   0%|                  | 3/743 [00:59<3:06:56, 15.16s/batch, batch_loss=13.4, batch_index=4, batch_size=256]

Validation:   1%|                  | 4/743 [00:59<3:03:50, 14.93s/batch, batch_loss=13.4, batch_index=4, batch_size=256]

Validation:   1%|                  | 4/743 [01:15<3:03:50, 14.93s/batch, batch_loss=22.3, batch_index=5, batch_size=256]

Validation:   1%|                  | 5/743 [01:15<3:05:14, 15.06s/batch, batch_loss=22.3, batch_index=5, batch_size=256]

Validation:   1%|                  | 5/743 [01:32<3:05:14, 15.06s/batch, batch_loss=21.5, batch_index=6, batch_size=256]

Validation:   1%|▏                 | 6/743 [01:32<3:13:08, 15.72s/batch, batch_loss=21.5, batch_index=6, batch_size=256]

Validation:   1%|▏                  | 6/743 [01:47<3:13:08, 15.72s/batch, batch_loss=556, batch_index=7, batch_size=256]

Validation:   1%|▏                  | 7/743 [01:47<3:09:00, 15.41s/batch, batch_loss=556, batch_index=7, batch_size=256]

Validation:   1%|▏                 | 7/743 [02:02<3:09:00, 15.41s/batch, batch_loss=17.6, batch_index=8, batch_size=256]

Validation:   1%|▏                 | 8/743 [02:02<3:07:24, 15.30s/batch, batch_loss=17.6, batch_index=8, batch_size=256]

Validation:   1%|▏                 | 8/743 [02:17<3:07:24, 15.30s/batch, batch_loss=16.8, batch_index=9, batch_size=256]

Validation:   1%|▏                 | 9/743 [02:17<3:05:57, 15.20s/batch, batch_loss=16.8, batch_index=9, batch_size=256]

Validation:   1%|▏                | 9/743 [02:32<3:05:57, 15.20s/batch, batch_loss=14.6, batch_index=10, batch_size=256]

Validation:   1%|▏               | 10/743 [02:32<3:05:23, 15.18s/batch, batch_loss=14.6, batch_index=10, batch_size=256]

Validation:   1%|▏               | 10/743 [02:47<3:05:23, 15.18s/batch, batch_loss=13.3, batch_index=11, batch_size=256]

Validation:   1%|▏               | 11/743 [02:47<3:06:15, 15.27s/batch, batch_loss=13.3, batch_index=11, batch_size=256]

Validation:   1%|▏            | 11/743 [03:02<3:06:15, 15.27s/batch, batch_loss=2.19e+3, batch_index=12, batch_size=256]

Validation:   2%|▏            | 12/743 [03:02<3:02:56, 15.02s/batch, batch_loss=2.19e+3, batch_index=12, batch_size=256]

Validation:   2%|▎               | 12/743 [03:16<3:02:56, 15.02s/batch, batch_loss=13.6, batch_index=13, batch_size=256]

Validation:   2%|▎               | 13/743 [03:16<3:01:05, 14.88s/batch, batch_loss=13.6, batch_index=13, batch_size=256]

Validation:   2%|▎               | 13/743 [03:33<3:01:05, 14.88s/batch, batch_loss=13.6, batch_index=14, batch_size=256]

Validation:   2%|▎               | 14/743 [03:33<3:07:41, 15.45s/batch, batch_loss=13.6, batch_index=14, batch_size=256]

Validation:   2%|▎                 | 14/743 [03:47<3:07:41, 15.45s/batch, batch_loss=20, batch_index=15, batch_size=256]

Validation:   2%|▎                 | 15/743 [03:47<3:03:55, 15.16s/batch, batch_loss=20, batch_index=15, batch_size=256]

Validation:   2%|▎               | 15/743 [04:02<3:03:55, 15.16s/batch, batch_loss=15.2, batch_index=16, batch_size=256]

Validation:   2%|▎               | 16/743 [04:02<3:00:26, 14.89s/batch, batch_loss=15.2, batch_index=16, batch_size=256]

Validation:   2%|▎               | 16/743 [04:17<3:00:26, 14.89s/batch, batch_loss=11.9, batch_index=17, batch_size=256]

Validation:   2%|▎               | 17/743 [04:17<2:59:47, 14.86s/batch, batch_loss=11.9, batch_index=17, batch_size=256]

Validation:   2%|▎            | 17/743 [04:31<2:59:47, 14.86s/batch, batch_loss=4.54e+3, batch_index=18, batch_size=256]

Validation:   2%|▎            | 18/743 [04:31<2:58:16, 14.75s/batch, batch_loss=4.54e+3, batch_index=18, batch_size=256]

Validation:   2%|▍               | 18/743 [04:45<2:58:16, 14.75s/batch, batch_loss=12.5, batch_index=19, batch_size=256]

Validation:   3%|▍               | 19/743 [04:45<2:55:52, 14.58s/batch, batch_loss=12.5, batch_index=19, batch_size=256]

Validation:   3%|▍               | 19/743 [05:00<2:55:52, 14.58s/batch, batch_loss=16.4, batch_index=20, batch_size=256]

Validation:   3%|▍               | 20/743 [05:00<2:56:57, 14.69s/batch, batch_loss=16.4, batch_index=20, batch_size=256]

Validation:   3%|▍                | 20/743 [05:14<2:56:57, 14.69s/batch, batch_loss=956, batch_index=21, batch_size=256]

Validation:   3%|▍                | 21/743 [05:14<2:53:39, 14.43s/batch, batch_loss=956, batch_index=21, batch_size=256]

Validation:   3%|▍               | 21/743 [05:28<2:53:39, 14.43s/batch, batch_loss=15.9, batch_index=22, batch_size=256]

Validation:   3%|▍               | 22/743 [05:28<2:52:55, 14.39s/batch, batch_loss=15.9, batch_index=22, batch_size=256]

Validation:   3%|▍               | 22/743 [05:43<2:52:55, 14.39s/batch, batch_loss=9.24, batch_index=23, batch_size=256]

Validation:   3%|▍               | 23/743 [05:43<2:54:53, 14.57s/batch, batch_loss=9.24, batch_index=23, batch_size=256]

Validation:   3%|▍               | 23/743 [05:58<2:54:53, 14.57s/batch, batch_loss=20.7, batch_index=24, batch_size=256]

Validation:   3%|▌               | 24/743 [05:58<2:54:44, 14.58s/batch, batch_loss=20.7, batch_index=24, batch_size=256]

Validation:   3%|▌               | 24/743 [06:12<2:54:44, 14.58s/batch, batch_loss=14.7, batch_index=25, batch_size=256]

Validation:   3%|▌               | 25/743 [06:12<2:53:58, 14.54s/batch, batch_loss=14.7, batch_index=25, batch_size=256]

Validation:   3%|▌               | 25/743 [06:27<2:53:58, 14.54s/batch, batch_loss=19.6, batch_index=26, batch_size=256]

Validation:   3%|▌               | 26/743 [06:27<2:53:40, 14.53s/batch, batch_loss=19.6, batch_index=26, batch_size=256]

Validation:   3%|▍            | 26/743 [06:42<2:53:40, 14.53s/batch, batch_loss=1.64e+3, batch_index=27, batch_size=256]

Validation:   4%|▍            | 27/743 [06:42<2:54:58, 14.66s/batch, batch_loss=1.64e+3, batch_index=27, batch_size=256]

Validation:   4%|▌               | 27/743 [06:57<2:54:58, 14.66s/batch, batch_loss=16.5, batch_index=28, batch_size=256]

Validation:   4%|▌               | 28/743 [06:57<2:57:13, 14.87s/batch, batch_loss=16.5, batch_index=28, batch_size=256]

Validation:   4%|▋                 | 28/743 [07:16<2:57:13, 14.87s/batch, batch_loss=16, batch_index=29, batch_size=256]

Validation:   4%|▋                 | 29/743 [07:16<3:09:57, 15.96s/batch, batch_loss=16, batch_index=29, batch_size=256]

Validation:   4%|▌            | 29/743 [07:31<3:09:57, 15.96s/batch, batch_loss=1.18e+4, batch_index=30, batch_size=256]

Validation:   4%|▌            | 30/743 [07:31<3:05:58, 15.65s/batch, batch_loss=1.18e+4, batch_index=30, batch_size=256]

Validation:   4%|▋               | 30/743 [07:46<3:05:58, 15.65s/batch, batch_loss=18.3, batch_index=31, batch_size=256]

Validation:   4%|▋               | 31/743 [07:46<3:03:10, 15.44s/batch, batch_loss=18.3, batch_index=31, batch_size=256]

Validation:   4%|▋               | 31/743 [08:00<3:03:10, 15.44s/batch, batch_loss=17.2, batch_index=32, batch_size=256]

Validation:   4%|▋               | 32/743 [08:00<2:59:01, 15.11s/batch, batch_loss=17.2, batch_index=32, batch_size=256]

Validation:   4%|▋               | 32/743 [08:13<2:59:01, 15.11s/batch, batch_loss=18.6, batch_index=33, batch_size=256]

Validation:   4%|▋               | 33/743 [08:13<2:53:23, 14.65s/batch, batch_loss=18.6, batch_index=33, batch_size=256]

Validation:   4%|▋               | 33/743 [08:27<2:53:23, 14.65s/batch, batch_loss=16.2, batch_index=34, batch_size=256]

Validation:   5%|▋               | 34/743 [08:27<2:48:22, 14.25s/batch, batch_loss=16.2, batch_index=34, batch_size=256]

Validation:   5%|▌            | 34/743 [08:40<2:48:22, 14.25s/batch, batch_loss=2.82e+3, batch_index=35, batch_size=256]

Validation:   5%|▌            | 35/743 [08:40<2:45:16, 14.01s/batch, batch_loss=2.82e+3, batch_index=35, batch_size=256]

Validation:   5%|▊               | 35/743 [08:55<2:45:16, 14.01s/batch, batch_loss=15.4, batch_index=36, batch_size=256]

Validation:   5%|▊               | 36/743 [08:55<2:48:24, 14.29s/batch, batch_loss=15.4, batch_index=36, batch_size=256]

Validation:   5%|▊                | 36/743 [09:11<2:48:24, 14.29s/batch, batch_loss=163, batch_index=37, batch_size=256]

Validation:   5%|▊                | 37/743 [09:11<2:52:25, 14.65s/batch, batch_loss=163, batch_index=37, batch_size=256]

Validation:   5%|▋            | 37/743 [09:25<2:52:25, 14.65s/batch, batch_loss=6.51e+3, batch_index=38, batch_size=256]

Validation:   5%|▋            | 38/743 [09:25<2:52:39, 14.69s/batch, batch_loss=6.51e+3, batch_index=38, batch_size=256]

Validation:   5%|▊               | 38/743 [09:42<2:52:39, 14.69s/batch, batch_loss=15.7, batch_index=39, batch_size=256]

Validation:   5%|▊               | 39/743 [09:42<3:00:15, 15.36s/batch, batch_loss=15.7, batch_index=39, batch_size=256]

Validation:   5%|▊               | 39/743 [09:57<3:00:15, 15.36s/batch, batch_loss=18.6, batch_index=40, batch_size=256]

Validation:   5%|▊               | 40/743 [09:57<2:57:24, 15.14s/batch, batch_loss=18.6, batch_index=40, batch_size=256]

Validation:   5%|▊               | 40/743 [10:13<2:57:24, 15.14s/batch, batch_loss=15.6, batch_index=41, batch_size=256]

Validation:   6%|▉               | 41/743 [10:13<2:59:25, 15.34s/batch, batch_loss=15.6, batch_index=41, batch_size=256]

Validation:   6%|▉               | 41/743 [10:28<2:59:25, 15.34s/batch, batch_loss=17.1, batch_index=42, batch_size=256]

Validation:   6%|▉               | 42/743 [10:28<2:58:12, 15.25s/batch, batch_loss=17.1, batch_index=42, batch_size=256]

Validation:   6%|▉               | 42/743 [10:43<2:58:12, 15.25s/batch, batch_loss=11.1, batch_index=43, batch_size=256]

Validation:   6%|▉               | 43/743 [10:43<2:57:13, 15.19s/batch, batch_loss=11.1, batch_index=43, batch_size=256]

Validation:   6%|▉               | 43/743 [10:56<2:57:13, 15.19s/batch, batch_loss=16.7, batch_index=44, batch_size=256]

Validation:   6%|▉               | 44/743 [10:56<2:51:18, 14.70s/batch, batch_loss=16.7, batch_index=44, batch_size=256]

Validation:   6%|▉               | 44/743 [11:10<2:51:18, 14.70s/batch, batch_loss=19.2, batch_index=45, batch_size=256]

Validation:   6%|▉               | 45/743 [11:10<2:47:36, 14.41s/batch, batch_loss=19.2, batch_index=45, batch_size=256]

Validation:   6%|▉               | 45/743 [11:27<2:47:36, 14.41s/batch, batch_loss=12.6, batch_index=46, batch_size=256]

Validation:   6%|▉               | 46/743 [11:27<2:55:28, 15.10s/batch, batch_loss=12.6, batch_index=46, batch_size=256]

Validation:   6%|▉               | 46/743 [11:40<2:55:28, 15.10s/batch, batch_loss=16.8, batch_index=47, batch_size=256]

Validation:   6%|█               | 47/743 [11:40<2:48:00, 14.48s/batch, batch_loss=16.8, batch_index=47, batch_size=256]

Validation:   6%|█               | 47/743 [11:54<2:48:00, 14.48s/batch, batch_loss=20.7, batch_index=48, batch_size=256]

Validation:   6%|█               | 48/743 [11:54<2:46:31, 14.38s/batch, batch_loss=20.7, batch_index=48, batch_size=256]

Validation:   6%|█               | 48/743 [12:09<2:46:31, 14.38s/batch, batch_loss=20.3, batch_index=49, batch_size=256]

Validation:   7%|█               | 49/743 [12:09<2:48:59, 14.61s/batch, batch_loss=20.3, batch_index=49, batch_size=256]

Validation:   7%|█               | 49/743 [12:24<2:48:59, 14.61s/batch, batch_loss=14.8, batch_index=50, batch_size=256]

Validation:   7%|█               | 50/743 [12:24<2:48:52, 14.62s/batch, batch_loss=14.8, batch_index=50, batch_size=256]

Validation:   7%|█               | 50/743 [12:39<2:48:52, 14.62s/batch, batch_loss=14.9, batch_index=51, batch_size=256]

Validation:   7%|█               | 51/743 [12:39<2:49:41, 14.71s/batch, batch_loss=14.9, batch_index=51, batch_size=256]

Validation:   7%|█               | 51/743 [12:54<2:49:41, 14.71s/batch, batch_loss=15.6, batch_index=52, batch_size=256]

Validation:   7%|█               | 52/743 [12:54<2:49:45, 14.74s/batch, batch_loss=15.6, batch_index=52, batch_size=256]

Validation:   7%|█               | 52/743 [13:09<2:49:45, 14.74s/batch, batch_loss=22.3, batch_index=53, batch_size=256]

Validation:   7%|█▏              | 53/743 [13:09<2:50:44, 14.85s/batch, batch_loss=22.3, batch_index=53, batch_size=256]

Validation:   7%|█▏              | 53/743 [13:25<2:50:44, 14.85s/batch, batch_loss=15.5, batch_index=54, batch_size=256]

Validation:   7%|█▏              | 54/743 [13:25<2:54:58, 15.24s/batch, batch_loss=15.5, batch_index=54, batch_size=256]

Validation:   7%|█▏              | 54/743 [13:39<2:54:58, 15.24s/batch, batch_loss=18.4, batch_index=55, batch_size=256]

Validation:   7%|█▏              | 55/743 [13:39<2:51:48, 14.98s/batch, batch_loss=18.4, batch_index=55, batch_size=256]

Validation:   7%|█▏              | 55/743 [13:54<2:51:48, 14.98s/batch, batch_loss=17.4, batch_index=56, batch_size=256]

Validation:   8%|█▏              | 56/743 [13:54<2:50:58, 14.93s/batch, batch_loss=17.4, batch_index=56, batch_size=256]

Validation:   8%|█▏              | 56/743 [14:09<2:50:58, 14.93s/batch, batch_loss=15.2, batch_index=57, batch_size=256]

Validation:   8%|█▏              | 57/743 [14:09<2:49:48, 14.85s/batch, batch_loss=15.2, batch_index=57, batch_size=256]

Validation:   8%|█▏              | 57/743 [14:24<2:49:48, 14.85s/batch, batch_loss=17.6, batch_index=58, batch_size=256]

Validation:   8%|█▏              | 58/743 [14:24<2:50:50, 14.96s/batch, batch_loss=17.6, batch_index=58, batch_size=256]

Validation:   8%|█▎               | 58/743 [14:39<2:50:50, 14.96s/batch, batch_loss=105, batch_index=59, batch_size=256]

Validation:   8%|█▎               | 59/743 [14:39<2:49:37, 14.88s/batch, batch_loss=105, batch_index=59, batch_size=256]

Validation:   8%|█            | 59/743 [14:53<2:49:37, 14.88s/batch, batch_loss=6.14e+3, batch_index=60, batch_size=256]

Validation:   8%|█            | 60/743 [14:53<2:47:42, 14.73s/batch, batch_loss=6.14e+3, batch_index=60, batch_size=256]

Validation:   8%|█▎              | 60/743 [15:08<2:47:42, 14.73s/batch, batch_loss=10.1, batch_index=61, batch_size=256]

Validation:   8%|█▎              | 61/743 [15:08<2:47:47, 14.76s/batch, batch_loss=10.1, batch_index=61, batch_size=256]

Validation:   8%|█▎              | 61/743 [15:22<2:47:47, 14.76s/batch, batch_loss=10.6, batch_index=62, batch_size=256]

Validation:   8%|█▎              | 62/743 [15:22<2:45:55, 14.62s/batch, batch_loss=10.6, batch_index=62, batch_size=256]

Validation:   8%|█▎              | 62/743 [15:39<2:45:55, 14.62s/batch, batch_loss=21.6, batch_index=63, batch_size=256]

Validation:   8%|█▎              | 63/743 [15:39<2:52:47, 15.25s/batch, batch_loss=21.6, batch_index=63, batch_size=256]

Validation:   8%|█▎              | 63/743 [15:53<2:52:47, 15.25s/batch, batch_loss=12.1, batch_index=64, batch_size=256]

Validation:   9%|█▍              | 64/743 [15:53<2:50:23, 15.06s/batch, batch_loss=12.1, batch_index=64, batch_size=256]

Validation:   9%|█▍              | 64/743 [16:09<2:50:23, 15.06s/batch, batch_loss=16.6, batch_index=65, batch_size=256]

Validation:   9%|█▍              | 65/743 [16:09<2:50:27, 15.08s/batch, batch_loss=16.6, batch_index=65, batch_size=256]

Validation:   9%|█▏           | 65/743 [16:24<2:50:27, 15.08s/batch, batch_loss=1.27e+3, batch_index=66, batch_size=256]

Validation:   9%|█▏           | 66/743 [16:24<2:50:37, 15.12s/batch, batch_loss=1.27e+3, batch_index=66, batch_size=256]

Validation:   9%|█▍              | 66/743 [16:38<2:50:37, 15.12s/batch, batch_loss=15.1, batch_index=67, batch_size=256]

Validation:   9%|█▍              | 67/743 [16:38<2:47:49, 14.90s/batch, batch_loss=15.1, batch_index=67, batch_size=256]

Validation:   9%|█▍              | 67/743 [16:52<2:47:49, 14.90s/batch, batch_loss=16.5, batch_index=68, batch_size=256]

Validation:   9%|█▍              | 68/743 [16:52<2:43:52, 14.57s/batch, batch_loss=16.5, batch_index=68, batch_size=256]

Validation:   9%|█▍              | 68/743 [17:06<2:43:52, 14.57s/batch, batch_loss=10.4, batch_index=69, batch_size=256]

Validation:   9%|█▍              | 69/743 [17:06<2:42:52, 14.50s/batch, batch_loss=10.4, batch_index=69, batch_size=256]

Validation:   9%|█▍              | 69/743 [17:21<2:42:52, 14.50s/batch, batch_loss=15.2, batch_index=70, batch_size=256]

Validation:   9%|█▌              | 70/743 [17:21<2:43:57, 14.62s/batch, batch_loss=15.2, batch_index=70, batch_size=256]

Validation:   9%|█▌              | 70/743 [17:36<2:43:57, 14.62s/batch, batch_loss=9.66, batch_index=71, batch_size=256]

Validation:  10%|█▌              | 71/743 [17:36<2:42:57, 14.55s/batch, batch_loss=9.66, batch_index=71, batch_size=256]

Validation:  10%|█▌              | 71/743 [17:51<2:42:57, 14.55s/batch, batch_loss=15.8, batch_index=72, batch_size=256]

Validation:  10%|█▌              | 72/743 [17:51<2:46:17, 14.87s/batch, batch_loss=15.8, batch_index=72, batch_size=256]

Validation:  10%|█▌              | 72/743 [18:06<2:46:17, 14.87s/batch, batch_loss=17.7, batch_index=73, batch_size=256]

Validation:  10%|█▌              | 73/743 [18:06<2:47:00, 14.96s/batch, batch_loss=17.7, batch_index=73, batch_size=256]

Validation:  10%|█▌              | 73/743 [18:21<2:47:00, 14.96s/batch, batch_loss=20.1, batch_index=74, batch_size=256]

Validation:  10%|█▌              | 74/743 [18:21<2:45:51, 14.88s/batch, batch_loss=20.1, batch_index=74, batch_size=256]

Validation:  10%|█▌              | 74/743 [18:36<2:45:51, 14.88s/batch, batch_loss=13.4, batch_index=75, batch_size=256]

Validation:  10%|█▌              | 75/743 [18:36<2:45:08, 14.83s/batch, batch_loss=13.4, batch_index=75, batch_size=256]

Validation:  10%|█▌              | 75/743 [18:50<2:45:08, 14.83s/batch, batch_loss=16.7, batch_index=76, batch_size=256]

Validation:  10%|█▋              | 76/743 [18:50<2:43:25, 14.70s/batch, batch_loss=16.7, batch_index=76, batch_size=256]

Validation:  10%|█▋              | 76/743 [19:05<2:43:25, 14.70s/batch, batch_loss=13.6, batch_index=77, batch_size=256]

Validation:  10%|█▋              | 77/743 [19:05<2:44:10, 14.79s/batch, batch_loss=13.6, batch_index=77, batch_size=256]

Validation:  10%|█▋              | 77/743 [19:22<2:44:10, 14.79s/batch, batch_loss=15.7, batch_index=78, batch_size=256]

Validation:  10%|█▋              | 78/743 [19:22<2:49:32, 15.30s/batch, batch_loss=15.7, batch_index=78, batch_size=256]

Validation:  10%|█▋              | 78/743 [19:38<2:49:32, 15.30s/batch, batch_loss=11.6, batch_index=79, batch_size=256]

Validation:  11%|█▋              | 79/743 [19:38<2:52:23, 15.58s/batch, batch_loss=11.6, batch_index=79, batch_size=256]

Validation:  11%|█▋              | 79/743 [19:53<2:52:23, 15.58s/batch, batch_loss=7.86, batch_index=80, batch_size=256]

Validation:  11%|█▋              | 80/743 [19:53<2:50:17, 15.41s/batch, batch_loss=7.86, batch_index=80, batch_size=256]

Validation:  11%|█▊               | 80/743 [20:08<2:50:17, 15.41s/batch, batch_loss=153, batch_index=81, batch_size=256]

Validation:  11%|█▊               | 81/743 [20:08<2:47:38, 15.19s/batch, batch_loss=153, batch_index=81, batch_size=256]

Validation:  11%|█▍           | 81/743 [20:22<2:47:38, 15.19s/batch, batch_loss=1.51e+3, batch_index=82, batch_size=256]

Validation:  11%|█▍           | 82/743 [20:22<2:45:27, 15.02s/batch, batch_loss=1.51e+3, batch_index=82, batch_size=256]

Validation:  11%|█▊              | 82/743 [20:37<2:45:27, 15.02s/batch, batch_loss=32.8, batch_index=83, batch_size=256]

Validation:  11%|█▊              | 83/743 [20:37<2:44:52, 14.99s/batch, batch_loss=32.8, batch_index=83, batch_size=256]

Validation:  11%|█▊              | 83/743 [20:53<2:44:52, 14.99s/batch, batch_loss=17.1, batch_index=84, batch_size=256]

Validation:  11%|█▊              | 84/743 [20:53<2:46:37, 15.17s/batch, batch_loss=17.1, batch_index=84, batch_size=256]

Validation:  11%|█▊              | 84/743 [21:07<2:46:37, 15.17s/batch, batch_loss=20.3, batch_index=85, batch_size=256]

Validation:  11%|█▊              | 85/743 [21:07<2:45:02, 15.05s/batch, batch_loss=20.3, batch_index=85, batch_size=256]

Validation:  11%|█▊              | 85/743 [21:23<2:45:02, 15.05s/batch, batch_loss=23.4, batch_index=86, batch_size=256]

Validation:  12%|█▊              | 86/743 [21:23<2:46:10, 15.18s/batch, batch_loss=23.4, batch_index=86, batch_size=256]

Validation:  12%|█▊              | 86/743 [21:39<2:46:10, 15.18s/batch, batch_loss=32.6, batch_index=87, batch_size=256]

Validation:  12%|█▊              | 87/743 [21:39<2:48:17, 15.39s/batch, batch_loss=32.6, batch_index=87, batch_size=256]

Validation:  12%|█▊              | 87/743 [21:54<2:48:17, 15.39s/batch, batch_loss=21.7, batch_index=88, batch_size=256]

Validation:  12%|█▉              | 88/743 [21:54<2:47:32, 15.35s/batch, batch_loss=21.7, batch_index=88, batch_size=256]

Validation:  12%|█▌           | 88/743 [22:10<2:47:32, 15.35s/batch, batch_loss=1.46e+4, batch_index=89, batch_size=256]

Validation:  12%|█▌           | 89/743 [22:10<2:50:29, 15.64s/batch, batch_loss=1.46e+4, batch_index=89, batch_size=256]

Validation:  12%|█▉              | 89/743 [22:25<2:50:29, 15.64s/batch, batch_loss=11.7, batch_index=90, batch_size=256]

Validation:  12%|█▉              | 90/743 [22:25<2:47:19, 15.38s/batch, batch_loss=11.7, batch_index=90, batch_size=256]

Validation:  12%|█▉              | 90/743 [22:41<2:47:19, 15.38s/batch, batch_loss=34.3, batch_index=91, batch_size=256]

Validation:  12%|█▉              | 91/743 [22:41<2:47:39, 15.43s/batch, batch_loss=34.3, batch_index=91, batch_size=256]

Validation:  12%|█▉              | 91/743 [22:55<2:47:39, 15.43s/batch, batch_loss=30.6, batch_index=92, batch_size=256]

Validation:  12%|█▉              | 92/743 [22:55<2:43:01, 15.02s/batch, batch_loss=30.6, batch_index=92, batch_size=256]

Validation:  12%|█▉              | 92/743 [23:09<2:43:01, 15.02s/batch, batch_loss=25.4, batch_index=93, batch_size=256]

Validation:  13%|██              | 93/743 [23:09<2:38:27, 14.63s/batch, batch_loss=25.4, batch_index=93, batch_size=256]

Validation:  13%|██              | 93/743 [23:24<2:38:27, 14.63s/batch, batch_loss=35.6, batch_index=94, batch_size=256]

Validation:  13%|██              | 94/743 [23:24<2:39:29, 14.74s/batch, batch_loss=35.6, batch_index=94, batch_size=256]

Validation:  13%|██              | 94/743 [23:41<2:39:29, 14.74s/batch, batch_loss=13.7, batch_index=95, batch_size=256]

Validation:  13%|██              | 95/743 [23:41<2:46:44, 15.44s/batch, batch_loss=13.7, batch_index=95, batch_size=256]

Validation:  13%|██              | 95/743 [23:56<2:46:44, 15.44s/batch, batch_loss=19.1, batch_index=96, batch_size=256]

Validation:  13%|██              | 96/743 [23:56<2:45:31, 15.35s/batch, batch_loss=19.1, batch_index=96, batch_size=256]

Validation:  13%|██▎               | 96/743 [24:11<2:45:31, 15.35s/batch, batch_loss=29, batch_index=97, batch_size=256]

Validation:  13%|██▎               | 97/743 [24:11<2:45:12, 15.34s/batch, batch_loss=29, batch_index=97, batch_size=256]

Validation:  13%|██              | 97/743 [24:26<2:45:12, 15.34s/batch, batch_loss=16.6, batch_index=98, batch_size=256]

Validation:  13%|██              | 98/743 [24:26<2:43:22, 15.20s/batch, batch_loss=16.6, batch_index=98, batch_size=256]

Validation:  13%|██              | 98/743 [24:40<2:43:22, 15.20s/batch, batch_loss=24.4, batch_index=99, batch_size=256]

Validation:  13%|██▏             | 99/743 [24:40<2:40:49, 14.98s/batch, batch_loss=24.4, batch_index=99, batch_size=256]

Validation:  13%|█▉             | 99/743 [24:55<2:40:49, 14.98s/batch, batch_loss=15.2, batch_index=100, batch_size=256]

Validation:  13%|█▉            | 100/743 [24:55<2:39:42, 14.90s/batch, batch_loss=15.2, batch_index=100, batch_size=256]

Validation:  13%|█▉            | 100/743 [25:13<2:39:42, 14.90s/batch, batch_loss=16.9, batch_index=101, batch_size=256]

Validation:  14%|█▉            | 101/743 [25:13<2:47:24, 15.65s/batch, batch_loss=16.9, batch_index=101, batch_size=256]

Validation:  14%|█▉            | 101/743 [25:28<2:47:24, 15.65s/batch, batch_loss=11.5, batch_index=102, batch_size=256]

Validation:  14%|█▉            | 102/743 [25:28<2:45:12, 15.46s/batch, batch_loss=11.5, batch_index=102, batch_size=256]

Validation:  14%|█▌         | 102/743 [25:43<2:45:12, 15.46s/batch, batch_loss=3.39e+3, batch_index=103, batch_size=256]

Validation:  14%|█▌         | 103/743 [25:43<2:45:14, 15.49s/batch, batch_loss=3.39e+3, batch_index=103, batch_size=256]

Validation:  14%|█▉            | 103/743 [25:58<2:45:14, 15.49s/batch, batch_loss=16.2, batch_index=104, batch_size=256]

Validation:  14%|█▉            | 104/743 [25:58<2:44:38, 15.46s/batch, batch_loss=16.2, batch_index=104, batch_size=256]

Validation:  14%|█▉            | 104/743 [26:13<2:44:38, 15.46s/batch, batch_loss=7.86, batch_index=105, batch_size=256]

Validation:  14%|█▉            | 105/743 [26:13<2:41:53, 15.22s/batch, batch_loss=7.86, batch_index=105, batch_size=256]

Validation:  14%|█▉            | 105/743 [26:28<2:41:53, 15.22s/batch, batch_loss=19.2, batch_index=106, batch_size=256]

Validation:  14%|█▉            | 106/743 [26:28<2:41:24, 15.20s/batch, batch_loss=19.2, batch_index=106, batch_size=256]

Validation:  14%|██▏            | 106/743 [26:43<2:41:24, 15.20s/batch, batch_loss=723, batch_index=107, batch_size=256]

Validation:  14%|██▏            | 107/743 [26:43<2:40:07, 15.11s/batch, batch_loss=723, batch_index=107, batch_size=256]

Validation:  14%|█▋          | 107/743 [26:58<2:40:07, 15.11s/batch, batch_loss=1.6e+3, batch_index=108, batch_size=256]

Validation:  15%|█▋          | 108/743 [26:58<2:38:53, 15.01s/batch, batch_loss=1.6e+3, batch_index=108, batch_size=256]

Validation:  15%|██▏            | 108/743 [27:13<2:38:53, 15.01s/batch, batch_loss=204, batch_index=109, batch_size=256]

Validation:  15%|██▏            | 109/743 [27:13<2:39:24, 15.09s/batch, batch_loss=204, batch_index=109, batch_size=256]

Validation:  15%|██            | 109/743 [27:27<2:39:24, 15.09s/batch, batch_loss=24.3, batch_index=110, batch_size=256]

Validation:  15%|██            | 110/743 [27:27<2:36:20, 14.82s/batch, batch_loss=24.3, batch_index=110, batch_size=256]

Validation:  15%|██▎             | 110/743 [27:42<2:36:20, 14.82s/batch, batch_loss=15, batch_index=111, batch_size=256]

Validation:  15%|██▍             | 111/743 [27:42<2:36:28, 14.86s/batch, batch_loss=15, batch_index=111, batch_size=256]

Validation:  15%|██            | 111/743 [27:58<2:36:28, 14.86s/batch, batch_loss=26.3, batch_index=112, batch_size=256]

Validation:  15%|██            | 112/743 [27:58<2:37:51, 15.01s/batch, batch_loss=26.3, batch_index=112, batch_size=256]

Validation:  15%|█▋         | 112/743 [28:12<2:37:51, 15.01s/batch, batch_loss=1.06e+4, batch_index=113, batch_size=256]

Validation:  15%|█▋         | 113/743 [28:12<2:35:40, 14.83s/batch, batch_loss=1.06e+4, batch_index=113, batch_size=256]

Validation:  15%|██▏           | 113/743 [28:27<2:35:40, 14.83s/batch, batch_loss=20.4, batch_index=114, batch_size=256]

Validation:  15%|██▏           | 114/743 [28:27<2:34:23, 14.73s/batch, batch_loss=20.4, batch_index=114, batch_size=256]

Validation:  15%|██▏           | 114/743 [28:41<2:34:23, 14.73s/batch, batch_loss=21.7, batch_index=115, batch_size=256]

Validation:  15%|██▏           | 115/743 [28:41<2:33:29, 14.67s/batch, batch_loss=21.7, batch_index=115, batch_size=256]

Validation:  15%|██▍             | 115/743 [28:56<2:33:29, 14.67s/batch, batch_loss=16, batch_index=116, batch_size=256]

Validation:  16%|██▍             | 116/743 [28:56<2:34:07, 14.75s/batch, batch_loss=16, batch_index=116, batch_size=256]

Validation:  16%|██▏           | 116/743 [29:10<2:34:07, 14.75s/batch, batch_loss=26.3, batch_index=117, batch_size=256]

Validation:  16%|██▏           | 117/743 [29:10<2:32:32, 14.62s/batch, batch_loss=26.3, batch_index=117, batch_size=256]

Validation:  16%|██▏           | 117/743 [29:26<2:32:32, 14.62s/batch, batch_loss=27.4, batch_index=118, batch_size=256]

Validation:  16%|██▏           | 118/743 [29:26<2:35:21, 14.91s/batch, batch_loss=27.4, batch_index=118, batch_size=256]

Validation:  16%|██▌             | 118/743 [29:41<2:35:21, 14.91s/batch, batch_loss=19, batch_index=119, batch_size=256]

Validation:  16%|██▌             | 119/743 [29:41<2:34:47, 14.88s/batch, batch_loss=19, batch_index=119, batch_size=256]

Validation:  16%|██▏           | 119/743 [29:56<2:34:47, 14.88s/batch, batch_loss=22.2, batch_index=120, batch_size=256]

Validation:  16%|██▎           | 120/743 [29:56<2:35:23, 14.97s/batch, batch_loss=22.2, batch_index=120, batch_size=256]

Validation:  16%|██▎           | 120/743 [30:11<2:35:23, 14.97s/batch, batch_loss=14.3, batch_index=121, batch_size=256]

Validation:  16%|██▎           | 121/743 [30:11<2:35:19, 14.98s/batch, batch_loss=14.3, batch_index=121, batch_size=256]

Validation:  16%|██▎           | 121/743 [30:26<2:35:19, 14.98s/batch, batch_loss=7.06, batch_index=122, batch_size=256]

Validation:  16%|██▎           | 122/743 [30:26<2:35:19, 15.01s/batch, batch_loss=7.06, batch_index=122, batch_size=256]

Validation:  16%|██▎           | 122/743 [30:41<2:35:19, 15.01s/batch, batch_loss=8.66, batch_index=123, batch_size=256]

Validation:  17%|██▎           | 123/743 [30:41<2:33:47, 14.88s/batch, batch_loss=8.66, batch_index=123, batch_size=256]

Validation:  17%|██▎           | 123/743 [30:56<2:33:47, 14.88s/batch, batch_loss=13.1, batch_index=124, batch_size=256]

Validation:  17%|██▎           | 124/743 [30:56<2:35:45, 15.10s/batch, batch_loss=13.1, batch_index=124, batch_size=256]

Validation:  17%|██▎           | 124/743 [31:11<2:35:45, 15.10s/batch, batch_loss=27.6, batch_index=125, batch_size=256]

Validation:  17%|██▎           | 125/743 [31:11<2:33:59, 14.95s/batch, batch_loss=27.6, batch_index=125, batch_size=256]

Validation:  17%|██▎           | 125/743 [31:25<2:33:59, 14.95s/batch, batch_loss=14.3, batch_index=126, batch_size=256]

Validation:  17%|██▎           | 126/743 [31:25<2:32:06, 14.79s/batch, batch_loss=14.3, batch_index=126, batch_size=256]

Validation:  17%|██▎           | 126/743 [31:40<2:32:06, 14.79s/batch, batch_loss=15.3, batch_index=127, batch_size=256]

Validation:  17%|██▍           | 127/743 [31:40<2:32:41, 14.87s/batch, batch_loss=15.3, batch_index=127, batch_size=256]

Validation:  17%|██▋             | 127/743 [31:54<2:32:41, 14.87s/batch, batch_loss=21, batch_index=128, batch_size=256]

Validation:  17%|██▊             | 128/743 [31:54<2:29:46, 14.61s/batch, batch_loss=21, batch_index=128, batch_size=256]

Validation:  17%|██▍           | 128/743 [32:08<2:29:46, 14.61s/batch, batch_loss=16.3, batch_index=129, batch_size=256]

Validation:  17%|██▍           | 129/743 [32:08<2:27:02, 14.37s/batch, batch_loss=16.3, batch_index=129, batch_size=256]

Validation:  17%|██▍           | 129/743 [32:23<2:27:02, 14.37s/batch, batch_loss=20.6, batch_index=130, batch_size=256]

Validation:  17%|██▍           | 130/743 [32:23<2:28:50, 14.57s/batch, batch_loss=20.6, batch_index=130, batch_size=256]

Validation:  17%|██▍           | 130/743 [32:38<2:28:50, 14.57s/batch, batch_loss=21.6, batch_index=131, batch_size=256]

Validation:  18%|██▍           | 131/743 [32:38<2:27:46, 14.49s/batch, batch_loss=21.6, batch_index=131, batch_size=256]

Validation:  18%|██▍           | 131/743 [32:53<2:27:46, 14.49s/batch, batch_loss=24.8, batch_index=132, batch_size=256]

Validation:  18%|██▍           | 132/743 [32:53<2:29:22, 14.67s/batch, batch_loss=24.8, batch_index=132, batch_size=256]

Validation:  18%|██▍           | 132/743 [33:07<2:29:22, 14.67s/batch, batch_loss=39.5, batch_index=133, batch_size=256]

Validation:  18%|██▌           | 133/743 [33:07<2:28:32, 14.61s/batch, batch_loss=39.5, batch_index=133, batch_size=256]

Validation:  18%|██▊             | 133/743 [33:22<2:28:32, 14.61s/batch, batch_loss=19, batch_index=134, batch_size=256]

Validation:  18%|██▉             | 134/743 [33:22<2:30:19, 14.81s/batch, batch_loss=19, batch_index=134, batch_size=256]

Validation:  18%|██▌           | 134/743 [33:36<2:30:19, 14.81s/batch, batch_loss=37.3, batch_index=135, batch_size=256]

Validation:  18%|██▌           | 135/743 [33:36<2:27:40, 14.57s/batch, batch_loss=37.3, batch_index=135, batch_size=256]

Validation:  18%|██▌           | 135/743 [33:51<2:27:40, 14.57s/batch, batch_loss=19.9, batch_index=136, batch_size=256]

Validation:  18%|██▌           | 136/743 [33:51<2:28:24, 14.67s/batch, batch_loss=19.9, batch_index=136, batch_size=256]

Validation:  18%|██▌           | 136/743 [34:06<2:28:24, 14.67s/batch, batch_loss=25.9, batch_index=137, batch_size=256]

Validation:  18%|██▌           | 137/743 [34:06<2:26:56, 14.55s/batch, batch_loss=25.9, batch_index=137, batch_size=256]

Validation:  18%|██▌           | 137/743 [34:20<2:26:56, 14.55s/batch, batch_loss=8.71, batch_index=138, batch_size=256]

Validation:  19%|██▌           | 138/743 [34:20<2:27:05, 14.59s/batch, batch_loss=8.71, batch_index=138, batch_size=256]

Validation:  19%|██▊            | 138/743 [34:34<2:27:05, 14.59s/batch, batch_loss=255, batch_index=139, batch_size=256]

Validation:  19%|██▊            | 139/743 [34:34<2:23:58, 14.30s/batch, batch_loss=255, batch_index=139, batch_size=256]

Validation:  19%|██▌           | 139/743 [34:48<2:23:58, 14.30s/batch, batch_loss=17.2, batch_index=140, batch_size=256]

Validation:  19%|██▋           | 140/743 [34:48<2:24:13, 14.35s/batch, batch_loss=17.2, batch_index=140, batch_size=256]

Validation:  19%|██▋           | 140/743 [35:03<2:24:13, 14.35s/batch, batch_loss=15.1, batch_index=141, batch_size=256]

Validation:  19%|██▋           | 141/743 [35:03<2:25:02, 14.46s/batch, batch_loss=15.1, batch_index=141, batch_size=256]

Validation:  19%|██▋           | 141/743 [35:18<2:25:02, 14.46s/batch, batch_loss=15.9, batch_index=142, batch_size=256]

Validation:  19%|██▋           | 142/743 [35:18<2:26:37, 14.64s/batch, batch_loss=15.9, batch_index=142, batch_size=256]

Validation:  19%|██▋           | 142/743 [35:33<2:26:37, 14.64s/batch, batch_loss=15.9, batch_index=143, batch_size=256]

Validation:  19%|██▋           | 143/743 [35:33<2:26:24, 14.64s/batch, batch_loss=15.9, batch_index=143, batch_size=256]

Validation:  19%|███             | 143/743 [35:47<2:26:24, 14.64s/batch, batch_loss=19, batch_index=144, batch_size=256]

Validation:  19%|███             | 144/743 [35:47<2:25:58, 14.62s/batch, batch_loss=19, batch_index=144, batch_size=256]

Validation:  19%|██▋           | 144/743 [36:02<2:25:58, 14.62s/batch, batch_loss=16.9, batch_index=145, batch_size=256]

Validation:  20%|██▋           | 145/743 [36:02<2:27:24, 14.79s/batch, batch_loss=16.9, batch_index=145, batch_size=256]

Validation:  20%|██▋           | 145/743 [36:18<2:27:24, 14.79s/batch, batch_loss=17.2, batch_index=146, batch_size=256]

Validation:  20%|██▊           | 146/743 [36:18<2:30:29, 15.12s/batch, batch_loss=17.2, batch_index=146, batch_size=256]

Validation:  20%|██▊           | 146/743 [36:33<2:30:29, 15.12s/batch, batch_loss=20.4, batch_index=147, batch_size=256]

Validation:  20%|██▊           | 147/743 [36:33<2:28:58, 15.00s/batch, batch_loss=20.4, batch_index=147, batch_size=256]

Validation:  20%|██▎         | 147/743 [36:47<2:28:58, 15.00s/batch, batch_loss=3.2e+4, batch_index=148, batch_size=256]

Validation:  20%|██▍         | 148/743 [36:47<2:25:37, 14.69s/batch, batch_loss=3.2e+4, batch_index=148, batch_size=256]

Validation:  20%|██▊           | 148/743 [37:01<2:25:37, 14.69s/batch, batch_loss=25.1, batch_index=149, batch_size=256]

Validation:  20%|██▊           | 149/743 [37:01<2:23:46, 14.52s/batch, batch_loss=25.1, batch_index=149, batch_size=256]

Validation:  20%|██▊           | 149/743 [37:19<2:23:46, 14.52s/batch, batch_loss=23.3, batch_index=150, batch_size=256]

Validation:  20%|██▊           | 150/743 [37:19<2:33:26, 15.52s/batch, batch_loss=23.3, batch_index=150, batch_size=256]

Validation:  20%|██▊           | 150/743 [37:34<2:33:26, 15.52s/batch, batch_loss=15.4, batch_index=151, batch_size=256]

Validation:  20%|██▊           | 151/743 [37:34<2:30:26, 15.25s/batch, batch_loss=15.4, batch_index=151, batch_size=256]

Validation:  20%|██▏        | 151/743 [37:48<2:30:26, 15.25s/batch, batch_loss=1.04e+4, batch_index=152, batch_size=256]

Validation:  20%|██▎        | 152/743 [37:48<2:28:51, 15.11s/batch, batch_loss=1.04e+4, batch_index=152, batch_size=256]

Validation:  20%|██▊           | 152/743 [38:06<2:28:51, 15.11s/batch, batch_loss=14.5, batch_index=153, batch_size=256]

Validation:  21%|██▉           | 153/743 [38:06<2:36:48, 15.95s/batch, batch_loss=14.5, batch_index=153, batch_size=256]

Validation:  21%|██▉           | 153/743 [38:22<2:36:48, 15.95s/batch, batch_loss=17.7, batch_index=154, batch_size=256]

Validation:  21%|██▉           | 154/743 [38:22<2:35:37, 15.85s/batch, batch_loss=17.7, batch_index=154, batch_size=256]

Validation:  21%|██▉           | 154/743 [38:36<2:35:37, 15.85s/batch, batch_loss=21.1, batch_index=155, batch_size=256]

Validation:  21%|██▉           | 155/743 [38:36<2:29:30, 15.26s/batch, batch_loss=21.1, batch_index=155, batch_size=256]

Validation:  21%|██▉           | 155/743 [38:51<2:29:30, 15.26s/batch, batch_loss=15.8, batch_index=156, batch_size=256]

Validation:  21%|██▉           | 156/743 [38:51<2:29:27, 15.28s/batch, batch_loss=15.8, batch_index=156, batch_size=256]

Validation:  21%|██▉           | 156/743 [39:06<2:29:27, 15.28s/batch, batch_loss=20.4, batch_index=157, batch_size=256]

Validation:  21%|██▉           | 157/743 [39:06<2:29:13, 15.28s/batch, batch_loss=20.4, batch_index=157, batch_size=256]

Validation:  21%|██▉           | 157/743 [39:22<2:29:13, 15.28s/batch, batch_loss=24.3, batch_index=158, batch_size=256]

Validation:  21%|██▉           | 158/743 [39:22<2:29:56, 15.38s/batch, batch_loss=24.3, batch_index=158, batch_size=256]

Validation:  21%|██▉           | 158/743 [39:37<2:29:56, 15.38s/batch, batch_loss=25.1, batch_index=159, batch_size=256]

Validation:  21%|██▉           | 159/743 [39:37<2:28:10, 15.22s/batch, batch_loss=25.1, batch_index=159, batch_size=256]

Validation:  21%|██▉           | 159/743 [39:51<2:28:10, 15.22s/batch, batch_loss=16.5, batch_index=160, batch_size=256]

Validation:  22%|███           | 160/743 [39:51<2:24:46, 14.90s/batch, batch_loss=16.5, batch_index=160, batch_size=256]

Validation:  22%|███           | 160/743 [40:05<2:24:46, 14.90s/batch, batch_loss=17.7, batch_index=161, batch_size=256]

Validation:  22%|███           | 161/743 [40:05<2:23:00, 14.74s/batch, batch_loss=17.7, batch_index=161, batch_size=256]

Validation:  22%|███           | 161/743 [40:20<2:23:00, 14.74s/batch, batch_loss=22.1, batch_index=162, batch_size=256]

Validation:  22%|███           | 162/743 [40:20<2:21:42, 14.63s/batch, batch_loss=22.1, batch_index=162, batch_size=256]

Validation:  22%|███           | 162/743 [40:34<2:21:42, 14.63s/batch, batch_loss=12.8, batch_index=163, batch_size=256]

Validation:  22%|███           | 163/743 [40:34<2:20:42, 14.56s/batch, batch_loss=12.8, batch_index=163, batch_size=256]

Validation:  22%|███           | 163/743 [40:49<2:20:42, 14.56s/batch, batch_loss=12.7, batch_index=164, batch_size=256]

Validation:  22%|███           | 164/743 [40:49<2:19:55, 14.50s/batch, batch_loss=12.7, batch_index=164, batch_size=256]

Validation:  22%|███           | 164/743 [41:05<2:19:55, 14.50s/batch, batch_loss=13.3, batch_index=165, batch_size=256]

Validation:  22%|███           | 165/743 [41:05<2:26:25, 15.20s/batch, batch_loss=13.3, batch_index=165, batch_size=256]

Validation:  22%|███           | 165/743 [41:21<2:26:25, 15.20s/batch, batch_loss=12.6, batch_index=166, batch_size=256]

Validation:  22%|███▏          | 166/743 [41:21<2:25:55, 15.17s/batch, batch_loss=12.6, batch_index=166, batch_size=256]

Validation:  22%|███▏          | 166/743 [41:35<2:25:55, 15.17s/batch, batch_loss=15.6, batch_index=167, batch_size=256]

Validation:  22%|███▏          | 167/743 [41:35<2:23:21, 14.93s/batch, batch_loss=15.6, batch_index=167, batch_size=256]

Validation:  22%|███▏          | 167/743 [41:49<2:23:21, 14.93s/batch, batch_loss=22.6, batch_index=168, batch_size=256]

Validation:  23%|███▏          | 168/743 [41:49<2:21:10, 14.73s/batch, batch_loss=22.6, batch_index=168, batch_size=256]

Validation:  23%|███▏          | 168/743 [42:04<2:21:10, 14.73s/batch, batch_loss=22.8, batch_index=169, batch_size=256]

Validation:  23%|███▏          | 169/743 [42:04<2:22:11, 14.86s/batch, batch_loss=22.8, batch_index=169, batch_size=256]

Validation:  23%|███▏          | 169/743 [42:19<2:22:11, 14.86s/batch, batch_loss=20.1, batch_index=170, batch_size=256]

Validation:  23%|███▏          | 170/743 [42:19<2:21:44, 14.84s/batch, batch_loss=20.1, batch_index=170, batch_size=256]

Validation:  23%|███▏          | 170/743 [42:34<2:21:44, 14.84s/batch, batch_loss=19.2, batch_index=171, batch_size=256]

Validation:  23%|███▏          | 171/743 [42:34<2:20:21, 14.72s/batch, batch_loss=19.2, batch_index=171, batch_size=256]

Validation:  23%|███▏          | 171/743 [42:49<2:20:21, 14.72s/batch, batch_loss=18.2, batch_index=172, batch_size=256]

Validation:  23%|███▏          | 172/743 [42:49<2:20:52, 14.80s/batch, batch_loss=18.2, batch_index=172, batch_size=256]

Validation:  23%|███▏          | 172/743 [43:03<2:20:52, 14.80s/batch, batch_loss=21.9, batch_index=173, batch_size=256]

Validation:  23%|███▎          | 173/743 [43:03<2:20:52, 14.83s/batch, batch_loss=21.9, batch_index=173, batch_size=256]

Validation:  23%|███▎          | 173/743 [43:19<2:20:52, 14.83s/batch, batch_loss=13.8, batch_index=174, batch_size=256]

Validation:  23%|███▎          | 174/743 [43:19<2:22:18, 15.01s/batch, batch_loss=13.8, batch_index=174, batch_size=256]

Validation:  23%|███▎          | 174/743 [43:37<2:22:18, 15.01s/batch, batch_loss=19.7, batch_index=175, batch_size=256]

Validation:  24%|███▎          | 175/743 [43:37<2:31:45, 16.03s/batch, batch_loss=19.7, batch_index=175, batch_size=256]

Validation:  24%|███▎          | 175/743 [43:53<2:31:45, 16.03s/batch, batch_loss=16.6, batch_index=176, batch_size=256]

Validation:  24%|███▎          | 176/743 [43:53<2:30:47, 15.96s/batch, batch_loss=16.6, batch_index=176, batch_size=256]

Validation:  24%|███▎          | 176/743 [44:08<2:30:47, 15.96s/batch, batch_loss=19.1, batch_index=177, batch_size=256]

Validation:  24%|███▎          | 177/743 [44:08<2:27:36, 15.65s/batch, batch_loss=19.1, batch_index=177, batch_size=256]

Validation:  24%|███▎          | 177/743 [44:24<2:27:36, 15.65s/batch, batch_loss=23.2, batch_index=178, batch_size=256]

Validation:  24%|███▎          | 178/743 [44:24<2:28:14, 15.74s/batch, batch_loss=23.2, batch_index=178, batch_size=256]

Validation:  24%|███▎          | 178/743 [44:40<2:28:14, 15.74s/batch, batch_loss=19.8, batch_index=179, batch_size=256]

Validation:  24%|███▎          | 179/743 [44:40<2:28:45, 15.82s/batch, batch_loss=19.8, batch_index=179, batch_size=256]

Validation:  24%|██▋        | 179/743 [44:55<2:28:45, 15.82s/batch, batch_loss=7.24e+3, batch_index=180, batch_size=256]

Validation:  24%|██▋        | 180/743 [44:55<2:25:30, 15.51s/batch, batch_loss=7.24e+3, batch_index=180, batch_size=256]

Validation:  24%|███▍          | 180/743 [45:09<2:25:30, 15.51s/batch, batch_loss=16.2, batch_index=181, batch_size=256]

Validation:  24%|███▍          | 181/743 [45:09<2:21:04, 15.06s/batch, batch_loss=16.2, batch_index=181, batch_size=256]

Validation:  24%|███▍          | 181/743 [45:23<2:21:04, 15.06s/batch, batch_loss=20.3, batch_index=182, batch_size=256]

Validation:  24%|███▍          | 182/743 [45:23<2:18:57, 14.86s/batch, batch_loss=20.3, batch_index=182, batch_size=256]

Validation:  24%|███▍          | 182/743 [45:39<2:18:57, 14.86s/batch, batch_loss=18.2, batch_index=183, batch_size=256]

Validation:  25%|███▍          | 183/743 [45:39<2:20:17, 15.03s/batch, batch_loss=18.2, batch_index=183, batch_size=256]

Validation:  25%|███▍          | 183/743 [45:54<2:20:17, 15.03s/batch, batch_loss=12.5, batch_index=184, batch_size=256]

Validation:  25%|███▍          | 184/743 [45:54<2:22:28, 15.29s/batch, batch_loss=12.5, batch_index=184, batch_size=256]

Validation:  25%|███▍          | 184/743 [46:09<2:22:28, 15.29s/batch, batch_loss=15.8, batch_index=185, batch_size=256]

Validation:  25%|███▍          | 185/743 [46:09<2:21:19, 15.20s/batch, batch_loss=15.8, batch_index=185, batch_size=256]

Validation:  25%|███▍          | 185/743 [46:26<2:21:19, 15.20s/batch, batch_loss=22.7, batch_index=186, batch_size=256]

Validation:  25%|███▌          | 186/743 [46:26<2:24:17, 15.54s/batch, batch_loss=22.7, batch_index=186, batch_size=256]

Validation:  25%|███▌          | 186/743 [46:41<2:24:17, 15.54s/batch, batch_loss=27.2, batch_index=187, batch_size=256]

Validation:  25%|███▌          | 187/743 [46:41<2:22:05, 15.33s/batch, batch_loss=27.2, batch_index=187, batch_size=256]

Validation:  25%|███▌          | 187/743 [46:54<2:22:05, 15.33s/batch, batch_loss=14.7, batch_index=188, batch_size=256]

Validation:  25%|███▌          | 188/743 [46:54<2:17:42, 14.89s/batch, batch_loss=14.7, batch_index=188, batch_size=256]

Validation:  25%|███▌          | 188/743 [47:10<2:17:42, 14.89s/batch, batch_loss=18.2, batch_index=189, batch_size=256]

Validation:  25%|███▌          | 189/743 [47:10<2:19:14, 15.08s/batch, batch_loss=18.2, batch_index=189, batch_size=256]

Validation:  25%|███▊           | 189/743 [47:27<2:19:14, 15.08s/batch, batch_loss=976, batch_index=190, batch_size=256]

Validation:  26%|███▊           | 190/743 [47:27<2:25:25, 15.78s/batch, batch_loss=976, batch_index=190, batch_size=256]

Validation:  26%|███▌          | 190/743 [47:42<2:25:25, 15.78s/batch, batch_loss=22.5, batch_index=191, batch_size=256]

Validation:  26%|███▌          | 191/743 [47:42<2:20:48, 15.31s/batch, batch_loss=22.5, batch_index=191, batch_size=256]

Validation:  26%|████            | 191/743 [47:56<2:20:48, 15.31s/batch, batch_loss=12, batch_index=192, batch_size=256]

Validation:  26%|████▏           | 192/743 [47:56<2:17:39, 14.99s/batch, batch_loss=12, batch_index=192, batch_size=256]

Validation:  26%|███▌          | 192/743 [48:10<2:17:39, 14.99s/batch, batch_loss=17.3, batch_index=193, batch_size=256]

Validation:  26%|███▋          | 193/743 [48:10<2:15:30, 14.78s/batch, batch_loss=17.3, batch_index=193, batch_size=256]

Validation:  26%|████▏           | 193/743 [48:26<2:15:30, 14.78s/batch, batch_loss=19, batch_index=194, batch_size=256]

Validation:  26%|████▏           | 194/743 [48:26<2:16:57, 14.97s/batch, batch_loss=19, batch_index=194, batch_size=256]

Validation:  26%|███▋          | 194/743 [48:40<2:16:57, 14.97s/batch, batch_loss=12.4, batch_index=195, batch_size=256]

Validation:  26%|███▋          | 195/743 [48:40<2:15:49, 14.87s/batch, batch_loss=12.4, batch_index=195, batch_size=256]

Validation:  26%|███▋          | 195/743 [48:55<2:15:49, 14.87s/batch, batch_loss=17.7, batch_index=196, batch_size=256]

Validation:  26%|███▋          | 196/743 [48:55<2:15:34, 14.87s/batch, batch_loss=17.7, batch_index=196, batch_size=256]

Validation:  26%|███▋          | 196/743 [49:09<2:15:34, 14.87s/batch, batch_loss=11.1, batch_index=197, batch_size=256]

Validation:  27%|███▋          | 197/743 [49:09<2:12:39, 14.58s/batch, batch_loss=11.1, batch_index=197, batch_size=256]

Validation:  27%|███▋          | 197/743 [49:24<2:12:39, 14.58s/batch, batch_loss=20.6, batch_index=198, batch_size=256]

Validation:  27%|███▋          | 198/743 [49:24<2:12:26, 14.58s/batch, batch_loss=20.6, batch_index=198, batch_size=256]

Validation:  27%|███▋          | 198/743 [49:37<2:12:26, 14.58s/batch, batch_loss=20.2, batch_index=199, batch_size=256]

Validation:  27%|███▋          | 199/743 [49:37<2:10:15, 14.37s/batch, batch_loss=20.2, batch_index=199, batch_size=256]

Validation:  27%|████           | 199/743 [49:52<2:10:15, 14.37s/batch, batch_loss=281, batch_index=200, batch_size=256]

Validation:  27%|████           | 200/743 [49:52<2:09:18, 14.29s/batch, batch_loss=281, batch_index=200, batch_size=256]

Validation:  27%|███▊          | 200/743 [50:06<2:09:18, 14.29s/batch, batch_loss=38.5, batch_index=201, batch_size=256]

Validation:  27%|███▊          | 201/743 [50:06<2:08:58, 14.28s/batch, batch_loss=38.5, batch_index=201, batch_size=256]

Validation:  27%|███▊          | 201/743 [50:20<2:08:58, 14.28s/batch, batch_loss=20.1, batch_index=202, batch_size=256]

Validation:  27%|███▊          | 202/743 [50:20<2:08:28, 14.25s/batch, batch_loss=20.1, batch_index=202, batch_size=256]

Validation:  27%|███▊          | 202/743 [50:34<2:08:28, 14.25s/batch, batch_loss=15.4, batch_index=203, batch_size=256]

Validation:  27%|███▊          | 203/743 [50:34<2:07:17, 14.14s/batch, batch_loss=15.4, batch_index=203, batch_size=256]

Validation:  27%|███▊          | 203/743 [50:48<2:07:17, 14.14s/batch, batch_loss=20.5, batch_index=204, batch_size=256]

Validation:  27%|███▊          | 204/743 [50:48<2:07:37, 14.21s/batch, batch_loss=20.5, batch_index=204, batch_size=256]

Validation:  27%|███▊          | 204/743 [51:03<2:07:37, 14.21s/batch, batch_loss=18.9, batch_index=205, batch_size=256]

Validation:  28%|███▊          | 205/743 [51:03<2:10:05, 14.51s/batch, batch_loss=18.9, batch_index=205, batch_size=256]

Validation:  28%|███▊          | 205/743 [51:19<2:10:05, 14.51s/batch, batch_loss=16.2, batch_index=206, batch_size=256]

Validation:  28%|███▉          | 206/743 [51:19<2:12:42, 14.83s/batch, batch_loss=16.2, batch_index=206, batch_size=256]

Validation:  28%|███▉          | 206/743 [51:36<2:12:42, 14.83s/batch, batch_loss=19.3, batch_index=207, batch_size=256]

Validation:  28%|███▉          | 207/743 [51:36<2:19:09, 15.58s/batch, batch_loss=19.3, batch_index=207, batch_size=256]

Validation:  28%|███▉          | 207/743 [51:52<2:19:09, 15.58s/batch, batch_loss=17.4, batch_index=208, batch_size=256]

Validation:  28%|███▉          | 208/743 [51:52<2:18:12, 15.50s/batch, batch_loss=17.4, batch_index=208, batch_size=256]

Validation:  28%|███▉          | 208/743 [52:06<2:18:12, 15.50s/batch, batch_loss=8.57, batch_index=209, batch_size=256]

Validation:  28%|███▉          | 209/743 [52:06<2:16:06, 15.29s/batch, batch_loss=8.57, batch_index=209, batch_size=256]

Validation:  28%|███▉          | 209/743 [52:22<2:16:06, 15.29s/batch, batch_loss=11.5, batch_index=210, batch_size=256]

Validation:  28%|███▉          | 210/743 [52:22<2:15:47, 15.29s/batch, batch_loss=11.5, batch_index=210, batch_size=256]

Validation:  28%|███▉          | 210/743 [52:37<2:15:47, 15.29s/batch, batch_loss=14.4, batch_index=211, batch_size=256]

Validation:  28%|███▉          | 211/743 [52:37<2:16:12, 15.36s/batch, batch_loss=14.4, batch_index=211, batch_size=256]

Validation:  28%|███▉          | 211/743 [52:53<2:16:12, 15.36s/batch, batch_loss=15.3, batch_index=212, batch_size=256]

Validation:  29%|███▉          | 212/743 [52:53<2:17:05, 15.49s/batch, batch_loss=15.3, batch_index=212, batch_size=256]

Validation:  29%|████▎          | 212/743 [53:12<2:17:05, 15.49s/batch, batch_loss=541, batch_index=213, batch_size=256]

Validation:  29%|████▎          | 213/743 [53:12<2:25:03, 16.42s/batch, batch_loss=541, batch_index=213, batch_size=256]

Validation:  29%|████          | 213/743 [53:28<2:25:03, 16.42s/batch, batch_loss=13.3, batch_index=214, batch_size=256]

Validation:  29%|████          | 214/743 [53:28<2:24:48, 16.42s/batch, batch_loss=13.3, batch_index=214, batch_size=256]

Validation:  29%|████          | 214/743 [53:44<2:24:48, 16.42s/batch, batch_loss=17.1, batch_index=215, batch_size=256]

Validation:  29%|████          | 215/743 [53:44<2:23:48, 16.34s/batch, batch_loss=17.1, batch_index=215, batch_size=256]

Validation:  29%|███▏       | 215/743 [54:00<2:23:48, 16.34s/batch, batch_loss=2.57e+3, batch_index=216, batch_size=256]

Validation:  29%|███▏       | 216/743 [54:00<2:22:14, 16.19s/batch, batch_loss=2.57e+3, batch_index=216, batch_size=256]

Validation:  29%|████▋           | 216/743 [54:16<2:22:14, 16.19s/batch, batch_loss=20, batch_index=217, batch_size=256]

Validation:  29%|████▋           | 217/743 [54:16<2:22:16, 16.23s/batch, batch_loss=20, batch_index=217, batch_size=256]

Validation:  29%|████          | 217/743 [54:32<2:22:16, 16.23s/batch, batch_loss=14.6, batch_index=218, batch_size=256]

Validation:  29%|████          | 218/743 [54:32<2:19:36, 15.96s/batch, batch_loss=14.6, batch_index=218, batch_size=256]

Validation:  29%|████          | 218/743 [54:48<2:19:36, 15.96s/batch, batch_loss=25.9, batch_index=219, batch_size=256]

Validation:  29%|████▏         | 219/743 [54:48<2:20:11, 16.05s/batch, batch_loss=25.9, batch_index=219, batch_size=256]

Validation:  29%|████▏         | 219/743 [55:02<2:20:11, 16.05s/batch, batch_loss=28.9, batch_index=220, batch_size=256]

Validation:  30%|████▏         | 220/743 [55:02<2:15:43, 15.57s/batch, batch_loss=28.9, batch_index=220, batch_size=256]

Validation:  30%|████▏         | 220/743 [55:17<2:15:43, 15.57s/batch, batch_loss=17.3, batch_index=221, batch_size=256]

Validation:  30%|████▏         | 221/743 [55:17<2:12:38, 15.25s/batch, batch_loss=17.3, batch_index=221, batch_size=256]

Validation:  30%|████▏         | 221/743 [55:31<2:12:38, 15.25s/batch, batch_loss=12.2, batch_index=222, batch_size=256]

Validation:  30%|████▏         | 222/743 [55:31<2:09:10, 14.88s/batch, batch_loss=12.2, batch_index=222, batch_size=256]

Validation:  30%|████▏         | 222/743 [55:45<2:09:10, 14.88s/batch, batch_loss=12.7, batch_index=223, batch_size=256]

Validation:  30%|████▏         | 223/743 [55:45<2:07:35, 14.72s/batch, batch_loss=12.7, batch_index=223, batch_size=256]

Validation:  30%|████▊           | 223/743 [56:00<2:07:35, 14.72s/batch, batch_loss=12, batch_index=224, batch_size=256]

Validation:  30%|████▊           | 224/743 [56:00<2:08:12, 14.82s/batch, batch_loss=12, batch_index=224, batch_size=256]

Validation:  30%|███▎       | 224/743 [56:16<2:08:12, 14.82s/batch, batch_loss=4.93e+3, batch_index=225, batch_size=256]

Validation:  30%|███▎       | 225/743 [56:16<2:08:57, 14.94s/batch, batch_loss=4.93e+3, batch_index=225, batch_size=256]

Validation:  30%|████▏         | 225/743 [56:30<2:08:57, 14.94s/batch, batch_loss=19.6, batch_index=226, batch_size=256]

Validation:  30%|████▎         | 226/743 [56:30<2:06:10, 14.64s/batch, batch_loss=19.6, batch_index=226, batch_size=256]

Validation:  30%|████▎         | 226/743 [56:44<2:06:10, 14.64s/batch, batch_loss=20.6, batch_index=227, batch_size=256]

Validation:  31%|████▎         | 227/743 [56:44<2:05:54, 14.64s/batch, batch_loss=20.6, batch_index=227, batch_size=256]

Validation:  31%|████▎         | 227/743 [56:59<2:05:54, 14.64s/batch, batch_loss=19.7, batch_index=228, batch_size=256]

Validation:  31%|████▎         | 228/743 [56:59<2:06:13, 14.71s/batch, batch_loss=19.7, batch_index=228, batch_size=256]

Validation:  31%|████▎         | 228/743 [57:14<2:06:13, 14.71s/batch, batch_loss=19.7, batch_index=229, batch_size=256]

Validation:  31%|████▎         | 229/743 [57:14<2:06:23, 14.75s/batch, batch_loss=19.7, batch_index=229, batch_size=256]

Validation:  31%|████▎         | 229/743 [57:28<2:06:23, 14.75s/batch, batch_loss=23.8, batch_index=230, batch_size=256]

Validation:  31%|████▎         | 230/743 [57:28<2:05:17, 14.65s/batch, batch_loss=23.8, batch_index=230, batch_size=256]

Validation:  31%|███▍       | 230/743 [57:43<2:05:17, 14.65s/batch, batch_loss=3.23e+4, batch_index=231, batch_size=256]

Validation:  31%|███▍       | 231/743 [57:43<2:04:54, 14.64s/batch, batch_loss=3.23e+4, batch_index=231, batch_size=256]

Validation:  31%|████▎         | 231/743 [57:58<2:04:54, 14.64s/batch, batch_loss=18.9, batch_index=232, batch_size=256]

Validation:  31%|████▎         | 232/743 [57:58<2:05:39, 14.75s/batch, batch_loss=18.9, batch_index=232, batch_size=256]

Validation:  31%|████▎         | 232/743 [58:13<2:05:39, 14.75s/batch, batch_loss=10.4, batch_index=233, batch_size=256]

Validation:  31%|████▍         | 233/743 [58:13<2:05:39, 14.78s/batch, batch_loss=10.4, batch_index=233, batch_size=256]

Validation:  31%|████▍         | 233/743 [58:27<2:05:39, 14.78s/batch, batch_loss=14.8, batch_index=234, batch_size=256]

Validation:  31%|████▍         | 234/743 [58:27<2:04:47, 14.71s/batch, batch_loss=14.8, batch_index=234, batch_size=256]

Validation:  31%|████▍         | 234/743 [58:43<2:04:47, 14.71s/batch, batch_loss=15.2, batch_index=235, batch_size=256]

Validation:  32%|████▍         | 235/743 [58:43<2:06:18, 14.92s/batch, batch_loss=15.2, batch_index=235, batch_size=256]

Validation:  32%|████▍         | 235/743 [58:58<2:06:18, 14.92s/batch, batch_loss=4.35, batch_index=236, batch_size=256]

Validation:  32%|████▍         | 236/743 [58:58<2:06:43, 15.00s/batch, batch_loss=4.35, batch_index=236, batch_size=256]

Validation:  32%|████▍         | 236/743 [59:12<2:06:43, 15.00s/batch, batch_loss=22.1, batch_index=237, batch_size=256]

Validation:  32%|████▍         | 237/743 [59:12<2:03:07, 14.60s/batch, batch_loss=22.1, batch_index=237, batch_size=256]

Validation:  32%|████▍         | 237/743 [59:29<2:03:07, 14.60s/batch, batch_loss=17.2, batch_index=238, batch_size=256]

Validation:  32%|████▍         | 238/743 [59:29<2:09:41, 15.41s/batch, batch_loss=17.2, batch_index=238, batch_size=256]

Validation:  32%|███▊        | 238/743 [59:44<2:09:41, 15.41s/batch, batch_loss=4.5e+3, batch_index=239, batch_size=256]

Validation:  32%|███▊        | 239/743 [59:44<2:08:35, 15.31s/batch, batch_loss=4.5e+3, batch_index=239, batch_size=256]

Validation:  32%|████▌         | 239/743 [59:58<2:08:35, 15.31s/batch, batch_loss=19.7, batch_index=240, batch_size=256]

Validation:  32%|████▌         | 240/743 [59:58<2:05:59, 15.03s/batch, batch_loss=19.7, batch_index=240, batch_size=256]

Validation:  32%|███▉        | 240/743 [1:00:14<2:05:59, 15.03s/batch, batch_loss=16.8, batch_index=241, batch_size=256]

Validation:  32%|███▉        | 241/743 [1:00:14<2:07:16, 15.21s/batch, batch_loss=16.8, batch_index=241, batch_size=256]

Validation:  32%|████▏        | 241/743 [1:00:29<2:07:16, 15.21s/batch, batch_loss=230, batch_index=242, batch_size=256]

Validation:  33%|████▏        | 242/743 [1:00:29<2:06:20, 15.13s/batch, batch_loss=230, batch_index=242, batch_size=256]

Validation:  33%|████▌         | 242/743 [1:00:44<2:06:20, 15.13s/batch, batch_loss=11, batch_index=243, batch_size=256]

Validation:  33%|████▌         | 243/743 [1:00:44<2:05:55, 15.11s/batch, batch_loss=11, batch_index=243, batch_size=256]

Validation:  33%|███▉        | 243/743 [1:00:59<2:05:55, 15.11s/batch, batch_loss=17.2, batch_index=244, batch_size=256]

Validation:  33%|███▉        | 244/743 [1:00:59<2:04:56, 15.02s/batch, batch_loss=17.2, batch_index=244, batch_size=256]

Validation:  33%|███▉        | 244/743 [1:01:14<2:04:56, 15.02s/batch, batch_loss=20.9, batch_index=245, batch_size=256]

Validation:  33%|███▉        | 245/743 [1:01:14<2:05:18, 15.10s/batch, batch_loss=20.9, batch_index=245, batch_size=256]

Validation:  33%|███▉        | 245/743 [1:01:31<2:05:18, 15.10s/batch, batch_loss=6.55, batch_index=246, batch_size=256]

Validation:  33%|███▉        | 246/743 [1:01:31<2:09:55, 15.69s/batch, batch_loss=6.55, batch_index=246, batch_size=256]

Validation:  33%|███▉        | 246/743 [1:01:47<2:09:55, 15.69s/batch, batch_loss=18.4, batch_index=247, batch_size=256]

Validation:  33%|███▉        | 247/743 [1:01:47<2:08:52, 15.59s/batch, batch_loss=18.4, batch_index=247, batch_size=256]

Validation:  33%|████▋         | 247/743 [1:02:02<2:08:52, 15.59s/batch, batch_loss=40, batch_index=248, batch_size=256]

Validation:  33%|████▋         | 248/743 [1:02:02<2:07:29, 15.45s/batch, batch_loss=40, batch_index=248, batch_size=256]

Validation:  33%|████▋         | 248/743 [1:02:18<2:07:29, 15.45s/batch, batch_loss=14, batch_index=249, batch_size=256]

Validation:  34%|████▋         | 249/743 [1:02:18<2:08:21, 15.59s/batch, batch_loss=14, batch_index=249, batch_size=256]

Validation:  34%|████        | 249/743 [1:02:33<2:08:21, 15.59s/batch, batch_loss=19.8, batch_index=250, batch_size=256]

Validation:  34%|████        | 250/743 [1:02:33<2:07:08, 15.47s/batch, batch_loss=19.8, batch_index=250, batch_size=256]

Validation:  34%|████        | 250/743 [1:02:47<2:07:08, 15.47s/batch, batch_loss=20.8, batch_index=251, batch_size=256]

Validation:  34%|████        | 251/743 [1:02:47<2:05:04, 15.25s/batch, batch_loss=20.8, batch_index=251, batch_size=256]

Validation:  34%|████        | 251/743 [1:03:02<2:05:04, 15.25s/batch, batch_loss=21.9, batch_index=252, batch_size=256]

Validation:  34%|████        | 252/743 [1:03:02<2:03:00, 15.03s/batch, batch_loss=21.9, batch_index=252, batch_size=256]

Validation:  34%|████        | 252/743 [1:03:17<2:03:00, 15.03s/batch, batch_loss=21.5, batch_index=253, batch_size=256]

Validation:  34%|████        | 253/743 [1:03:17<2:02:17, 14.97s/batch, batch_loss=21.5, batch_index=253, batch_size=256]

Validation:  34%|███      | 253/743 [1:03:33<2:02:17, 14.97s/batch, batch_loss=1.14e+4, batch_index=254, batch_size=256]

Validation:  34%|███      | 254/743 [1:03:33<2:04:02, 15.22s/batch, batch_loss=1.14e+4, batch_index=254, batch_size=256]

Validation:  34%|███      | 254/743 [1:03:47<2:04:02, 15.22s/batch, batch_loss=2.45e+3, batch_index=255, batch_size=256]

Validation:  34%|███      | 255/743 [1:03:47<2:01:08, 14.89s/batch, batch_loss=2.45e+3, batch_index=255, batch_size=256]

Validation:  34%|████        | 255/743 [1:04:01<2:01:08, 14.89s/batch, batch_loss=20.7, batch_index=256, batch_size=256]

Validation:  34%|████▏       | 256/743 [1:04:01<1:58:24, 14.59s/batch, batch_loss=20.7, batch_index=256, batch_size=256]

Validation:  34%|████▏       | 256/743 [1:04:15<1:58:24, 14.59s/batch, batch_loss=21.3, batch_index=257, batch_size=256]

Validation:  35%|████▏       | 257/743 [1:04:15<1:57:01, 14.45s/batch, batch_loss=21.3, batch_index=257, batch_size=256]

Validation:  35%|████▏       | 257/743 [1:04:29<1:57:01, 14.45s/batch, batch_loss=13.1, batch_index=258, batch_size=256]

Validation:  35%|████▏       | 258/743 [1:04:29<1:56:38, 14.43s/batch, batch_loss=13.1, batch_index=258, batch_size=256]

Validation:  35%|████▏       | 258/743 [1:04:43<1:56:38, 14.43s/batch, batch_loss=3.89, batch_index=259, batch_size=256]

Validation:  35%|████▏       | 259/743 [1:04:43<1:55:56, 14.37s/batch, batch_loss=3.89, batch_index=259, batch_size=256]

Validation:  35%|████▏       | 259/743 [1:04:58<1:55:56, 14.37s/batch, batch_loss=2.67, batch_index=260, batch_size=256]

Validation:  35%|████▏       | 260/743 [1:04:58<1:56:27, 14.47s/batch, batch_loss=2.67, batch_index=260, batch_size=256]

Validation:  35%|████▏       | 260/743 [1:05:12<1:56:27, 14.47s/batch, batch_loss=10.8, batch_index=261, batch_size=256]

Validation:  35%|████▏       | 261/743 [1:05:12<1:55:53, 14.43s/batch, batch_loss=10.8, batch_index=261, batch_size=256]

Validation:  35%|████▏       | 261/743 [1:05:27<1:55:53, 14.43s/batch, batch_loss=29.1, batch_index=262, batch_size=256]

Validation:  35%|████▏       | 262/743 [1:05:27<1:55:59, 14.47s/batch, batch_loss=29.1, batch_index=262, batch_size=256]

Validation:  35%|███▏     | 262/743 [1:05:41<1:55:59, 14.47s/batch, batch_loss=2.72e+3, batch_index=263, batch_size=256]

Validation:  35%|███▏     | 263/743 [1:05:41<1:55:38, 14.45s/batch, batch_loss=2.72e+3, batch_index=263, batch_size=256]

Validation:  35%|████▏       | 263/743 [1:05:56<1:55:38, 14.45s/batch, batch_loss=11.7, batch_index=264, batch_size=256]

Validation:  36%|████▎       | 264/743 [1:05:56<1:55:26, 14.46s/batch, batch_loss=11.7, batch_index=264, batch_size=256]

Validation:  36%|████▎       | 264/743 [1:06:10<1:55:26, 14.46s/batch, batch_loss=22.6, batch_index=265, batch_size=256]

Validation:  36%|████▎       | 265/743 [1:06:10<1:54:55, 14.43s/batch, batch_loss=22.6, batch_index=265, batch_size=256]

Validation:  36%|████▎       | 265/743 [1:06:24<1:54:55, 14.43s/batch, batch_loss=23.2, batch_index=266, batch_size=256]

Validation:  36%|████▎       | 266/743 [1:06:24<1:52:47, 14.19s/batch, batch_loss=23.2, batch_index=266, batch_size=256]

Validation:  36%|████▎       | 266/743 [1:06:38<1:52:47, 14.19s/batch, batch_loss=23.4, batch_index=267, batch_size=256]

Validation:  36%|████▎       | 267/743 [1:06:38<1:51:51, 14.10s/batch, batch_loss=23.4, batch_index=267, batch_size=256]

Validation:  36%|███▏     | 267/743 [1:06:52<1:51:51, 14.10s/batch, batch_loss=3.01e+3, batch_index=268, batch_size=256]

Validation:  36%|███▏     | 268/743 [1:06:52<1:52:46, 14.24s/batch, batch_loss=3.01e+3, batch_index=268, batch_size=256]

Validation:  36%|████▎       | 268/743 [1:07:07<1:52:46, 14.24s/batch, batch_loss=35.2, batch_index=269, batch_size=256]

Validation:  36%|████▎       | 269/743 [1:07:07<1:52:43, 14.27s/batch, batch_loss=35.2, batch_index=269, batch_size=256]

Validation:  36%|████▎       | 269/743 [1:07:23<1:52:43, 14.27s/batch, batch_loss=30.6, batch_index=270, batch_size=256]

Validation:  36%|████▎       | 270/743 [1:07:23<1:57:43, 14.93s/batch, batch_loss=30.6, batch_index=270, batch_size=256]

Validation:  36%|████▎       | 270/743 [1:07:38<1:57:43, 14.93s/batch, batch_loss=32.9, batch_index=271, batch_size=256]

Validation:  36%|████▍       | 271/743 [1:07:38<1:58:11, 15.02s/batch, batch_loss=32.9, batch_index=271, batch_size=256]

Validation:  36%|███▎     | 271/743 [1:07:52<1:58:11, 15.02s/batch, batch_loss=1.05e+3, batch_index=272, batch_size=256]

Validation:  37%|███▎     | 272/743 [1:07:52<1:55:43, 14.74s/batch, batch_loss=1.05e+3, batch_index=272, batch_size=256]

Validation:  37%|████▍       | 272/743 [1:08:06<1:55:43, 14.74s/batch, batch_loss=18.2, batch_index=273, batch_size=256]

Validation:  37%|████▍       | 273/743 [1:08:06<1:52:31, 14.37s/batch, batch_loss=18.2, batch_index=273, batch_size=256]

Validation:  37%|████▍       | 273/743 [1:08:19<1:52:31, 14.37s/batch, batch_loss=21.7, batch_index=274, batch_size=256]

Validation:  37%|████▍       | 274/743 [1:08:19<1:48:50, 13.92s/batch, batch_loss=21.7, batch_index=274, batch_size=256]

Validation:  37%|████▍       | 274/743 [1:08:32<1:48:50, 13.92s/batch, batch_loss=18.1, batch_index=275, batch_size=256]

Validation:  37%|████▍       | 275/743 [1:08:32<1:46:18, 13.63s/batch, batch_loss=18.1, batch_index=275, batch_size=256]

Validation:  37%|████▍       | 275/743 [1:08:46<1:46:18, 13.63s/batch, batch_loss=14.2, batch_index=276, batch_size=256]

Validation:  37%|████▍       | 276/743 [1:08:46<1:47:21, 13.79s/batch, batch_loss=14.2, batch_index=276, batch_size=256]

Validation:  37%|████▍       | 276/743 [1:09:00<1:47:21, 13.79s/batch, batch_loss=25.4, batch_index=277, batch_size=256]

Validation:  37%|████▍       | 277/743 [1:09:00<1:48:19, 13.95s/batch, batch_loss=25.4, batch_index=277, batch_size=256]

Validation:  37%|████▍       | 277/743 [1:09:16<1:48:19, 13.95s/batch, batch_loss=19.1, batch_index=278, batch_size=256]

Validation:  37%|████▍       | 278/743 [1:09:16<1:51:20, 14.37s/batch, batch_loss=19.1, batch_index=278, batch_size=256]

Validation:  37%|████▍       | 278/743 [1:09:30<1:51:20, 14.37s/batch, batch_loss=8.51, batch_index=279, batch_size=256]

Validation:  38%|████▌       | 279/743 [1:09:30<1:51:23, 14.40s/batch, batch_loss=8.51, batch_index=279, batch_size=256]

Validation:  38%|████▌       | 279/743 [1:09:47<1:51:23, 14.40s/batch, batch_loss=14.2, batch_index=280, batch_size=256]

Validation:  38%|████▌       | 280/743 [1:09:47<1:57:49, 15.27s/batch, batch_loss=14.2, batch_index=280, batch_size=256]

Validation:  38%|████▌       | 280/743 [1:10:02<1:57:49, 15.27s/batch, batch_loss=18.1, batch_index=281, batch_size=256]

Validation:  38%|████▌       | 281/743 [1:10:02<1:55:48, 15.04s/batch, batch_loss=18.1, batch_index=281, batch_size=256]

Validation:  38%|████▌       | 281/743 [1:10:15<1:55:48, 15.04s/batch, batch_loss=25.3, batch_index=282, batch_size=256]

Validation:  38%|████▌       | 282/743 [1:10:15<1:52:13, 14.61s/batch, batch_loss=25.3, batch_index=282, batch_size=256]

Validation:  38%|████▌       | 282/743 [1:10:30<1:52:13, 14.61s/batch, batch_loss=17.8, batch_index=283, batch_size=256]

Validation:  38%|████▌       | 283/743 [1:10:30<1:51:44, 14.57s/batch, batch_loss=17.8, batch_index=283, batch_size=256]

Validation:  38%|████▌       | 283/743 [1:10:45<1:51:44, 14.57s/batch, batch_loss=18.9, batch_index=284, batch_size=256]

Validation:  38%|████▌       | 284/743 [1:10:45<1:51:22, 14.56s/batch, batch_loss=18.9, batch_index=284, batch_size=256]

Validation:  38%|████▌       | 284/743 [1:10:59<1:51:22, 14.56s/batch, batch_loss=14.1, batch_index=285, batch_size=256]

Validation:  38%|████▌       | 285/743 [1:10:59<1:50:31, 14.48s/batch, batch_loss=14.1, batch_index=285, batch_size=256]

Validation:  38%|████▌       | 285/743 [1:11:13<1:50:31, 14.48s/batch, batch_loss=16.1, batch_index=286, batch_size=256]

Validation:  38%|████▌       | 286/743 [1:11:13<1:50:32, 14.51s/batch, batch_loss=16.1, batch_index=286, batch_size=256]

Validation:  38%|███▍     | 286/743 [1:11:30<1:50:32, 14.51s/batch, batch_loss=1.19e+4, batch_index=287, batch_size=256]

Validation:  39%|███▍     | 287/743 [1:11:30<1:55:42, 15.22s/batch, batch_loss=1.19e+4, batch_index=287, batch_size=256]

Validation:  39%|████▋       | 287/743 [1:11:45<1:55:42, 15.22s/batch, batch_loss=26.5, batch_index=288, batch_size=256]

Validation:  39%|████▋       | 288/743 [1:11:45<1:54:33, 15.11s/batch, batch_loss=26.5, batch_index=288, batch_size=256]

Validation:  39%|████▋       | 288/743 [1:12:01<1:54:33, 15.11s/batch, batch_loss=22.1, batch_index=289, batch_size=256]

Validation:  39%|████▋       | 289/743 [1:12:01<1:55:46, 15.30s/batch, batch_loss=22.1, batch_index=289, batch_size=256]

Validation:  39%|█████        | 289/743 [1:12:16<1:55:46, 15.30s/batch, batch_loss=485, batch_index=290, batch_size=256]

Validation:  39%|█████        | 290/743 [1:12:16<1:54:37, 15.18s/batch, batch_loss=485, batch_index=290, batch_size=256]

Validation:  39%|███▌     | 290/743 [1:12:29<1:54:37, 15.18s/batch, batch_loss=1.52e+3, batch_index=291, batch_size=256]

Validation:  39%|███▌     | 291/743 [1:12:29<1:50:58, 14.73s/batch, batch_loss=1.52e+3, batch_index=291, batch_size=256]

Validation:  39%|███▉      | 291/743 [1:12:44<1:50:58, 14.73s/batch, batch_loss=1.2e+3, batch_index=292, batch_size=256]

Validation:  39%|███▉      | 292/743 [1:12:44<1:50:32, 14.71s/batch, batch_loss=1.2e+3, batch_index=292, batch_size=256]

Validation:  39%|████▋       | 292/743 [1:12:59<1:50:32, 14.71s/batch, batch_loss=28.4, batch_index=293, batch_size=256]

Validation:  39%|████▋       | 293/743 [1:12:59<1:49:48, 14.64s/batch, batch_loss=28.4, batch_index=293, batch_size=256]

Validation:  39%|███▉      | 293/743 [1:13:13<1:49:48, 14.64s/batch, batch_loss=1.1e+3, batch_index=294, batch_size=256]

Validation:  40%|███▉      | 294/743 [1:13:13<1:49:59, 14.70s/batch, batch_loss=1.1e+3, batch_index=294, batch_size=256]

Validation:  40%|█████▌        | 294/743 [1:13:30<1:49:59, 14.70s/batch, batch_loss=19, batch_index=295, batch_size=256]

Validation:  40%|█████▌        | 295/743 [1:13:30<1:53:09, 15.16s/batch, batch_loss=19, batch_index=295, batch_size=256]

Validation:  40%|████▊       | 295/743 [1:13:43<1:53:09, 15.16s/batch, batch_loss=17.8, batch_index=296, batch_size=256]

Validation:  40%|████▊       | 296/743 [1:13:43<1:49:44, 14.73s/batch, batch_loss=17.8, batch_index=296, batch_size=256]

Validation:  40%|████▊       | 296/743 [1:13:57<1:49:44, 14.73s/batch, batch_loss=11.6, batch_index=297, batch_size=256]

Validation:  40%|████▊       | 297/743 [1:13:57<1:47:17, 14.43s/batch, batch_loss=11.6, batch_index=297, batch_size=256]

Validation:  40%|█████▌        | 297/743 [1:14:11<1:47:17, 14.43s/batch, batch_loss=23, batch_index=298, batch_size=256]

Validation:  40%|█████▌        | 298/743 [1:14:11<1:46:14, 14.32s/batch, batch_loss=23, batch_index=298, batch_size=256]

Validation:  40%|████▊       | 298/743 [1:14:25<1:46:14, 14.32s/batch, batch_loss=32.3, batch_index=299, batch_size=256]

Validation:  40%|████▊       | 299/743 [1:14:25<1:45:17, 14.23s/batch, batch_loss=32.3, batch_index=299, batch_size=256]

Validation:  40%|████▊       | 299/743 [1:14:38<1:45:17, 14.23s/batch, batch_loss=35.9, batch_index=300, batch_size=256]

Validation:  40%|████▊       | 300/743 [1:14:38<1:42:52, 13.93s/batch, batch_loss=35.9, batch_index=300, batch_size=256]

Validation:  40%|█████▏       | 300/743 [1:14:53<1:42:52, 13.93s/batch, batch_loss=839, batch_index=301, batch_size=256]

Validation:  41%|█████▎       | 301/743 [1:14:53<1:43:43, 14.08s/batch, batch_loss=839, batch_index=301, batch_size=256]

Validation:  41%|████▊       | 301/743 [1:15:09<1:43:43, 14.08s/batch, batch_loss=10.5, batch_index=302, batch_size=256]

Validation:  41%|████▉       | 302/743 [1:15:09<1:47:16, 14.60s/batch, batch_loss=10.5, batch_index=302, batch_size=256]

Validation:  41%|████▉       | 302/743 [1:15:26<1:47:16, 14.60s/batch, batch_loss=15.1, batch_index=303, batch_size=256]

Validation:  41%|████▉       | 303/743 [1:15:26<1:53:14, 15.44s/batch, batch_loss=15.1, batch_index=303, batch_size=256]

Validation:  41%|████▉       | 303/743 [1:15:41<1:53:14, 15.44s/batch, batch_loss=18.9, batch_index=304, batch_size=256]

Validation:  41%|████▉       | 304/743 [1:15:41<1:51:10, 15.19s/batch, batch_loss=18.9, batch_index=304, batch_size=256]

Validation:  41%|████▉       | 304/743 [1:15:55<1:51:10, 15.19s/batch, batch_loss=13.1, batch_index=305, batch_size=256]

Validation:  41%|████▉       | 305/743 [1:15:55<1:48:05, 14.81s/batch, batch_loss=13.1, batch_index=305, batch_size=256]

Validation:  41%|████▉       | 305/743 [1:16:09<1:48:05, 14.81s/batch, batch_loss=20.2, batch_index=306, batch_size=256]

Validation:  41%|████▉       | 306/743 [1:16:09<1:46:13, 14.58s/batch, batch_loss=20.2, batch_index=306, batch_size=256]

Validation:  41%|████▉       | 306/743 [1:16:23<1:46:13, 14.58s/batch, batch_loss=21.7, batch_index=307, batch_size=256]

Validation:  41%|████▉       | 307/743 [1:16:23<1:45:51, 14.57s/batch, batch_loss=21.7, batch_index=307, batch_size=256]

Validation:  41%|█████▎       | 307/743 [1:16:37<1:45:51, 14.57s/batch, batch_loss=883, batch_index=308, batch_size=256]

Validation:  41%|█████▍       | 308/743 [1:16:37<1:44:39, 14.44s/batch, batch_loss=883, batch_index=308, batch_size=256]

Validation:  41%|████▉       | 308/743 [1:16:52<1:44:39, 14.44s/batch, batch_loss=30.6, batch_index=309, batch_size=256]

Validation:  42%|████▉       | 309/743 [1:16:52<1:44:58, 14.51s/batch, batch_loss=30.6, batch_index=309, batch_size=256]

Validation:  42%|████▉       | 309/743 [1:17:06<1:44:58, 14.51s/batch, batch_loss=18.2, batch_index=310, batch_size=256]

Validation:  42%|█████       | 310/743 [1:17:06<1:44:22, 14.46s/batch, batch_loss=18.2, batch_index=310, batch_size=256]

Validation:  42%|█████▊        | 310/743 [1:17:21<1:44:22, 14.46s/batch, batch_loss=19, batch_index=311, batch_size=256]

Validation:  42%|█████▊        | 311/743 [1:17:21<1:45:27, 14.65s/batch, batch_loss=19, batch_index=311, batch_size=256]

Validation:  42%|█████       | 311/743 [1:17:37<1:45:27, 14.65s/batch, batch_loss=17.4, batch_index=312, batch_size=256]

Validation:  42%|█████       | 312/743 [1:17:37<1:46:16, 14.80s/batch, batch_loss=17.4, batch_index=312, batch_size=256]

Validation:  42%|█████       | 312/743 [1:17:52<1:46:16, 14.80s/batch, batch_loss=8.51, batch_index=313, batch_size=256]

Validation:  42%|█████       | 313/743 [1:17:52<1:47:18, 14.97s/batch, batch_loss=8.51, batch_index=313, batch_size=256]

Validation:  42%|█████       | 313/743 [1:18:06<1:47:18, 14.97s/batch, batch_loss=12.8, batch_index=314, batch_size=256]

Validation:  42%|█████       | 314/743 [1:18:06<1:45:31, 14.76s/batch, batch_loss=12.8, batch_index=314, batch_size=256]

Validation:  42%|█████▉        | 314/743 [1:18:21<1:45:31, 14.76s/batch, batch_loss=22, batch_index=315, batch_size=256]

Validation:  42%|█████▉        | 315/743 [1:18:21<1:45:57, 14.86s/batch, batch_loss=22, batch_index=315, batch_size=256]

Validation:  42%|█████       | 315/743 [1:18:36<1:45:57, 14.86s/batch, batch_loss=21.9, batch_index=316, batch_size=256]

Validation:  43%|█████       | 316/743 [1:18:36<1:45:13, 14.78s/batch, batch_loss=21.9, batch_index=316, batch_size=256]

Validation:  43%|█████       | 316/743 [1:18:51<1:45:13, 14.78s/batch, batch_loss=20.7, batch_index=317, batch_size=256]

Validation:  43%|█████       | 317/743 [1:18:51<1:44:48, 14.76s/batch, batch_loss=20.7, batch_index=317, batch_size=256]

Validation:  43%|█████       | 317/743 [1:19:04<1:44:48, 14.76s/batch, batch_loss=16.2, batch_index=318, batch_size=256]

Validation:  43%|█████▏      | 318/743 [1:19:04<1:41:24, 14.32s/batch, batch_loss=16.2, batch_index=318, batch_size=256]

Validation:  43%|█████▉        | 318/743 [1:19:17<1:41:24, 14.32s/batch, batch_loss=21, batch_index=319, batch_size=256]

Validation:  43%|██████        | 319/743 [1:19:17<1:39:32, 14.09s/batch, batch_loss=21, batch_index=319, batch_size=256]

Validation:  43%|█████▏      | 319/743 [1:19:34<1:39:32, 14.09s/batch, batch_loss=19.5, batch_index=320, batch_size=256]

Validation:  43%|█████▏      | 320/743 [1:19:34<1:43:58, 14.75s/batch, batch_loss=19.5, batch_index=320, batch_size=256]

Validation:  43%|█████▏      | 320/743 [1:19:48<1:43:58, 14.75s/batch, batch_loss=17.9, batch_index=321, batch_size=256]

Validation:  43%|█████▏      | 321/743 [1:19:48<1:42:32, 14.58s/batch, batch_loss=17.9, batch_index=321, batch_size=256]

Validation:  43%|█████▏      | 321/743 [1:20:03<1:42:32, 14.58s/batch, batch_loss=15.8, batch_index=322, batch_size=256]

Validation:  43%|█████▏      | 322/743 [1:20:03<1:42:35, 14.62s/batch, batch_loss=15.8, batch_index=322, batch_size=256]

Validation:  43%|█████▏      | 322/743 [1:20:17<1:42:35, 14.62s/batch, batch_loss=17.5, batch_index=323, batch_size=256]

Validation:  43%|█████▏      | 323/743 [1:20:17<1:41:14, 14.46s/batch, batch_loss=17.5, batch_index=323, batch_size=256]

Validation:  43%|█████▋       | 323/743 [1:20:32<1:41:14, 14.46s/batch, batch_loss=298, batch_index=324, batch_size=256]

Validation:  44%|█████▋       | 324/743 [1:20:32<1:42:04, 14.62s/batch, batch_loss=298, batch_index=324, batch_size=256]

Validation:  44%|█████▏      | 324/743 [1:20:46<1:42:04, 14.62s/batch, batch_loss=19.7, batch_index=325, batch_size=256]

Validation:  44%|█████▏      | 325/743 [1:20:46<1:40:42, 14.46s/batch, batch_loss=19.7, batch_index=325, batch_size=256]

Validation:  44%|█████▏      | 325/743 [1:21:00<1:40:42, 14.46s/batch, batch_loss=20.6, batch_index=326, batch_size=256]

Validation:  44%|█████▎      | 326/743 [1:21:00<1:40:50, 14.51s/batch, batch_loss=20.6, batch_index=326, batch_size=256]

Validation:  44%|█████▎      | 326/743 [1:21:15<1:40:50, 14.51s/batch, batch_loss=18.7, batch_index=327, batch_size=256]

Validation:  44%|█████▎      | 327/743 [1:21:15<1:40:53, 14.55s/batch, batch_loss=18.7, batch_index=327, batch_size=256]

Validation:  44%|█████▎      | 327/743 [1:21:30<1:40:53, 14.55s/batch, batch_loss=17.3, batch_index=328, batch_size=256]

Validation:  44%|█████▎      | 328/743 [1:21:30<1:41:24, 14.66s/batch, batch_loss=17.3, batch_index=328, batch_size=256]

Validation:  44%|█████▎      | 328/743 [1:21:45<1:41:24, 14.66s/batch, batch_loss=9.31, batch_index=329, batch_size=256]

Validation:  44%|█████▎      | 329/743 [1:21:45<1:41:07, 14.66s/batch, batch_loss=9.31, batch_index=329, batch_size=256]

Validation:  44%|█████▎      | 329/743 [1:21:59<1:41:07, 14.66s/batch, batch_loss=15.2, batch_index=330, batch_size=256]

Validation:  44%|█████▎      | 330/743 [1:21:59<1:39:54, 14.52s/batch, batch_loss=15.2, batch_index=330, batch_size=256]

Validation:  44%|█████▎      | 330/743 [1:22:14<1:39:54, 14.52s/batch, batch_loss=24.3, batch_index=331, batch_size=256]

Validation:  45%|█████▎      | 331/743 [1:22:14<1:41:33, 14.79s/batch, batch_loss=24.3, batch_index=331, batch_size=256]

Validation:  45%|████     | 331/743 [1:22:29<1:41:33, 14.79s/batch, batch_loss=1.15e+4, batch_index=332, batch_size=256]

Validation:  45%|████     | 332/743 [1:22:29<1:40:31, 14.67s/batch, batch_loss=1.15e+4, batch_index=332, batch_size=256]

Validation:  45%|█████▎      | 332/743 [1:22:44<1:40:31, 14.67s/batch, batch_loss=30.9, batch_index=333, batch_size=256]

Validation:  45%|█████▍      | 333/743 [1:22:44<1:41:25, 14.84s/batch, batch_loss=30.9, batch_index=333, batch_size=256]

Validation:  45%|█████▍      | 333/743 [1:22:59<1:41:25, 14.84s/batch, batch_loss=25.2, batch_index=334, batch_size=256]

Validation:  45%|█████▍      | 334/743 [1:22:59<1:42:37, 15.06s/batch, batch_loss=25.2, batch_index=334, batch_size=256]

Validation:  45%|█████▍      | 334/743 [1:23:14<1:42:37, 15.06s/batch, batch_loss=32.4, batch_index=335, batch_size=256]

Validation:  45%|█████▍      | 335/743 [1:23:14<1:40:48, 14.82s/batch, batch_loss=32.4, batch_index=335, batch_size=256]

Validation:  45%|█████▍      | 335/743 [1:23:32<1:40:48, 14.82s/batch, batch_loss=14.8, batch_index=336, batch_size=256]

Validation:  45%|█████▍      | 336/743 [1:23:32<1:48:18, 15.97s/batch, batch_loss=14.8, batch_index=336, batch_size=256]

Validation:  45%|█████▍      | 336/743 [1:23:48<1:48:18, 15.97s/batch, batch_loss=23.2, batch_index=337, batch_size=256]

Validation:  45%|█████▍      | 337/743 [1:23:48<1:48:00, 15.96s/batch, batch_loss=23.2, batch_index=337, batch_size=256]

Validation:  45%|█████▍      | 337/743 [1:24:04<1:48:00, 15.96s/batch, batch_loss=36.7, batch_index=338, batch_size=256]

Validation:  45%|█████▍      | 338/743 [1:24:04<1:47:26, 15.92s/batch, batch_loss=36.7, batch_index=338, batch_size=256]

Validation:  45%|█████▍      | 338/743 [1:24:20<1:47:26, 15.92s/batch, batch_loss=39.5, batch_index=339, batch_size=256]

Validation:  46%|█████▍      | 339/743 [1:24:20<1:46:13, 15.78s/batch, batch_loss=39.5, batch_index=339, batch_size=256]

Validation:  46%|█████▍      | 339/743 [1:24:35<1:46:13, 15.78s/batch, batch_loss=32.1, batch_index=340, batch_size=256]

Validation:  46%|█████▍      | 340/743 [1:24:35<1:45:37, 15.73s/batch, batch_loss=32.1, batch_index=340, batch_size=256]

Validation:  46%|██████▍       | 340/743 [1:24:51<1:45:37, 15.73s/batch, batch_loss=17, batch_index=341, batch_size=256]

Validation:  46%|██████▍       | 341/743 [1:24:51<1:44:35, 15.61s/batch, batch_loss=17, batch_index=341, batch_size=256]

Validation:  46%|█████▌      | 341/743 [1:25:09<1:44:35, 15.61s/batch, batch_loss=24.3, batch_index=342, batch_size=256]

Validation:  46%|█████▌      | 342/743 [1:25:09<1:49:32, 16.39s/batch, batch_loss=24.3, batch_index=342, batch_size=256]

Validation:  46%|█████▌      | 342/743 [1:25:24<1:49:32, 16.39s/batch, batch_loss=26.9, batch_index=343, batch_size=256]

Validation:  46%|█████▌      | 343/743 [1:25:24<1:47:38, 16.15s/batch, batch_loss=26.9, batch_index=343, batch_size=256]

Validation:  46%|█████▌      | 343/743 [1:25:39<1:47:38, 16.15s/batch, batch_loss=25.6, batch_index=344, batch_size=256]

Validation:  46%|█████▌      | 344/743 [1:25:39<1:44:23, 15.70s/batch, batch_loss=25.6, batch_index=344, batch_size=256]

Validation:  46%|█████▌      | 344/743 [1:25:54<1:44:23, 15.70s/batch, batch_loss=19.6, batch_index=345, batch_size=256]

Validation:  46%|█████▌      | 345/743 [1:25:54<1:43:37, 15.62s/batch, batch_loss=19.6, batch_index=345, batch_size=256]

Validation:  46%|█████▌      | 345/743 [1:26:09<1:43:37, 15.62s/batch, batch_loss=27.7, batch_index=346, batch_size=256]

Validation:  47%|█████▌      | 346/743 [1:26:09<1:40:42, 15.22s/batch, batch_loss=27.7, batch_index=346, batch_size=256]

Validation:  47%|█████▌      | 346/743 [1:26:23<1:40:42, 15.22s/batch, batch_loss=21.8, batch_index=347, batch_size=256]

Validation:  47%|█████▌      | 347/743 [1:26:23<1:39:25, 15.07s/batch, batch_loss=21.8, batch_index=347, batch_size=256]

Validation:  47%|█████▌      | 347/743 [1:26:41<1:39:25, 15.07s/batch, batch_loss=29.2, batch_index=348, batch_size=256]

Validation:  47%|█████▌      | 348/743 [1:26:41<1:43:49, 15.77s/batch, batch_loss=29.2, batch_index=348, batch_size=256]

Validation:  47%|█████▌      | 348/743 [1:26:55<1:43:49, 15.77s/batch, batch_loss=24.1, batch_index=349, batch_size=256]

Validation:  47%|█████▋      | 349/743 [1:26:55<1:40:39, 15.33s/batch, batch_loss=24.1, batch_index=349, batch_size=256]

Validation:  47%|█████▋      | 349/743 [1:27:09<1:40:39, 15.33s/batch, batch_loss=23.3, batch_index=350, batch_size=256]

Validation:  47%|█████▋      | 350/743 [1:27:09<1:38:10, 14.99s/batch, batch_loss=23.3, batch_index=350, batch_size=256]

Validation:  47%|████▏    | 350/743 [1:27:24<1:38:10, 14.99s/batch, batch_loss=1.31e+4, batch_index=351, batch_size=256]

Validation:  47%|████▎    | 351/743 [1:27:24<1:38:10, 15.03s/batch, batch_loss=1.31e+4, batch_index=351, batch_size=256]

Validation:  47%|█████▋      | 351/743 [1:27:39<1:38:10, 15.03s/batch, batch_loss=32.4, batch_index=352, batch_size=256]

Validation:  47%|█████▋      | 352/743 [1:27:39<1:36:18, 14.78s/batch, batch_loss=32.4, batch_index=352, batch_size=256]

Validation:  47%|█████▋      | 352/743 [1:27:52<1:36:18, 14.78s/batch, batch_loss=19.1, batch_index=353, batch_size=256]

Validation:  48%|█████▋      | 353/743 [1:27:52<1:34:15, 14.50s/batch, batch_loss=19.1, batch_index=353, batch_size=256]

Validation:  48%|█████▋      | 353/743 [1:28:06<1:34:15, 14.50s/batch, batch_loss=21.9, batch_index=354, batch_size=256]

Validation:  48%|█████▋      | 354/743 [1:28:06<1:32:57, 14.34s/batch, batch_loss=21.9, batch_index=354, batch_size=256]

Validation:  48%|██████▋       | 354/743 [1:28:21<1:32:57, 14.34s/batch, batch_loss=26, batch_index=355, batch_size=256]

Validation:  48%|██████▋       | 355/743 [1:28:21<1:33:29, 14.46s/batch, batch_loss=26, batch_index=355, batch_size=256]

Validation:  48%|█████▋      | 355/743 [1:28:35<1:33:29, 14.46s/batch, batch_loss=38.5, batch_index=356, batch_size=256]

Validation:  48%|█████▋      | 356/743 [1:28:35<1:32:31, 14.35s/batch, batch_loss=38.5, batch_index=356, batch_size=256]

Validation:  48%|████▎    | 356/743 [1:28:49<1:32:31, 14.35s/batch, batch_loss=5.99e+4, batch_index=357, batch_size=256]

Validation:  48%|████▎    | 357/743 [1:28:49<1:31:55, 14.29s/batch, batch_loss=5.99e+4, batch_index=357, batch_size=256]

Validation:  48%|██████▋       | 357/743 [1:29:04<1:31:55, 14.29s/batch, batch_loss=17, batch_index=358, batch_size=256]

Validation:  48%|██████▋       | 358/743 [1:29:04<1:31:45, 14.30s/batch, batch_loss=17, batch_index=358, batch_size=256]

Validation:  48%|█████▊      | 358/743 [1:29:19<1:31:45, 14.30s/batch, batch_loss=14.7, batch_index=359, batch_size=256]

Validation:  48%|█████▊      | 359/743 [1:29:19<1:32:41, 14.48s/batch, batch_loss=14.7, batch_index=359, batch_size=256]

Validation:  48%|█████▊      | 359/743 [1:29:33<1:32:41, 14.48s/batch, batch_loss=22.9, batch_index=360, batch_size=256]

Validation:  48%|█████▊      | 360/743 [1:29:33<1:32:09, 14.44s/batch, batch_loss=22.9, batch_index=360, batch_size=256]

Validation:  48%|██████▊       | 360/743 [1:29:47<1:32:09, 14.44s/batch, batch_loss=19, batch_index=361, batch_size=256]

Validation:  49%|██████▊       | 361/743 [1:29:47<1:31:39, 14.40s/batch, batch_loss=19, batch_index=361, batch_size=256]

Validation:  49%|█████▊      | 361/743 [1:30:02<1:31:39, 14.40s/batch, batch_loss=23.7, batch_index=362, batch_size=256]

Validation:  49%|█████▊      | 362/743 [1:30:02<1:31:47, 14.46s/batch, batch_loss=23.7, batch_index=362, batch_size=256]

Validation:  49%|█████▊      | 362/743 [1:30:16<1:31:47, 14.46s/batch, batch_loss=29.9, batch_index=363, batch_size=256]

Validation:  49%|█████▊      | 363/743 [1:30:16<1:31:33, 14.46s/batch, batch_loss=29.9, batch_index=363, batch_size=256]

Validation:  49%|█████▊      | 363/743 [1:30:31<1:31:33, 14.46s/batch, batch_loss=23.1, batch_index=364, batch_size=256]

Validation:  49%|█████▉      | 364/743 [1:30:31<1:31:03, 14.42s/batch, batch_loss=23.1, batch_index=364, batch_size=256]

Validation:  49%|██████▊       | 364/743 [1:30:45<1:31:03, 14.42s/batch, batch_loss=21, batch_index=365, batch_size=256]

Validation:  49%|██████▉       | 365/743 [1:30:45<1:30:35, 14.38s/batch, batch_loss=21, batch_index=365, batch_size=256]

Validation:  49%|█████▉      | 365/743 [1:31:00<1:30:35, 14.38s/batch, batch_loss=14.1, batch_index=366, batch_size=256]

Validation:  49%|█████▉      | 366/743 [1:31:00<1:30:54, 14.47s/batch, batch_loss=14.1, batch_index=366, batch_size=256]

Validation:  49%|█████▉      | 366/743 [1:31:14<1:30:54, 14.47s/batch, batch_loss=19.5, batch_index=367, batch_size=256]

Validation:  49%|█████▉      | 367/743 [1:31:14<1:29:41, 14.31s/batch, batch_loss=19.5, batch_index=367, batch_size=256]

Validation:  49%|████▍    | 367/743 [1:31:28<1:29:41, 14.31s/batch, batch_loss=4.83e+3, batch_index=368, batch_size=256]

Validation:  50%|████▍    | 368/743 [1:31:28<1:30:20, 14.45s/batch, batch_loss=4.83e+3, batch_index=368, batch_size=256]

Validation:  50%|██████▉       | 368/743 [1:31:43<1:30:20, 14.45s/batch, batch_loss=20, batch_index=369, batch_size=256]

Validation:  50%|██████▉       | 369/743 [1:31:43<1:31:21, 14.66s/batch, batch_loss=20, batch_index=369, batch_size=256]

Validation:  50%|█████▉      | 369/743 [1:31:59<1:31:21, 14.66s/batch, batch_loss=26.9, batch_index=370, batch_size=256]

Validation:  50%|█████▉      | 370/743 [1:31:59<1:31:52, 14.78s/batch, batch_loss=26.9, batch_index=370, batch_size=256]

Validation:  50%|█████▉      | 370/743 [1:32:12<1:31:52, 14.78s/batch, batch_loss=22.8, batch_index=371, batch_size=256]

Validation:  50%|█████▉      | 371/743 [1:32:12<1:29:41, 14.47s/batch, batch_loss=22.8, batch_index=371, batch_size=256]

Validation:  50%|█████▉      | 371/743 [1:32:27<1:29:41, 14.47s/batch, batch_loss=20.6, batch_index=372, batch_size=256]

Validation:  50%|██████      | 372/743 [1:32:27<1:29:46, 14.52s/batch, batch_loss=20.6, batch_index=372, batch_size=256]

Validation:  50%|██████      | 372/743 [1:32:41<1:29:46, 14.52s/batch, batch_loss=27.8, batch_index=373, batch_size=256]

Validation:  50%|██████      | 373/743 [1:32:41<1:29:31, 14.52s/batch, batch_loss=27.8, batch_index=373, batch_size=256]

Validation:  50%|██████      | 373/743 [1:32:56<1:29:31, 14.52s/batch, batch_loss=16.5, batch_index=374, batch_size=256]

Validation:  50%|██████      | 374/743 [1:32:56<1:29:16, 14.52s/batch, batch_loss=16.5, batch_index=374, batch_size=256]

Validation:  50%|██████      | 374/743 [1:33:11<1:29:16, 14.52s/batch, batch_loss=9.73, batch_index=375, batch_size=256]

Validation:  50%|██████      | 375/743 [1:33:11<1:29:19, 14.56s/batch, batch_loss=9.73, batch_index=375, batch_size=256]

Validation:  50%|██████      | 375/743 [1:33:25<1:29:19, 14.56s/batch, batch_loss=30.2, batch_index=376, batch_size=256]

Validation:  51%|██████      | 376/743 [1:33:25<1:27:52, 14.37s/batch, batch_loss=30.2, batch_index=376, batch_size=256]

Validation:  51%|██████      | 376/743 [1:33:39<1:27:52, 14.37s/batch, batch_loss=12.4, batch_index=377, batch_size=256]

Validation:  51%|██████      | 377/743 [1:33:39<1:27:58, 14.42s/batch, batch_loss=12.4, batch_index=377, batch_size=256]

Validation:  51%|██████      | 377/743 [1:33:53<1:27:58, 14.42s/batch, batch_loss=21.9, batch_index=378, batch_size=256]

Validation:  51%|██████      | 378/743 [1:33:53<1:26:58, 14.30s/batch, batch_loss=21.9, batch_index=378, batch_size=256]

Validation:  51%|██████      | 378/743 [1:34:07<1:26:58, 14.30s/batch, batch_loss=7.74, batch_index=379, batch_size=256]

Validation:  51%|██████      | 379/743 [1:34:07<1:26:05, 14.19s/batch, batch_loss=7.74, batch_index=379, batch_size=256]

Validation:  51%|███████▏      | 379/743 [1:34:21<1:26:05, 14.19s/batch, batch_loss=12, batch_index=380, batch_size=256]

Validation:  51%|███████▏      | 380/743 [1:34:21<1:25:49, 14.19s/batch, batch_loss=12, batch_index=380, batch_size=256]

Validation:  51%|████▌    | 380/743 [1:34:35<1:25:49, 14.19s/batch, batch_loss=7.04e+4, batch_index=381, batch_size=256]

Validation:  51%|████▌    | 381/743 [1:34:35<1:24:41, 14.04s/batch, batch_loss=7.04e+4, batch_index=381, batch_size=256]

Validation:  51%|██████▋      | 381/743 [1:34:50<1:24:41, 14.04s/batch, batch_loss=914, batch_index=382, batch_size=256]

Validation:  51%|██████▋      | 382/743 [1:34:50<1:26:03, 14.30s/batch, batch_loss=914, batch_index=382, batch_size=256]

Validation:  51%|██████▋      | 382/743 [1:35:04<1:26:03, 14.30s/batch, batch_loss=212, batch_index=383, batch_size=256]

Validation:  52%|██████▋      | 383/743 [1:35:04<1:25:43, 14.29s/batch, batch_loss=212, batch_index=383, batch_size=256]

Validation:  52%|██████▋      | 383/743 [1:35:18<1:25:43, 14.29s/batch, batch_loss=282, batch_index=384, batch_size=256]

Validation:  52%|██████▋      | 384/743 [1:35:18<1:25:28, 14.29s/batch, batch_loss=282, batch_index=384, batch_size=256]

Validation:  52%|██████▏     | 384/743 [1:35:33<1:25:28, 14.29s/batch, batch_loss=19.5, batch_index=385, batch_size=256]

Validation:  52%|██████▏     | 385/743 [1:35:33<1:25:55, 14.40s/batch, batch_loss=19.5, batch_index=385, batch_size=256]

Validation:  52%|██████▏     | 385/743 [1:35:47<1:25:55, 14.40s/batch, batch_loss=10.2, batch_index=386, batch_size=256]

Validation:  52%|██████▏     | 386/743 [1:35:47<1:25:11, 14.32s/batch, batch_loss=10.2, batch_index=386, batch_size=256]

Validation:  52%|██████▏     | 386/743 [1:36:02<1:25:11, 14.32s/batch, batch_loss=8.99, batch_index=387, batch_size=256]

Validation:  52%|██████▎     | 387/743 [1:36:02<1:25:54, 14.48s/batch, batch_loss=8.99, batch_index=387, batch_size=256]

Validation:  52%|██████▎     | 387/743 [1:36:17<1:25:54, 14.48s/batch, batch_loss=17.5, batch_index=388, batch_size=256]

Validation:  52%|██████▎     | 388/743 [1:36:17<1:25:58, 14.53s/batch, batch_loss=17.5, batch_index=388, batch_size=256]

Validation:  52%|██████▎     | 388/743 [1:36:32<1:25:58, 14.53s/batch, batch_loss=12.7, batch_index=389, batch_size=256]

Validation:  52%|██████▎     | 389/743 [1:36:32<1:26:55, 14.73s/batch, batch_loss=12.7, batch_index=389, batch_size=256]

Validation:  52%|██████▎     | 389/743 [1:36:46<1:26:55, 14.73s/batch, batch_loss=16.6, batch_index=390, batch_size=256]

Validation:  52%|██████▎     | 390/743 [1:36:46<1:26:27, 14.70s/batch, batch_loss=16.6, batch_index=390, batch_size=256]

Validation:  52%|██████▎     | 390/743 [1:37:01<1:26:27, 14.70s/batch, batch_loss=14.7, batch_index=391, batch_size=256]

Validation:  53%|██████▎     | 391/743 [1:37:01<1:25:25, 14.56s/batch, batch_loss=14.7, batch_index=391, batch_size=256]

Validation:  53%|██████▎     | 391/743 [1:37:18<1:25:25, 14.56s/batch, batch_loss=15.2, batch_index=392, batch_size=256]

Validation:  53%|██████▎     | 392/743 [1:37:18<1:30:25, 15.46s/batch, batch_loss=15.2, batch_index=392, batch_size=256]

Validation:  53%|██████▎     | 392/743 [1:37:34<1:30:25, 15.46s/batch, batch_loss=19.3, batch_index=393, batch_size=256]

Validation:  53%|██████▎     | 393/743 [1:37:34<1:30:22, 15.49s/batch, batch_loss=19.3, batch_index=393, batch_size=256]

Validation:  53%|██████▎     | 393/743 [1:37:49<1:30:22, 15.49s/batch, batch_loss=17.3, batch_index=394, batch_size=256]

Validation:  53%|██████▎     | 394/743 [1:37:49<1:30:07, 15.49s/batch, batch_loss=17.3, batch_index=394, batch_size=256]

Validation:  53%|██████▎     | 394/743 [1:38:06<1:30:07, 15.49s/batch, batch_loss=14.4, batch_index=395, batch_size=256]

Validation:  53%|██████▍     | 395/743 [1:38:06<1:31:59, 15.86s/batch, batch_loss=14.4, batch_index=395, batch_size=256]

Validation:  53%|███████▍      | 395/743 [1:38:20<1:31:59, 15.86s/batch, batch_loss=18, batch_index=396, batch_size=256]

Validation:  53%|███████▍      | 396/743 [1:38:20<1:28:23, 15.28s/batch, batch_loss=18, batch_index=396, batch_size=256]

Validation:  53%|██████▍     | 396/743 [1:38:35<1:28:23, 15.28s/batch, batch_loss=11.7, batch_index=397, batch_size=256]

Validation:  53%|██████▍     | 397/743 [1:38:35<1:26:46, 15.05s/batch, batch_loss=11.7, batch_index=397, batch_size=256]

Validation:  53%|██████▍     | 397/743 [1:38:52<1:26:46, 15.05s/batch, batch_loss=24.9, batch_index=398, batch_size=256]

Validation:  54%|██████▍     | 398/743 [1:38:52<1:29:54, 15.64s/batch, batch_loss=24.9, batch_index=398, batch_size=256]

Validation:  54%|██████▍     | 398/743 [1:39:07<1:29:54, 15.64s/batch, batch_loss=15.9, batch_index=399, batch_size=256]

Validation:  54%|██████▍     | 399/743 [1:39:07<1:29:11, 15.56s/batch, batch_loss=15.9, batch_index=399, batch_size=256]

Validation:  54%|██████▍     | 399/743 [1:39:22<1:29:11, 15.56s/batch, batch_loss=21.6, batch_index=400, batch_size=256]

Validation:  54%|██████▍     | 400/743 [1:39:22<1:28:19, 15.45s/batch, batch_loss=21.6, batch_index=400, batch_size=256]

Validation:  54%|██████▍     | 400/743 [1:39:39<1:28:19, 15.45s/batch, batch_loss=20.9, batch_index=401, batch_size=256]

Validation:  54%|██████▍     | 401/743 [1:39:39<1:30:37, 15.90s/batch, batch_loss=20.9, batch_index=401, batch_size=256]

Validation:  54%|██████▍     | 401/743 [1:39:55<1:30:37, 15.90s/batch, batch_loss=7.65, batch_index=402, batch_size=256]

Validation:  54%|██████▍     | 402/743 [1:39:55<1:30:29, 15.92s/batch, batch_loss=7.65, batch_index=402, batch_size=256]

Validation:  54%|██████▍     | 402/743 [1:40:09<1:30:29, 15.92s/batch, batch_loss=17.8, batch_index=403, batch_size=256]

Validation:  54%|██████▌     | 403/743 [1:40:09<1:27:25, 15.43s/batch, batch_loss=17.8, batch_index=403, batch_size=256]

Validation:  54%|██████▌     | 403/743 [1:40:25<1:27:25, 15.43s/batch, batch_loss=16.3, batch_index=404, batch_size=256]

Validation:  54%|██████▌     | 404/743 [1:40:25<1:28:14, 15.62s/batch, batch_loss=16.3, batch_index=404, batch_size=256]

Validation:  54%|██████▌     | 404/743 [1:40:41<1:28:14, 15.62s/batch, batch_loss=10.4, batch_index=405, batch_size=256]

Validation:  55%|██████▌     | 405/743 [1:40:41<1:27:18, 15.50s/batch, batch_loss=10.4, batch_index=405, batch_size=256]

Validation:  55%|██████▌     | 405/743 [1:40:57<1:27:18, 15.50s/batch, batch_loss=14.5, batch_index=406, batch_size=256]

Validation:  55%|██████▌     | 406/743 [1:40:57<1:28:47, 15.81s/batch, batch_loss=14.5, batch_index=406, batch_size=256]

Validation:  55%|██████▌     | 406/743 [1:41:16<1:28:47, 15.81s/batch, batch_loss=18.6, batch_index=407, batch_size=256]

Validation:  55%|██████▌     | 407/743 [1:41:16<1:33:02, 16.61s/batch, batch_loss=18.6, batch_index=407, batch_size=256]

Validation:  55%|██████▌     | 407/743 [1:41:32<1:33:02, 16.61s/batch, batch_loss=20.5, batch_index=408, batch_size=256]

Validation:  55%|██████▌     | 408/743 [1:41:32<1:31:45, 16.43s/batch, batch_loss=20.5, batch_index=408, batch_size=256]

Validation:  55%|██████▌     | 408/743 [1:41:46<1:31:45, 16.43s/batch, batch_loss=11.2, batch_index=409, batch_size=256]

Validation:  55%|██████▌     | 409/743 [1:41:46<1:28:30, 15.90s/batch, batch_loss=11.2, batch_index=409, batch_size=256]

Validation:  55%|██████▌     | 409/743 [1:42:02<1:28:30, 15.90s/batch, batch_loss=16.8, batch_index=410, batch_size=256]

Validation:  55%|██████▌     | 410/743 [1:42:02<1:27:54, 15.84s/batch, batch_loss=16.8, batch_index=410, batch_size=256]

Validation:  55%|██████▌     | 410/743 [1:42:16<1:27:54, 15.84s/batch, batch_loss=18.6, batch_index=411, batch_size=256]

Validation:  55%|██████▋     | 411/743 [1:42:16<1:25:13, 15.40s/batch, batch_loss=18.6, batch_index=411, batch_size=256]

Validation:  55%|██████▋     | 411/743 [1:42:31<1:25:13, 15.40s/batch, batch_loss=15.9, batch_index=412, batch_size=256]

Validation:  55%|██████▋     | 412/743 [1:42:31<1:24:28, 15.31s/batch, batch_loss=15.9, batch_index=412, batch_size=256]

Validation:  55%|████▉    | 412/743 [1:42:47<1:24:28, 15.31s/batch, batch_loss=1.92e+3, batch_index=413, batch_size=256]

Validation:  56%|█████    | 413/743 [1:42:47<1:24:22, 15.34s/batch, batch_loss=1.92e+3, batch_index=413, batch_size=256]

Validation:  56%|██████▋     | 413/743 [1:43:00<1:24:22, 15.34s/batch, batch_loss=23.6, batch_index=414, batch_size=256]

Validation:  56%|██████▋     | 414/743 [1:43:00<1:20:24, 14.66s/batch, batch_loss=23.6, batch_index=414, batch_size=256]

Validation:  56%|██████▋     | 414/743 [1:43:17<1:20:24, 14.66s/batch, batch_loss=24.7, batch_index=415, batch_size=256]

Validation:  56%|██████▋     | 415/743 [1:43:17<1:23:47, 15.33s/batch, batch_loss=24.7, batch_index=415, batch_size=256]

Validation:  56%|█████    | 415/743 [1:43:32<1:23:47, 15.33s/batch, batch_loss=6.47e+3, batch_index=416, batch_size=256]

Validation:  56%|█████    | 416/743 [1:43:32<1:22:36, 15.16s/batch, batch_loss=6.47e+3, batch_index=416, batch_size=256]

Validation:  56%|██████▋     | 416/743 [1:43:44<1:22:36, 15.16s/batch, batch_loss=17.8, batch_index=417, batch_size=256]

Validation:  56%|██████▋     | 417/743 [1:43:44<1:18:03, 14.37s/batch, batch_loss=17.8, batch_index=417, batch_size=256]

Validation:  56%|██████▋     | 417/743 [1:43:57<1:18:03, 14.37s/batch, batch_loss=15.8, batch_index=418, batch_size=256]

Validation:  56%|██████▊     | 418/743 [1:43:57<1:16:02, 14.04s/batch, batch_loss=15.8, batch_index=418, batch_size=256]

Validation:  56%|██████▊     | 418/743 [1:44:12<1:16:02, 14.04s/batch, batch_loss=18.6, batch_index=419, batch_size=256]

Validation:  56%|██████▊     | 419/743 [1:44:12<1:17:10, 14.29s/batch, batch_loss=18.6, batch_index=419, batch_size=256]

Validation:  56%|██████▊     | 419/743 [1:44:26<1:17:10, 14.29s/batch, batch_loss=15.4, batch_index=420, batch_size=256]

Validation:  57%|██████▊     | 420/743 [1:44:26<1:16:47, 14.27s/batch, batch_loss=15.4, batch_index=420, batch_size=256]

Validation:  57%|██████▊     | 420/743 [1:44:42<1:16:47, 14.27s/batch, batch_loss=31.6, batch_index=421, batch_size=256]

Validation:  57%|██████▊     | 421/743 [1:44:42<1:17:49, 14.50s/batch, batch_loss=31.6, batch_index=421, batch_size=256]

Validation:  57%|██████▊     | 421/743 [1:44:56<1:17:49, 14.50s/batch, batch_loss=10.9, batch_index=422, batch_size=256]

Validation:  57%|██████▊     | 422/743 [1:44:56<1:17:04, 14.41s/batch, batch_loss=10.9, batch_index=422, batch_size=256]

Validation:  57%|██████▊     | 422/743 [1:45:13<1:17:04, 14.41s/batch, batch_loss=22.4, batch_index=423, batch_size=256]

Validation:  57%|██████▊     | 423/743 [1:45:13<1:21:17, 15.24s/batch, batch_loss=22.4, batch_index=423, batch_size=256]

Validation:  57%|███████▍     | 423/743 [1:45:28<1:21:17, 15.24s/batch, batch_loss=327, batch_index=424, batch_size=256]

Validation:  57%|███████▍     | 424/743 [1:45:28<1:20:49, 15.20s/batch, batch_loss=327, batch_index=424, batch_size=256]

Validation:  57%|██████▊     | 424/743 [1:45:43<1:20:49, 15.20s/batch, batch_loss=22.6, batch_index=425, batch_size=256]

Validation:  57%|██████▊     | 425/743 [1:45:43<1:20:15, 15.14s/batch, batch_loss=22.6, batch_index=425, batch_size=256]

Validation:  57%|██████▊     | 425/743 [1:45:58<1:20:15, 15.14s/batch, batch_loss=23.3, batch_index=426, batch_size=256]

Validation:  57%|██████▉     | 426/743 [1:45:58<1:20:18, 15.20s/batch, batch_loss=23.3, batch_index=426, batch_size=256]

Validation:  57%|██████▉     | 426/743 [1:46:13<1:20:18, 15.20s/batch, batch_loss=20.9, batch_index=427, batch_size=256]

Validation:  57%|██████▉     | 427/743 [1:46:13<1:19:24, 15.08s/batch, batch_loss=20.9, batch_index=427, batch_size=256]

Validation:  57%|█████▏   | 427/743 [1:46:28<1:19:24, 15.08s/batch, batch_loss=5.28e+3, batch_index=428, batch_size=256]

Validation:  58%|█████▏   | 428/743 [1:46:28<1:18:39, 14.98s/batch, batch_loss=5.28e+3, batch_index=428, batch_size=256]

Validation:  58%|██████▉     | 428/743 [1:46:43<1:18:39, 14.98s/batch, batch_loss=18.7, batch_index=429, batch_size=256]

Validation:  58%|██████▉     | 429/743 [1:46:43<1:18:38, 15.03s/batch, batch_loss=18.7, batch_index=429, batch_size=256]

Validation:  58%|█████▏   | 429/743 [1:46:57<1:18:38, 15.03s/batch, batch_loss=5.39e+3, batch_index=430, batch_size=256]

Validation:  58%|█████▏   | 430/743 [1:46:57<1:17:05, 14.78s/batch, batch_loss=5.39e+3, batch_index=430, batch_size=256]

Validation:  58%|█████▏   | 430/743 [1:47:13<1:17:05, 14.78s/batch, batch_loss=1.39e+4, batch_index=431, batch_size=256]

Validation:  58%|█████▏   | 431/743 [1:47:13<1:19:01, 15.20s/batch, batch_loss=1.39e+4, batch_index=431, batch_size=256]

Validation:  58%|███████▌     | 431/743 [1:47:28<1:19:01, 15.20s/batch, batch_loss=966, batch_index=432, batch_size=256]

Validation:  58%|███████▌     | 432/743 [1:47:28<1:18:25, 15.13s/batch, batch_loss=966, batch_index=432, batch_size=256]

Validation:  58%|██████▉     | 432/743 [1:47:43<1:18:25, 15.13s/batch, batch_loss=17.1, batch_index=433, batch_size=256]

Validation:  58%|██████▉     | 433/743 [1:47:43<1:17:34, 15.01s/batch, batch_loss=17.1, batch_index=433, batch_size=256]

Validation:  58%|██████▉     | 433/743 [1:47:58<1:17:34, 15.01s/batch, batch_loss=12.7, batch_index=434, batch_size=256]

Validation:  58%|███████     | 434/743 [1:47:58<1:17:30, 15.05s/batch, batch_loss=12.7, batch_index=434, batch_size=256]

Validation:  58%|███████     | 434/743 [1:48:12<1:17:30, 15.05s/batch, batch_loss=16.4, batch_index=435, batch_size=256]

Validation:  59%|███████     | 435/743 [1:48:12<1:15:41, 14.74s/batch, batch_loss=16.4, batch_index=435, batch_size=256]

Validation:  59%|███████     | 435/743 [1:48:27<1:15:41, 14.74s/batch, batch_loss=14.2, batch_index=436, batch_size=256]

Validation:  59%|███████     | 436/743 [1:48:27<1:15:01, 14.66s/batch, batch_loss=14.2, batch_index=436, batch_size=256]

Validation:  59%|███████     | 436/743 [1:48:41<1:15:01, 14.66s/batch, batch_loss=21.9, batch_index=437, batch_size=256]

Validation:  59%|███████     | 437/743 [1:48:41<1:13:58, 14.50s/batch, batch_loss=21.9, batch_index=437, batch_size=256]

Validation:  59%|███████▋     | 437/743 [1:48:56<1:13:58, 14.50s/batch, batch_loss=977, batch_index=438, batch_size=256]

Validation:  59%|███████▋     | 438/743 [1:48:56<1:14:31, 14.66s/batch, batch_loss=977, batch_index=438, batch_size=256]

Validation:  59%|███████▋     | 438/743 [1:49:10<1:14:31, 14.66s/batch, batch_loss=901, batch_index=439, batch_size=256]

Validation:  59%|███████▋     | 439/743 [1:49:10<1:13:35, 14.53s/batch, batch_loss=901, batch_index=439, batch_size=256]

Validation:  59%|███████     | 439/743 [1:49:24<1:13:35, 14.53s/batch, batch_loss=19.2, batch_index=440, batch_size=256]

Validation:  59%|███████     | 440/743 [1:49:24<1:12:19, 14.32s/batch, batch_loss=19.2, batch_index=440, batch_size=256]

Validation:  59%|███████     | 440/743 [1:49:39<1:12:19, 14.32s/batch, batch_loss=14.8, batch_index=441, batch_size=256]

Validation:  59%|███████     | 441/743 [1:49:39<1:12:28, 14.40s/batch, batch_loss=14.8, batch_index=441, batch_size=256]

Validation:  59%|███████     | 441/743 [1:49:53<1:12:28, 14.40s/batch, batch_loss=19.2, batch_index=442, batch_size=256]

Validation:  59%|███████▏    | 442/743 [1:49:53<1:12:23, 14.43s/batch, batch_loss=19.2, batch_index=442, batch_size=256]

Validation:  59%|███████▏    | 442/743 [1:50:08<1:12:23, 14.43s/batch, batch_loss=12.8, batch_index=443, batch_size=256]

Validation:  60%|███████▏    | 443/743 [1:50:08<1:12:50, 14.57s/batch, batch_loss=12.8, batch_index=443, batch_size=256]

Validation:  60%|███████▏    | 443/743 [1:50:22<1:12:50, 14.57s/batch, batch_loss=18.8, batch_index=444, batch_size=256]

Validation:  60%|███████▏    | 444/743 [1:50:22<1:12:27, 14.54s/batch, batch_loss=18.8, batch_index=444, batch_size=256]

Validation:  60%|███████▏    | 444/743 [1:50:37<1:12:27, 14.54s/batch, batch_loss=10.7, batch_index=445, batch_size=256]

Validation:  60%|███████▏    | 445/743 [1:50:37<1:12:17, 14.56s/batch, batch_loss=10.7, batch_index=445, batch_size=256]

Validation:  60%|███████▏    | 445/743 [1:50:52<1:12:17, 14.56s/batch, batch_loss=17.4, batch_index=446, batch_size=256]

Validation:  60%|███████▏    | 446/743 [1:50:52<1:12:25, 14.63s/batch, batch_loss=17.4, batch_index=446, batch_size=256]

Validation:  60%|█████▍   | 446/743 [1:51:07<1:12:25, 14.63s/batch, batch_loss=6.85e+3, batch_index=447, batch_size=256]

Validation:  60%|█████▍   | 447/743 [1:51:07<1:12:31, 14.70s/batch, batch_loss=6.85e+3, batch_index=447, batch_size=256]

Validation:  60%|███████▏    | 447/743 [1:51:23<1:12:31, 14.70s/batch, batch_loss=7.88, batch_index=448, batch_size=256]

Validation:  60%|███████▏    | 448/743 [1:51:23<1:14:43, 15.20s/batch, batch_loss=7.88, batch_index=448, batch_size=256]

Validation:  60%|███████▏    | 448/743 [1:51:39<1:14:43, 15.20s/batch, batch_loss=12.2, batch_index=449, batch_size=256]

Validation:  60%|███████▎    | 449/743 [1:51:39<1:15:00, 15.31s/batch, batch_loss=12.2, batch_index=449, batch_size=256]

Validation:  60%|███████▎    | 449/743 [1:51:53<1:15:00, 15.31s/batch, batch_loss=18.6, batch_index=450, batch_size=256]

Validation:  61%|███████▎    | 450/743 [1:51:53<1:13:28, 15.05s/batch, batch_loss=18.6, batch_index=450, batch_size=256]

Validation:  61%|███████▎    | 450/743 [1:52:08<1:13:28, 15.05s/batch, batch_loss=14.5, batch_index=451, batch_size=256]

Validation:  61%|███████▎    | 451/743 [1:52:08<1:12:53, 14.98s/batch, batch_loss=14.5, batch_index=451, batch_size=256]

Validation:  61%|███████▎    | 451/743 [1:52:22<1:12:53, 14.98s/batch, batch_loss=19.9, batch_index=452, batch_size=256]

Validation:  61%|███████▎    | 452/743 [1:52:22<1:11:34, 14.76s/batch, batch_loss=19.9, batch_index=452, batch_size=256]

Validation:  61%|███████▎    | 452/743 [1:52:37<1:11:34, 14.76s/batch, batch_loss=15.6, batch_index=453, batch_size=256]

Validation:  61%|███████▎    | 453/743 [1:52:37<1:11:08, 14.72s/batch, batch_loss=15.6, batch_index=453, batch_size=256]

Validation:  61%|███████▎    | 453/743 [1:52:51<1:11:08, 14.72s/batch, batch_loss=9.72, batch_index=454, batch_size=256]

Validation:  61%|███████▎    | 454/743 [1:52:51<1:10:46, 14.69s/batch, batch_loss=9.72, batch_index=454, batch_size=256]

Validation:  61%|████████▌     | 454/743 [1:53:08<1:10:46, 14.69s/batch, batch_loss=12, batch_index=455, batch_size=256]

Validation:  61%|████████▌     | 455/743 [1:53:08<1:13:07, 15.24s/batch, batch_loss=12, batch_index=455, batch_size=256]

Validation:  61%|███████▎    | 455/743 [1:53:22<1:13:07, 15.24s/batch, batch_loss=12.6, batch_index=456, batch_size=256]

Validation:  61%|███████▎    | 456/743 [1:53:22<1:12:00, 15.05s/batch, batch_loss=12.6, batch_index=456, batch_size=256]

Validation:  61%|███████▎    | 456/743 [1:53:37<1:12:00, 15.05s/batch, batch_loss=18.1, batch_index=457, batch_size=256]

Validation:  62%|███████▍    | 457/743 [1:53:37<1:10:43, 14.84s/batch, batch_loss=18.1, batch_index=457, batch_size=256]

Validation:  62%|███████▍    | 457/743 [1:53:51<1:10:43, 14.84s/batch, batch_loss=23.8, batch_index=458, batch_size=256]

Validation:  62%|███████▍    | 458/743 [1:53:51<1:10:13, 14.78s/batch, batch_loss=23.8, batch_index=458, batch_size=256]

Validation:  62%|███████▍    | 458/743 [1:54:06<1:10:13, 14.78s/batch, batch_loss=17.1, batch_index=459, batch_size=256]

Validation:  62%|███████▍    | 459/743 [1:54:06<1:10:14, 14.84s/batch, batch_loss=17.1, batch_index=459, batch_size=256]

Validation:  62%|███████▍    | 459/743 [1:54:22<1:10:14, 14.84s/batch, batch_loss=22.3, batch_index=460, batch_size=256]

Validation:  62%|███████▍    | 460/743 [1:54:22<1:10:38, 14.98s/batch, batch_loss=22.3, batch_index=460, batch_size=256]

Validation:  62%|███████▍    | 460/743 [1:54:37<1:10:38, 14.98s/batch, batch_loss=20.1, batch_index=461, batch_size=256]

Validation:  62%|███████▍    | 461/743 [1:54:37<1:10:32, 15.01s/batch, batch_loss=20.1, batch_index=461, batch_size=256]

Validation:  62%|███████▍    | 461/743 [1:54:51<1:10:32, 15.01s/batch, batch_loss=15.9, batch_index=462, batch_size=256]

Validation:  62%|███████▍    | 462/743 [1:54:51<1:09:24, 14.82s/batch, batch_loss=15.9, batch_index=462, batch_size=256]

Validation:  62%|███████▍    | 462/743 [1:55:06<1:09:24, 14.82s/batch, batch_loss=13.3, batch_index=463, batch_size=256]

Validation:  62%|███████▍    | 463/743 [1:55:06<1:08:52, 14.76s/batch, batch_loss=13.3, batch_index=463, batch_size=256]

Validation:  62%|█████▌   | 463/743 [1:55:21<1:08:52, 14.76s/batch, batch_loss=1.35e+4, batch_index=464, batch_size=256]

Validation:  62%|█████▌   | 464/743 [1:55:21<1:08:49, 14.80s/batch, batch_loss=1.35e+4, batch_index=464, batch_size=256]

Validation:  62%|███████▍    | 464/743 [1:55:35<1:08:49, 14.80s/batch, batch_loss=22.1, batch_index=465, batch_size=256]

Validation:  63%|███████▌    | 465/743 [1:55:35<1:08:07, 14.70s/batch, batch_loss=22.1, batch_index=465, batch_size=256]

Validation:  63%|███████▌    | 465/743 [1:55:50<1:08:07, 14.70s/batch, batch_loss=17.9, batch_index=466, batch_size=256]

Validation:  63%|███████▌    | 466/743 [1:55:50<1:07:38, 14.65s/batch, batch_loss=17.9, batch_index=466, batch_size=256]

Validation:  63%|███████▌    | 466/743 [1:56:04<1:07:38, 14.65s/batch, batch_loss=23.8, batch_index=467, batch_size=256]

Validation:  63%|███████▌    | 467/743 [1:56:04<1:07:17, 14.63s/batch, batch_loss=23.8, batch_index=467, batch_size=256]

Validation:  63%|███████▌    | 467/743 [1:56:19<1:07:17, 14.63s/batch, batch_loss=12.8, batch_index=468, batch_size=256]

Validation:  63%|███████▌    | 468/743 [1:56:19<1:06:41, 14.55s/batch, batch_loss=12.8, batch_index=468, batch_size=256]

Validation:  63%|███████▌    | 468/743 [1:56:33<1:06:41, 14.55s/batch, batch_loss=18.7, batch_index=469, batch_size=256]

Validation:  63%|███████▌    | 469/743 [1:56:33<1:05:44, 14.40s/batch, batch_loss=18.7, batch_index=469, batch_size=256]

Validation:  63%|█████▋   | 469/743 [1:56:48<1:05:44, 14.40s/batch, batch_loss=6.35e+4, batch_index=470, batch_size=256]

Validation:  63%|█████▋   | 470/743 [1:56:48<1:06:03, 14.52s/batch, batch_loss=6.35e+4, batch_index=470, batch_size=256]

Validation:  63%|███████▌    | 470/743 [1:57:01<1:06:03, 14.52s/batch, batch_loss=10.9, batch_index=471, batch_size=256]

Validation:  63%|███████▌    | 471/743 [1:57:01<1:05:00, 14.34s/batch, batch_loss=10.9, batch_index=471, batch_size=256]

Validation:  63%|███████▌    | 471/743 [1:57:16<1:05:00, 14.34s/batch, batch_loss=22.4, batch_index=472, batch_size=256]

Validation:  64%|███████▌    | 472/743 [1:57:16<1:05:17, 14.46s/batch, batch_loss=22.4, batch_index=472, batch_size=256]

Validation:  64%|████████▎    | 472/743 [1:57:31<1:05:17, 14.46s/batch, batch_loss=586, batch_index=473, batch_size=256]

Validation:  64%|████████▎    | 473/743 [1:57:31<1:04:57, 14.44s/batch, batch_loss=586, batch_index=473, batch_size=256]

Validation:  64%|███████▋    | 473/743 [1:57:45<1:04:57, 14.44s/batch, batch_loss=17.6, batch_index=474, batch_size=256]

Validation:  64%|███████▋    | 474/743 [1:57:45<1:05:16, 14.56s/batch, batch_loss=17.6, batch_index=474, batch_size=256]

Validation:  64%|███████▋    | 474/743 [1:58:00<1:05:16, 14.56s/batch, batch_loss=21.1, batch_index=475, batch_size=256]

Validation:  64%|███████▋    | 475/743 [1:58:00<1:04:37, 14.47s/batch, batch_loss=21.1, batch_index=475, batch_size=256]

Validation:  64%|████████▎    | 475/743 [1:58:15<1:04:37, 14.47s/batch, batch_loss=9.6, batch_index=476, batch_size=256]

Validation:  64%|████████▎    | 476/743 [1:58:15<1:05:09, 14.64s/batch, batch_loss=9.6, batch_index=476, batch_size=256]

Validation:  64%|███████▋    | 476/743 [1:58:29<1:05:09, 14.64s/batch, batch_loss=14.3, batch_index=477, batch_size=256]

Validation:  64%|███████▋    | 477/743 [1:58:29<1:04:02, 14.45s/batch, batch_loss=14.3, batch_index=477, batch_size=256]

Validation:  64%|█████▊   | 477/743 [1:58:43<1:04:02, 14.45s/batch, batch_loss=2.45e+3, batch_index=478, batch_size=256]

Validation:  64%|█████▊   | 478/743 [1:58:43<1:03:25, 14.36s/batch, batch_loss=2.45e+3, batch_index=478, batch_size=256]

Validation:  64%|█████▊   | 478/743 [1:58:57<1:03:25, 14.36s/batch, batch_loss=2.08e+4, batch_index=479, batch_size=256]

Validation:  64%|█████▊   | 479/743 [1:58:57<1:02:45, 14.26s/batch, batch_loss=2.08e+4, batch_index=479, batch_size=256]

Validation:  64%|███████▋    | 479/743 [1:59:12<1:02:45, 14.26s/batch, batch_loss=11.8, batch_index=480, batch_size=256]

Validation:  65%|███████▊    | 480/743 [1:59:12<1:03:14, 14.43s/batch, batch_loss=11.8, batch_index=480, batch_size=256]

Validation:  65%|█████████     | 480/743 [1:59:30<1:03:14, 14.43s/batch, batch_loss=14, batch_index=481, batch_size=256]

Validation:  65%|█████████     | 481/743 [1:59:30<1:07:26, 15.45s/batch, batch_loss=14, batch_index=481, batch_size=256]

Validation:  65%|█████▊   | 481/743 [1:59:43<1:07:26, 15.45s/batch, batch_loss=6.96e+3, batch_index=482, batch_size=256]

Validation:  65%|█████▊   | 482/743 [1:59:43<1:05:14, 15.00s/batch, batch_loss=6.96e+3, batch_index=482, batch_size=256]

Validation:  65%|███████▊    | 482/743 [1:59:58<1:05:14, 15.00s/batch, batch_loss=19.2, batch_index=483, batch_size=256]

Validation:  65%|███████▊    | 483/743 [1:59:58<1:04:44, 14.94s/batch, batch_loss=19.2, batch_index=483, batch_size=256]

Validation:  65%|█████▊   | 483/743 [2:00:13<1:04:44, 14.94s/batch, batch_loss=2.31e+4, batch_index=484, batch_size=256]

Validation:  65%|█████▊   | 484/743 [2:00:13<1:03:41, 14.75s/batch, batch_loss=2.31e+4, batch_index=484, batch_size=256]

Validation:  65%|█████▊   | 484/743 [2:00:28<1:03:41, 14.75s/batch, batch_loss=3.13e+4, batch_index=485, batch_size=256]

Validation:  65%|█████▊   | 485/743 [2:00:28<1:04:03, 14.90s/batch, batch_loss=3.13e+4, batch_index=485, batch_size=256]

Validation:  65%|███████▊    | 485/743 [2:00:41<1:04:03, 14.90s/batch, batch_loss=15.9, batch_index=486, batch_size=256]

Validation:  65%|███████▊    | 486/743 [2:00:41<1:01:55, 14.46s/batch, batch_loss=15.9, batch_index=486, batch_size=256]

Validation:  65%|███████▊    | 486/743 [2:00:56<1:01:55, 14.46s/batch, batch_loss=34.3, batch_index=487, batch_size=256]

Validation:  66%|███████▊    | 487/743 [2:00:56<1:01:25, 14.40s/batch, batch_loss=34.3, batch_index=487, batch_size=256]

Validation:  66%|███████▊    | 487/743 [2:01:10<1:01:25, 14.40s/batch, batch_loss=25.4, batch_index=488, batch_size=256]

Validation:  66%|███████▉    | 488/743 [2:01:10<1:01:37, 14.50s/batch, batch_loss=25.4, batch_index=488, batch_size=256]

Validation:  66%|███████▉    | 488/743 [2:01:27<1:01:37, 14.50s/batch, batch_loss=13.5, batch_index=489, batch_size=256]

Validation:  66%|███████▉    | 489/743 [2:01:27<1:04:16, 15.18s/batch, batch_loss=13.5, batch_index=489, batch_size=256]

Validation:  66%|███████▉    | 489/743 [2:01:42<1:04:16, 15.18s/batch, batch_loss=18.3, batch_index=490, batch_size=256]

Validation:  66%|███████▉    | 490/743 [2:01:42<1:03:46, 15.12s/batch, batch_loss=18.3, batch_index=490, batch_size=256]

Validation:  66%|███████▉    | 490/743 [2:01:57<1:03:46, 15.12s/batch, batch_loss=18.9, batch_index=491, batch_size=256]

Validation:  66%|███████▉    | 491/743 [2:01:57<1:02:46, 14.95s/batch, batch_loss=18.9, batch_index=491, batch_size=256]

Validation:  66%|█████▉   | 491/743 [2:02:10<1:02:46, 14.95s/batch, batch_loss=1.04e+3, batch_index=492, batch_size=256]

Validation:  66%|█████▉   | 492/743 [2:02:10<1:01:09, 14.62s/batch, batch_loss=1.04e+3, batch_index=492, batch_size=256]

Validation:  66%|█████▉   | 492/743 [2:02:25<1:01:09, 14.62s/batch, batch_loss=1.43e+4, batch_index=493, batch_size=256]

Validation:  66%|█████▉   | 493/743 [2:02:25<1:01:17, 14.71s/batch, batch_loss=1.43e+4, batch_index=493, batch_size=256]

Validation:  66%|███████▉    | 493/743 [2:02:41<1:01:17, 14.71s/batch, batch_loss=10.1, batch_index=494, batch_size=256]

Validation:  66%|███████▉    | 494/743 [2:02:41<1:02:11, 14.99s/batch, batch_loss=10.1, batch_index=494, batch_size=256]

Validation:  66%|█████▉   | 494/743 [2:02:56<1:02:11, 14.99s/batch, batch_loss=1.18e+4, batch_index=495, batch_size=256]

Validation:  67%|█████▉   | 495/743 [2:02:56<1:02:00, 15.00s/batch, batch_loss=1.18e+4, batch_index=495, batch_size=256]

Validation:  67%|███████▉    | 495/743 [2:03:11<1:02:00, 15.00s/batch, batch_loss=16.6, batch_index=496, batch_size=256]

Validation:  67%|████████    | 496/743 [2:03:11<1:01:26, 14.93s/batch, batch_loss=16.6, batch_index=496, batch_size=256]

Validation:  67%|████████    | 496/743 [2:03:27<1:01:26, 14.93s/batch, batch_loss=14.3, batch_index=497, batch_size=256]

Validation:  67%|████████    | 497/743 [2:03:27<1:02:55, 15.35s/batch, batch_loss=14.3, batch_index=497, batch_size=256]

Validation:  67%|████████    | 497/743 [2:03:41<1:02:55, 15.35s/batch, batch_loss=15.9, batch_index=498, batch_size=256]

Validation:  67%|████████    | 498/743 [2:03:41<1:00:47, 14.89s/batch, batch_loss=15.9, batch_index=498, batch_size=256]

Validation:  67%|████████    | 498/743 [2:03:56<1:00:47, 14.89s/batch, batch_loss=7.74, batch_index=499, batch_size=256]

Validation:  67%|████████    | 499/743 [2:03:56<1:00:17, 14.82s/batch, batch_loss=7.74, batch_index=499, batch_size=256]

Validation:  67%|██████   | 499/743 [2:04:10<1:00:17, 14.82s/batch, batch_loss=2.51e+4, batch_index=500, batch_size=256]

Validation:  67%|███████▍   | 500/743 [2:04:10<59:39, 14.73s/batch, batch_loss=2.51e+4, batch_index=500, batch_size=256]

Validation:  67%|█████████▍    | 500/743 [2:04:24<59:39, 14.73s/batch, batch_loss=22.1, batch_index=501, batch_size=256]

Validation:  67%|█████████▍    | 501/743 [2:04:24<58:45, 14.57s/batch, batch_loss=22.1, batch_index=501, batch_size=256]

Validation:  67%|███████▍   | 501/743 [2:04:39<58:45, 14.57s/batch, batch_loss=3.15e+3, batch_index=502, batch_size=256]

Validation:  68%|███████▍   | 502/743 [2:04:39<58:16, 14.51s/batch, batch_loss=3.15e+3, batch_index=502, batch_size=256]

Validation:  68%|█████████▍    | 502/743 [2:04:53<58:16, 14.51s/batch, batch_loss=15.5, batch_index=503, batch_size=256]

Validation:  68%|█████████▍    | 503/743 [2:04:53<57:49, 14.46s/batch, batch_loss=15.5, batch_index=503, batch_size=256]

Validation:  68%|█████████▍    | 503/743 [2:05:07<57:49, 14.46s/batch, batch_loss=13.7, batch_index=504, batch_size=256]

Validation:  68%|█████████▍    | 504/743 [2:05:07<57:37, 14.47s/batch, batch_loss=13.7, batch_index=504, batch_size=256]

Validation:  68%|█████████▍    | 504/743 [2:05:22<57:37, 14.47s/batch, batch_loss=22.2, batch_index=505, batch_size=256]

Validation:  68%|█████████▌    | 505/743 [2:05:22<57:51, 14.58s/batch, batch_loss=22.2, batch_index=505, batch_size=256]

Validation:  68%|███████▍   | 505/743 [2:05:38<57:51, 14.58s/batch, batch_loss=2.84e+3, batch_index=506, batch_size=256]

Validation:  68%|███████▍   | 506/743 [2:05:38<58:25, 14.79s/batch, batch_loss=2.84e+3, batch_index=506, batch_size=256]

Validation:  68%|███████▍   | 506/743 [2:05:53<58:25, 14.79s/batch, batch_loss=1.99e+3, batch_index=507, batch_size=256]

Validation:  68%|███████▌   | 507/743 [2:05:53<58:35, 14.90s/batch, batch_loss=1.99e+3, batch_index=507, batch_size=256]

Validation:  68%|███████▌   | 507/743 [2:06:08<58:35, 14.90s/batch, batch_loss=8.39e+3, batch_index=508, batch_size=256]

Validation:  68%|███████▌   | 508/743 [2:06:08<58:41, 14.99s/batch, batch_loss=8.39e+3, batch_index=508, batch_size=256]

Validation:  68%|███████▌   | 508/743 [2:06:23<58:41, 14.99s/batch, batch_loss=8.48e+3, batch_index=509, batch_size=256]

Validation:  69%|███████▌   | 509/743 [2:06:23<58:30, 15.00s/batch, batch_loss=8.48e+3, batch_index=509, batch_size=256]

Validation:  69%|█████████▌    | 509/743 [2:06:37<58:30, 15.00s/batch, batch_loss=15.7, batch_index=510, batch_size=256]

Validation:  69%|█████████▌    | 510/743 [2:06:37<57:35, 14.83s/batch, batch_loss=15.7, batch_index=510, batch_size=256]

Validation:  69%|█████████▌    | 510/743 [2:06:52<57:35, 14.83s/batch, batch_loss=19.5, batch_index=511, batch_size=256]

Validation:  69%|█████████▋    | 511/743 [2:06:52<57:04, 14.76s/batch, batch_loss=19.5, batch_index=511, batch_size=256]

Validation:  69%|█████████▋    | 511/743 [2:07:07<57:04, 14.76s/batch, batch_loss=17.4, batch_index=512, batch_size=256]

Validation:  69%|█████████▋    | 512/743 [2:07:07<57:02, 14.82s/batch, batch_loss=17.4, batch_index=512, batch_size=256]

Validation:  69%|█████████▋    | 512/743 [2:07:21<57:02, 14.82s/batch, batch_loss=18.7, batch_index=513, batch_size=256]

Validation:  69%|█████████▋    | 513/743 [2:07:21<55:35, 14.50s/batch, batch_loss=18.7, batch_index=513, batch_size=256]

Validation:  69%|█████████▋    | 513/743 [2:07:38<55:35, 14.50s/batch, batch_loss=17.4, batch_index=514, batch_size=256]

Validation:  69%|█████████▋    | 514/743 [2:07:38<58:57, 15.45s/batch, batch_loss=17.4, batch_index=514, batch_size=256]

Validation:  69%|█████████▋    | 514/743 [2:07:54<58:57, 15.45s/batch, batch_loss=13.4, batch_index=515, batch_size=256]

Validation:  69%|█████████▋    | 515/743 [2:07:54<58:26, 15.38s/batch, batch_loss=13.4, batch_index=515, batch_size=256]

Validation:  69%|█████████▋    | 515/743 [2:08:08<58:26, 15.38s/batch, batch_loss=16.6, batch_index=516, batch_size=256]

Validation:  69%|█████████▋    | 516/743 [2:08:08<56:34, 14.96s/batch, batch_loss=16.6, batch_index=516, batch_size=256]

Validation:  69%|███████▋   | 516/743 [2:08:21<56:34, 14.96s/batch, batch_loss=6.15e+4, batch_index=517, batch_size=256]

Validation:  70%|███████▋   | 517/743 [2:08:21<54:30, 14.47s/batch, batch_loss=6.15e+4, batch_index=517, batch_size=256]

Validation:  70%|██████████▍    | 517/743 [2:08:34<54:30, 14.47s/batch, batch_loss=503, batch_index=518, batch_size=256]

Validation:  70%|██████████▍    | 518/743 [2:08:34<52:40, 14.05s/batch, batch_loss=503, batch_index=518, batch_size=256]

Validation:  70%|█████████▊    | 518/743 [2:08:49<52:40, 14.05s/batch, batch_loss=12.2, batch_index=519, batch_size=256]

Validation:  70%|█████████▊    | 519/743 [2:08:49<53:41, 14.38s/batch, batch_loss=12.2, batch_index=519, batch_size=256]

Validation:  70%|█████████▊    | 519/743 [2:09:04<53:41, 14.38s/batch, batch_loss=21.6, batch_index=520, batch_size=256]

Validation:  70%|█████████▊    | 520/743 [2:09:04<54:26, 14.65s/batch, batch_loss=21.6, batch_index=520, batch_size=256]

Validation:  70%|█████████▊    | 520/743 [2:09:20<54:26, 14.65s/batch, batch_loss=15.9, batch_index=521, batch_size=256]

Validation:  70%|█████████▊    | 521/743 [2:09:20<54:58, 14.86s/batch, batch_loss=15.9, batch_index=521, batch_size=256]

Validation:  70%|█████████▊    | 521/743 [2:09:35<54:58, 14.86s/batch, batch_loss=16.2, batch_index=522, batch_size=256]

Validation:  70%|█████████▊    | 522/743 [2:09:35<55:08, 14.97s/batch, batch_loss=16.2, batch_index=522, batch_size=256]

Validation:  70%|██████████▌    | 522/743 [2:09:50<55:08, 14.97s/batch, batch_loss=427, batch_index=523, batch_size=256]

Validation:  70%|██████████▌    | 523/743 [2:09:50<54:42, 14.92s/batch, batch_loss=427, batch_index=523, batch_size=256]

Validation:  70%|█████████▊    | 523/743 [2:10:05<54:42, 14.92s/batch, batch_loss=16.7, batch_index=524, batch_size=256]

Validation:  71%|█████████▊    | 524/743 [2:10:05<54:45, 15.00s/batch, batch_loss=16.7, batch_index=524, batch_size=256]

Validation:  71%|███████████▎    | 524/743 [2:10:20<54:45, 15.00s/batch, batch_loss=21, batch_index=525, batch_size=256]

Validation:  71%|███████████▎    | 525/743 [2:10:20<54:58, 15.13s/batch, batch_loss=21, batch_index=525, batch_size=256]

Validation:  71%|█████████▉    | 525/743 [2:10:36<54:58, 15.13s/batch, batch_loss=11.7, batch_index=526, batch_size=256]

Validation:  71%|█████████▉    | 526/743 [2:10:36<55:18, 15.29s/batch, batch_loss=11.7, batch_index=526, batch_size=256]

Validation:  71%|███████▊   | 526/743 [2:10:52<55:18, 15.29s/batch, batch_loss=3.75e+3, batch_index=527, batch_size=256]

Validation:  71%|███████▊   | 527/743 [2:10:52<55:34, 15.44s/batch, batch_loss=3.75e+3, batch_index=527, batch_size=256]

Validation:  71%|██████████▋    | 527/743 [2:11:07<55:34, 15.44s/batch, batch_loss=512, batch_index=528, batch_size=256]

Validation:  71%|██████████▋    | 528/743 [2:11:07<55:19, 15.44s/batch, batch_loss=512, batch_index=528, batch_size=256]

Validation:  71%|███████▊   | 528/743 [2:11:22<55:19, 15.44s/batch, batch_loss=6.51e+3, batch_index=529, batch_size=256]

Validation:  71%|███████▊   | 529/743 [2:11:22<54:44, 15.35s/batch, batch_loss=6.51e+3, batch_index=529, batch_size=256]

Validation:  71%|██████████▋    | 529/743 [2:11:40<54:44, 15.35s/batch, batch_loss=209, batch_index=530, batch_size=256]

Validation:  71%|██████████▋    | 530/743 [2:11:40<56:26, 15.90s/batch, batch_loss=209, batch_index=530, batch_size=256]

Validation:  71%|███████████▍    | 530/743 [2:11:55<56:26, 15.90s/batch, batch_loss=42, batch_index=531, batch_size=256]

Validation:  71%|███████████▍    | 531/743 [2:11:55<55:52, 15.81s/batch, batch_loss=42, batch_index=531, batch_size=256]

Validation:  71%|██████████▋    | 531/743 [2:12:11<55:52, 15.81s/batch, batch_loss=254, batch_index=532, batch_size=256]

Validation:  72%|██████████▋    | 532/743 [2:12:11<55:52, 15.89s/batch, batch_loss=254, batch_index=532, batch_size=256]

Validation:  72%|██████████    | 532/743 [2:12:27<55:52, 15.89s/batch, batch_loss=10.9, batch_index=533, batch_size=256]

Validation:  72%|██████████    | 533/743 [2:12:27<54:55, 15.69s/batch, batch_loss=10.9, batch_index=533, batch_size=256]

Validation:  72%|██████████    | 533/743 [2:12:42<54:55, 15.69s/batch, batch_loss=12.5, batch_index=534, batch_size=256]

Validation:  72%|██████████    | 534/743 [2:12:42<54:32, 15.66s/batch, batch_loss=12.5, batch_index=534, batch_size=256]

Validation:  72%|██████████    | 534/743 [2:12:57<54:32, 15.66s/batch, batch_loss=17.4, batch_index=535, batch_size=256]

Validation:  72%|██████████    | 535/743 [2:12:57<53:18, 15.38s/batch, batch_loss=17.4, batch_index=535, batch_size=256]

Validation:  72%|██████████    | 535/743 [2:13:12<53:18, 15.38s/batch, batch_loss=18.5, batch_index=536, batch_size=256]

Validation:  72%|██████████    | 536/743 [2:13:12<53:03, 15.38s/batch, batch_loss=18.5, batch_index=536, batch_size=256]

Validation:  72%|██████████    | 536/743 [2:13:27<53:03, 15.38s/batch, batch_loss=14.8, batch_index=537, batch_size=256]

Validation:  72%|██████████    | 537/743 [2:13:27<52:42, 15.35s/batch, batch_loss=14.8, batch_index=537, batch_size=256]

Validation:  72%|██████████    | 537/743 [2:13:43<52:42, 15.35s/batch, batch_loss=17.4, batch_index=538, batch_size=256]

Validation:  72%|██████████▏   | 538/743 [2:13:43<52:52, 15.48s/batch, batch_loss=17.4, batch_index=538, batch_size=256]

Validation:  72%|██████████▊    | 538/743 [2:14:01<52:52, 15.48s/batch, batch_loss=252, batch_index=539, batch_size=256]

Validation:  73%|██████████▉    | 539/743 [2:14:01<55:18, 16.27s/batch, batch_loss=252, batch_index=539, batch_size=256]

Validation:  73%|███████████▌    | 539/743 [2:14:16<55:18, 16.27s/batch, batch_loss=20, batch_index=540, batch_size=256]

Validation:  73%|███████████▋    | 540/743 [2:14:16<53:41, 15.87s/batch, batch_loss=20, batch_index=540, batch_size=256]

Validation:  73%|██████████▏   | 540/743 [2:14:32<53:41, 15.87s/batch, batch_loss=29.1, batch_index=541, batch_size=256]

Validation:  73%|██████████▏   | 541/743 [2:14:32<53:25, 15.87s/batch, batch_loss=29.1, batch_index=541, batch_size=256]

Validation:  73%|████████   | 541/743 [2:14:48<53:25, 15.87s/batch, batch_loss=1.95e+3, batch_index=542, batch_size=256]

Validation:  73%|████████   | 542/743 [2:14:48<52:41, 15.73s/batch, batch_loss=1.95e+3, batch_index=542, batch_size=256]

Validation:  73%|██████████▏   | 542/743 [2:15:02<52:41, 15.73s/batch, batch_loss=18.9, batch_index=543, batch_size=256]

Validation:  73%|██████████▏   | 543/743 [2:15:02<50:47, 15.24s/batch, batch_loss=18.9, batch_index=543, batch_size=256]

Validation:  73%|████████   | 543/743 [2:15:17<50:47, 15.24s/batch, batch_loss=1.09e+4, batch_index=544, batch_size=256]

Validation:  73%|████████   | 544/743 [2:15:17<50:14, 15.15s/batch, batch_loss=1.09e+4, batch_index=544, batch_size=256]

Validation:  73%|████████   | 544/743 [2:15:33<50:14, 15.15s/batch, batch_loss=2.75e+3, batch_index=545, batch_size=256]

Validation:  73%|████████   | 545/743 [2:15:33<51:21, 15.56s/batch, batch_loss=2.75e+3, batch_index=545, batch_size=256]

Validation:  73%|██████████▎   | 545/743 [2:15:48<51:21, 15.56s/batch, batch_loss=9.06, batch_index=546, batch_size=256]

Validation:  73%|██████████▎   | 546/743 [2:15:48<50:07, 15.27s/batch, batch_loss=9.06, batch_index=546, batch_size=256]

Validation:  73%|███████████    | 546/743 [2:16:02<50:07, 15.27s/batch, batch_loss=260, batch_index=547, batch_size=256]

Validation:  74%|███████████    | 547/743 [2:16:02<48:40, 14.90s/batch, batch_loss=260, batch_index=547, batch_size=256]

Validation:  74%|██████████▎   | 547/743 [2:16:16<48:40, 14.90s/batch, batch_loss=25.6, batch_index=548, batch_size=256]

Validation:  74%|██████████▎   | 548/743 [2:16:16<48:13, 14.84s/batch, batch_loss=25.6, batch_index=548, batch_size=256]

Validation:  74%|████████▊   | 548/743 [2:16:32<48:13, 14.84s/batch, batch_loss=4.1e+3, batch_index=549, batch_size=256]

Validation:  74%|████████▊   | 549/743 [2:16:32<48:52, 15.12s/batch, batch_loss=4.1e+3, batch_index=549, batch_size=256]

Validation:  74%|████████▏  | 549/743 [2:16:47<48:52, 15.12s/batch, batch_loss=1.28e+4, batch_index=550, batch_size=256]

Validation:  74%|████████▏  | 550/743 [2:16:47<48:31, 15.08s/batch, batch_loss=1.28e+4, batch_index=550, batch_size=256]

Validation:  74%|██████████▎   | 550/743 [2:17:02<48:31, 15.08s/batch, batch_loss=17.3, batch_index=551, batch_size=256]

Validation:  74%|██████████▍   | 551/743 [2:17:02<48:17, 15.09s/batch, batch_loss=17.3, batch_index=551, batch_size=256]

Validation:  74%|████████▏  | 551/743 [2:17:17<48:17, 15.09s/batch, batch_loss=6.76e+3, batch_index=552, batch_size=256]

Validation:  74%|████████▏  | 552/743 [2:17:17<47:10, 14.82s/batch, batch_loss=6.76e+3, batch_index=552, batch_size=256]

Validation:  74%|███████████▉    | 552/743 [2:17:31<47:10, 14.82s/batch, batch_loss=25, batch_index=553, batch_size=256]

Validation:  74%|███████████▉    | 553/743 [2:17:31<47:00, 14.84s/batch, batch_loss=25, batch_index=553, batch_size=256]

Validation:  74%|██████████▍   | 553/743 [2:17:46<47:00, 14.84s/batch, batch_loss=24.3, batch_index=554, batch_size=256]

Validation:  75%|██████████▍   | 554/743 [2:17:46<46:50, 14.87s/batch, batch_loss=24.3, batch_index=554, batch_size=256]

Validation:  75%|████████▏  | 554/743 [2:18:01<46:50, 14.87s/batch, batch_loss=2.47e+3, batch_index=555, batch_size=256]

Validation:  75%|████████▏  | 555/743 [2:18:01<46:38, 14.89s/batch, batch_loss=2.47e+3, batch_index=555, batch_size=256]

Validation:  75%|██████████▍   | 555/743 [2:18:16<46:38, 14.89s/batch, batch_loss=32.3, batch_index=556, batch_size=256]

Validation:  75%|██████████▍   | 556/743 [2:18:16<46:09, 14.81s/batch, batch_loss=32.3, batch_index=556, batch_size=256]

Validation:  75%|███████████▉    | 556/743 [2:18:30<46:09, 14.81s/batch, batch_loss=12, batch_index=557, batch_size=256]

Validation:  75%|███████████▉    | 557/743 [2:18:30<45:20, 14.63s/batch, batch_loss=12, batch_index=557, batch_size=256]

Validation:  75%|████████▏  | 557/743 [2:18:45<45:20, 14.63s/batch, batch_loss=1.51e+4, batch_index=558, batch_size=256]

Validation:  75%|████████▎  | 558/743 [2:18:45<45:21, 14.71s/batch, batch_loss=1.51e+4, batch_index=558, batch_size=256]

Validation:  75%|█████████   | 558/743 [2:18:59<45:21, 14.71s/batch, batch_loss=3.6e+3, batch_index=559, batch_size=256]

Validation:  75%|█████████   | 559/743 [2:18:59<44:23, 14.47s/batch, batch_loss=3.6e+3, batch_index=559, batch_size=256]

Validation:  75%|████████▎  | 559/743 [2:19:14<44:23, 14.47s/batch, batch_loss=2.93e+3, batch_index=560, batch_size=256]

Validation:  75%|████████▎  | 560/743 [2:19:14<44:28, 14.58s/batch, batch_loss=2.93e+3, batch_index=560, batch_size=256]

Validation:  75%|██████████▌   | 560/743 [2:19:31<44:28, 14.58s/batch, batch_loss=12.7, batch_index=561, batch_size=256]

Validation:  76%|██████████▌   | 561/743 [2:19:31<46:46, 15.42s/batch, batch_loss=12.7, batch_index=561, batch_size=256]

Validation:  76%|██████████▌   | 561/743 [2:19:45<46:46, 15.42s/batch, batch_loss=16.7, batch_index=562, batch_size=256]

Validation:  76%|██████████▌   | 562/743 [2:19:45<44:56, 14.90s/batch, batch_loss=16.7, batch_index=562, batch_size=256]

Validation:  76%|██████████▌   | 562/743 [2:19:59<44:56, 14.90s/batch, batch_loss=18.5, batch_index=563, batch_size=256]

Validation:  76%|██████████▌   | 563/743 [2:19:59<43:38, 14.55s/batch, batch_loss=18.5, batch_index=563, batch_size=256]

Validation:  76%|████████▎  | 563/743 [2:20:13<43:38, 14.55s/batch, batch_loss=1.08e+3, batch_index=564, batch_size=256]

Validation:  76%|████████▎  | 564/743 [2:20:13<43:04, 14.44s/batch, batch_loss=1.08e+3, batch_index=564, batch_size=256]

Validation:  76%|████████▎  | 564/743 [2:20:27<43:04, 14.44s/batch, batch_loss=3.69e+3, batch_index=565, batch_size=256]

Validation:  76%|████████▎  | 565/743 [2:20:27<42:38, 14.37s/batch, batch_loss=3.69e+3, batch_index=565, batch_size=256]

Validation:  76%|██████████▋   | 565/743 [2:20:41<42:38, 14.37s/batch, batch_loss=13.1, batch_index=566, batch_size=256]

Validation:  76%|██████████▋   | 566/743 [2:20:41<41:50, 14.18s/batch, batch_loss=13.1, batch_index=566, batch_size=256]

Validation:  76%|██████████▋   | 566/743 [2:20:54<41:50, 14.18s/batch, batch_loss=15.9, batch_index=567, batch_size=256]

Validation:  76%|██████████▋   | 567/743 [2:20:54<41:12, 14.05s/batch, batch_loss=15.9, batch_index=567, batch_size=256]

Validation:  76%|██████████▋   | 567/743 [2:21:08<41:12, 14.05s/batch, batch_loss=15.2, batch_index=568, batch_size=256]

Validation:  76%|██████████▋   | 568/743 [2:21:08<40:54, 14.02s/batch, batch_loss=15.2, batch_index=568, batch_size=256]

Validation:  76%|██████████▋   | 568/743 [2:21:23<40:54, 14.02s/batch, batch_loss=15.6, batch_index=569, batch_size=256]

Validation:  77%|██████████▋   | 569/743 [2:21:23<41:27, 14.29s/batch, batch_loss=15.6, batch_index=569, batch_size=256]

Validation:  77%|██████████▋   | 569/743 [2:21:38<41:27, 14.29s/batch, batch_loss=18.3, batch_index=570, batch_size=256]

Validation:  77%|██████████▋   | 570/743 [2:21:38<41:40, 14.46s/batch, batch_loss=18.3, batch_index=570, batch_size=256]

Validation:  77%|██████████▋   | 570/743 [2:21:53<41:40, 14.46s/batch, batch_loss=11.2, batch_index=571, batch_size=256]

Validation:  77%|██████████▊   | 571/743 [2:21:53<41:30, 14.48s/batch, batch_loss=11.2, batch_index=571, batch_size=256]

Validation:  77%|██████████▊   | 571/743 [2:22:07<41:30, 14.48s/batch, batch_loss=22.8, batch_index=572, batch_size=256]

Validation:  77%|██████████▊   | 572/743 [2:22:07<41:14, 14.47s/batch, batch_loss=22.8, batch_index=572, batch_size=256]

Validation:  77%|██████████▊   | 572/743 [2:22:21<41:14, 14.47s/batch, batch_loss=15.6, batch_index=573, batch_size=256]

Validation:  77%|██████████▊   | 573/743 [2:22:21<40:52, 14.43s/batch, batch_loss=15.6, batch_index=573, batch_size=256]

Validation:  77%|████████████▎   | 573/743 [2:22:36<40:52, 14.43s/batch, batch_loss=16, batch_index=574, batch_size=256]

Validation:  77%|████████████▎   | 574/743 [2:22:36<40:51, 14.51s/batch, batch_loss=16, batch_index=574, batch_size=256]

Validation:  77%|████████████▎   | 574/743 [2:22:50<40:51, 14.51s/batch, batch_loss=17, batch_index=575, batch_size=256]

Validation:  77%|████████████▍   | 575/743 [2:22:50<39:50, 14.23s/batch, batch_loss=17, batch_index=575, batch_size=256]

Validation:  77%|██████████▊   | 575/743 [2:23:05<39:50, 14.23s/batch, batch_loss=22.2, batch_index=576, batch_size=256]

Validation:  78%|██████████▊   | 576/743 [2:23:05<40:32, 14.56s/batch, batch_loss=22.2, batch_index=576, batch_size=256]

Validation:  78%|██████████▊   | 576/743 [2:23:22<40:32, 14.56s/batch, batch_loss=21.3, batch_index=577, batch_size=256]

Validation:  78%|██████████▊   | 577/743 [2:23:22<42:16, 15.28s/batch, batch_loss=21.3, batch_index=577, batch_size=256]

Validation:  78%|██████████▊   | 577/743 [2:23:38<42:16, 15.28s/batch, batch_loss=25.7, batch_index=578, batch_size=256]

Validation:  78%|██████████▉   | 578/743 [2:23:38<42:15, 15.37s/batch, batch_loss=25.7, batch_index=578, batch_size=256]

Validation:  78%|███████████▋   | 578/743 [2:23:52<42:15, 15.37s/batch, batch_loss=313, batch_index=579, batch_size=256]

Validation:  78%|███████████▋   | 579/743 [2:23:52<41:11, 15.07s/batch, batch_loss=313, batch_index=579, batch_size=256]

Validation:  78%|██████████▉   | 579/743 [2:24:06<41:11, 15.07s/batch, batch_loss=6.39, batch_index=580, batch_size=256]

Validation:  78%|██████████▉   | 580/743 [2:24:06<40:27, 14.89s/batch, batch_loss=6.39, batch_index=580, batch_size=256]

Validation:  78%|██████████▉   | 580/743 [2:24:21<40:27, 14.89s/batch, batch_loss=13.4, batch_index=581, batch_size=256]

Validation:  78%|██████████▉   | 581/743 [2:24:21<39:29, 14.63s/batch, batch_loss=13.4, batch_index=581, batch_size=256]

Validation:  78%|██████████▉   | 581/743 [2:24:34<39:29, 14.63s/batch, batch_loss=16.3, batch_index=582, batch_size=256]

Validation:  78%|██████████▉   | 582/743 [2:24:34<38:39, 14.41s/batch, batch_loss=16.3, batch_index=582, batch_size=256]

Validation:  78%|█████████▍  | 582/743 [2:24:49<38:39, 14.41s/batch, batch_loss=2.4e+3, batch_index=583, batch_size=256]

Validation:  78%|█████████▍  | 583/743 [2:24:49<38:54, 14.59s/batch, batch_loss=2.4e+3, batch_index=583, batch_size=256]

Validation:  78%|██████████▉   | 583/743 [2:25:06<38:54, 14.59s/batch, batch_loss=4.28, batch_index=584, batch_size=256]

Validation:  79%|███████████   | 584/743 [2:25:06<39:57, 15.08s/batch, batch_loss=4.28, batch_index=584, batch_size=256]

Validation:  79%|████████████▌   | 584/743 [2:25:21<39:57, 15.08s/batch, batch_loss=23, batch_index=585, batch_size=256]

Validation:  79%|████████████▌   | 585/743 [2:25:21<39:35, 15.03s/batch, batch_loss=23, batch_index=585, batch_size=256]

Validation:  79%|███████████▊   | 585/743 [2:25:36<39:35, 15.03s/batch, batch_loss=549, batch_index=586, batch_size=256]

Validation:  79%|███████████▊   | 586/743 [2:25:36<39:43, 15.18s/batch, batch_loss=549, batch_index=586, batch_size=256]

Validation:  79%|███████████   | 586/743 [2:25:51<39:43, 15.18s/batch, batch_loss=9.47, batch_index=587, batch_size=256]

Validation:  79%|███████████   | 587/743 [2:25:51<39:06, 15.04s/batch, batch_loss=9.47, batch_index=587, batch_size=256]

Validation:  79%|███████████▊   | 587/743 [2:26:06<39:06, 15.04s/batch, batch_loss=400, batch_index=588, batch_size=256]

Validation:  79%|███████████▊   | 588/743 [2:26:06<38:51, 15.04s/batch, batch_loss=400, batch_index=588, batch_size=256]

Validation:  79%|████████▋  | 588/743 [2:26:21<38:51, 15.04s/batch, batch_loss=2.51e+4, batch_index=589, batch_size=256]

Validation:  79%|████████▋  | 589/743 [2:26:21<38:41, 15.08s/batch, batch_loss=2.51e+4, batch_index=589, batch_size=256]

Validation:  79%|███████████   | 589/743 [2:26:36<38:41, 15.08s/batch, batch_loss=21.8, batch_index=590, batch_size=256]

Validation:  79%|███████████   | 590/743 [2:26:36<38:24, 15.06s/batch, batch_loss=21.8, batch_index=590, batch_size=256]

Validation:  79%|███████████   | 590/743 [2:26:50<38:24, 15.06s/batch, batch_loss=15.6, batch_index=591, batch_size=256]

Validation:  80%|███████████▏  | 591/743 [2:26:50<37:42, 14.88s/batch, batch_loss=15.6, batch_index=591, batch_size=256]

Validation:  80%|███████████▏  | 591/743 [2:27:05<37:42, 14.88s/batch, batch_loss=15.6, batch_index=592, batch_size=256]

Validation:  80%|███████████▏  | 592/743 [2:27:05<37:01, 14.71s/batch, batch_loss=15.6, batch_index=592, batch_size=256]

Validation:  80%|████████▊  | 592/743 [2:27:19<37:01, 14.71s/batch, batch_loss=2.39e+4, batch_index=593, batch_size=256]

Validation:  80%|████████▊  | 593/743 [2:27:19<36:17, 14.52s/batch, batch_loss=2.39e+4, batch_index=593, batch_size=256]

Validation:  80%|███████████▏  | 593/743 [2:27:33<36:17, 14.52s/batch, batch_loss=4.18, batch_index=594, batch_size=256]

Validation:  80%|███████████▏  | 594/743 [2:27:33<35:46, 14.41s/batch, batch_loss=4.18, batch_index=594, batch_size=256]

Validation:  80%|███████████▏  | 594/743 [2:27:47<35:46, 14.41s/batch, batch_loss=6.72, batch_index=595, batch_size=256]

Validation:  80%|███████████▏  | 595/743 [2:27:47<35:05, 14.22s/batch, batch_loss=6.72, batch_index=595, batch_size=256]

Validation:  80%|███████████▏  | 595/743 [2:28:01<35:05, 14.22s/batch, batch_loss=7.27, batch_index=596, batch_size=256]

Validation:  80%|███████████▏  | 596/743 [2:28:01<35:03, 14.31s/batch, batch_loss=7.27, batch_index=596, batch_size=256]

Validation:  80%|████████▊  | 596/743 [2:28:16<35:03, 14.31s/batch, batch_loss=1.79e+3, batch_index=597, batch_size=256]

Validation:  80%|████████▊  | 597/743 [2:28:16<35:03, 14.41s/batch, batch_loss=1.79e+3, batch_index=597, batch_size=256]

Validation:  80%|███████████▏  | 597/743 [2:28:31<35:03, 14.41s/batch, batch_loss=16.1, batch_index=598, batch_size=256]

Validation:  80%|███████████▎  | 598/743 [2:28:31<34:57, 14.46s/batch, batch_loss=16.1, batch_index=598, batch_size=256]

Validation:  80%|███████████▎  | 598/743 [2:28:48<34:57, 14.46s/batch, batch_loss=14.7, batch_index=599, batch_size=256]

Validation:  81%|███████████▎  | 599/743 [2:28:48<36:30, 15.21s/batch, batch_loss=14.7, batch_index=599, batch_size=256]

Validation:  81%|███████████▎  | 599/743 [2:29:02<36:30, 15.21s/batch, batch_loss=21.3, batch_index=600, batch_size=256]

Validation:  81%|███████████▎  | 600/743 [2:29:02<35:51, 15.05s/batch, batch_loss=21.3, batch_index=600, batch_size=256]

Validation:  81%|███████████▎  | 600/743 [2:29:16<35:51, 15.05s/batch, batch_loss=14.2, batch_index=601, batch_size=256]

Validation:  81%|███████████▎  | 601/743 [2:29:16<34:42, 14.66s/batch, batch_loss=14.2, batch_index=601, batch_size=256]

Validation:  81%|███████████▎  | 601/743 [2:29:31<34:42, 14.66s/batch, batch_loss=18.1, batch_index=602, batch_size=256]

Validation:  81%|███████████▎  | 602/743 [2:29:31<34:43, 14.77s/batch, batch_loss=18.1, batch_index=602, batch_size=256]

Validation:  81%|████████▉  | 602/743 [2:29:45<34:43, 14.77s/batch, batch_loss=1.15e+4, batch_index=603, batch_size=256]

Validation:  81%|████████▉  | 603/743 [2:29:45<33:55, 14.54s/batch, batch_loss=1.15e+4, batch_index=603, batch_size=256]

Validation:  81%|███████████▎  | 603/743 [2:29:59<33:55, 14.54s/batch, batch_loss=22.5, batch_index=604, batch_size=256]

Validation:  81%|███████████▍  | 604/743 [2:29:59<33:19, 14.38s/batch, batch_loss=22.5, batch_index=604, batch_size=256]

Validation:  81%|███████████▍  | 604/743 [2:30:13<33:19, 14.38s/batch, batch_loss=23.8, batch_index=605, batch_size=256]

Validation:  81%|███████████▍  | 605/743 [2:30:13<33:10, 14.42s/batch, batch_loss=23.8, batch_index=605, batch_size=256]

Validation:  81%|████████████▏  | 605/743 [2:30:29<33:10, 14.42s/batch, batch_loss=251, batch_index=606, batch_size=256]

Validation:  82%|████████████▏  | 606/743 [2:30:29<33:25, 14.64s/batch, batch_loss=251, batch_index=606, batch_size=256]

Validation:  82%|███████████▍  | 606/743 [2:30:43<33:25, 14.64s/batch, batch_loss=31.4, batch_index=607, batch_size=256]

Validation:  82%|███████████▍  | 607/743 [2:30:43<33:03, 14.58s/batch, batch_loss=31.4, batch_index=607, batch_size=256]

Validation:  82%|█████████████   | 607/743 [2:30:58<33:03, 14.58s/batch, batch_loss=21, batch_index=608, batch_size=256]

Validation:  82%|█████████████   | 608/743 [2:30:58<33:07, 14.72s/batch, batch_loss=21, batch_index=608, batch_size=256]

Validation:  82%|███████████▍  | 608/743 [2:31:14<33:07, 14.72s/batch, batch_loss=17.8, batch_index=609, batch_size=256]

Validation:  82%|███████████▍  | 609/743 [2:31:14<33:28, 14.99s/batch, batch_loss=17.8, batch_index=609, batch_size=256]

Validation:  82%|███████████▍  | 609/743 [2:31:29<33:28, 14.99s/batch, batch_loss=16.8, batch_index=610, batch_size=256]

Validation:  82%|███████████▍  | 610/743 [2:31:29<33:24, 15.07s/batch, batch_loss=16.8, batch_index=610, batch_size=256]

Validation:  82%|███████████▍  | 610/743 [2:31:44<33:24, 15.07s/batch, batch_loss=20.5, batch_index=611, batch_size=256]

Validation:  82%|███████████▌  | 611/743 [2:31:44<33:00, 15.01s/batch, batch_loss=20.5, batch_index=611, batch_size=256]

Validation:  82%|█████████████▏  | 611/743 [2:31:59<33:00, 15.01s/batch, batch_loss=12, batch_index=612, batch_size=256]

Validation:  82%|█████████████▏  | 612/743 [2:31:59<32:47, 15.02s/batch, batch_loss=12, batch_index=612, batch_size=256]

Validation:  82%|███████████▌  | 612/743 [2:32:14<32:47, 15.02s/batch, batch_loss=17.1, batch_index=613, batch_size=256]

Validation:  83%|███████████▌  | 613/743 [2:32:14<32:21, 14.94s/batch, batch_loss=17.1, batch_index=613, batch_size=256]

Validation:  83%|█████████  | 613/743 [2:32:28<32:21, 14.94s/batch, batch_loss=5.64e+3, batch_index=614, batch_size=256]

Validation:  83%|█████████  | 614/743 [2:32:28<31:34, 14.69s/batch, batch_loss=5.64e+3, batch_index=614, batch_size=256]

Validation:  83%|███████████▌  | 614/743 [2:32:43<31:34, 14.69s/batch, batch_loss=15.3, batch_index=615, batch_size=256]

Validation:  83%|███████████▌  | 615/743 [2:32:43<31:32, 14.79s/batch, batch_loss=15.3, batch_index=615, batch_size=256]

Validation:  83%|███████████▌  | 615/743 [2:32:58<31:32, 14.79s/batch, batch_loss=13.8, batch_index=616, batch_size=256]

Validation:  83%|███████████▌  | 616/743 [2:32:58<31:39, 14.96s/batch, batch_loss=13.8, batch_index=616, batch_size=256]

Validation:  83%|███████████▌  | 616/743 [2:33:13<31:39, 14.96s/batch, batch_loss=9.56, batch_index=617, batch_size=256]

Validation:  83%|███████████▋  | 617/743 [2:33:13<31:21, 14.93s/batch, batch_loss=9.56, batch_index=617, batch_size=256]

Validation:  83%|███████████▋  | 617/743 [2:33:28<31:21, 14.93s/batch, batch_loss=11.6, batch_index=618, batch_size=256]

Validation:  83%|███████████▋  | 618/743 [2:33:28<31:01, 14.89s/batch, batch_loss=11.6, batch_index=618, batch_size=256]

Validation:  83%|████████████▍  | 618/743 [2:33:42<31:01, 14.89s/batch, batch_loss=342, batch_index=619, batch_size=256]

Validation:  83%|████████████▍  | 619/743 [2:33:42<30:35, 14.81s/batch, batch_loss=342, batch_index=619, batch_size=256]

Validation:  83%|███████████▋  | 619/743 [2:33:57<30:35, 14.81s/batch, batch_loss=14.3, batch_index=620, batch_size=256]

Validation:  83%|███████████▋  | 620/743 [2:33:57<29:57, 14.61s/batch, batch_loss=14.3, batch_index=620, batch_size=256]

Validation:  83%|███████████▋  | 620/743 [2:34:11<29:57, 14.61s/batch, batch_loss=10.1, batch_index=621, batch_size=256]

Validation:  84%|███████████▋  | 621/743 [2:34:11<29:22, 14.45s/batch, batch_loss=10.1, batch_index=621, batch_size=256]

Validation:  84%|███████████▋  | 621/743 [2:34:26<29:22, 14.45s/batch, batch_loss=14.3, batch_index=622, batch_size=256]

Validation:  84%|███████████▋  | 622/743 [2:34:26<29:46, 14.76s/batch, batch_loss=14.3, batch_index=622, batch_size=256]

Validation:  84%|████████████▌  | 622/743 [2:34:41<29:46, 14.76s/batch, batch_loss=193, batch_index=623, batch_size=256]

Validation:  84%|████████████▌  | 623/743 [2:34:41<29:47, 14.89s/batch, batch_loss=193, batch_index=623, batch_size=256]

Validation:  84%|███████████▋  | 623/743 [2:34:56<29:47, 14.89s/batch, batch_loss=15.8, batch_index=624, batch_size=256]

Validation:  84%|███████████▊  | 624/743 [2:34:56<29:13, 14.74s/batch, batch_loss=15.8, batch_index=624, batch_size=256]

Validation:  84%|█████████▏ | 624/743 [2:35:10<29:13, 14.74s/batch, batch_loss=2.38e+3, batch_index=625, batch_size=256]

Validation:  84%|█████████▎ | 625/743 [2:35:10<28:43, 14.60s/batch, batch_loss=2.38e+3, batch_index=625, batch_size=256]

Validation:  84%|███████████▊  | 625/743 [2:35:25<28:43, 14.60s/batch, batch_loss=19.7, batch_index=626, batch_size=256]

Validation:  84%|███████████▊  | 626/743 [2:35:25<28:30, 14.62s/batch, batch_loss=19.7, batch_index=626, batch_size=256]

Validation:  84%|███████████▊  | 626/743 [2:35:38<28:30, 14.62s/batch, batch_loss=18.9, batch_index=627, batch_size=256]

Validation:  84%|███████████▊  | 627/743 [2:35:38<27:46, 14.37s/batch, batch_loss=18.9, batch_index=627, batch_size=256]

Validation:  84%|███████████▊  | 627/743 [2:35:53<27:46, 14.37s/batch, batch_loss=17.2, batch_index=628, batch_size=256]

Validation:  85%|███████████▊  | 628/743 [2:35:53<27:37, 14.42s/batch, batch_loss=17.2, batch_index=628, batch_size=256]

Validation:  85%|███████████▊  | 628/743 [2:36:07<27:37, 14.42s/batch, batch_loss=13.4, batch_index=629, batch_size=256]

Validation:  85%|███████████▊  | 629/743 [2:36:07<27:02, 14.23s/batch, batch_loss=13.4, batch_index=629, batch_size=256]

Validation:  85%|███████████▊  | 629/743 [2:36:21<27:02, 14.23s/batch, batch_loss=17.2, batch_index=630, batch_size=256]

Validation:  85%|███████████▊  | 630/743 [2:36:21<26:40, 14.16s/batch, batch_loss=17.2, batch_index=630, batch_size=256]

Validation:  85%|████████████▋  | 630/743 [2:36:35<26:40, 14.16s/batch, batch_loss=243, batch_index=631, batch_size=256]

Validation:  85%|████████████▋  | 631/743 [2:36:35<26:23, 14.13s/batch, batch_loss=243, batch_index=631, batch_size=256]

Validation:  85%|███████████▉  | 631/743 [2:36:49<26:23, 14.13s/batch, batch_loss=17.8, batch_index=632, batch_size=256]

Validation:  85%|███████████▉  | 632/743 [2:36:49<26:20, 14.24s/batch, batch_loss=17.8, batch_index=632, batch_size=256]

Validation:  85%|███████████▉  | 632/743 [2:37:06<26:20, 14.24s/batch, batch_loss=15.8, batch_index=633, batch_size=256]

Validation:  85%|███████████▉  | 633/743 [2:37:06<27:17, 14.88s/batch, batch_loss=15.8, batch_index=633, batch_size=256]

Validation:  85%|███████████▉  | 633/743 [2:37:20<27:17, 14.88s/batch, batch_loss=12.4, batch_index=634, batch_size=256]

Validation:  85%|███████████▉  | 634/743 [2:37:20<26:53, 14.80s/batch, batch_loss=12.4, batch_index=634, batch_size=256]

Validation:  85%|███████████▉  | 634/743 [2:37:35<26:53, 14.80s/batch, batch_loss=9.17, batch_index=635, batch_size=256]

Validation:  85%|███████████▉  | 635/743 [2:37:35<26:28, 14.71s/batch, batch_loss=9.17, batch_index=635, batch_size=256]

Validation:  85%|████████████▊  | 635/743 [2:37:49<26:28, 14.71s/batch, batch_loss=797, batch_index=636, batch_size=256]

Validation:  86%|████████████▊  | 636/743 [2:37:49<26:02, 14.60s/batch, batch_loss=797, batch_index=636, batch_size=256]

Validation:  86%|████████████▊  | 636/743 [2:38:06<26:02, 14.60s/batch, batch_loss=715, batch_index=637, batch_size=256]

Validation:  86%|████████████▊  | 637/743 [2:38:06<26:48, 15.17s/batch, batch_loss=715, batch_index=637, batch_size=256]

Validation:  86%|████████████  | 637/743 [2:38:20<26:48, 15.17s/batch, batch_loss=20.7, batch_index=638, batch_size=256]

Validation:  86%|████████████  | 638/743 [2:38:20<26:05, 14.90s/batch, batch_loss=20.7, batch_index=638, batch_size=256]

Validation:  86%|█████████▍ | 638/743 [2:38:33<26:05, 14.90s/batch, batch_loss=1.21e+4, batch_index=639, batch_size=256]

Validation:  86%|█████████▍ | 639/743 [2:38:33<24:42, 14.25s/batch, batch_loss=1.21e+4, batch_index=639, batch_size=256]

Validation:  86%|████████████  | 639/743 [2:38:46<24:42, 14.25s/batch, batch_loss=22.5, batch_index=640, batch_size=256]

Validation:  86%|████████████  | 640/743 [2:38:46<24:14, 14.12s/batch, batch_loss=22.5, batch_index=640, batch_size=256]

Validation:  86%|████████████  | 640/743 [2:39:02<24:14, 14.12s/batch, batch_loss=32.6, batch_index=641, batch_size=256]

Validation:  86%|████████████  | 641/743 [2:39:02<24:56, 14.68s/batch, batch_loss=32.6, batch_index=641, batch_size=256]

Validation:  86%|████████████  | 641/743 [2:39:18<24:56, 14.68s/batch, batch_loss=31.7, batch_index=642, batch_size=256]

Validation:  86%|████████████  | 642/743 [2:39:18<25:21, 15.07s/batch, batch_loss=31.7, batch_index=642, batch_size=256]

Validation:  86%|█████████▌ | 642/743 [2:39:36<25:21, 15.07s/batch, batch_loss=1.04e+3, batch_index=643, batch_size=256]

Validation:  87%|█████████▌ | 643/743 [2:39:36<26:20, 15.80s/batch, batch_loss=1.04e+3, batch_index=643, batch_size=256]

Validation:  87%|████████████  | 643/743 [2:39:52<26:20, 15.80s/batch, batch_loss=22.7, batch_index=644, batch_size=256]

Validation:  87%|████████████▏ | 644/743 [2:39:52<26:00, 15.77s/batch, batch_loss=22.7, batch_index=644, batch_size=256]

Validation:  87%|████████████▏ | 644/743 [2:40:07<26:00, 15.77s/batch, batch_loss=20.5, batch_index=645, batch_size=256]

Validation:  87%|████████████▏ | 645/743 [2:40:07<25:19, 15.51s/batch, batch_loss=20.5, batch_index=645, batch_size=256]

Validation:  87%|█████████▌ | 645/743 [2:40:20<25:19, 15.51s/batch, batch_loss=6.24e+3, batch_index=646, batch_size=256]

Validation:  87%|█████████▌ | 646/743 [2:40:20<24:18, 15.04s/batch, batch_loss=6.24e+3, batch_index=646, batch_size=256]

Validation:  87%|████████████▏ | 646/743 [2:40:35<24:18, 15.04s/batch, batch_loss=20.3, batch_index=647, batch_size=256]

Validation:  87%|████████████▏ | 647/743 [2:40:35<23:50, 14.91s/batch, batch_loss=20.3, batch_index=647, batch_size=256]

Validation:  87%|████████████▏ | 647/743 [2:40:50<23:50, 14.91s/batch, batch_loss=8.13, batch_index=648, batch_size=256]

Validation:  87%|████████████▏ | 648/743 [2:40:50<23:32, 14.87s/batch, batch_loss=8.13, batch_index=648, batch_size=256]

Validation:  87%|████████████▏ | 648/743 [2:41:08<23:32, 14.87s/batch, batch_loss=10.9, batch_index=649, batch_size=256]

Validation:  87%|████████████▏ | 649/743 [2:41:08<24:48, 15.83s/batch, batch_loss=10.9, batch_index=649, batch_size=256]

Validation:  87%|████████████▏ | 649/743 [2:41:23<24:48, 15.83s/batch, batch_loss=15.6, batch_index=650, batch_size=256]

Validation:  87%|████████████▏ | 650/743 [2:41:23<24:20, 15.70s/batch, batch_loss=15.6, batch_index=650, batch_size=256]

Validation:  87%|████████████▏ | 650/743 [2:41:38<24:20, 15.70s/batch, batch_loss=26.6, batch_index=651, batch_size=256]

Validation:  88%|████████████▎ | 651/743 [2:41:38<23:49, 15.54s/batch, batch_loss=26.6, batch_index=651, batch_size=256]

Validation:  88%|████████████▎ | 651/743 [2:41:54<23:49, 15.54s/batch, batch_loss=25.4, batch_index=652, batch_size=256]

Validation:  88%|████████████▎ | 652/743 [2:41:54<23:34, 15.54s/batch, batch_loss=25.4, batch_index=652, batch_size=256]

Validation:  88%|████████████▎ | 652/743 [2:42:09<23:34, 15.54s/batch, batch_loss=14.9, batch_index=653, batch_size=256]

Validation:  88%|████████████▎ | 653/743 [2:42:09<23:04, 15.38s/batch, batch_loss=14.9, batch_index=653, batch_size=256]

Validation:  88%|████████████▎ | 653/743 [2:42:23<23:04, 15.38s/batch, batch_loss=20.7, batch_index=654, batch_size=256]

Validation:  88%|████████████▎ | 654/743 [2:42:23<22:08, 14.93s/batch, batch_loss=20.7, batch_index=654, batch_size=256]

Validation:  88%|████████████▎ | 654/743 [2:42:37<22:08, 14.93s/batch, batch_loss=22.1, batch_index=655, batch_size=256]

Validation:  88%|████████████▎ | 655/743 [2:42:37<21:31, 14.68s/batch, batch_loss=22.1, batch_index=655, batch_size=256]

Validation:  88%|██████████████  | 655/743 [2:42:52<21:31, 14.68s/batch, batch_loss=19, batch_index=656, batch_size=256]

Validation:  88%|██████████████▏ | 656/743 [2:42:52<21:14, 14.65s/batch, batch_loss=19, batch_index=656, batch_size=256]

Validation:  88%|████████████▎ | 656/743 [2:43:06<21:14, 14.65s/batch, batch_loss=14.8, batch_index=657, batch_size=256]

Validation:  88%|████████████▍ | 657/743 [2:43:06<21:06, 14.72s/batch, batch_loss=14.8, batch_index=657, batch_size=256]

Validation:  88%|████████████▍ | 657/743 [2:43:23<21:06, 14.72s/batch, batch_loss=15.4, batch_index=658, batch_size=256]

Validation:  89%|████████████▍ | 658/743 [2:43:23<21:34, 15.23s/batch, batch_loss=15.4, batch_index=658, batch_size=256]

Validation:  89%|████████████▍ | 658/743 [2:43:37<21:34, 15.23s/batch, batch_loss=22.1, batch_index=659, batch_size=256]

Validation:  89%|████████████▍ | 659/743 [2:43:37<20:53, 14.92s/batch, batch_loss=22.1, batch_index=659, batch_size=256]

Validation:  89%|████████████▍ | 659/743 [2:43:52<20:53, 14.92s/batch, batch_loss=23.4, batch_index=660, batch_size=256]

Validation:  89%|████████████▍ | 660/743 [2:43:52<20:26, 14.78s/batch, batch_loss=23.4, batch_index=660, batch_size=256]

Validation:  89%|██████████████▏ | 660/743 [2:44:07<20:26, 14.78s/batch, batch_loss=20, batch_index=661, batch_size=256]

Validation:  89%|██████████████▏ | 661/743 [2:44:07<20:16, 14.84s/batch, batch_loss=20, batch_index=661, batch_size=256]

Validation:  89%|████████████▍ | 661/743 [2:44:22<20:16, 14.84s/batch, batch_loss=10.6, batch_index=662, batch_size=256]

Validation:  89%|████████████▍ | 662/743 [2:44:22<20:16, 15.02s/batch, batch_loss=10.6, batch_index=662, batch_size=256]

Validation:  89%|█████████▊ | 662/743 [2:44:36<20:16, 15.02s/batch, batch_loss=3.58e+3, batch_index=663, batch_size=256]

Validation:  89%|█████████▊ | 663/743 [2:44:36<19:47, 14.84s/batch, batch_loss=3.58e+3, batch_index=663, batch_size=256]

Validation:  89%|████████████▍ | 663/743 [2:44:51<19:47, 14.84s/batch, batch_loss=15.3, batch_index=664, batch_size=256]

Validation:  89%|████████████▌ | 664/743 [2:44:51<19:32, 14.84s/batch, batch_loss=15.3, batch_index=664, batch_size=256]

Validation:  89%|████████████▌ | 664/743 [2:45:09<19:32, 14.84s/batch, batch_loss=19.7, batch_index=665, batch_size=256]

Validation:  90%|████████████▌ | 665/743 [2:45:09<20:35, 15.83s/batch, batch_loss=19.7, batch_index=665, batch_size=256]

Validation:  90%|████████████▌ | 665/743 [2:45:25<20:35, 15.83s/batch, batch_loss=14.6, batch_index=666, batch_size=256]

Validation:  90%|████████████▌ | 666/743 [2:45:25<20:06, 15.67s/batch, batch_loss=14.6, batch_index=666, batch_size=256]

Validation:  90%|█████████▊ | 666/743 [2:45:38<20:06, 15.67s/batch, batch_loss=2.09e+4, batch_index=667, batch_size=256]

Validation:  90%|█████████▊ | 667/743 [2:45:38<19:07, 15.09s/batch, batch_loss=2.09e+4, batch_index=667, batch_size=256]

Validation:  90%|████████████▌ | 667/743 [2:45:54<19:07, 15.09s/batch, batch_loss=19.2, batch_index=668, batch_size=256]

Validation:  90%|████████████▌ | 668/743 [2:45:54<18:57, 15.16s/batch, batch_loss=19.2, batch_index=668, batch_size=256]

Validation:  90%|████████████▌ | 668/743 [2:46:07<18:57, 15.16s/batch, batch_loss=24.9, batch_index=669, batch_size=256]

Validation:  90%|████████████▌ | 669/743 [2:46:07<18:09, 14.72s/batch, batch_loss=24.9, batch_index=669, batch_size=256]

Validation:  90%|████████████▌ | 669/743 [2:46:23<18:09, 14.72s/batch, batch_loss=26.5, batch_index=670, batch_size=256]

Validation:  90%|████████████▌ | 670/743 [2:46:23<18:06, 14.88s/batch, batch_loss=26.5, batch_index=670, batch_size=256]

Validation:  90%|█████████▉ | 670/743 [2:46:37<18:06, 14.88s/batch, batch_loss=3.11e+3, batch_index=671, batch_size=256]

Validation:  90%|█████████▉ | 671/743 [2:46:37<17:45, 14.80s/batch, batch_loss=3.11e+3, batch_index=671, batch_size=256]

Validation:  90%|████████████▋ | 671/743 [2:46:52<17:45, 14.80s/batch, batch_loss=21.9, batch_index=672, batch_size=256]

Validation:  90%|████████████▋ | 672/743 [2:46:52<17:19, 14.64s/batch, batch_loss=21.9, batch_index=672, batch_size=256]

Validation:  90%|████████████▋ | 672/743 [2:47:07<17:19, 14.64s/batch, batch_loss=15.9, batch_index=673, batch_size=256]

Validation:  91%|████████████▋ | 673/743 [2:47:07<17:15, 14.79s/batch, batch_loss=15.9, batch_index=673, batch_size=256]

Validation:  91%|████████████▋ | 673/743 [2:47:24<17:15, 14.79s/batch, batch_loss=12.4, batch_index=674, batch_size=256]

Validation:  91%|████████████▋ | 674/743 [2:47:24<17:58, 15.63s/batch, batch_loss=12.4, batch_index=674, batch_size=256]

Validation:  91%|████████████▋ | 674/743 [2:47:40<17:58, 15.63s/batch, batch_loss=25.4, batch_index=675, batch_size=256]

Validation:  91%|████████████▋ | 675/743 [2:47:40<17:40, 15.59s/batch, batch_loss=25.4, batch_index=675, batch_size=256]

Validation:  91%|████████████▋ | 675/743 [2:47:55<17:40, 15.59s/batch, batch_loss=20.5, batch_index=676, batch_size=256]

Validation:  91%|████████████▋ | 676/743 [2:47:55<17:24, 15.60s/batch, batch_loss=20.5, batch_index=676, batch_size=256]

Validation:  91%|████████████▋ | 676/743 [2:48:10<17:24, 15.60s/batch, batch_loss=26.6, batch_index=677, batch_size=256]

Validation:  91%|████████████▊ | 677/743 [2:48:10<16:47, 15.27s/batch, batch_loss=26.6, batch_index=677, batch_size=256]

Validation:  91%|████████████▊ | 677/743 [2:48:25<16:47, 15.27s/batch, batch_loss=16.5, batch_index=678, batch_size=256]

Validation:  91%|████████████▊ | 678/743 [2:48:25<16:32, 15.27s/batch, batch_loss=16.5, batch_index=678, batch_size=256]

Validation:  91%|████████████▊ | 678/743 [2:48:40<16:32, 15.27s/batch, batch_loss=15.1, batch_index=679, batch_size=256]

Validation:  91%|████████████▊ | 679/743 [2:48:40<16:09, 15.15s/batch, batch_loss=15.1, batch_index=679, batch_size=256]

Validation:  91%|████████████▊ | 679/743 [2:48:55<16:09, 15.15s/batch, batch_loss=20.7, batch_index=680, batch_size=256]

Validation:  92%|████████████▊ | 680/743 [2:48:55<15:46, 15.03s/batch, batch_loss=20.7, batch_index=680, batch_size=256]

Validation:  92%|████████████▊ | 680/743 [2:49:09<15:46, 15.03s/batch, batch_loss=20.9, batch_index=681, batch_size=256]

Validation:  92%|████████████▊ | 681/743 [2:49:09<15:23, 14.90s/batch, batch_loss=20.9, batch_index=681, batch_size=256]

Validation:  92%|████████████▊ | 681/743 [2:49:24<15:23, 14.90s/batch, batch_loss=26.1, batch_index=682, batch_size=256]

Validation:  92%|████████████▊ | 682/743 [2:49:24<14:59, 14.75s/batch, batch_loss=26.1, batch_index=682, batch_size=256]

Validation:  92%|████████████▊ | 682/743 [2:49:40<14:59, 14.75s/batch, batch_loss=19.6, batch_index=683, batch_size=256]

Validation:  92%|████████████▊ | 683/743 [2:49:40<15:08, 15.14s/batch, batch_loss=19.6, batch_index=683, batch_size=256]

Validation:  92%|██████████████▋ | 683/743 [2:49:54<15:08, 15.14s/batch, batch_loss=17, batch_index=684, batch_size=256]

Validation:  92%|██████████████▋ | 684/743 [2:49:54<14:43, 14.98s/batch, batch_loss=17, batch_index=684, batch_size=256]

Validation:  92%|████████████▉ | 684/743 [2:50:10<14:43, 14.98s/batch, batch_loss=14.1, batch_index=685, batch_size=256]

Validation:  92%|████████████▉ | 685/743 [2:50:10<14:31, 15.02s/batch, batch_loss=14.1, batch_index=685, batch_size=256]

Validation:  92%|██████████▏| 685/743 [2:50:25<14:31, 15.02s/batch, batch_loss=1.66e+3, batch_index=686, batch_size=256]

Validation:  92%|██████████▏| 686/743 [2:50:25<14:19, 15.08s/batch, batch_loss=1.66e+3, batch_index=686, batch_size=256]

Validation:  92%|████████████▉ | 686/743 [2:50:39<14:19, 15.08s/batch, batch_loss=24.2, batch_index=687, batch_size=256]

Validation:  92%|████████████▉ | 687/743 [2:50:39<13:54, 14.91s/batch, batch_loss=24.2, batch_index=687, batch_size=256]

Validation:  92%|████████████▉ | 687/743 [2:50:53<13:54, 14.91s/batch, batch_loss=13.7, batch_index=688, batch_size=256]

Validation:  93%|████████████▉ | 688/743 [2:50:53<13:26, 14.66s/batch, batch_loss=13.7, batch_index=688, batch_size=256]

Validation:  93%|████████████▉ | 688/743 [2:51:07<13:26, 14.66s/batch, batch_loss=15.8, batch_index=689, batch_size=256]

Validation:  93%|████████████▉ | 689/743 [2:51:07<12:59, 14.44s/batch, batch_loss=15.8, batch_index=689, batch_size=256]

Validation:  93%|████████████▉ | 689/743 [2:51:25<12:59, 14.44s/batch, batch_loss=19.7, batch_index=690, batch_size=256]

Validation:  93%|█████████████ | 690/743 [2:51:25<13:34, 15.36s/batch, batch_loss=19.7, batch_index=690, batch_size=256]

Validation:  93%|█████████████ | 690/743 [2:51:39<13:34, 15.36s/batch, batch_loss=13.4, batch_index=691, batch_size=256]

Validation:  93%|█████████████ | 691/743 [2:51:39<13:06, 15.12s/batch, batch_loss=13.4, batch_index=691, batch_size=256]

Validation:  93%|█████████████ | 691/743 [2:51:54<13:06, 15.12s/batch, batch_loss=23.5, batch_index=692, batch_size=256]

Validation:  93%|█████████████ | 692/743 [2:51:54<12:49, 15.09s/batch, batch_loss=23.5, batch_index=692, batch_size=256]

Validation:  93%|█████████████ | 692/743 [2:52:09<12:49, 15.09s/batch, batch_loss=23.4, batch_index=693, batch_size=256]

Validation:  93%|█████████████ | 693/743 [2:52:09<12:25, 14.92s/batch, batch_loss=23.4, batch_index=693, batch_size=256]

Validation:  93%|█████████████ | 693/743 [2:52:24<12:25, 14.92s/batch, batch_loss=25.2, batch_index=694, batch_size=256]

Validation:  93%|█████████████ | 694/743 [2:52:24<12:10, 14.92s/batch, batch_loss=25.2, batch_index=694, batch_size=256]

Validation:  93%|██████████▎| 694/743 [2:52:38<12:10, 14.92s/batch, batch_loss=3.13e+3, batch_index=695, batch_size=256]

Validation:  94%|██████████▎| 695/743 [2:52:38<11:50, 14.79s/batch, batch_loss=3.13e+3, batch_index=695, batch_size=256]

Validation:  94%|█████████████ | 695/743 [2:52:53<11:50, 14.79s/batch, batch_loss=9.38, batch_index=696, batch_size=256]

Validation:  94%|█████████████ | 696/743 [2:52:53<11:33, 14.77s/batch, batch_loss=9.38, batch_index=696, batch_size=256]

Validation:  94%|█████████████ | 696/743 [2:53:07<11:33, 14.77s/batch, batch_loss=33.8, batch_index=697, batch_size=256]

Validation:  94%|█████████████▏| 697/743 [2:53:07<11:05, 14.47s/batch, batch_loss=33.8, batch_index=697, batch_size=256]

Validation:  94%|██████████████ | 697/743 [2:53:21<11:05, 14.47s/batch, batch_loss=758, batch_index=698, batch_size=256]

Validation:  94%|██████████████ | 698/743 [2:53:21<10:51, 14.49s/batch, batch_loss=758, batch_index=698, batch_size=256]

Validation:  94%|█████████████▏| 698/743 [2:53:35<10:51, 14.49s/batch, batch_loss=8.11, batch_index=699, batch_size=256]

Validation:  94%|█████████████▏| 699/743 [2:53:35<10:31, 14.36s/batch, batch_loss=8.11, batch_index=699, batch_size=256]

Validation:  94%|██████████████ | 699/743 [2:53:52<10:31, 14.36s/batch, batch_loss=947, batch_index=700, batch_size=256]

Validation:  94%|██████████████▏| 700/743 [2:53:52<10:41, 14.91s/batch, batch_loss=947, batch_index=700, batch_size=256]

Validation:  94%|█████████████▏| 700/743 [2:54:07<10:41, 14.91s/batch, batch_loss=7.84, batch_index=701, batch_size=256]

Validation:  94%|█████████████▏| 701/743 [2:54:07<10:30, 15.01s/batch, batch_loss=7.84, batch_index=701, batch_size=256]

Validation:  94%|█████████████▏| 701/743 [2:54:21<10:30, 15.01s/batch, batch_loss=8.53, batch_index=702, batch_size=256]

Validation:  94%|█████████████▏| 702/743 [2:54:21<10:03, 14.73s/batch, batch_loss=8.53, batch_index=702, batch_size=256]

Validation:  94%|██████████████▏| 702/743 [2:54:36<10:03, 14.73s/batch, batch_loss=176, batch_index=703, batch_size=256]

Validation:  95%|██████████████▏| 703/743 [2:54:36<09:49, 14.73s/batch, batch_loss=176, batch_index=703, batch_size=256]

Validation:  95%|██████████████▏| 703/743 [2:54:50<09:49, 14.73s/batch, batch_loss=474, batch_index=704, batch_size=256]

Validation:  95%|██████████████▏| 704/743 [2:54:50<09:26, 14.52s/batch, batch_loss=474, batch_index=704, batch_size=256]

Validation:  95%|█████████████▎| 704/743 [2:55:04<09:26, 14.52s/batch, batch_loss=10.2, batch_index=705, batch_size=256]

Validation:  95%|█████████████▎| 705/743 [2:55:04<09:13, 14.58s/batch, batch_loss=10.2, batch_index=705, batch_size=256]

Validation:  95%|███████████████▏| 705/743 [2:55:19<09:13, 14.58s/batch, batch_loss=18, batch_index=706, batch_size=256]

Validation:  95%|███████████████▏| 706/743 [2:55:19<08:58, 14.55s/batch, batch_loss=18, batch_index=706, batch_size=256]

Validation:  95%|██████████████▎| 706/743 [2:55:33<08:58, 14.55s/batch, batch_loss=418, batch_index=707, batch_size=256]

Validation:  95%|██████████████▎| 707/743 [2:55:33<08:43, 14.54s/batch, batch_loss=418, batch_index=707, batch_size=256]

Validation:  95%|█████████████▎| 707/743 [2:55:49<08:43, 14.54s/batch, batch_loss=16.5, batch_index=708, batch_size=256]

Validation:  95%|█████████████▎| 708/743 [2:55:49<08:35, 14.72s/batch, batch_loss=16.5, batch_index=708, batch_size=256]

Validation:  95%|███████████████▏| 708/743 [2:56:03<08:35, 14.72s/batch, batch_loss=26, batch_index=709, batch_size=256]

Validation:  95%|███████████████▎| 709/743 [2:56:03<08:15, 14.56s/batch, batch_loss=26, batch_index=709, batch_size=256]

Validation:  95%|█████████████▎| 709/743 [2:56:18<08:15, 14.56s/batch, batch_loss=18.3, batch_index=710, batch_size=256]

Validation:  96%|█████████████▍| 710/743 [2:56:18<08:03, 14.66s/batch, batch_loss=18.3, batch_index=710, batch_size=256]

Validation:  96%|█████████████▍| 710/743 [2:56:32<08:03, 14.66s/batch, batch_loss=15.4, batch_index=711, batch_size=256]

Validation:  96%|█████████████▍| 711/743 [2:56:32<07:47, 14.59s/batch, batch_loss=15.4, batch_index=711, batch_size=256]

Validation:  96%|███████████████▎| 711/743 [2:56:47<07:47, 14.59s/batch, batch_loss=22, batch_index=712, batch_size=256]

Validation:  96%|███████████████▎| 712/743 [2:56:47<07:37, 14.75s/batch, batch_loss=22, batch_index=712, batch_size=256]

Validation:  96%|█████████████▍| 712/743 [2:57:02<07:37, 14.75s/batch, batch_loss=17.4, batch_index=713, batch_size=256]

Validation:  96%|█████████████▍| 713/743 [2:57:02<07:26, 14.88s/batch, batch_loss=17.4, batch_index=713, batch_size=256]

Validation:  96%|█████████████▍| 713/743 [2:57:17<07:26, 14.88s/batch, batch_loss=7.41, batch_index=714, batch_size=256]

Validation:  96%|█████████████▍| 714/743 [2:57:17<07:07, 14.74s/batch, batch_loss=7.41, batch_index=714, batch_size=256]

Validation:  96%|█████████████▍| 714/743 [2:57:31<07:07, 14.74s/batch, batch_loss=13.7, batch_index=715, batch_size=256]

Validation:  96%|█████████████▍| 715/743 [2:57:31<06:51, 14.71s/batch, batch_loss=13.7, batch_index=715, batch_size=256]

Validation:  96%|█████████████▍| 715/743 [2:57:46<06:51, 14.71s/batch, batch_loss=21.7, batch_index=716, batch_size=256]

Validation:  96%|█████████████▍| 716/743 [2:57:46<06:37, 14.71s/batch, batch_loss=21.7, batch_index=716, batch_size=256]

Validation:  96%|██████████████▍| 716/743 [2:58:01<06:37, 14.71s/batch, batch_loss=388, batch_index=717, batch_size=256]

Validation:  97%|██████████████▍| 717/743 [2:58:01<06:24, 14.77s/batch, batch_loss=388, batch_index=717, batch_size=256]

Validation:  97%|█████████████▌| 717/743 [2:58:16<06:24, 14.77s/batch, batch_loss=17.9, batch_index=718, batch_size=256]

Validation:  97%|█████████████▌| 718/743 [2:58:16<06:10, 14.80s/batch, batch_loss=17.9, batch_index=718, batch_size=256]

Validation:  97%|█████████████▌| 718/743 [2:58:31<06:10, 14.80s/batch, batch_loss=16.4, batch_index=719, batch_size=256]

Validation:  97%|█████████████▌| 719/743 [2:58:31<05:55, 14.83s/batch, batch_loss=16.4, batch_index=719, batch_size=256]

Validation:  97%|█████████████▌| 719/743 [2:58:45<05:55, 14.83s/batch, batch_loss=17.7, batch_index=720, batch_size=256]

Validation:  97%|█████████████▌| 720/743 [2:58:45<05:35, 14.58s/batch, batch_loss=17.7, batch_index=720, batch_size=256]

Validation:  97%|███████████████▌| 720/743 [2:58:59<05:35, 14.58s/batch, batch_loss=14, batch_index=721, batch_size=256]

Validation:  97%|███████████████▌| 721/743 [2:58:59<05:17, 14.45s/batch, batch_loss=14, batch_index=721, batch_size=256]

Validation:  97%|█████████████▌| 721/743 [2:59:13<05:17, 14.45s/batch, batch_loss=24.9, batch_index=722, batch_size=256]

Validation:  97%|█████████████▌| 722/743 [2:59:13<05:00, 14.32s/batch, batch_loss=24.9, batch_index=722, batch_size=256]

Validation:  97%|██████████▋| 722/743 [2:59:29<05:00, 14.32s/batch, batch_loss=5.29e+3, batch_index=723, batch_size=256]

Validation:  97%|██████████▋| 723/743 [2:59:29<04:55, 14.75s/batch, batch_loss=5.29e+3, batch_index=723, batch_size=256]

Validation:  97%|███████████████▌| 723/743 [2:59:44<04:55, 14.75s/batch, batch_loss=18, batch_index=724, batch_size=256]

Validation:  97%|███████████████▌| 724/743 [2:59:44<04:41, 14.80s/batch, batch_loss=18, batch_index=724, batch_size=256]

Validation:  97%|█████████████▋| 724/743 [2:59:58<04:41, 14.80s/batch, batch_loss=17.1, batch_index=725, batch_size=256]

Validation:  98%|█████████████▋| 725/743 [2:59:58<04:24, 14.71s/batch, batch_loss=17.1, batch_index=725, batch_size=256]

Validation:  98%|█████████████▋| 725/743 [3:00:13<04:24, 14.71s/batch, batch_loss=20.8, batch_index=726, batch_size=256]

Validation:  98%|█████████████▋| 726/743 [3:00:13<04:08, 14.64s/batch, batch_loss=20.8, batch_index=726, batch_size=256]

Validation:  98%|██████████▋| 726/743 [3:00:27<04:08, 14.64s/batch, batch_loss=2.41e+4, batch_index=727, batch_size=256]

Validation:  98%|██████████▊| 727/743 [3:00:27<03:52, 14.54s/batch, batch_loss=2.41e+4, batch_index=727, batch_size=256]

Validation:  98%|█████████████▋| 727/743 [3:00:41<03:52, 14.54s/batch, batch_loss=30.1, batch_index=728, batch_size=256]

Validation:  98%|█████████████▋| 728/743 [3:00:41<03:35, 14.39s/batch, batch_loss=30.1, batch_index=728, batch_size=256]

Validation:  98%|█████████████▋| 728/743 [3:00:55<03:35, 14.39s/batch, batch_loss=27.3, batch_index=729, batch_size=256]

Validation:  98%|█████████████▋| 729/743 [3:00:55<03:21, 14.42s/batch, batch_loss=27.3, batch_index=729, batch_size=256]

Validation:  98%|█████████████▋| 729/743 [3:01:09<03:21, 14.42s/batch, batch_loss=19.9, batch_index=730, batch_size=256]

Validation:  98%|█████████████▊| 730/743 [3:01:09<03:04, 14.18s/batch, batch_loss=19.9, batch_index=730, batch_size=256]

Validation:  98%|███████████████▋| 730/743 [3:01:27<03:04, 14.18s/batch, batch_loss=16, batch_index=731, batch_size=256]

Validation:  98%|███████████████▋| 731/743 [3:01:27<03:04, 15.38s/batch, batch_loss=16, batch_index=731, batch_size=256]

Validation:  98%|█████████████▊| 731/743 [3:01:42<03:04, 15.38s/batch, batch_loss=13.6, batch_index=732, batch_size=256]

Validation:  99%|█████████████▊| 732/743 [3:01:42<02:46, 15.14s/batch, batch_loss=13.6, batch_index=732, batch_size=256]

Validation:  99%|███████████████▊| 732/743 [3:01:56<02:46, 15.14s/batch, batch_loss=24, batch_index=733, batch_size=256]

Validation:  99%|███████████████▊| 733/743 [3:01:56<02:29, 14.91s/batch, batch_loss=24, batch_index=733, batch_size=256]

Validation:  99%|█████████████▊| 733/743 [3:02:10<02:29, 14.91s/batch, batch_loss=5.92, batch_index=734, batch_size=256]

Validation:  99%|█████████████▊| 734/743 [3:02:10<02:12, 14.69s/batch, batch_loss=5.92, batch_index=734, batch_size=256]

Validation:  99%|█████████████▊| 734/743 [3:02:25<02:12, 14.69s/batch, batch_loss=8.57, batch_index=735, batch_size=256]

Validation:  99%|█████████████▊| 735/743 [3:02:25<01:56, 14.60s/batch, batch_loss=8.57, batch_index=735, batch_size=256]

Validation:  99%|█████████████▊| 735/743 [3:02:39<01:56, 14.60s/batch, batch_loss=1.85, batch_index=736, batch_size=256]

Validation:  99%|█████████████▊| 736/743 [3:02:39<01:41, 14.47s/batch, batch_loss=1.85, batch_index=736, batch_size=256]

Validation:  99%|████████████▉| 736/743 [3:02:52<01:41, 14.47s/batch, batch_loss=0.581, batch_index=737, batch_size=256]

Validation:  99%|████████████▉| 737/743 [3:02:52<01:24, 14.00s/batch, batch_loss=0.581, batch_index=737, batch_size=256]

Validation:  99%|████████████▉| 737/743 [3:03:04<01:24, 14.00s/batch, batch_loss=0.581, batch_index=738, batch_size=256]

Validation:  99%|████████████▉| 738/743 [3:03:04<01:07, 13.54s/batch, batch_loss=0.581, batch_index=738, batch_size=256]

Validation:  99%|████████████▉| 738/743 [3:03:18<01:07, 13.54s/batch, batch_loss=0.581, batch_index=739, batch_size=256]

Validation:  99%|████████████▉| 739/743 [3:03:18<00:53, 13.48s/batch, batch_loss=0.581, batch_index=739, batch_size=256]

Validation:  99%|████████████▉| 739/743 [3:03:32<00:53, 13.48s/batch, batch_loss=0.581, batch_index=740, batch_size=256]

Validation: 100%|████████████▉| 740/743 [3:03:32<00:41, 13.79s/batch, batch_loss=0.581, batch_index=740, batch_size=256]

Validation: 100%|████████████▉| 740/743 [3:03:45<00:41, 13.79s/batch, batch_loss=0.581, batch_index=741, batch_size=256]

Validation: 100%|████████████▉| 741/743 [3:03:45<00:27, 13.59s/batch, batch_loss=0.581, batch_index=741, batch_size=256]

Validation: 100%|████████████▉| 741/743 [3:03:58<00:27, 13.59s/batch, batch_loss=0.581, batch_index=742, batch_size=256]

Validation: 100%|████████████▉| 742/743 [3:03:58<00:13, 13.45s/batch, batch_loss=0.581, batch_index=742, batch_size=256]

Validation: 100%|████████████▉| 742/743 [3:04:11<00:13, 13.45s/batch, batch_loss=0.586, batch_index=743, batch_size=238]

Validation: 100%|█████████████| 743/743 [3:04:11<00:00, 13.12s/batch, batch_loss=0.586, batch_index=743, batch_size=238]

Validation: 100%|█████████████| 743/743 [3:04:11<00:00, 14.87s/batch, batch_loss=0.586, batch_index=743, batch_size=238]




Val Loss: 1297.5935


Epoch 3/10:   0%|                                                                            | 0/991 [00:00<?, ?batch/s]

Epoch 3/10:   0%|                              | 0/991 [00:15<?, ?batch/s, batch_loss=19, batch_index=1, batch_size=256]

Epoch 3/10:   0%|                    | 1/991 [00:15<4:08:56, 15.09s/batch, batch_loss=19, batch_index=1, batch_size=256]

Epoch 3/10:   0%|                  | 1/991 [00:30<4:08:56, 15.09s/batch, batch_loss=20.1, batch_index=2, batch_size=256]

Epoch 3/10:   0%|                  | 2/991 [00:30<4:10:53, 15.22s/batch, batch_loss=20.1, batch_index=2, batch_size=256]

Epoch 3/10:   0%|                  | 2/991 [00:45<4:10:53, 15.22s/batch, batch_loss=13.2, batch_index=3, batch_size=256]

Epoch 3/10:   0%|                  | 3/991 [00:45<4:09:11, 15.13s/batch, batch_loss=13.2, batch_index=3, batch_size=256]

Epoch 3/10:   0%|                  | 3/991 [00:59<4:09:11, 15.13s/batch, batch_loss=8.93, batch_index=4, batch_size=256]

Epoch 3/10:   0%|                  | 4/991 [00:59<4:01:38, 14.69s/batch, batch_loss=8.93, batch_index=4, batch_size=256]

Epoch 3/10:   0%|                  | 4/991 [01:14<4:01:38, 14.69s/batch, batch_loss=21.8, batch_index=5, batch_size=256]

Epoch 3/10:   1%|                  | 5/991 [01:14<4:04:04, 14.85s/batch, batch_loss=21.8, batch_index=5, batch_size=256]

Epoch 3/10:   1%|                  | 5/991 [01:29<4:04:04, 14.85s/batch, batch_loss=23.4, batch_index=6, batch_size=256]

Epoch 3/10:   1%|                  | 6/991 [01:29<4:03:33, 14.84s/batch, batch_loss=23.4, batch_index=6, batch_size=256]

Epoch 3/10:   1%|                  | 6/991 [01:44<4:03:33, 14.84s/batch, batch_loss=17.6, batch_index=7, batch_size=256]

Epoch 3/10:   1%|▏                 | 7/991 [01:44<4:06:20, 15.02s/batch, batch_loss=17.6, batch_index=7, batch_size=256]

Epoch 3/10:   1%|▏                  | 7/991 [01:59<4:06:20, 15.02s/batch, batch_loss=602, batch_index=8, batch_size=256]

Epoch 3/10:   1%|▏                  | 8/991 [01:59<4:04:05, 14.90s/batch, batch_loss=602, batch_index=8, batch_size=256]

Epoch 3/10:   1%|▏                 | 8/991 [02:14<4:04:05, 14.90s/batch, batch_loss=16.9, batch_index=9, batch_size=256]

Epoch 3/10:   1%|▏                 | 9/991 [02:14<4:06:30, 15.06s/batch, batch_loss=16.9, batch_index=9, batch_size=256]

Epoch 3/10:   1%|▏                | 9/991 [02:30<4:06:30, 15.06s/batch, batch_loss=16.1, batch_index=10, batch_size=256]

Epoch 3/10:   1%|▏               | 10/991 [02:30<4:08:53, 15.22s/batch, batch_loss=16.1, batch_index=10, batch_size=256]

Epoch 3/10:   1%|▏               | 10/991 [02:46<4:08:53, 15.22s/batch, batch_loss=11.1, batch_index=11, batch_size=256]

Epoch 3/10:   1%|▏               | 11/991 [02:46<4:10:48, 15.36s/batch, batch_loss=11.1, batch_index=11, batch_size=256]

Epoch 3/10:   1%|▏            | 11/991 [03:00<4:10:48, 15.36s/batch, batch_loss=1.99e+3, batch_index=12, batch_size=256]

Epoch 3/10:   1%|▏            | 12/991 [03:00<4:05:55, 15.07s/batch, batch_loss=1.99e+3, batch_index=12, batch_size=256]

Epoch 3/10:   1%|▏               | 12/991 [03:15<4:05:55, 15.07s/batch, batch_loss=18.5, batch_index=13, batch_size=256]

Epoch 3/10:   1%|▏               | 13/991 [03:15<4:06:55, 15.15s/batch, batch_loss=18.5, batch_index=13, batch_size=256]

Epoch 3/10:   1%|▏               | 13/991 [03:34<4:06:55, 15.15s/batch, batch_loss=11.3, batch_index=14, batch_size=256]

Epoch 3/10:   1%|▏               | 14/991 [03:34<4:22:40, 16.13s/batch, batch_loss=11.3, batch_index=14, batch_size=256]

Epoch 3/10:   1%|▎                 | 14/991 [03:49<4:22:40, 16.13s/batch, batch_loss=12, batch_index=15, batch_size=256]

Epoch 3/10:   2%|▎                 | 15/991 [03:49<4:16:49, 15.79s/batch, batch_loss=12, batch_index=15, batch_size=256]

Epoch 3/10:   2%|▏               | 15/991 [04:03<4:16:49, 15.79s/batch, batch_loss=13.9, batch_index=16, batch_size=256]

Epoch 3/10:   2%|▎               | 16/991 [04:03<4:08:17, 15.28s/batch, batch_loss=13.9, batch_index=16, batch_size=256]

Epoch 3/10:   2%|▎               | 16/991 [04:16<4:08:17, 15.28s/batch, batch_loss=14.3, batch_index=17, batch_size=256]

Epoch 3/10:   2%|▎               | 17/991 [04:16<3:58:08, 14.67s/batch, batch_loss=14.3, batch_index=17, batch_size=256]

Epoch 3/10:   2%|▎               | 17/991 [04:30<3:58:08, 14.67s/batch, batch_loss=9.64, batch_index=18, batch_size=256]

Epoch 3/10:   2%|▎               | 18/991 [04:30<3:56:37, 14.59s/batch, batch_loss=9.64, batch_index=18, batch_size=256]

Epoch 3/10:   2%|▏            | 18/991 [04:46<3:56:37, 14.59s/batch, batch_loss=8.97e+3, batch_index=19, batch_size=256]

Epoch 3/10:   2%|▏            | 19/991 [04:46<4:00:24, 14.84s/batch, batch_loss=8.97e+3, batch_index=19, batch_size=256]

Epoch 3/10:   2%|▎               | 19/991 [05:01<4:00:24, 14.84s/batch, batch_loss=10.8, batch_index=20, batch_size=256]

Epoch 3/10:   2%|▎               | 20/991 [05:01<4:02:44, 15.00s/batch, batch_loss=10.8, batch_index=20, batch_size=256]

Epoch 3/10:   2%|▎               | 20/991 [05:17<4:02:44, 15.00s/batch, batch_loss=17.1, batch_index=21, batch_size=256]

Epoch 3/10:   2%|▎               | 21/991 [05:17<4:03:34, 15.07s/batch, batch_loss=17.1, batch_index=21, batch_size=256]

Epoch 3/10:   2%|▎            | 21/991 [05:32<4:03:34, 15.07s/batch, batch_loss=1.08e+3, batch_index=22, batch_size=256]

Epoch 3/10:   2%|▎            | 22/991 [05:32<4:04:56, 15.17s/batch, batch_loss=1.08e+3, batch_index=22, batch_size=256]

Epoch 3/10:   2%|▎               | 22/991 [05:48<4:04:56, 15.17s/batch, batch_loss=8.16, batch_index=23, batch_size=256]

Epoch 3/10:   2%|▎               | 23/991 [05:48<4:07:41, 15.35s/batch, batch_loss=8.16, batch_index=23, batch_size=256]

Epoch 3/10:   2%|▎               | 23/991 [06:03<4:07:41, 15.35s/batch, batch_loss=13.5, batch_index=24, batch_size=256]

Epoch 3/10:   2%|▍               | 24/991 [06:03<4:08:37, 15.43s/batch, batch_loss=13.5, batch_index=24, batch_size=256]

Epoch 3/10:   2%|▍                 | 24/991 [06:19<4:08:37, 15.43s/batch, batch_loss=12, batch_index=25, batch_size=256]

Epoch 3/10:   3%|▍                 | 25/991 [06:19<4:11:15, 15.61s/batch, batch_loss=12, batch_index=25, batch_size=256]

Epoch 3/10:   3%|▍               | 25/991 [06:35<4:11:15, 15.61s/batch, batch_loss=18.1, batch_index=26, batch_size=256]

Epoch 3/10:   3%|▍               | 26/991 [06:35<4:11:19, 15.63s/batch, batch_loss=18.1, batch_index=26, batch_size=256]

Epoch 3/10:   3%|▍               | 26/991 [06:50<4:11:19, 15.63s/batch, batch_loss=15.9, batch_index=27, batch_size=256]

Epoch 3/10:   3%|▍               | 27/991 [06:50<4:08:02, 15.44s/batch, batch_loss=15.9, batch_index=27, batch_size=256]

Epoch 3/10:   3%|▎            | 27/991 [07:05<4:08:02, 15.44s/batch, batch_loss=1.13e+3, batch_index=28, batch_size=256]

Epoch 3/10:   3%|▎            | 28/991 [07:05<4:06:47, 15.38s/batch, batch_loss=1.13e+3, batch_index=28, batch_size=256]

Epoch 3/10:   3%|▍               | 28/991 [07:22<4:06:47, 15.38s/batch, batch_loss=10.1, batch_index=29, batch_size=256]

Epoch 3/10:   3%|▍               | 29/991 [07:22<4:14:52, 15.90s/batch, batch_loss=10.1, batch_index=29, batch_size=256]

Epoch 3/10:   3%|▌                 | 29/991 [07:37<4:14:52, 15.90s/batch, batch_loss=11, batch_index=30, batch_size=256]

Epoch 3/10:   3%|▌                 | 30/991 [07:37<4:10:09, 15.62s/batch, batch_loss=11, batch_index=30, batch_size=256]

Epoch 3/10:   3%|▍               | 30/991 [07:53<4:10:09, 15.62s/batch, batch_loss=9.34, batch_index=31, batch_size=256]

Epoch 3/10:   3%|▌               | 31/991 [07:53<4:08:18, 15.52s/batch, batch_loss=9.34, batch_index=31, batch_size=256]

Epoch 3/10:   3%|▍             | 31/991 [08:07<4:08:18, 15.52s/batch, batch_loss=1.3e+4, batch_index=32, batch_size=256]

Epoch 3/10:   3%|▍             | 32/991 [08:07<4:04:37, 15.30s/batch, batch_loss=1.3e+4, batch_index=32, batch_size=256]

Epoch 3/10:   3%|▌               | 32/991 [08:23<4:04:37, 15.30s/batch, batch_loss=13.1, batch_index=33, batch_size=256]

Epoch 3/10:   3%|▌               | 33/991 [08:23<4:05:18, 15.36s/batch, batch_loss=13.1, batch_index=33, batch_size=256]

Epoch 3/10:   3%|▋                  | 33/991 [08:38<4:05:18, 15.36s/batch, batch_loss=9, batch_index=34, batch_size=256]

Epoch 3/10:   3%|▋                  | 34/991 [08:38<4:03:54, 15.29s/batch, batch_loss=9, batch_index=34, batch_size=256]

Epoch 3/10:   3%|▌               | 34/991 [08:53<4:03:54, 15.29s/batch, batch_loss=12.8, batch_index=35, batch_size=256]

Epoch 3/10:   4%|▌               | 35/991 [08:53<4:03:22, 15.27s/batch, batch_loss=12.8, batch_index=35, batch_size=256]

Epoch 3/10:   4%|▌               | 35/991 [09:08<4:03:22, 15.27s/batch, batch_loss=10.2, batch_index=36, batch_size=256]

Epoch 3/10:   4%|▌               | 36/991 [09:08<4:01:55, 15.20s/batch, batch_loss=10.2, batch_index=36, batch_size=256]

Epoch 3/10:   4%|▌               | 36/991 [09:26<4:01:55, 15.20s/batch, batch_loss=10.4, batch_index=37, batch_size=256]

Epoch 3/10:   4%|▌               | 37/991 [09:26<4:13:51, 15.97s/batch, batch_loss=10.4, batch_index=37, batch_size=256]

Epoch 3/10:   4%|▌               | 37/991 [09:42<4:13:51, 15.97s/batch, batch_loss=7.08, batch_index=38, batch_size=256]

Epoch 3/10:   4%|▌               | 38/991 [09:42<4:12:13, 15.88s/batch, batch_loss=7.08, batch_index=38, batch_size=256]

Epoch 3/10:   4%|▍            | 38/991 [09:57<4:12:13, 15.88s/batch, batch_loss=1.73e+3, batch_index=39, batch_size=256]

Epoch 3/10:   4%|▌            | 39/991 [09:57<4:09:37, 15.73s/batch, batch_loss=1.73e+3, batch_index=39, batch_size=256]

Epoch 3/10:   4%|▋               | 39/991 [10:13<4:09:37, 15.73s/batch, batch_loss=13.6, batch_index=40, batch_size=256]

Epoch 3/10:   4%|▋               | 40/991 [10:13<4:08:43, 15.69s/batch, batch_loss=13.6, batch_index=40, batch_size=256]

Epoch 3/10:   4%|▌             | 40/991 [10:28<4:08:43, 15.69s/batch, batch_loss=6.2e+3, batch_index=41, batch_size=256]

Epoch 3/10:   4%|▌             | 41/991 [10:28<4:04:49, 15.46s/batch, batch_loss=6.2e+3, batch_index=41, batch_size=256]

Epoch 3/10:   4%|▋               | 41/991 [10:43<4:04:49, 15.46s/batch, batch_loss=15.4, batch_index=42, batch_size=256]

Epoch 3/10:   4%|▋               | 42/991 [10:43<4:02:10, 15.31s/batch, batch_loss=15.4, batch_index=42, batch_size=256]

Epoch 3/10:   4%|▋               | 42/991 [10:58<4:02:10, 15.31s/batch, batch_loss=8.77, batch_index=43, batch_size=256]

Epoch 3/10:   4%|▋               | 43/991 [10:58<4:00:59, 15.25s/batch, batch_loss=8.77, batch_index=43, batch_size=256]

Epoch 3/10:   4%|▋               | 43/991 [11:13<4:00:59, 15.25s/batch, batch_loss=14.2, batch_index=44, batch_size=256]

Epoch 3/10:   4%|▋               | 44/991 [11:13<3:59:59, 15.20s/batch, batch_loss=14.2, batch_index=44, batch_size=256]

Epoch 3/10:   4%|▋               | 44/991 [11:29<3:59:59, 15.20s/batch, batch_loss=15.5, batch_index=45, batch_size=256]

Epoch 3/10:   5%|▋               | 45/991 [11:29<4:04:25, 15.50s/batch, batch_loss=15.5, batch_index=45, batch_size=256]

Epoch 3/10:   5%|▋               | 45/991 [11:44<4:04:25, 15.50s/batch, batch_loss=12.9, batch_index=46, batch_size=256]

Epoch 3/10:   5%|▋               | 46/991 [11:44<4:01:00, 15.30s/batch, batch_loss=12.9, batch_index=46, batch_size=256]

Epoch 3/10:   5%|▋               | 46/991 [11:58<4:01:00, 15.30s/batch, batch_loss=6.21, batch_index=47, batch_size=256]

Epoch 3/10:   5%|▊               | 47/991 [11:58<3:54:58, 14.93s/batch, batch_loss=6.21, batch_index=47, batch_size=256]

Epoch 3/10:   5%|▊               | 47/991 [12:13<3:54:58, 14.93s/batch, batch_loss=12.7, batch_index=48, batch_size=256]

Epoch 3/10:   5%|▊               | 48/991 [12:13<3:55:07, 14.96s/batch, batch_loss=12.7, batch_index=48, batch_size=256]

Epoch 3/10:   5%|▊               | 48/991 [12:27<3:55:07, 14.96s/batch, batch_loss=12.4, batch_index=49, batch_size=256]

Epoch 3/10:   5%|▊               | 49/991 [12:27<3:52:14, 14.79s/batch, batch_loss=12.4, batch_index=49, batch_size=256]

Epoch 3/10:   5%|▊               | 49/991 [12:42<3:52:14, 14.79s/batch, batch_loss=13.4, batch_index=50, batch_size=256]

Epoch 3/10:   5%|▊               | 50/991 [12:42<3:52:38, 14.83s/batch, batch_loss=13.4, batch_index=50, batch_size=256]

Epoch 3/10:   5%|▊               | 50/991 [12:56<3:52:38, 14.83s/batch, batch_loss=8.75, batch_index=51, batch_size=256]

Epoch 3/10:   5%|▊               | 51/991 [12:56<3:48:43, 14.60s/batch, batch_loss=8.75, batch_index=51, batch_size=256]

Epoch 3/10:   5%|▊               | 51/991 [13:10<3:48:43, 14.60s/batch, batch_loss=12.6, batch_index=52, batch_size=256]

Epoch 3/10:   5%|▊               | 52/991 [13:10<3:46:07, 14.45s/batch, batch_loss=12.6, batch_index=52, batch_size=256]

Epoch 3/10:   5%|▉                 | 52/991 [13:26<3:46:07, 14.45s/batch, batch_loss=13, batch_index=53, batch_size=256]

Epoch 3/10:   5%|▉                 | 53/991 [13:26<3:49:02, 14.65s/batch, batch_loss=13, batch_index=53, batch_size=256]

Epoch 3/10:   5%|▊               | 53/991 [13:39<3:49:02, 14.65s/batch, batch_loss=9.86, batch_index=54, batch_size=256]

Epoch 3/10:   5%|▊               | 54/991 [13:39<3:45:10, 14.42s/batch, batch_loss=9.86, batch_index=54, batch_size=256]

Epoch 3/10:   5%|▊               | 54/991 [13:55<3:45:10, 14.42s/batch, batch_loss=9.32, batch_index=55, batch_size=256]

Epoch 3/10:   6%|▉               | 55/991 [13:55<3:50:09, 14.75s/batch, batch_loss=9.32, batch_index=55, batch_size=256]

Epoch 3/10:   6%|▉               | 55/991 [14:10<3:50:09, 14.75s/batch, batch_loss=10.8, batch_index=56, batch_size=256]

Epoch 3/10:   6%|▉               | 56/991 [14:10<3:52:50, 14.94s/batch, batch_loss=10.8, batch_index=56, batch_size=256]

Epoch 3/10:   6%|▉               | 56/991 [14:25<3:52:50, 14.94s/batch, batch_loss=6.13, batch_index=57, batch_size=256]

Epoch 3/10:   6%|▉               | 57/991 [14:25<3:51:34, 14.88s/batch, batch_loss=6.13, batch_index=57, batch_size=256]

Epoch 3/10:   6%|█                 | 57/991 [14:40<3:51:34, 14.88s/batch, batch_loss=16, batch_index=58, batch_size=256]

Epoch 3/10:   6%|█                 | 58/991 [14:40<3:52:23, 14.95s/batch, batch_loss=16, batch_index=58, batch_size=256]

Epoch 3/10:   6%|▉               | 58/991 [14:55<3:52:23, 14.95s/batch, batch_loss=9.67, batch_index=59, batch_size=256]

Epoch 3/10:   6%|▉               | 59/991 [14:55<3:51:39, 14.91s/batch, batch_loss=9.67, batch_index=59, batch_size=256]

Epoch 3/10:   6%|▉               | 59/991 [15:13<3:51:39, 14.91s/batch, batch_loss=16.2, batch_index=60, batch_size=256]

Epoch 3/10:   6%|▉               | 60/991 [15:13<4:06:31, 15.89s/batch, batch_loss=16.2, batch_index=60, batch_size=256]

Epoch 3/10:   6%|█                 | 60/991 [15:28<4:06:31, 15.89s/batch, batch_loss=12, batch_index=61, batch_size=256]

Epoch 3/10:   6%|█                 | 61/991 [15:28<4:02:27, 15.64s/batch, batch_loss=12, batch_index=61, batch_size=256]

Epoch 3/10:   6%|▉               | 61/991 [15:44<4:02:27, 15.64s/batch, batch_loss=12.9, batch_index=62, batch_size=256]

Epoch 3/10:   6%|█               | 62/991 [15:44<4:01:48, 15.62s/batch, batch_loss=12.9, batch_index=62, batch_size=256]

Epoch 3/10:   6%|█                | 62/991 [15:59<4:01:48, 15.62s/batch, batch_loss=422, batch_index=63, batch_size=256]

Epoch 3/10:   6%|█                | 63/991 [15:59<4:00:18, 15.54s/batch, batch_loss=422, batch_index=63, batch_size=256]

Epoch 3/10:   6%|█                | 63/991 [16:14<4:00:18, 15.54s/batch, batch_loss=794, batch_index=64, batch_size=256]

Epoch 3/10:   6%|█                | 64/991 [16:14<3:57:22, 15.36s/batch, batch_loss=794, batch_index=64, batch_size=256]

Epoch 3/10:   6%|▊            | 64/991 [16:29<3:57:22, 15.36s/batch, batch_loss=2.55e+3, batch_index=65, batch_size=256]

Epoch 3/10:   7%|▊            | 65/991 [16:29<3:56:56, 15.35s/batch, batch_loss=2.55e+3, batch_index=65, batch_size=256]

Epoch 3/10:   7%|█               | 65/991 [16:44<3:56:56, 15.35s/batch, batch_loss=3.81, batch_index=66, batch_size=256]

Epoch 3/10:   7%|█               | 66/991 [16:44<3:54:38, 15.22s/batch, batch_loss=3.81, batch_index=66, batch_size=256]

Epoch 3/10:   7%|█               | 66/991 [17:00<3:54:38, 15.22s/batch, batch_loss=9.96, batch_index=67, batch_size=256]

Epoch 3/10:   7%|█               | 67/991 [17:00<3:56:53, 15.38s/batch, batch_loss=9.96, batch_index=67, batch_size=256]

Epoch 3/10:   7%|█               | 67/991 [17:16<3:56:53, 15.38s/batch, batch_loss=7.86, batch_index=68, batch_size=256]

Epoch 3/10:   7%|█               | 68/991 [17:16<3:57:00, 15.41s/batch, batch_loss=7.86, batch_index=68, batch_size=256]

Epoch 3/10:   7%|█               | 68/991 [17:30<3:57:00, 15.41s/batch, batch_loss=19.1, batch_index=69, batch_size=256]

Epoch 3/10:   7%|█               | 69/991 [17:30<3:51:40, 15.08s/batch, batch_loss=19.1, batch_index=69, batch_size=256]

Epoch 3/10:   7%|█               | 69/991 [17:44<3:51:40, 15.08s/batch, batch_loss=7.89, batch_index=70, batch_size=256]

Epoch 3/10:   7%|█▏              | 70/991 [17:44<3:46:31, 14.76s/batch, batch_loss=7.89, batch_index=70, batch_size=256]

Epoch 3/10:   7%|█▏              | 70/991 [17:58<3:46:31, 14.76s/batch, batch_loss=12.8, batch_index=71, batch_size=256]

Epoch 3/10:   7%|█▏              | 71/991 [17:58<3:45:39, 14.72s/batch, batch_loss=12.8, batch_index=71, batch_size=256]

Epoch 3/10:   7%|█▏              | 71/991 [18:13<3:45:39, 14.72s/batch, batch_loss=13.3, batch_index=72, batch_size=256]

Epoch 3/10:   7%|█▏              | 72/991 [18:13<3:46:03, 14.76s/batch, batch_loss=13.3, batch_index=72, batch_size=256]

Epoch 3/10:   7%|█▏              | 72/991 [18:29<3:46:03, 14.76s/batch, batch_loss=25.2, batch_index=73, batch_size=256]

Epoch 3/10:   7%|█▏              | 73/991 [18:29<3:49:02, 14.97s/batch, batch_loss=25.2, batch_index=73, batch_size=256]

Epoch 3/10:   7%|▉            | 73/991 [18:44<3:49:02, 14.97s/batch, batch_loss=1.74e+3, batch_index=74, batch_size=256]

Epoch 3/10:   7%|▉            | 74/991 [18:44<3:50:25, 15.08s/batch, batch_loss=1.74e+3, batch_index=74, batch_size=256]

Epoch 3/10:   7%|█▏              | 74/991 [18:59<3:50:25, 15.08s/batch, batch_loss=15.5, batch_index=75, batch_size=256]

Epoch 3/10:   8%|█▏              | 75/991 [18:59<3:50:20, 15.09s/batch, batch_loss=15.5, batch_index=75, batch_size=256]

Epoch 3/10:   8%|█▏              | 75/991 [19:16<3:50:20, 15.09s/batch, batch_loss=10.3, batch_index=76, batch_size=256]

Epoch 3/10:   8%|█▏              | 76/991 [19:16<3:59:53, 15.73s/batch, batch_loss=10.3, batch_index=76, batch_size=256]

Epoch 3/10:   8%|█▏              | 76/991 [19:32<3:59:53, 15.73s/batch, batch_loss=10.4, batch_index=77, batch_size=256]

Epoch 3/10:   8%|█▏              | 77/991 [19:32<3:58:02, 15.63s/batch, batch_loss=10.4, batch_index=77, batch_size=256]

Epoch 3/10:   8%|█▏              | 77/991 [19:48<3:58:02, 15.63s/batch, batch_loss=11.9, batch_index=78, batch_size=256]

Epoch 3/10:   8%|█▎              | 78/991 [19:48<4:00:10, 15.78s/batch, batch_loss=11.9, batch_index=78, batch_size=256]

Epoch 3/10:   8%|█▎              | 78/991 [20:04<4:00:10, 15.78s/batch, batch_loss=8.91, batch_index=79, batch_size=256]

Epoch 3/10:   8%|█▎              | 79/991 [20:04<4:01:09, 15.87s/batch, batch_loss=8.91, batch_index=79, batch_size=256]

Epoch 3/10:   8%|█▎              | 79/991 [20:20<4:01:09, 15.87s/batch, batch_loss=7.61, batch_index=80, batch_size=256]

Epoch 3/10:   8%|█▎              | 80/991 [20:20<4:02:45, 15.99s/batch, batch_loss=7.61, batch_index=80, batch_size=256]

Epoch 3/10:   8%|█▎              | 80/991 [20:35<4:02:45, 15.99s/batch, batch_loss=12.3, batch_index=81, batch_size=256]

Epoch 3/10:   8%|█▎              | 81/991 [20:35<3:56:23, 15.59s/batch, batch_loss=12.3, batch_index=81, batch_size=256]

Epoch 3/10:   8%|█▎              | 81/991 [20:49<3:56:23, 15.59s/batch, batch_loss=12.5, batch_index=82, batch_size=256]

Epoch 3/10:   8%|█▎              | 82/991 [20:49<3:51:03, 15.25s/batch, batch_loss=12.5, batch_index=82, batch_size=256]

Epoch 3/10:   8%|█▍               | 82/991 [21:07<3:51:03, 15.25s/batch, batch_loss=7.2, batch_index=83, batch_size=256]

Epoch 3/10:   8%|█▍               | 83/991 [21:07<4:02:09, 16.00s/batch, batch_loss=7.2, batch_index=83, batch_size=256]

Epoch 3/10:   8%|█▎              | 83/991 [21:22<4:02:09, 16.00s/batch, batch_loss=10.6, batch_index=84, batch_size=256]

Epoch 3/10:   8%|█▎              | 84/991 [21:22<3:57:09, 15.69s/batch, batch_loss=10.6, batch_index=84, batch_size=256]

Epoch 3/10:   8%|█▎              | 84/991 [21:37<3:57:09, 15.69s/batch, batch_loss=7.76, batch_index=85, batch_size=256]

Epoch 3/10:   9%|█▎              | 85/991 [21:37<3:54:33, 15.53s/batch, batch_loss=7.76, batch_index=85, batch_size=256]

Epoch 3/10:   9%|█▎              | 85/991 [21:52<3:54:33, 15.53s/batch, batch_loss=9.46, batch_index=86, batch_size=256]

Epoch 3/10:   9%|█▍              | 86/991 [21:52<3:50:54, 15.31s/batch, batch_loss=9.46, batch_index=86, batch_size=256]

Epoch 3/10:   9%|█▍              | 86/991 [22:08<3:50:54, 15.31s/batch, batch_loss=9.74, batch_index=87, batch_size=256]

Epoch 3/10:   9%|█▍              | 87/991 [22:08<3:52:41, 15.44s/batch, batch_loss=9.74, batch_index=87, batch_size=256]

Epoch 3/10:   9%|█▍              | 87/991 [22:24<3:52:41, 15.44s/batch, batch_loss=9.52, batch_index=88, batch_size=256]

Epoch 3/10:   9%|█▍              | 88/991 [22:24<3:57:09, 15.76s/batch, batch_loss=9.52, batch_index=88, batch_size=256]

Epoch 3/10:   9%|█▍              | 88/991 [22:40<3:57:09, 15.76s/batch, batch_loss=5.03, batch_index=89, batch_size=256]

Epoch 3/10:   9%|█▍              | 89/991 [22:40<3:54:41, 15.61s/batch, batch_loss=5.03, batch_index=89, batch_size=256]

Epoch 3/10:   9%|█▌               | 89/991 [22:55<3:54:41, 15.61s/batch, batch_loss=246, batch_index=90, batch_size=256]

Epoch 3/10:   9%|█▌               | 90/991 [22:55<3:54:49, 15.64s/batch, batch_loss=246, batch_index=90, batch_size=256]

Epoch 3/10:   9%|█▏           | 90/991 [23:09<3:54:49, 15.64s/batch, batch_loss=1.54e+3, batch_index=91, batch_size=256]

Epoch 3/10:   9%|█▏           | 91/991 [23:09<3:47:21, 15.16s/batch, batch_loss=1.54e+3, batch_index=91, batch_size=256]

Epoch 3/10:   9%|█▍              | 91/991 [23:24<3:47:21, 15.16s/batch, batch_loss=15.2, batch_index=92, batch_size=256]

Epoch 3/10:   9%|█▍              | 92/991 [23:24<3:46:48, 15.14s/batch, batch_loss=15.2, batch_index=92, batch_size=256]

Epoch 3/10:   9%|█▍              | 92/991 [23:40<3:46:48, 15.14s/batch, batch_loss=19.8, batch_index=93, batch_size=256]

Epoch 3/10:   9%|█▌              | 93/991 [23:40<3:47:40, 15.21s/batch, batch_loss=19.8, batch_index=93, batch_size=256]

Epoch 3/10:   9%|█▌              | 93/991 [23:54<3:47:40, 15.21s/batch, batch_loss=21.6, batch_index=94, batch_size=256]

Epoch 3/10:   9%|█▌              | 94/991 [23:54<3:44:38, 15.03s/batch, batch_loss=21.6, batch_index=94, batch_size=256]

Epoch 3/10:   9%|█▌              | 94/991 [24:11<3:44:38, 15.03s/batch, batch_loss=18.4, batch_index=95, batch_size=256]

Epoch 3/10:  10%|█▌              | 95/991 [24:11<3:52:08, 15.55s/batch, batch_loss=18.4, batch_index=95, batch_size=256]

Epoch 3/10:  10%|█▌              | 95/991 [24:26<3:52:08, 15.55s/batch, batch_loss=17.7, batch_index=96, batch_size=256]

Epoch 3/10:  10%|█▌              | 96/991 [24:26<3:48:07, 15.29s/batch, batch_loss=17.7, batch_index=96, batch_size=256]

Epoch 3/10:  10%|█▌              | 96/991 [24:41<3:48:07, 15.29s/batch, batch_loss=16.8, batch_index=97, batch_size=256]

Epoch 3/10:  10%|█▌              | 97/991 [24:41<3:45:56, 15.16s/batch, batch_loss=16.8, batch_index=97, batch_size=256]

Epoch 3/10:  10%|█▌              | 97/991 [24:56<3:45:56, 15.16s/batch, batch_loss=15.7, batch_index=98, batch_size=256]

Epoch 3/10:  10%|█▌              | 98/991 [24:56<3:46:33, 15.22s/batch, batch_loss=15.7, batch_index=98, batch_size=256]

Epoch 3/10:  10%|█▊                | 98/991 [25:13<3:46:33, 15.22s/batch, batch_loss=15, batch_index=99, batch_size=256]

Epoch 3/10:  10%|█▊                | 99/991 [25:13<3:52:13, 15.62s/batch, batch_loss=15, batch_index=99, batch_size=256]

Epoch 3/10:  10%|█▍             | 99/991 [25:29<3:52:13, 15.62s/batch, batch_loss=15.8, batch_index=100, batch_size=256]

Epoch 3/10:  10%|█▍            | 100/991 [25:29<3:55:03, 15.83s/batch, batch_loss=15.8, batch_index=100, batch_size=256]

Epoch 3/10:  10%|█▍            | 100/991 [25:45<3:55:03, 15.83s/batch, batch_loss=12.7, batch_index=101, batch_size=256]

Epoch 3/10:  10%|█▍            | 101/991 [25:45<3:54:07, 15.78s/batch, batch_loss=12.7, batch_index=101, batch_size=256]

Epoch 3/10:  10%|█▋              | 101/991 [26:00<3:54:07, 15.78s/batch, batch_loss=23, batch_index=102, batch_size=256]

Epoch 3/10:  10%|█▋              | 102/991 [26:00<3:52:04, 15.66s/batch, batch_loss=23, batch_index=102, batch_size=256]

Epoch 3/10:  10%|█▌             | 102/991 [26:15<3:52:04, 15.66s/batch, batch_loss=912, batch_index=103, batch_size=256]

Epoch 3/10:  10%|█▌             | 103/991 [26:15<3:46:31, 15.31s/batch, batch_loss=912, batch_index=103, batch_size=256]

Epoch 3/10:  10%|█▍            | 103/991 [26:30<3:46:31, 15.31s/batch, batch_loss=14.3, batch_index=104, batch_size=256]

Epoch 3/10:  10%|█▍            | 104/991 [26:30<3:46:11, 15.30s/batch, batch_loss=14.3, batch_index=104, batch_size=256]

Epoch 3/10:  10%|█▍            | 104/991 [26:45<3:46:11, 15.30s/batch, batch_loss=9.77, batch_index=105, batch_size=256]

Epoch 3/10:  11%|█▍            | 105/991 [26:45<3:43:41, 15.15s/batch, batch_loss=9.77, batch_index=105, batch_size=256]

Epoch 3/10:  11%|█▍            | 105/991 [26:59<3:43:41, 15.15s/batch, batch_loss=10.6, batch_index=106, batch_size=256]

Epoch 3/10:  11%|█▍            | 106/991 [26:59<3:39:14, 14.86s/batch, batch_loss=10.6, batch_index=106, batch_size=256]

Epoch 3/10:  11%|█▋              | 106/991 [27:14<3:39:14, 14.86s/batch, batch_loss=18, batch_index=107, batch_size=256]

Epoch 3/10:  11%|█▋              | 107/991 [27:14<3:38:14, 14.81s/batch, batch_loss=18, batch_index=107, batch_size=256]

Epoch 3/10:  11%|█▌            | 107/991 [27:28<3:38:14, 14.81s/batch, batch_loss=24.2, batch_index=108, batch_size=256]

Epoch 3/10:  11%|█▌            | 108/991 [27:28<3:34:37, 14.58s/batch, batch_loss=24.2, batch_index=108, batch_size=256]

Epoch 3/10:  11%|█▌            | 108/991 [27:43<3:34:37, 14.58s/batch, batch_loss=12.3, batch_index=109, batch_size=256]

Epoch 3/10:  11%|█▌            | 109/991 [27:43<3:36:10, 14.71s/batch, batch_loss=12.3, batch_index=109, batch_size=256]

Epoch 3/10:  11%|█▌            | 109/991 [27:57<3:36:10, 14.71s/batch, batch_loss=13.8, batch_index=110, batch_size=256]

Epoch 3/10:  11%|█▌            | 110/991 [27:57<3:33:48, 14.56s/batch, batch_loss=13.8, batch_index=110, batch_size=256]

Epoch 3/10:  11%|█▌            | 110/991 [28:12<3:33:48, 14.56s/batch, batch_loss=18.4, batch_index=111, batch_size=256]

Epoch 3/10:  11%|█▌            | 111/991 [28:12<3:37:59, 14.86s/batch, batch_loss=18.4, batch_index=111, batch_size=256]

Epoch 3/10:  11%|█▌            | 111/991 [28:27<3:37:59, 14.86s/batch, batch_loss=16.9, batch_index=112, batch_size=256]

Epoch 3/10:  11%|█▌            | 112/991 [28:27<3:38:27, 14.91s/batch, batch_loss=16.9, batch_index=112, batch_size=256]

Epoch 3/10:  11%|█▌            | 112/991 [28:43<3:38:27, 14.91s/batch, batch_loss=8.71, batch_index=113, batch_size=256]

Epoch 3/10:  11%|█▌            | 113/991 [28:43<3:39:38, 15.01s/batch, batch_loss=8.71, batch_index=113, batch_size=256]

Epoch 3/10:  11%|█▌            | 113/991 [28:58<3:39:38, 15.01s/batch, batch_loss=15.5, batch_index=114, batch_size=256]

Epoch 3/10:  12%|█▌            | 114/991 [28:58<3:41:17, 15.14s/batch, batch_loss=15.5, batch_index=114, batch_size=256]

Epoch 3/10:  12%|█▌            | 114/991 [29:13<3:41:17, 15.14s/batch, batch_loss=22.6, batch_index=115, batch_size=256]

Epoch 3/10:  12%|█▌            | 115/991 [29:13<3:38:57, 15.00s/batch, batch_loss=22.6, batch_index=115, batch_size=256]

Epoch 3/10:  12%|█▌            | 115/991 [29:28<3:38:57, 15.00s/batch, batch_loss=10.4, batch_index=116, batch_size=256]

Epoch 3/10:  12%|█▋            | 116/991 [29:28<3:37:58, 14.95s/batch, batch_loss=10.4, batch_index=116, batch_size=256]

Epoch 3/10:  12%|█▋            | 116/991 [29:42<3:37:58, 14.95s/batch, batch_loss=16.9, batch_index=117, batch_size=256]

Epoch 3/10:  12%|█▋            | 117/991 [29:42<3:37:34, 14.94s/batch, batch_loss=16.9, batch_index=117, batch_size=256]

Epoch 3/10:  12%|█▋            | 117/991 [29:57<3:37:34, 14.94s/batch, batch_loss=14.6, batch_index=118, batch_size=256]

Epoch 3/10:  12%|█▋            | 118/991 [29:57<3:36:20, 14.87s/batch, batch_loss=14.6, batch_index=118, batch_size=256]

Epoch 3/10:  12%|█▋            | 118/991 [30:12<3:36:20, 14.87s/batch, batch_loss=27.4, batch_index=119, batch_size=256]

Epoch 3/10:  12%|█▋            | 119/991 [30:12<3:36:08, 14.87s/batch, batch_loss=27.4, batch_index=119, batch_size=256]

Epoch 3/10:  12%|█▋            | 119/991 [30:27<3:36:08, 14.87s/batch, batch_loss=17.1, batch_index=120, batch_size=256]

Epoch 3/10:  12%|█▋            | 120/991 [30:27<3:35:59, 14.88s/batch, batch_loss=17.1, batch_index=120, batch_size=256]

Epoch 3/10:  12%|█▋            | 120/991 [30:42<3:35:59, 14.88s/batch, batch_loss=22.6, batch_index=121, batch_size=256]

Epoch 3/10:  12%|█▋            | 121/991 [30:42<3:35:57, 14.89s/batch, batch_loss=22.6, batch_index=121, batch_size=256]

Epoch 3/10:  12%|█▋            | 121/991 [30:56<3:35:57, 14.89s/batch, batch_loss=9.93, batch_index=122, batch_size=256]

Epoch 3/10:  12%|█▋            | 122/991 [30:56<3:33:19, 14.73s/batch, batch_loss=9.93, batch_index=122, batch_size=256]

Epoch 3/10:  12%|█▋            | 122/991 [31:12<3:33:19, 14.73s/batch, batch_loss=15.3, batch_index=123, batch_size=256]

Epoch 3/10:  12%|█▋            | 123/991 [31:12<3:36:41, 14.98s/batch, batch_loss=15.3, batch_index=123, batch_size=256]

Epoch 3/10:  12%|█▎         | 123/991 [31:27<3:36:41, 14.98s/batch, batch_loss=3.47e+3, batch_index=124, batch_size=256]

Epoch 3/10:  13%|█▍         | 124/991 [31:27<3:36:50, 15.01s/batch, batch_loss=3.47e+3, batch_index=124, batch_size=256]

Epoch 3/10:  13%|█▊            | 124/991 [31:42<3:36:50, 15.01s/batch, batch_loss=7.82, batch_index=125, batch_size=256]

Epoch 3/10:  13%|█▊            | 125/991 [31:42<3:38:47, 15.16s/batch, batch_loss=7.82, batch_index=125, batch_size=256]

Epoch 3/10:  13%|█▊            | 125/991 [31:59<3:38:47, 15.16s/batch, batch_loss=11.5, batch_index=126, batch_size=256]

Epoch 3/10:  13%|█▊            | 126/991 [31:59<3:44:10, 15.55s/batch, batch_loss=11.5, batch_index=126, batch_size=256]

Epoch 3/10:  13%|█▍         | 126/991 [32:14<3:44:10, 15.55s/batch, batch_loss=1.88e+3, batch_index=127, batch_size=256]

Epoch 3/10:  13%|█▍         | 127/991 [32:14<3:44:25, 15.59s/batch, batch_loss=1.88e+3, batch_index=127, batch_size=256]

Epoch 3/10:  13%|█▍         | 127/991 [32:30<3:44:25, 15.59s/batch, batch_loss=1.56e+3, batch_index=128, batch_size=256]

Epoch 3/10:  13%|█▍         | 128/991 [32:30<3:42:17, 15.45s/batch, batch_loss=1.56e+3, batch_index=128, batch_size=256]

Epoch 3/10:  13%|█▉             | 128/991 [32:45<3:42:17, 15.45s/batch, batch_loss=223, batch_index=129, batch_size=256]

Epoch 3/10:  13%|█▉             | 129/991 [32:45<3:40:33, 15.35s/batch, batch_loss=223, batch_index=129, batch_size=256]

Epoch 3/10:  13%|█▉             | 129/991 [33:06<3:40:33, 15.35s/batch, batch_loss=990, batch_index=130, batch_size=256]

Epoch 3/10:  13%|█▉             | 130/991 [33:06<4:05:11, 17.09s/batch, batch_loss=990, batch_index=130, batch_size=256]

Epoch 3/10:  13%|█▍         | 130/991 [33:23<4:05:11, 17.09s/batch, batch_loss=8.33e+3, batch_index=131, batch_size=256]

Epoch 3/10:  13%|█▍         | 131/991 [33:23<4:03:26, 16.98s/batch, batch_loss=8.33e+3, batch_index=131, batch_size=256]

Epoch 3/10:  13%|█▊            | 131/991 [33:38<4:03:26, 16.98s/batch, batch_loss=17.6, batch_index=132, batch_size=256]

Epoch 3/10:  13%|█▊            | 132/991 [33:38<3:55:48, 16.47s/batch, batch_loss=17.6, batch_index=132, batch_size=256]

Epoch 3/10:  13%|█▊            | 132/991 [33:54<3:55:48, 16.47s/batch, batch_loss=7.84, batch_index=133, batch_size=256]

Epoch 3/10:  13%|█▉            | 133/991 [33:54<3:52:15, 16.24s/batch, batch_loss=7.84, batch_index=133, batch_size=256]

Epoch 3/10:  13%|█▉            | 133/991 [34:09<3:52:15, 16.24s/batch, batch_loss=12.3, batch_index=134, batch_size=256]

Epoch 3/10:  14%|█▉            | 134/991 [34:09<3:48:37, 16.01s/batch, batch_loss=12.3, batch_index=134, batch_size=256]

Epoch 3/10:  14%|██▏             | 134/991 [34:23<3:48:37, 16.01s/batch, batch_loss=19, batch_index=135, batch_size=256]

Epoch 3/10:  14%|██▏             | 135/991 [34:23<3:38:38, 15.33s/batch, batch_loss=19, batch_index=135, batch_size=256]

Epoch 3/10:  14%|█▉            | 135/991 [34:37<3:38:38, 15.33s/batch, batch_loss=7.99, batch_index=136, batch_size=256]

Epoch 3/10:  14%|█▉            | 136/991 [34:37<3:33:37, 14.99s/batch, batch_loss=7.99, batch_index=136, batch_size=256]

Epoch 3/10:  14%|█▉            | 136/991 [34:52<3:33:37, 14.99s/batch, batch_loss=12.7, batch_index=137, batch_size=256]

Epoch 3/10:  14%|█▉            | 137/991 [34:52<3:34:02, 15.04s/batch, batch_loss=12.7, batch_index=137, batch_size=256]

Epoch 3/10:  14%|██▏             | 137/991 [35:07<3:34:02, 15.04s/batch, batch_loss=17, batch_index=138, batch_size=256]

Epoch 3/10:  14%|██▏             | 138/991 [35:07<3:34:23, 15.08s/batch, batch_loss=17, batch_index=138, batch_size=256]

Epoch 3/10:  14%|█▉            | 138/991 [35:22<3:34:23, 15.08s/batch, batch_loss=6.94, batch_index=139, batch_size=256]

Epoch 3/10:  14%|█▉            | 139/991 [35:22<3:33:13, 15.02s/batch, batch_loss=6.94, batch_index=139, batch_size=256]

Epoch 3/10:  14%|█▉            | 139/991 [35:37<3:33:13, 15.02s/batch, batch_loss=10.9, batch_index=140, batch_size=256]

Epoch 3/10:  14%|█▉            | 140/991 [35:37<3:34:01, 15.09s/batch, batch_loss=10.9, batch_index=140, batch_size=256]

Epoch 3/10:  14%|██             | 140/991 [35:55<3:34:01, 15.09s/batch, batch_loss=5.9, batch_index=141, batch_size=256]

Epoch 3/10:  14%|██▏            | 141/991 [35:55<3:42:37, 15.71s/batch, batch_loss=5.9, batch_index=141, batch_size=256]

Epoch 3/10:  14%|█▉            | 141/991 [36:09<3:42:37, 15.71s/batch, batch_loss=7.23, batch_index=142, batch_size=256]

Epoch 3/10:  14%|██            | 142/991 [36:09<3:35:42, 15.24s/batch, batch_loss=7.23, batch_index=142, batch_size=256]

Epoch 3/10:  14%|██            | 142/991 [36:24<3:35:42, 15.24s/batch, batch_loss=13.2, batch_index=143, batch_size=256]

Epoch 3/10:  14%|██            | 143/991 [36:24<3:35:14, 15.23s/batch, batch_loss=13.2, batch_index=143, batch_size=256]

Epoch 3/10:  14%|██            | 143/991 [36:38<3:35:14, 15.23s/batch, batch_loss=15.5, batch_index=144, batch_size=256]

Epoch 3/10:  15%|██            | 144/991 [36:38<3:30:30, 14.91s/batch, batch_loss=15.5, batch_index=144, batch_size=256]

Epoch 3/10:  15%|██            | 144/991 [36:56<3:30:30, 14.91s/batch, batch_loss=15.2, batch_index=145, batch_size=256]

Epoch 3/10:  15%|██            | 145/991 [36:56<3:41:37, 15.72s/batch, batch_loss=15.2, batch_index=145, batch_size=256]

Epoch 3/10:  15%|██            | 145/991 [37:11<3:41:37, 15.72s/batch, batch_loss=13.7, batch_index=146, batch_size=256]

Epoch 3/10:  15%|██            | 146/991 [37:11<3:37:37, 15.45s/batch, batch_loss=13.7, batch_index=146, batch_size=256]

Epoch 3/10:  15%|██            | 146/991 [37:26<3:37:37, 15.45s/batch, batch_loss=7.88, batch_index=147, batch_size=256]

Epoch 3/10:  15%|██            | 147/991 [37:26<3:36:23, 15.38s/batch, batch_loss=7.88, batch_index=147, batch_size=256]

Epoch 3/10:  15%|██            | 147/991 [37:41<3:36:23, 15.38s/batch, batch_loss=20.7, batch_index=148, batch_size=256]

Epoch 3/10:  15%|██            | 148/991 [37:41<3:33:49, 15.22s/batch, batch_loss=20.7, batch_index=148, batch_size=256]

Epoch 3/10:  15%|██            | 148/991 [37:55<3:33:49, 15.22s/batch, batch_loss=10.7, batch_index=149, batch_size=256]

Epoch 3/10:  15%|██            | 149/991 [37:55<3:31:44, 15.09s/batch, batch_loss=10.7, batch_index=149, batch_size=256]

Epoch 3/10:  15%|██            | 149/991 [38:10<3:31:44, 15.09s/batch, batch_loss=9.38, batch_index=150, batch_size=256]

Epoch 3/10:  15%|██            | 150/991 [38:10<3:27:52, 14.83s/batch, batch_loss=9.38, batch_index=150, batch_size=256]

Epoch 3/10:  15%|██            | 150/991 [38:25<3:27:52, 14.83s/batch, batch_loss=17.9, batch_index=151, batch_size=256]

Epoch 3/10:  15%|██▏           | 151/991 [38:25<3:27:58, 14.86s/batch, batch_loss=17.9, batch_index=151, batch_size=256]

Epoch 3/10:  15%|██▏           | 151/991 [38:40<3:27:58, 14.86s/batch, batch_loss=15.9, batch_index=152, batch_size=256]

Epoch 3/10:  15%|██▏           | 152/991 [38:40<3:29:25, 14.98s/batch, batch_loss=15.9, batch_index=152, batch_size=256]

Epoch 3/10:  15%|██▏           | 152/991 [38:56<3:29:25, 14.98s/batch, batch_loss=18.2, batch_index=153, batch_size=256]

Epoch 3/10:  15%|██▏           | 153/991 [38:56<3:33:44, 15.30s/batch, batch_loss=18.2, batch_index=153, batch_size=256]

Epoch 3/10:  15%|██▏           | 153/991 [39:14<3:33:44, 15.30s/batch, batch_loss=20.9, batch_index=154, batch_size=256]

Epoch 3/10:  16%|██▏           | 154/991 [39:14<3:45:53, 16.19s/batch, batch_loss=20.9, batch_index=154, batch_size=256]

Epoch 3/10:  16%|██▏           | 154/991 [39:29<3:45:53, 16.19s/batch, batch_loss=20.6, batch_index=155, batch_size=256]

Epoch 3/10:  16%|██▏           | 155/991 [39:29<3:39:28, 15.75s/batch, batch_loss=20.6, batch_index=155, batch_size=256]

Epoch 3/10:  16%|██▏           | 155/991 [39:44<3:39:28, 15.75s/batch, batch_loss=7.55, batch_index=156, batch_size=256]

Epoch 3/10:  16%|██▏           | 156/991 [39:44<3:36:34, 15.56s/batch, batch_loss=7.55, batch_index=156, batch_size=256]

Epoch 3/10:  16%|██▏           | 156/991 [39:59<3:36:34, 15.56s/batch, batch_loss=22.9, batch_index=157, batch_size=256]

Epoch 3/10:  16%|██▏           | 157/991 [39:59<3:33:21, 15.35s/batch, batch_loss=22.9, batch_index=157, batch_size=256]

Epoch 3/10:  16%|██▏           | 157/991 [40:14<3:33:21, 15.35s/batch, batch_loss=7.83, batch_index=158, batch_size=256]

Epoch 3/10:  16%|██▏           | 158/991 [40:14<3:31:20, 15.22s/batch, batch_loss=7.83, batch_index=158, batch_size=256]

Epoch 3/10:  16%|██▏           | 158/991 [40:29<3:31:20, 15.22s/batch, batch_loss=6.16, batch_index=159, batch_size=256]

Epoch 3/10:  16%|██▏           | 159/991 [40:29<3:30:48, 15.20s/batch, batch_loss=6.16, batch_index=159, batch_size=256]

Epoch 3/10:  16%|██▏           | 159/991 [40:44<3:30:48, 15.20s/batch, batch_loss=12.2, batch_index=160, batch_size=256]

Epoch 3/10:  16%|██▎           | 160/991 [40:44<3:31:11, 15.25s/batch, batch_loss=12.2, batch_index=160, batch_size=256]

Epoch 3/10:  16%|██▍            | 160/991 [41:02<3:31:11, 15.25s/batch, batch_loss=457, batch_index=161, batch_size=256]

Epoch 3/10:  16%|██▍            | 161/991 [41:02<3:39:55, 15.90s/batch, batch_loss=457, batch_index=161, batch_size=256]

Epoch 3/10:  16%|██▎           | 161/991 [41:17<3:39:55, 15.90s/batch, batch_loss=14.6, batch_index=162, batch_size=256]

Epoch 3/10:  16%|██▎           | 162/991 [41:17<3:37:11, 15.72s/batch, batch_loss=14.6, batch_index=162, batch_size=256]

Epoch 3/10:  16%|██▎           | 162/991 [41:31<3:37:11, 15.72s/batch, batch_loss=7.86, batch_index=163, batch_size=256]

Epoch 3/10:  16%|██▎           | 163/991 [41:31<3:30:33, 15.26s/batch, batch_loss=7.86, batch_index=163, batch_size=256]

Epoch 3/10:  16%|██▎           | 163/991 [41:46<3:30:33, 15.26s/batch, batch_loss=11.3, batch_index=164, batch_size=256]

Epoch 3/10:  17%|██▎           | 164/991 [41:46<3:29:09, 15.17s/batch, batch_loss=11.3, batch_index=164, batch_size=256]

Epoch 3/10:  17%|██▎           | 164/991 [42:01<3:29:09, 15.17s/batch, batch_loss=10.4, batch_index=165, batch_size=256]

Epoch 3/10:  17%|██▎           | 165/991 [42:01<3:26:26, 15.00s/batch, batch_loss=10.4, batch_index=165, batch_size=256]

Epoch 3/10:  17%|██▎           | 165/991 [42:16<3:26:26, 15.00s/batch, batch_loss=11.6, batch_index=166, batch_size=256]

Epoch 3/10:  17%|██▎           | 166/991 [42:16<3:25:29, 14.95s/batch, batch_loss=11.6, batch_index=166, batch_size=256]

Epoch 3/10:  17%|██▎           | 166/991 [42:31<3:25:29, 14.95s/batch, batch_loss=18.3, batch_index=167, batch_size=256]

Epoch 3/10:  17%|██▎           | 167/991 [42:31<3:27:35, 15.12s/batch, batch_loss=18.3, batch_index=167, batch_size=256]

Epoch 3/10:  17%|██▋             | 167/991 [42:46<3:27:35, 15.12s/batch, batch_loss=12, batch_index=168, batch_size=256]

Epoch 3/10:  17%|██▋             | 168/991 [42:46<3:26:45, 15.07s/batch, batch_loss=12, batch_index=168, batch_size=256]

Epoch 3/10:  17%|██▎           | 168/991 [43:01<3:26:45, 15.07s/batch, batch_loss=13.3, batch_index=169, batch_size=256]

Epoch 3/10:  17%|██▍           | 169/991 [43:01<3:25:17, 14.99s/batch, batch_loss=13.3, batch_index=169, batch_size=256]

Epoch 3/10:  17%|██▍           | 169/991 [43:17<3:25:17, 14.99s/batch, batch_loss=9.47, batch_index=170, batch_size=256]

Epoch 3/10:  17%|██▍           | 170/991 [43:17<3:28:34, 15.24s/batch, batch_loss=9.47, batch_index=170, batch_size=256]

Epoch 3/10:  17%|██▍           | 170/991 [43:32<3:28:34, 15.24s/batch, batch_loss=4.37, batch_index=171, batch_size=256]

Epoch 3/10:  17%|██▍           | 171/991 [43:32<3:30:13, 15.38s/batch, batch_loss=4.37, batch_index=171, batch_size=256]

Epoch 3/10:  17%|██▍           | 171/991 [43:48<3:30:13, 15.38s/batch, batch_loss=8.18, batch_index=172, batch_size=256]

Epoch 3/10:  17%|██▍           | 172/991 [43:48<3:29:53, 15.38s/batch, batch_loss=8.18, batch_index=172, batch_size=256]

Epoch 3/10:  17%|██▍           | 172/991 [44:02<3:29:53, 15.38s/batch, batch_loss=7.06, batch_index=173, batch_size=256]

Epoch 3/10:  17%|██▍           | 173/991 [44:02<3:25:58, 15.11s/batch, batch_loss=7.06, batch_index=173, batch_size=256]

Epoch 3/10:  17%|█▉         | 173/991 [44:17<3:25:58, 15.11s/batch, batch_loss=3.02e+4, batch_index=174, batch_size=256]

Epoch 3/10:  18%|█▉         | 174/991 [44:17<3:24:24, 15.01s/batch, batch_loss=3.02e+4, batch_index=174, batch_size=256]

Epoch 3/10:  18%|██▍           | 174/991 [44:31<3:24:24, 15.01s/batch, batch_loss=19.4, batch_index=175, batch_size=256]

Epoch 3/10:  18%|██▍           | 175/991 [44:31<3:18:38, 14.61s/batch, batch_loss=19.4, batch_index=175, batch_size=256]

Epoch 3/10:  18%|██▍           | 175/991 [44:45<3:18:38, 14.61s/batch, batch_loss=25.7, batch_index=176, batch_size=256]

Epoch 3/10:  18%|██▍           | 176/991 [44:45<3:18:26, 14.61s/batch, batch_loss=25.7, batch_index=176, batch_size=256]

Epoch 3/10:  18%|██▍           | 176/991 [45:00<3:18:26, 14.61s/batch, batch_loss=25.8, batch_index=177, batch_size=256]

Epoch 3/10:  18%|██▌           | 177/991 [45:00<3:17:15, 14.54s/batch, batch_loss=25.8, batch_index=177, batch_size=256]

Epoch 3/10:  18%|██▌           | 177/991 [45:14<3:17:15, 14.54s/batch, batch_loss=21.8, batch_index=178, batch_size=256]

Epoch 3/10:  18%|██▌           | 178/991 [45:14<3:16:59, 14.54s/batch, batch_loss=21.8, batch_index=178, batch_size=256]

Epoch 3/10:  18%|██▌           | 178/991 [45:29<3:16:59, 14.54s/batch, batch_loss=12.2, batch_index=179, batch_size=256]

Epoch 3/10:  18%|██▌           | 179/991 [45:29<3:16:21, 14.51s/batch, batch_loss=12.2, batch_index=179, batch_size=256]

Epoch 3/10:  18%|██▌           | 179/991 [45:44<3:16:21, 14.51s/batch, batch_loss=6.97, batch_index=180, batch_size=256]

Epoch 3/10:  18%|██▌           | 180/991 [45:44<3:17:47, 14.63s/batch, batch_loss=6.97, batch_index=180, batch_size=256]

Epoch 3/10:  18%|█▉         | 180/991 [45:58<3:17:47, 14.63s/batch, batch_loss=2.51e+4, batch_index=181, batch_size=256]

Epoch 3/10:  18%|██         | 181/991 [45:58<3:16:36, 14.56s/batch, batch_loss=2.51e+4, batch_index=181, batch_size=256]

Epoch 3/10:  18%|██▌           | 181/991 [46:13<3:16:36, 14.56s/batch, batch_loss=11.4, batch_index=182, batch_size=256]

Epoch 3/10:  18%|██▌           | 182/991 [46:13<3:19:15, 14.78s/batch, batch_loss=11.4, batch_index=182, batch_size=256]

Epoch 3/10:  18%|██▌           | 182/991 [46:28<3:19:15, 14.78s/batch, batch_loss=17.8, batch_index=183, batch_size=256]

Epoch 3/10:  18%|██▌           | 183/991 [46:28<3:19:17, 14.80s/batch, batch_loss=17.8, batch_index=183, batch_size=256]

Epoch 3/10:  18%|██▌           | 183/991 [46:43<3:19:17, 14.80s/batch, batch_loss=18.2, batch_index=184, batch_size=256]

Epoch 3/10:  19%|██▌           | 184/991 [46:43<3:18:47, 14.78s/batch, batch_loss=18.2, batch_index=184, batch_size=256]

Epoch 3/10:  19%|██▌           | 184/991 [46:57<3:18:47, 14.78s/batch, batch_loss=13.5, batch_index=185, batch_size=256]

Epoch 3/10:  19%|██▌           | 185/991 [46:57<3:17:47, 14.72s/batch, batch_loss=13.5, batch_index=185, batch_size=256]

Epoch 3/10:  19%|██▉             | 185/991 [47:14<3:17:47, 14.72s/batch, batch_loss=17, batch_index=186, batch_size=256]

Epoch 3/10:  19%|███             | 186/991 [47:14<3:25:45, 15.34s/batch, batch_loss=17, batch_index=186, batch_size=256]

Epoch 3/10:  19%|███             | 186/991 [47:29<3:25:45, 15.34s/batch, batch_loss=14, batch_index=187, batch_size=256]

Epoch 3/10:  19%|███             | 187/991 [47:29<3:25:01, 15.30s/batch, batch_loss=14, batch_index=187, batch_size=256]

Epoch 3/10:  19%|██▋           | 187/991 [47:44<3:25:01, 15.30s/batch, batch_loss=16.1, batch_index=188, batch_size=256]

Epoch 3/10:  19%|██▋           | 188/991 [47:44<3:22:43, 15.15s/batch, batch_loss=16.1, batch_index=188, batch_size=256]

Epoch 3/10:  19%|██▋           | 188/991 [47:59<3:22:43, 15.15s/batch, batch_loss=17.8, batch_index=189, batch_size=256]

Epoch 3/10:  19%|██▋           | 189/991 [47:59<3:20:12, 14.98s/batch, batch_loss=17.8, batch_index=189, batch_size=256]

Epoch 3/10:  19%|██▋           | 189/991 [48:14<3:20:12, 14.98s/batch, batch_loss=20.6, batch_index=190, batch_size=256]

Epoch 3/10:  19%|██▋           | 190/991 [48:14<3:19:16, 14.93s/batch, batch_loss=20.6, batch_index=190, batch_size=256]

Epoch 3/10:  19%|██▋           | 190/991 [48:28<3:19:16, 14.93s/batch, batch_loss=16.7, batch_index=191, batch_size=256]

Epoch 3/10:  19%|██▋           | 191/991 [48:28<3:18:22, 14.88s/batch, batch_loss=16.7, batch_index=191, batch_size=256]

Epoch 3/10:  19%|██▋           | 191/991 [48:44<3:18:22, 14.88s/batch, batch_loss=10.5, batch_index=192, batch_size=256]

Epoch 3/10:  19%|██▋           | 192/991 [48:44<3:19:52, 15.01s/batch, batch_loss=10.5, batch_index=192, batch_size=256]

Epoch 3/10:  19%|██▋           | 192/991 [48:59<3:19:52, 15.01s/batch, batch_loss=18.8, batch_index=193, batch_size=256]

Epoch 3/10:  19%|██▋           | 193/991 [48:59<3:20:57, 15.11s/batch, batch_loss=18.8, batch_index=193, batch_size=256]

Epoch 3/10:  19%|██▉            | 193/991 [49:14<3:20:57, 15.11s/batch, batch_loss=8.2, batch_index=194, batch_size=256]

Epoch 3/10:  20%|██▉            | 194/991 [49:14<3:19:59, 15.06s/batch, batch_loss=8.2, batch_index=194, batch_size=256]

Epoch 3/10:  20%|██▋           | 194/991 [49:29<3:19:59, 15.06s/batch, batch_loss=3.84, batch_index=195, batch_size=256]

Epoch 3/10:  20%|██▊           | 195/991 [49:29<3:20:08, 15.09s/batch, batch_loss=3.84, batch_index=195, batch_size=256]

Epoch 3/10:  20%|██▊           | 195/991 [49:47<3:20:08, 15.09s/batch, batch_loss=5.25, batch_index=196, batch_size=256]

Epoch 3/10:  20%|██▊           | 196/991 [49:47<3:29:09, 15.79s/batch, batch_loss=5.25, batch_index=196, batch_size=256]

Epoch 3/10:  20%|██▊           | 196/991 [50:02<3:29:09, 15.79s/batch, batch_loss=12.2, batch_index=197, batch_size=256]

Epoch 3/10:  20%|██▊           | 197/991 [50:02<3:27:42, 15.70s/batch, batch_loss=12.2, batch_index=197, batch_size=256]

Epoch 3/10:  20%|██▊           | 197/991 [50:18<3:27:42, 15.70s/batch, batch_loss=8.43, batch_index=198, batch_size=256]

Epoch 3/10:  20%|██▊           | 198/991 [50:18<3:28:35, 15.78s/batch, batch_loss=8.43, batch_index=198, batch_size=256]

Epoch 3/10:  20%|██▊           | 198/991 [50:33<3:28:35, 15.78s/batch, batch_loss=14.8, batch_index=199, batch_size=256]

Epoch 3/10:  20%|██▊           | 199/991 [50:33<3:27:06, 15.69s/batch, batch_loss=14.8, batch_index=199, batch_size=256]

Epoch 3/10:  20%|██▊           | 199/991 [50:49<3:27:06, 15.69s/batch, batch_loss=7.35, batch_index=200, batch_size=256]

Epoch 3/10:  20%|██▊           | 200/991 [50:49<3:27:32, 15.74s/batch, batch_loss=7.35, batch_index=200, batch_size=256]

Epoch 3/10:  20%|██▊           | 200/991 [51:07<3:27:32, 15.74s/batch, batch_loss=12.4, batch_index=201, batch_size=256]

Epoch 3/10:  20%|██▊           | 201/991 [51:07<3:33:01, 16.18s/batch, batch_loss=12.4, batch_index=201, batch_size=256]

Epoch 3/10:  20%|██▊           | 201/991 [51:23<3:33:01, 16.18s/batch, batch_loss=10.5, batch_index=202, batch_size=256]

Epoch 3/10:  20%|██▊           | 202/991 [51:23<3:32:15, 16.14s/batch, batch_loss=10.5, batch_index=202, batch_size=256]

Epoch 3/10:  20%|██▊           | 202/991 [51:39<3:32:15, 16.14s/batch, batch_loss=13.7, batch_index=203, batch_size=256]

Epoch 3/10:  20%|██▊           | 203/991 [51:39<3:32:35, 16.19s/batch, batch_loss=13.7, batch_index=203, batch_size=256]

Epoch 3/10:  20%|██▊           | 203/991 [51:55<3:32:35, 16.19s/batch, batch_loss=19.5, batch_index=204, batch_size=256]

Epoch 3/10:  21%|██▉           | 204/991 [51:55<3:31:08, 16.10s/batch, batch_loss=19.5, batch_index=204, batch_size=256]

Epoch 3/10:  21%|██▉           | 204/991 [52:11<3:31:08, 16.10s/batch, batch_loss=16.1, batch_index=205, batch_size=256]

Epoch 3/10:  21%|██▉           | 205/991 [52:11<3:31:26, 16.14s/batch, batch_loss=16.1, batch_index=205, batch_size=256]

Epoch 3/10:  21%|██▉           | 205/991 [52:27<3:31:26, 16.14s/batch, batch_loss=7.64, batch_index=206, batch_size=256]

Epoch 3/10:  21%|██▉           | 206/991 [52:27<3:29:47, 16.03s/batch, batch_loss=7.64, batch_index=206, batch_size=256]

Epoch 3/10:  21%|██▉           | 206/991 [52:43<3:29:47, 16.03s/batch, batch_loss=8.67, batch_index=207, batch_size=256]

Epoch 3/10:  21%|██▉           | 207/991 [52:43<3:29:46, 16.05s/batch, batch_loss=8.67, batch_index=207, batch_size=256]

Epoch 3/10:  21%|██▉           | 207/991 [52:58<3:29:46, 16.05s/batch, batch_loss=11.8, batch_index=208, batch_size=256]

Epoch 3/10:  21%|██▉           | 208/991 [52:58<3:26:45, 15.84s/batch, batch_loss=11.8, batch_index=208, batch_size=256]

Epoch 3/10:  21%|██▉           | 208/991 [53:14<3:26:45, 15.84s/batch, batch_loss=8.53, batch_index=209, batch_size=256]

Epoch 3/10:  21%|██▉           | 209/991 [53:14<3:26:58, 15.88s/batch, batch_loss=8.53, batch_index=209, batch_size=256]

Epoch 3/10:  21%|██▉           | 209/991 [53:30<3:26:58, 15.88s/batch, batch_loss=18.8, batch_index=210, batch_size=256]

Epoch 3/10:  21%|██▉           | 210/991 [53:30<3:25:44, 15.81s/batch, batch_loss=18.8, batch_index=210, batch_size=256]

Epoch 3/10:  21%|██▉           | 210/991 [53:45<3:25:44, 15.81s/batch, batch_loss=10.8, batch_index=211, batch_size=256]

Epoch 3/10:  21%|██▉           | 211/991 [53:45<3:24:12, 15.71s/batch, batch_loss=10.8, batch_index=211, batch_size=256]

Epoch 3/10:  21%|██▉           | 211/991 [54:01<3:24:12, 15.71s/batch, batch_loss=14.1, batch_index=212, batch_size=256]

Epoch 3/10:  21%|██▉           | 212/991 [54:01<3:24:10, 15.73s/batch, batch_loss=14.1, batch_index=212, batch_size=256]

Epoch 3/10:  21%|██▉           | 212/991 [54:18<3:24:10, 15.73s/batch, batch_loss=2.77, batch_index=213, batch_size=256]

Epoch 3/10:  21%|███           | 213/991 [54:18<3:27:22, 15.99s/batch, batch_loss=2.77, batch_index=213, batch_size=256]

Epoch 3/10:  21%|███           | 213/991 [54:34<3:27:22, 15.99s/batch, batch_loss=11.9, batch_index=214, batch_size=256]

Epoch 3/10:  22%|███           | 214/991 [54:34<3:26:58, 15.98s/batch, batch_loss=11.9, batch_index=214, batch_size=256]

Epoch 3/10:  22%|███           | 214/991 [54:48<3:26:58, 15.98s/batch, batch_loss=14.6, batch_index=215, batch_size=256]

Epoch 3/10:  22%|███           | 215/991 [54:48<3:20:18, 15.49s/batch, batch_loss=14.6, batch_index=215, batch_size=256]

Epoch 3/10:  22%|███           | 215/991 [55:03<3:20:18, 15.49s/batch, batch_loss=9.06, batch_index=216, batch_size=256]

Epoch 3/10:  22%|███           | 216/991 [55:03<3:19:39, 15.46s/batch, batch_loss=9.06, batch_index=216, batch_size=256]

Epoch 3/10:  22%|███           | 216/991 [55:20<3:19:39, 15.46s/batch, batch_loss=11.3, batch_index=217, batch_size=256]

Epoch 3/10:  22%|███           | 217/991 [55:20<3:24:48, 15.88s/batch, batch_loss=11.3, batch_index=217, batch_size=256]

Epoch 3/10:  22%|███           | 217/991 [55:36<3:24:48, 15.88s/batch, batch_loss=18.4, batch_index=218, batch_size=256]

Epoch 3/10:  22%|███           | 218/991 [55:36<3:24:18, 15.86s/batch, batch_loss=18.4, batch_index=218, batch_size=256]

Epoch 3/10:  22%|███           | 218/991 [55:51<3:24:18, 15.86s/batch, batch_loss=18.7, batch_index=219, batch_size=256]

Epoch 3/10:  22%|███           | 219/991 [55:51<3:20:50, 15.61s/batch, batch_loss=18.7, batch_index=219, batch_size=256]

Epoch 3/10:  22%|███           | 219/991 [56:06<3:20:50, 15.61s/batch, batch_loss=21.6, batch_index=220, batch_size=256]

Epoch 3/10:  22%|███           | 220/991 [56:06<3:19:27, 15.52s/batch, batch_loss=21.6, batch_index=220, batch_size=256]

Epoch 3/10:  22%|███▌            | 220/991 [56:21<3:19:27, 15.52s/batch, batch_loss=20, batch_index=221, batch_size=256]

Epoch 3/10:  22%|███▌            | 221/991 [56:21<3:14:42, 15.17s/batch, batch_loss=20, batch_index=221, batch_size=256]

Epoch 3/10:  22%|███           | 221/991 [56:36<3:14:42, 15.17s/batch, batch_loss=14.1, batch_index=222, batch_size=256]

Epoch 3/10:  22%|███▏          | 222/991 [56:36<3:14:36, 15.18s/batch, batch_loss=14.1, batch_index=222, batch_size=256]

Epoch 3/10:  22%|███▌            | 222/991 [56:51<3:14:36, 15.18s/batch, batch_loss=20, batch_index=223, batch_size=256]

Epoch 3/10:  23%|███▌            | 223/991 [56:51<3:14:17, 15.18s/batch, batch_loss=20, batch_index=223, batch_size=256]

Epoch 3/10:  23%|███▏          | 223/991 [57:06<3:14:17, 15.18s/batch, batch_loss=12.5, batch_index=224, batch_size=256]

Epoch 3/10:  23%|███▏          | 224/991 [57:06<3:11:12, 14.96s/batch, batch_loss=12.5, batch_index=224, batch_size=256]

Epoch 3/10:  23%|███▌            | 224/991 [57:23<3:11:12, 14.96s/batch, batch_loss=10, batch_index=225, batch_size=256]

Epoch 3/10:  23%|███▋            | 225/991 [57:23<3:21:36, 15.79s/batch, batch_loss=10, batch_index=225, batch_size=256]

Epoch 3/10:  23%|███▏          | 225/991 [57:39<3:21:36, 15.79s/batch, batch_loss=20.1, batch_index=226, batch_size=256]

Epoch 3/10:  23%|███▏          | 226/991 [57:39<3:21:29, 15.80s/batch, batch_loss=20.1, batch_index=226, batch_size=256]

Epoch 3/10:  23%|██▌        | 226/991 [57:55<3:21:29, 15.80s/batch, batch_loss=2.41e+3, batch_index=227, batch_size=256]

Epoch 3/10:  23%|██▌        | 227/991 [57:55<3:20:33, 15.75s/batch, batch_loss=2.41e+3, batch_index=227, batch_size=256]

Epoch 3/10:  23%|██▌        | 227/991 [58:10<3:20:33, 15.75s/batch, batch_loss=3.61e+3, batch_index=228, batch_size=256]

Epoch 3/10:  23%|██▌        | 228/991 [58:10<3:19:56, 15.72s/batch, batch_loss=3.61e+3, batch_index=228, batch_size=256]

Epoch 3/10:  23%|███▏          | 228/991 [58:25<3:19:56, 15.72s/batch, batch_loss=12.8, batch_index=229, batch_size=256]

Epoch 3/10:  23%|███▏          | 229/991 [58:25<3:15:29, 15.39s/batch, batch_loss=12.8, batch_index=229, batch_size=256]

Epoch 3/10:  23%|███▏          | 229/991 [58:41<3:15:29, 15.39s/batch, batch_loss=8.67, batch_index=230, batch_size=256]

Epoch 3/10:  23%|███▏          | 230/991 [58:41<3:15:26, 15.41s/batch, batch_loss=8.67, batch_index=230, batch_size=256]

Epoch 3/10:  23%|███▋            | 230/991 [58:55<3:15:26, 15.41s/batch, batch_loss=11, batch_index=231, batch_size=256]

Epoch 3/10:  23%|███▋            | 231/991 [58:55<3:10:44, 15.06s/batch, batch_loss=11, batch_index=231, batch_size=256]

Epoch 3/10:  23%|███▎          | 231/991 [59:09<3:10:44, 15.06s/batch, batch_loss=9.98, batch_index=232, batch_size=256]

Epoch 3/10:  23%|███▎          | 232/991 [59:09<3:09:19, 14.97s/batch, batch_loss=9.98, batch_index=232, batch_size=256]

Epoch 3/10:  23%|███▌           | 232/991 [59:27<3:09:19, 14.97s/batch, batch_loss=8.4, batch_index=233, batch_size=256]

Epoch 3/10:  24%|███▌           | 233/991 [59:27<3:18:53, 15.74s/batch, batch_loss=8.4, batch_index=233, batch_size=256]

Epoch 3/10:  24%|███▊            | 233/991 [59:41<3:18:53, 15.74s/batch, batch_loss=14, batch_index=234, batch_size=256]

Epoch 3/10:  24%|███▊            | 234/991 [59:41<3:11:32, 15.18s/batch, batch_loss=14, batch_index=234, batch_size=256]

Epoch 3/10:  24%|███▎          | 234/991 [59:56<3:11:32, 15.18s/batch, batch_loss=17.4, batch_index=235, batch_size=256]

Epoch 3/10:  24%|███▎          | 235/991 [59:56<3:09:31, 15.04s/batch, batch_loss=17.4, batch_index=235, batch_size=256]

Epoch 3/10:  24%|██▊         | 235/991 [1:00:10<3:09:31, 15.04s/batch, batch_loss=26.6, batch_index=236, batch_size=256]

Epoch 3/10:  24%|██▊         | 236/991 [1:00:10<3:08:15, 14.96s/batch, batch_loss=26.6, batch_index=236, batch_size=256]

Epoch 3/10:  24%|██▊         | 236/991 [1:00:25<3:08:15, 14.96s/batch, batch_loss=37.4, batch_index=237, batch_size=256]

Epoch 3/10:  24%|██▊         | 237/991 [1:00:25<3:06:08, 14.81s/batch, batch_loss=37.4, batch_index=237, batch_size=256]

Epoch 3/10:  24%|██▊         | 237/991 [1:00:40<3:06:08, 14.81s/batch, batch_loss=29.5, batch_index=238, batch_size=256]

Epoch 3/10:  24%|██▉         | 238/991 [1:00:40<3:06:27, 14.86s/batch, batch_loss=29.5, batch_index=238, batch_size=256]

Epoch 3/10:  24%|██▉         | 238/991 [1:00:55<3:06:27, 14.86s/batch, batch_loss=6.01, batch_index=239, batch_size=256]

Epoch 3/10:  24%|██▉         | 239/991 [1:00:55<3:05:36, 14.81s/batch, batch_loss=6.01, batch_index=239, batch_size=256]

Epoch 3/10:  24%|██▉         | 239/991 [1:01:09<3:05:36, 14.81s/batch, batch_loss=10.3, batch_index=240, batch_size=256]

Epoch 3/10:  24%|██▉         | 240/991 [1:01:09<3:05:20, 14.81s/batch, batch_loss=10.3, batch_index=240, batch_size=256]

Epoch 3/10:  24%|██▉         | 240/991 [1:01:25<3:05:20, 14.81s/batch, batch_loss=11.4, batch_index=241, batch_size=256]

Epoch 3/10:  24%|██▉         | 241/991 [1:01:25<3:06:33, 14.92s/batch, batch_loss=11.4, batch_index=241, batch_size=256]

Epoch 3/10:  24%|██▉         | 241/991 [1:01:39<3:06:33, 14.92s/batch, batch_loss=26.7, batch_index=242, batch_size=256]

Epoch 3/10:  24%|██▉         | 242/991 [1:01:39<3:05:11, 14.84s/batch, batch_loss=26.7, batch_index=242, batch_size=256]

Epoch 3/10:  24%|███▏         | 242/991 [1:01:54<3:05:11, 14.84s/batch, batch_loss=275, batch_index=243, batch_size=256]

Epoch 3/10:  25%|███▏         | 243/991 [1:01:54<3:05:06, 14.85s/batch, batch_loss=275, batch_index=243, batch_size=256]

Epoch 3/10:  25%|██▉         | 243/991 [1:02:09<3:05:06, 14.85s/batch, batch_loss=30.9, batch_index=244, batch_size=256]

Epoch 3/10:  25%|██▉         | 244/991 [1:02:09<3:05:24, 14.89s/batch, batch_loss=30.9, batch_index=244, batch_size=256]

Epoch 3/10:  25%|██▉         | 244/991 [1:02:23<3:05:24, 14.89s/batch, batch_loss=6.71, batch_index=245, batch_size=256]

Epoch 3/10:  25%|██▉         | 245/991 [1:02:23<3:01:24, 14.59s/batch, batch_loss=6.71, batch_index=245, batch_size=256]

Epoch 3/10:  25%|██▉         | 245/991 [1:02:37<3:01:24, 14.59s/batch, batch_loss=5.59, batch_index=246, batch_size=256]

Epoch 3/10:  25%|██▉         | 246/991 [1:02:37<3:00:53, 14.57s/batch, batch_loss=5.59, batch_index=246, batch_size=256]

Epoch 3/10:  25%|███▍          | 246/991 [1:02:52<3:00:53, 14.57s/batch, batch_loss=20, batch_index=247, batch_size=256]

Epoch 3/10:  25%|███▍          | 247/991 [1:02:52<3:01:08, 14.61s/batch, batch_loss=20, batch_index=247, batch_size=256]

Epoch 3/10:  25%|██▉         | 247/991 [1:03:07<3:01:08, 14.61s/batch, batch_loss=5.35, batch_index=248, batch_size=256]

Epoch 3/10:  25%|███         | 248/991 [1:03:07<3:00:10, 14.55s/batch, batch_loss=5.35, batch_index=248, batch_size=256]

Epoch 3/10:  25%|███         | 248/991 [1:03:24<3:00:10, 14.55s/batch, batch_loss=15.1, batch_index=249, batch_size=256]

Epoch 3/10:  25%|███         | 249/991 [1:03:24<3:10:47, 15.43s/batch, batch_loss=15.1, batch_index=249, batch_size=256]

Epoch 3/10:  25%|███         | 249/991 [1:03:39<3:10:47, 15.43s/batch, batch_loss=8.33, batch_index=250, batch_size=256]

Epoch 3/10:  25%|███         | 250/991 [1:03:39<3:07:30, 15.18s/batch, batch_loss=8.33, batch_index=250, batch_size=256]

Epoch 3/10:  25%|███         | 250/991 [1:03:53<3:07:30, 15.18s/batch, batch_loss=7.01, batch_index=251, batch_size=256]

Epoch 3/10:  25%|███         | 251/991 [1:03:53<3:04:17, 14.94s/batch, batch_loss=7.01, batch_index=251, batch_size=256]

Epoch 3/10:  25%|███         | 251/991 [1:04:07<3:04:17, 14.94s/batch, batch_loss=20.2, batch_index=252, batch_size=256]

Epoch 3/10:  25%|███         | 252/991 [1:04:07<3:01:04, 14.70s/batch, batch_loss=20.2, batch_index=252, batch_size=256]

Epoch 3/10:  25%|███         | 252/991 [1:04:20<3:01:04, 14.70s/batch, batch_loss=6.79, batch_index=253, batch_size=256]

Epoch 3/10:  26%|███         | 253/991 [1:04:20<2:55:38, 14.28s/batch, batch_loss=6.79, batch_index=253, batch_size=256]

Epoch 3/10:  26%|███         | 253/991 [1:04:34<2:55:38, 14.28s/batch, batch_loss=20.5, batch_index=254, batch_size=256]

Epoch 3/10:  26%|███         | 254/991 [1:04:34<2:52:44, 14.06s/batch, batch_loss=20.5, batch_index=254, batch_size=256]

Epoch 3/10:  26%|███         | 254/991 [1:04:49<2:52:44, 14.06s/batch, batch_loss=15.8, batch_index=255, batch_size=256]

Epoch 3/10:  26%|███         | 255/991 [1:04:49<2:56:47, 14.41s/batch, batch_loss=15.8, batch_index=255, batch_size=256]

Epoch 3/10:  26%|███▎         | 255/991 [1:05:03<2:56:47, 14.41s/batch, batch_loss=696, batch_index=256, batch_size=256]

Epoch 3/10:  26%|███▎         | 256/991 [1:05:03<2:55:31, 14.33s/batch, batch_loss=696, batch_index=256, batch_size=256]

Epoch 3/10:  26%|███         | 256/991 [1:05:17<2:55:31, 14.33s/batch, batch_loss=19.6, batch_index=257, batch_size=256]

Epoch 3/10:  26%|███         | 257/991 [1:05:17<2:54:01, 14.23s/batch, batch_loss=19.6, batch_index=257, batch_size=256]

Epoch 3/10:  26%|███▎         | 257/991 [1:05:32<2:54:01, 14.23s/batch, batch_loss=209, batch_index=258, batch_size=256]

Epoch 3/10:  26%|███▍         | 258/991 [1:05:32<2:55:17, 14.35s/batch, batch_loss=209, batch_index=258, batch_size=256]

Epoch 3/10:  26%|███         | 258/991 [1:05:47<2:55:17, 14.35s/batch, batch_loss=14.1, batch_index=259, batch_size=256]

Epoch 3/10:  26%|███▏        | 259/991 [1:05:47<2:56:51, 14.50s/batch, batch_loss=14.1, batch_index=259, batch_size=256]

Epoch 3/10:  26%|███▏        | 259/991 [1:06:02<2:56:51, 14.50s/batch, batch_loss=49.7, batch_index=260, batch_size=256]

Epoch 3/10:  26%|███▏        | 260/991 [1:06:02<2:58:12, 14.63s/batch, batch_loss=49.7, batch_index=260, batch_size=256]

Epoch 3/10:  26%|███▏        | 260/991 [1:06:17<2:58:12, 14.63s/batch, batch_loss=18.5, batch_index=261, batch_size=256]

Epoch 3/10:  26%|███▏        | 261/991 [1:06:17<3:00:16, 14.82s/batch, batch_loss=18.5, batch_index=261, batch_size=256]

Epoch 3/10:  26%|███▏        | 261/991 [1:06:33<3:00:16, 14.82s/batch, batch_loss=11.8, batch_index=262, batch_size=256]

Epoch 3/10:  26%|███▏        | 262/991 [1:06:33<3:03:43, 15.12s/batch, batch_loss=11.8, batch_index=262, batch_size=256]

Epoch 3/10:  26%|███▏        | 262/991 [1:06:48<3:03:43, 15.12s/batch, batch_loss=15.9, batch_index=263, batch_size=256]

Epoch 3/10:  27%|███▏        | 263/991 [1:06:48<3:04:48, 15.23s/batch, batch_loss=15.9, batch_index=263, batch_size=256]

Epoch 3/10:  27%|███▏        | 263/991 [1:07:03<3:04:48, 15.23s/batch, batch_loss=13.8, batch_index=264, batch_size=256]

Epoch 3/10:  27%|███▏        | 264/991 [1:07:03<3:01:21, 14.97s/batch, batch_loss=13.8, batch_index=264, batch_size=256]

Epoch 3/10:  27%|███▏        | 264/991 [1:07:20<3:01:21, 14.97s/batch, batch_loss=19.3, batch_index=265, batch_size=256]

Epoch 3/10:  27%|███▏        | 265/991 [1:07:20<3:08:09, 15.55s/batch, batch_loss=19.3, batch_index=265, batch_size=256]

Epoch 3/10:  27%|███▏        | 265/991 [1:07:35<3:08:09, 15.55s/batch, batch_loss=17.2, batch_index=266, batch_size=256]

Epoch 3/10:  27%|███▏        | 266/991 [1:07:35<3:06:24, 15.43s/batch, batch_loss=17.2, batch_index=266, batch_size=256]

Epoch 3/10:  27%|███▏        | 266/991 [1:07:49<3:06:24, 15.43s/batch, batch_loss=20.3, batch_index=267, batch_size=256]

Epoch 3/10:  27%|███▏        | 267/991 [1:07:49<3:02:50, 15.15s/batch, batch_loss=20.3, batch_index=267, batch_size=256]

Epoch 3/10:  27%|███▏        | 267/991 [1:08:04<3:02:50, 15.15s/batch, batch_loss=7.26, batch_index=268, batch_size=256]

Epoch 3/10:  27%|███▏        | 268/991 [1:08:04<3:00:28, 14.98s/batch, batch_loss=7.26, batch_index=268, batch_size=256]

Epoch 3/10:  27%|███▏        | 268/991 [1:08:19<3:00:28, 14.98s/batch, batch_loss=17.8, batch_index=269, batch_size=256]

Epoch 3/10:  27%|███▎        | 269/991 [1:08:19<3:00:11, 14.97s/batch, batch_loss=17.8, batch_index=269, batch_size=256]

Epoch 3/10:  27%|███▎        | 269/991 [1:08:34<3:00:11, 14.97s/batch, batch_loss=1.96, batch_index=270, batch_size=256]

Epoch 3/10:  27%|███▎        | 270/991 [1:08:34<3:00:35, 15.03s/batch, batch_loss=1.96, batch_index=270, batch_size=256]

Epoch 3/10:  27%|███▎        | 270/991 [1:08:49<3:00:35, 15.03s/batch, batch_loss=15.4, batch_index=271, batch_size=256]

Epoch 3/10:  27%|███▎        | 271/991 [1:08:49<3:00:36, 15.05s/batch, batch_loss=15.4, batch_index=271, batch_size=256]

Epoch 3/10:  27%|███▎        | 271/991 [1:09:04<3:00:36, 15.05s/batch, batch_loss=16.1, batch_index=272, batch_size=256]

Epoch 3/10:  27%|███▎        | 272/991 [1:09:04<2:58:24, 14.89s/batch, batch_loss=16.1, batch_index=272, batch_size=256]

Epoch 3/10:  27%|███▊          | 272/991 [1:09:18<2:58:24, 14.89s/batch, batch_loss=38, batch_index=273, batch_size=256]

Epoch 3/10:  28%|███▊          | 273/991 [1:09:18<2:56:21, 14.74s/batch, batch_loss=38, batch_index=273, batch_size=256]

Epoch 3/10:  28%|███▎        | 273/991 [1:09:33<2:56:21, 14.74s/batch, batch_loss=12.2, batch_index=274, batch_size=256]

Epoch 3/10:  28%|███▎        | 274/991 [1:09:33<2:56:02, 14.73s/batch, batch_loss=12.2, batch_index=274, batch_size=256]

Epoch 3/10:  28%|██▍      | 274/991 [1:09:50<2:56:02, 14.73s/batch, batch_loss=3.32e+3, batch_index=275, batch_size=256]

Epoch 3/10:  28%|██▍      | 275/991 [1:09:50<3:03:27, 15.37s/batch, batch_loss=3.32e+3, batch_index=275, batch_size=256]

Epoch 3/10:  28%|███▎        | 275/991 [1:10:05<3:03:27, 15.37s/batch, batch_loss=17.3, batch_index=276, batch_size=256]

Epoch 3/10:  28%|███▎        | 276/991 [1:10:05<3:03:17, 15.38s/batch, batch_loss=17.3, batch_index=276, batch_size=256]

Epoch 3/10:  28%|██▌      | 276/991 [1:10:20<3:03:17, 15.38s/batch, batch_loss=4.79e+3, batch_index=277, batch_size=256]

Epoch 3/10:  28%|██▌      | 277/991 [1:10:20<3:01:35, 15.26s/batch, batch_loss=4.79e+3, batch_index=277, batch_size=256]

Epoch 3/10:  28%|███▎        | 277/991 [1:10:35<3:01:35, 15.26s/batch, batch_loss=13.1, batch_index=278, batch_size=256]

Epoch 3/10:  28%|███▎        | 278/991 [1:10:35<3:00:13, 15.17s/batch, batch_loss=13.1, batch_index=278, batch_size=256]

Epoch 3/10:  28%|███▎        | 278/991 [1:10:50<3:00:13, 15.17s/batch, batch_loss=19.6, batch_index=279, batch_size=256]

Epoch 3/10:  28%|███▍        | 279/991 [1:10:50<2:59:56, 15.16s/batch, batch_loss=19.6, batch_index=279, batch_size=256]

Epoch 3/10:  28%|███▍        | 279/991 [1:11:05<2:59:56, 15.16s/batch, batch_loss=14.8, batch_index=280, batch_size=256]

Epoch 3/10:  28%|███▍        | 280/991 [1:11:05<2:57:56, 15.02s/batch, batch_loss=14.8, batch_index=280, batch_size=256]

Epoch 3/10:  28%|███▍        | 280/991 [1:11:20<2:57:56, 15.02s/batch, batch_loss=11.5, batch_index=281, batch_size=256]

Epoch 3/10:  28%|███▍        | 281/991 [1:11:20<2:58:42, 15.10s/batch, batch_loss=11.5, batch_index=281, batch_size=256]

Epoch 3/10:  28%|███▍        | 281/991 [1:11:35<2:58:42, 15.10s/batch, batch_loss=7.12, batch_index=282, batch_size=256]

Epoch 3/10:  28%|███▍        | 282/991 [1:11:35<2:57:37, 15.03s/batch, batch_loss=7.12, batch_index=282, batch_size=256]

Epoch 3/10:  28%|███▉          | 282/991 [1:11:52<2:57:37, 15.03s/batch, batch_loss=16, batch_index=283, batch_size=256]

Epoch 3/10:  29%|███▉          | 283/991 [1:11:52<3:05:05, 15.69s/batch, batch_loss=16, batch_index=283, batch_size=256]

Epoch 3/10:  29%|███▍        | 283/991 [1:12:07<3:05:05, 15.69s/batch, batch_loss=21.7, batch_index=284, batch_size=256]

Epoch 3/10:  29%|███▍        | 284/991 [1:12:07<3:01:49, 15.43s/batch, batch_loss=21.7, batch_index=284, batch_size=256]

Epoch 3/10:  29%|███▍        | 284/991 [1:12:23<3:01:49, 15.43s/batch, batch_loss=13.3, batch_index=285, batch_size=256]

Epoch 3/10:  29%|███▍        | 285/991 [1:12:23<3:03:03, 15.56s/batch, batch_loss=13.3, batch_index=285, batch_size=256]

Epoch 3/10:  29%|███▍        | 285/991 [1:12:38<3:03:03, 15.56s/batch, batch_loss=8.15, batch_index=286, batch_size=256]

Epoch 3/10:  29%|███▍        | 286/991 [1:12:38<3:01:22, 15.44s/batch, batch_loss=8.15, batch_index=286, batch_size=256]

Epoch 3/10:  29%|███▍        | 286/991 [1:12:54<3:01:22, 15.44s/batch, batch_loss=7.52, batch_index=287, batch_size=256]

Epoch 3/10:  29%|███▍        | 287/991 [1:12:54<3:03:21, 15.63s/batch, batch_loss=7.52, batch_index=287, batch_size=256]

Epoch 3/10:  29%|██▌      | 287/991 [1:13:09<3:03:21, 15.63s/batch, batch_loss=2.58e+3, batch_index=288, batch_size=256]

Epoch 3/10:  29%|██▌      | 288/991 [1:13:09<3:00:36, 15.41s/batch, batch_loss=2.58e+3, batch_index=288, batch_size=256]

Epoch 3/10:  29%|██▌      | 288/991 [1:13:24<3:00:36, 15.41s/batch, batch_loss=1.26e+3, batch_index=289, batch_size=256]

Epoch 3/10:  29%|██▌      | 289/991 [1:13:24<3:00:37, 15.44s/batch, batch_loss=1.26e+3, batch_index=289, batch_size=256]

Epoch 3/10:  29%|███▍        | 289/991 [1:13:41<3:00:37, 15.44s/batch, batch_loss=14.4, batch_index=290, batch_size=256]

Epoch 3/10:  29%|███▌        | 290/991 [1:13:41<3:03:53, 15.74s/batch, batch_loss=14.4, batch_index=290, batch_size=256]

Epoch 3/10:  29%|███▌        | 290/991 [1:13:57<3:03:53, 15.74s/batch, batch_loss=4.89, batch_index=291, batch_size=256]

Epoch 3/10:  29%|███▌        | 291/991 [1:13:57<3:04:10, 15.79s/batch, batch_loss=4.89, batch_index=291, batch_size=256]

Epoch 3/10:  29%|███▌        | 291/991 [1:14:12<3:04:10, 15.79s/batch, batch_loss=11.3, batch_index=292, batch_size=256]

Epoch 3/10:  29%|███▌        | 292/991 [1:14:12<3:02:40, 15.68s/batch, batch_loss=11.3, batch_index=292, batch_size=256]

Epoch 3/10:  29%|███▌        | 292/991 [1:14:28<3:02:40, 15.68s/batch, batch_loss=16.3, batch_index=293, batch_size=256]

Epoch 3/10:  30%|███▌        | 293/991 [1:14:28<3:01:48, 15.63s/batch, batch_loss=16.3, batch_index=293, batch_size=256]

Epoch 3/10:  30%|███▌        | 293/991 [1:14:43<3:01:48, 15.63s/batch, batch_loss=12.9, batch_index=294, batch_size=256]

Epoch 3/10:  30%|███▌        | 294/991 [1:14:43<3:00:53, 15.57s/batch, batch_loss=12.9, batch_index=294, batch_size=256]

Epoch 3/10:  30%|███▌        | 294/991 [1:14:57<3:00:53, 15.57s/batch, batch_loss=10.6, batch_index=295, batch_size=256]

Epoch 3/10:  30%|███▌        | 295/991 [1:14:57<2:55:17, 15.11s/batch, batch_loss=10.6, batch_index=295, batch_size=256]

Epoch 3/10:  30%|███▌        | 295/991 [1:15:12<2:55:17, 15.11s/batch, batch_loss=17.8, batch_index=296, batch_size=256]

Epoch 3/10:  30%|███▌        | 296/991 [1:15:12<2:55:25, 15.15s/batch, batch_loss=17.8, batch_index=296, batch_size=256]

Epoch 3/10:  30%|███▌        | 296/991 [1:15:29<2:55:25, 15.15s/batch, batch_loss=15.1, batch_index=297, batch_size=256]

Epoch 3/10:  30%|███▌        | 297/991 [1:15:29<3:01:07, 15.66s/batch, batch_loss=15.1, batch_index=297, batch_size=256]

Epoch 3/10:  30%|██▋      | 297/991 [1:15:43<3:01:07, 15.66s/batch, batch_loss=3.24e+4, batch_index=298, batch_size=256]

Epoch 3/10:  30%|██▋      | 298/991 [1:15:43<2:54:17, 15.09s/batch, batch_loss=3.24e+4, batch_index=298, batch_size=256]

Epoch 3/10:  30%|███▌        | 298/991 [1:15:58<2:54:17, 15.09s/batch, batch_loss=20.7, batch_index=299, batch_size=256]

Epoch 3/10:  30%|███▌        | 299/991 [1:15:58<2:52:57, 15.00s/batch, batch_loss=20.7, batch_index=299, batch_size=256]

Epoch 3/10:  30%|███▌        | 299/991 [1:16:12<2:52:57, 15.00s/batch, batch_loss=5.95, batch_index=300, batch_size=256]

Epoch 3/10:  30%|███▋        | 300/991 [1:16:12<2:49:38, 14.73s/batch, batch_loss=5.95, batch_index=300, batch_size=256]

Epoch 3/10:  30%|███▉         | 300/991 [1:16:27<2:49:38, 14.73s/batch, batch_loss=9.3, batch_index=301, batch_size=256]

Epoch 3/10:  30%|███▉         | 301/991 [1:16:27<2:49:45, 14.76s/batch, batch_loss=9.3, batch_index=301, batch_size=256]

Epoch 3/10:  30%|███▋        | 301/991 [1:16:42<2:49:45, 14.76s/batch, batch_loss=10.8, batch_index=302, batch_size=256]

Epoch 3/10:  30%|███▋        | 302/991 [1:16:42<2:50:21, 14.84s/batch, batch_loss=10.8, batch_index=302, batch_size=256]

Epoch 3/10:  30%|████▎         | 302/991 [1:16:57<2:50:21, 14.84s/batch, batch_loss=10, batch_index=303, batch_size=256]

Epoch 3/10:  31%|████▎         | 303/991 [1:16:57<2:50:21, 14.86s/batch, batch_loss=10, batch_index=303, batch_size=256]

Epoch 3/10:  31%|███▋        | 303/991 [1:17:11<2:50:21, 14.86s/batch, batch_loss=3.83, batch_index=304, batch_size=256]

Epoch 3/10:  31%|███▋        | 304/991 [1:17:11<2:47:05, 14.59s/batch, batch_loss=3.83, batch_index=304, batch_size=256]

Epoch 3/10:  31%|███▋        | 304/991 [1:17:24<2:47:05, 14.59s/batch, batch_loss=16.1, batch_index=305, batch_size=256]

Epoch 3/10:  31%|███▋        | 305/991 [1:17:24<2:43:01, 14.26s/batch, batch_loss=16.1, batch_index=305, batch_size=256]

Epoch 3/10:  31%|████         | 305/991 [1:17:39<2:43:01, 14.26s/batch, batch_loss=9.7, batch_index=306, batch_size=256]

Epoch 3/10:  31%|████         | 306/991 [1:17:39<2:44:23, 14.40s/batch, batch_loss=9.7, batch_index=306, batch_size=256]

Epoch 3/10:  31%|██▊      | 306/991 [1:17:53<2:44:23, 14.40s/batch, batch_loss=6.29e+3, batch_index=307, batch_size=256]

Epoch 3/10:  31%|██▊      | 307/991 [1:17:53<2:43:01, 14.30s/batch, batch_loss=6.29e+3, batch_index=307, batch_size=256]

Epoch 3/10:  31%|███▋        | 307/991 [1:18:07<2:43:01, 14.30s/batch, batch_loss=12.6, batch_index=308, batch_size=256]

Epoch 3/10:  31%|███▋        | 308/991 [1:18:07<2:41:49, 14.22s/batch, batch_loss=12.6, batch_index=308, batch_size=256]

Epoch 3/10:  31%|███▋        | 308/991 [1:18:21<2:41:49, 14.22s/batch, batch_loss=22.4, batch_index=309, batch_size=256]

Epoch 3/10:  31%|███▋        | 309/991 [1:18:21<2:41:07, 14.18s/batch, batch_loss=22.4, batch_index=309, batch_size=256]

Epoch 3/10:  31%|███▋        | 309/991 [1:18:35<2:41:07, 14.18s/batch, batch_loss=14.2, batch_index=310, batch_size=256]

Epoch 3/10:  31%|███▊        | 310/991 [1:18:35<2:41:46, 14.25s/batch, batch_loss=14.2, batch_index=310, batch_size=256]

Epoch 3/10:  31%|███▊        | 310/991 [1:18:50<2:41:46, 14.25s/batch, batch_loss=22.8, batch_index=311, batch_size=256]

Epoch 3/10:  31%|███▊        | 311/991 [1:18:50<2:43:09, 14.40s/batch, batch_loss=22.8, batch_index=311, batch_size=256]

Epoch 3/10:  31%|████▍         | 311/991 [1:19:04<2:43:09, 14.40s/batch, batch_loss=15, batch_index=312, batch_size=256]

Epoch 3/10:  31%|████▍         | 312/991 [1:19:04<2:42:41, 14.38s/batch, batch_loss=15, batch_index=312, batch_size=256]

Epoch 3/10:  31%|██▊      | 312/991 [1:19:22<2:42:41, 14.38s/batch, batch_loss=1.06e+4, batch_index=313, batch_size=256]

Epoch 3/10:  32%|██▊      | 313/991 [1:19:22<2:51:27, 15.17s/batch, batch_loss=1.06e+4, batch_index=313, batch_size=256]

Epoch 3/10:  32%|███▊        | 313/991 [1:19:36<2:51:27, 15.17s/batch, batch_loss=9.46, batch_index=314, batch_size=256]

Epoch 3/10:  32%|███▊        | 314/991 [1:19:36<2:50:25, 15.10s/batch, batch_loss=9.46, batch_index=314, batch_size=256]

Epoch 3/10:  32%|███▊        | 314/991 [1:19:52<2:50:25, 15.10s/batch, batch_loss=14.7, batch_index=315, batch_size=256]

Epoch 3/10:  32%|███▊        | 315/991 [1:19:52<2:50:58, 15.17s/batch, batch_loss=14.7, batch_index=315, batch_size=256]

Epoch 3/10:  32%|███▊        | 315/991 [1:20:07<2:50:58, 15.17s/batch, batch_loss=22.1, batch_index=316, batch_size=256]

Epoch 3/10:  32%|███▊        | 316/991 [1:20:07<2:50:09, 15.12s/batch, batch_loss=22.1, batch_index=316, batch_size=256]

Epoch 3/10:  32%|███▊        | 316/991 [1:20:22<2:50:09, 15.12s/batch, batch_loss=22.8, batch_index=317, batch_size=256]

Epoch 3/10:  32%|███▊        | 317/991 [1:20:22<2:51:09, 15.24s/batch, batch_loss=22.8, batch_index=317, batch_size=256]

Epoch 3/10:  32%|███▊        | 317/991 [1:20:37<2:51:09, 15.24s/batch, batch_loss=20.8, batch_index=318, batch_size=256]

Epoch 3/10:  32%|███▊        | 318/991 [1:20:37<2:49:41, 15.13s/batch, batch_loss=20.8, batch_index=318, batch_size=256]

Epoch 3/10:  32%|███▊        | 318/991 [1:20:54<2:49:41, 15.13s/batch, batch_loss=19.7, batch_index=319, batch_size=256]

Epoch 3/10:  32%|███▊        | 319/991 [1:20:54<2:55:29, 15.67s/batch, batch_loss=19.7, batch_index=319, batch_size=256]

Epoch 3/10:  32%|███▊        | 319/991 [1:21:09<2:55:29, 15.67s/batch, batch_loss=15.5, batch_index=320, batch_size=256]

Epoch 3/10:  32%|███▊        | 320/991 [1:21:09<2:51:36, 15.34s/batch, batch_loss=15.5, batch_index=320, batch_size=256]

Epoch 3/10:  32%|███▊        | 320/991 [1:21:23<2:51:36, 15.34s/batch, batch_loss=23.3, batch_index=321, batch_size=256]

Epoch 3/10:  32%|███▉        | 321/991 [1:21:23<2:48:17, 15.07s/batch, batch_loss=23.3, batch_index=321, batch_size=256]

Epoch 3/10:  32%|███▉        | 321/991 [1:21:39<2:48:17, 15.07s/batch, batch_loss=7.27, batch_index=322, batch_size=256]

Epoch 3/10:  32%|███▉        | 322/991 [1:21:39<2:49:42, 15.22s/batch, batch_loss=7.27, batch_index=322, batch_size=256]

Epoch 3/10:  32%|███▉        | 322/991 [1:21:53<2:49:42, 15.22s/batch, batch_loss=10.3, batch_index=323, batch_size=256]

Epoch 3/10:  33%|███▉        | 323/991 [1:21:53<2:45:05, 14.83s/batch, batch_loss=10.3, batch_index=323, batch_size=256]

Epoch 3/10:  33%|███▉        | 323/991 [1:22:08<2:45:05, 14.83s/batch, batch_loss=22.4, batch_index=324, batch_size=256]

Epoch 3/10:  33%|███▉        | 324/991 [1:22:08<2:45:31, 14.89s/batch, batch_loss=22.4, batch_index=324, batch_size=256]

Epoch 3/10:  33%|███▉        | 324/991 [1:22:23<2:45:31, 14.89s/batch, batch_loss=9.82, batch_index=325, batch_size=256]

Epoch 3/10:  33%|███▉        | 325/991 [1:22:23<2:45:45, 14.93s/batch, batch_loss=9.82, batch_index=325, batch_size=256]

Epoch 3/10:  33%|███▉        | 325/991 [1:22:37<2:45:45, 14.93s/batch, batch_loss=29.2, batch_index=326, batch_size=256]

Epoch 3/10:  33%|███▉        | 326/991 [1:22:37<2:43:02, 14.71s/batch, batch_loss=29.2, batch_index=326, batch_size=256]

Epoch 3/10:  33%|██▉      | 326/991 [1:22:51<2:43:02, 14.71s/batch, batch_loss=3.05e+3, batch_index=327, batch_size=256]

Epoch 3/10:  33%|██▉      | 327/991 [1:22:51<2:41:34, 14.60s/batch, batch_loss=3.05e+3, batch_index=327, batch_size=256]

Epoch 3/10:  33%|███▉        | 327/991 [1:23:06<2:41:34, 14.60s/batch, batch_loss=7.39, batch_index=328, batch_size=256]

Epoch 3/10:  33%|███▉        | 328/991 [1:23:06<2:42:31, 14.71s/batch, batch_loss=7.39, batch_index=328, batch_size=256]

Epoch 3/10:  33%|███▉        | 328/991 [1:23:21<2:42:31, 14.71s/batch, batch_loss=22.2, batch_index=329, batch_size=256]

Epoch 3/10:  33%|███▉        | 329/991 [1:23:21<2:42:10, 14.70s/batch, batch_loss=22.2, batch_index=329, batch_size=256]

Epoch 3/10:  33%|███▉        | 329/991 [1:23:38<2:42:10, 14.70s/batch, batch_loss=16.2, batch_index=330, batch_size=256]

Epoch 3/10:  33%|███▉        | 330/991 [1:23:38<2:48:53, 15.33s/batch, batch_loss=16.2, batch_index=330, batch_size=256]

Epoch 3/10:  33%|███▉        | 330/991 [1:23:52<2:48:53, 15.33s/batch, batch_loss=13.5, batch_index=331, batch_size=256]

Epoch 3/10:  33%|████        | 331/991 [1:23:52<2:45:48, 15.07s/batch, batch_loss=13.5, batch_index=331, batch_size=256]

Epoch 3/10:  33%|████        | 331/991 [1:24:06<2:45:48, 15.07s/batch, batch_loss=15.2, batch_index=332, batch_size=256]

Epoch 3/10:  34%|████        | 332/991 [1:24:06<2:43:03, 14.85s/batch, batch_loss=15.2, batch_index=332, batch_size=256]

Epoch 3/10:  34%|████        | 332/991 [1:24:21<2:43:03, 14.85s/batch, batch_loss=16.2, batch_index=333, batch_size=256]

Epoch 3/10:  34%|████        | 333/991 [1:24:21<2:43:02, 14.87s/batch, batch_loss=16.2, batch_index=333, batch_size=256]

Epoch 3/10:  34%|████        | 333/991 [1:24:36<2:43:02, 14.87s/batch, batch_loss=14.6, batch_index=334, batch_size=256]

Epoch 3/10:  34%|████        | 334/991 [1:24:36<2:40:32, 14.66s/batch, batch_loss=14.6, batch_index=334, batch_size=256]

Epoch 3/10:  34%|████▍        | 334/991 [1:24:51<2:40:32, 14.66s/batch, batch_loss=4.3, batch_index=335, batch_size=256]

Epoch 3/10:  34%|████▍        | 335/991 [1:24:51<2:43:58, 15.00s/batch, batch_loss=4.3, batch_index=335, batch_size=256]

Epoch 3/10:  34%|███      | 335/991 [1:25:06<2:43:58, 15.00s/batch, batch_loss=8.47e+3, batch_index=336, batch_size=256]

Epoch 3/10:  34%|███      | 336/991 [1:25:06<2:42:21, 14.87s/batch, batch_loss=8.47e+3, batch_index=336, batch_size=256]

Epoch 3/10:  34%|███      | 336/991 [1:25:20<2:42:21, 14.87s/batch, batch_loss=2.35e+3, batch_index=337, batch_size=256]

Epoch 3/10:  34%|███      | 337/991 [1:25:20<2:39:07, 14.60s/batch, batch_loss=2.35e+3, batch_index=337, batch_size=256]

Epoch 3/10:  34%|████        | 337/991 [1:25:34<2:39:07, 14.60s/batch, batch_loss=8.75, batch_index=338, batch_size=256]

Epoch 3/10:  34%|████        | 338/991 [1:25:34<2:36:43, 14.40s/batch, batch_loss=8.75, batch_index=338, batch_size=256]

Epoch 3/10:  34%|████        | 338/991 [1:25:48<2:36:43, 14.40s/batch, batch_loss=23.3, batch_index=339, batch_size=256]

Epoch 3/10:  34%|████        | 339/991 [1:25:48<2:36:02, 14.36s/batch, batch_loss=23.3, batch_index=339, batch_size=256]

Epoch 3/10:  34%|████        | 339/991 [1:26:03<2:36:02, 14.36s/batch, batch_loss=11.1, batch_index=340, batch_size=256]

Epoch 3/10:  34%|████        | 340/991 [1:26:03<2:37:48, 14.55s/batch, batch_loss=11.1, batch_index=340, batch_size=256]

Epoch 3/10:  34%|████        | 340/991 [1:26:18<2:37:48, 14.55s/batch, batch_loss=9.33, batch_index=341, batch_size=256]

Epoch 3/10:  34%|████▏       | 341/991 [1:26:18<2:38:10, 14.60s/batch, batch_loss=9.33, batch_index=341, batch_size=256]

Epoch 3/10:  34%|███▊       | 341/991 [1:26:32<2:38:10, 14.60s/batch, batch_loss=0.693, batch_index=342, batch_size=256]

Epoch 3/10:  35%|███▊       | 342/991 [1:26:32<2:37:03, 14.52s/batch, batch_loss=0.693, batch_index=342, batch_size=256]

Epoch 3/10:  35%|████▏       | 342/991 [1:26:47<2:37:03, 14.52s/batch, batch_loss=7.06, batch_index=343, batch_size=256]

Epoch 3/10:  35%|████▏       | 343/991 [1:26:47<2:38:26, 14.67s/batch, batch_loss=7.06, batch_index=343, batch_size=256]

Epoch 3/10:  35%|████▏       | 343/991 [1:27:02<2:38:26, 14.67s/batch, batch_loss=16.9, batch_index=344, batch_size=256]

Epoch 3/10:  35%|████▏       | 344/991 [1:27:02<2:39:43, 14.81s/batch, batch_loss=16.9, batch_index=344, batch_size=256]

Epoch 3/10:  35%|████▌        | 344/991 [1:27:18<2:39:43, 14.81s/batch, batch_loss=115, batch_index=345, batch_size=256]

Epoch 3/10:  35%|████▌        | 345/991 [1:27:18<2:42:51, 15.13s/batch, batch_loss=115, batch_index=345, batch_size=256]

Epoch 3/10:  35%|████▊         | 345/991 [1:27:34<2:42:51, 15.13s/batch, batch_loss=16, batch_index=346, batch_size=256]

Epoch 3/10:  35%|████▉         | 346/991 [1:27:34<2:43:54, 15.25s/batch, batch_loss=16, batch_index=346, batch_size=256]

Epoch 3/10:  35%|████▏       | 346/991 [1:27:49<2:43:54, 15.25s/batch, batch_loss=12.1, batch_index=347, batch_size=256]

Epoch 3/10:  35%|████▏       | 347/991 [1:27:49<2:45:03, 15.38s/batch, batch_loss=12.1, batch_index=347, batch_size=256]

Epoch 3/10:  35%|████▏       | 347/991 [1:28:05<2:45:03, 15.38s/batch, batch_loss=14.3, batch_index=348, batch_size=256]

Epoch 3/10:  35%|████▏       | 348/991 [1:28:05<2:46:12, 15.51s/batch, batch_loss=14.3, batch_index=348, batch_size=256]

Epoch 3/10:  35%|████▏       | 348/991 [1:28:21<2:46:12, 15.51s/batch, batch_loss=8.56, batch_index=349, batch_size=256]

Epoch 3/10:  35%|████▏       | 349/991 [1:28:21<2:46:55, 15.60s/batch, batch_loss=8.56, batch_index=349, batch_size=256]

Epoch 3/10:  35%|████▏       | 349/991 [1:28:36<2:46:55, 15.60s/batch, batch_loss=12.7, batch_index=350, batch_size=256]

Epoch 3/10:  35%|████▏       | 350/991 [1:28:36<2:45:24, 15.48s/batch, batch_loss=12.7, batch_index=350, batch_size=256]

Epoch 3/10:  35%|████▏       | 350/991 [1:28:51<2:45:24, 15.48s/batch, batch_loss=7.73, batch_index=351, batch_size=256]

Epoch 3/10:  35%|████▎       | 351/991 [1:28:51<2:43:52, 15.36s/batch, batch_loss=7.73, batch_index=351, batch_size=256]

Epoch 3/10:  35%|████▎       | 351/991 [1:29:08<2:43:52, 15.36s/batch, batch_loss=14.3, batch_index=352, batch_size=256]

Epoch 3/10:  36%|████▎       | 352/991 [1:29:08<2:47:58, 15.77s/batch, batch_loss=14.3, batch_index=352, batch_size=256]

Epoch 3/10:  36%|████▎       | 352/991 [1:29:24<2:47:58, 15.77s/batch, batch_loss=19.4, batch_index=353, batch_size=256]

Epoch 3/10:  36%|████▎       | 353/991 [1:29:24<2:48:31, 15.85s/batch, batch_loss=19.4, batch_index=353, batch_size=256]

Epoch 3/10:  36%|████▎       | 353/991 [1:29:40<2:48:31, 15.85s/batch, batch_loss=20.3, batch_index=354, batch_size=256]

Epoch 3/10:  36%|████▎       | 354/991 [1:29:40<2:48:51, 15.91s/batch, batch_loss=20.3, batch_index=354, batch_size=256]

Epoch 3/10:  36%|████▎       | 354/991 [1:29:57<2:48:51, 15.91s/batch, batch_loss=9.09, batch_index=355, batch_size=256]

Epoch 3/10:  36%|████▎       | 355/991 [1:29:57<2:50:31, 16.09s/batch, batch_loss=9.09, batch_index=355, batch_size=256]

Epoch 3/10:  36%|████▎       | 355/991 [1:30:12<2:50:31, 16.09s/batch, batch_loss=14.4, batch_index=356, batch_size=256]

Epoch 3/10:  36%|████▎       | 356/991 [1:30:12<2:49:06, 15.98s/batch, batch_loss=14.4, batch_index=356, batch_size=256]

Epoch 3/10:  36%|████▎       | 356/991 [1:30:27<2:49:06, 15.98s/batch, batch_loss=15.9, batch_index=357, batch_size=256]

Epoch 3/10:  36%|████▎       | 357/991 [1:30:27<2:46:14, 15.73s/batch, batch_loss=15.9, batch_index=357, batch_size=256]

Epoch 3/10:  36%|█████         | 357/991 [1:30:42<2:46:14, 15.73s/batch, batch_loss=14, batch_index=358, batch_size=256]

Epoch 3/10:  36%|█████         | 358/991 [1:30:42<2:43:36, 15.51s/batch, batch_loss=14, batch_index=358, batch_size=256]

Epoch 3/10:  36%|████▋        | 358/991 [1:30:56<2:43:36, 15.51s/batch, batch_loss=5.1, batch_index=359, batch_size=256]

Epoch 3/10:  36%|████▋        | 359/991 [1:30:56<2:36:50, 14.89s/batch, batch_loss=5.1, batch_index=359, batch_size=256]

Epoch 3/10:  36%|████▎       | 359/991 [1:31:10<2:36:50, 14.89s/batch, batch_loss=9.11, batch_index=360, batch_size=256]

Epoch 3/10:  36%|████▎       | 360/991 [1:31:10<2:34:55, 14.73s/batch, batch_loss=9.11, batch_index=360, batch_size=256]

Epoch 3/10:  36%|████▎       | 360/991 [1:31:24<2:34:55, 14.73s/batch, batch_loss=25.2, batch_index=361, batch_size=256]

Epoch 3/10:  36%|████▎       | 361/991 [1:31:24<2:32:45, 14.55s/batch, batch_loss=25.2, batch_index=361, batch_size=256]

Epoch 3/10:  36%|████▎       | 361/991 [1:31:39<2:32:45, 14.55s/batch, batch_loss=18.9, batch_index=362, batch_size=256]

Epoch 3/10:  37%|████▍       | 362/991 [1:31:39<2:33:19, 14.63s/batch, batch_loss=18.9, batch_index=362, batch_size=256]

Epoch 3/10:  37%|████▍       | 362/991 [1:31:54<2:33:19, 14.63s/batch, batch_loss=10.8, batch_index=363, batch_size=256]

Epoch 3/10:  37%|████▍       | 363/991 [1:31:54<2:33:20, 14.65s/batch, batch_loss=10.8, batch_index=363, batch_size=256]

Epoch 3/10:  37%|████▍       | 363/991 [1:32:08<2:33:20, 14.65s/batch, batch_loss=11.9, batch_index=364, batch_size=256]

Epoch 3/10:  37%|████▍       | 364/991 [1:32:08<2:31:31, 14.50s/batch, batch_loss=11.9, batch_index=364, batch_size=256]

Epoch 3/10:  37%|████▍       | 364/991 [1:32:22<2:31:31, 14.50s/batch, batch_loss=8.52, batch_index=365, batch_size=256]

Epoch 3/10:  37%|████▍       | 365/991 [1:32:22<2:30:17, 14.41s/batch, batch_loss=8.52, batch_index=365, batch_size=256]

Epoch 3/10:  37%|████▍       | 365/991 [1:32:36<2:30:17, 14.41s/batch, batch_loss=12.9, batch_index=366, batch_size=256]

Epoch 3/10:  37%|████▍       | 366/991 [1:32:36<2:28:48, 14.29s/batch, batch_loss=12.9, batch_index=366, batch_size=256]

Epoch 3/10:  37%|████▍       | 366/991 [1:32:51<2:28:48, 14.29s/batch, batch_loss=12.1, batch_index=367, batch_size=256]

Epoch 3/10:  37%|████▍       | 367/991 [1:32:51<2:29:35, 14.38s/batch, batch_loss=12.1, batch_index=367, batch_size=256]

Epoch 3/10:  37%|████▍       | 367/991 [1:33:07<2:29:35, 14.38s/batch, batch_loss=11.9, batch_index=368, batch_size=256]

Epoch 3/10:  37%|████▍       | 368/991 [1:33:07<2:35:01, 14.93s/batch, batch_loss=11.9, batch_index=368, batch_size=256]

Epoch 3/10:  37%|████▍       | 368/991 [1:33:22<2:35:01, 14.93s/batch, batch_loss=13.5, batch_index=369, batch_size=256]

Epoch 3/10:  37%|████▍       | 369/991 [1:33:22<2:34:24, 14.89s/batch, batch_loss=13.5, batch_index=369, batch_size=256]

Epoch 3/10:  37%|███▎     | 369/991 [1:33:37<2:34:24, 14.89s/batch, batch_loss=1.21e+4, batch_index=370, batch_size=256]

Epoch 3/10:  37%|███▎     | 370/991 [1:33:37<2:34:11, 14.90s/batch, batch_loss=1.21e+4, batch_index=370, batch_size=256]

Epoch 3/10:  37%|████▍       | 370/991 [1:33:54<2:34:11, 14.90s/batch, batch_loss=30.2, batch_index=371, batch_size=256]

Epoch 3/10:  37%|████▍       | 371/991 [1:33:54<2:40:11, 15.50s/batch, batch_loss=30.2, batch_index=371, batch_size=256]

Epoch 3/10:  37%|████▍       | 371/991 [1:34:08<2:40:11, 15.50s/batch, batch_loss=15.7, batch_index=372, batch_size=256]

Epoch 3/10:  38%|████▌       | 372/991 [1:34:08<2:36:41, 15.19s/batch, batch_loss=15.7, batch_index=372, batch_size=256]

Epoch 3/10:  38%|████▌       | 372/991 [1:34:22<2:36:41, 15.19s/batch, batch_loss=27.9, batch_index=373, batch_size=256]

Epoch 3/10:  38%|████▌       | 373/991 [1:34:22<2:33:26, 14.90s/batch, batch_loss=27.9, batch_index=373, batch_size=256]

Epoch 3/10:  38%|████▉        | 373/991 [1:34:37<2:33:26, 14.90s/batch, batch_loss=485, batch_index=374, batch_size=256]

Epoch 3/10:  38%|████▉        | 374/991 [1:34:37<2:31:12, 14.70s/batch, batch_loss=485, batch_index=374, batch_size=256]

Epoch 3/10:  38%|███▍     | 374/991 [1:34:55<2:31:12, 14.70s/batch, batch_loss=1.43e+3, batch_index=375, batch_size=256]

Epoch 3/10:  38%|███▍     | 375/991 [1:34:55<2:40:55, 15.67s/batch, batch_loss=1.43e+3, batch_index=375, batch_size=256]

Epoch 3/10:  38%|███▍     | 375/991 [1:35:11<2:40:55, 15.67s/batch, batch_loss=1.21e+3, batch_index=376, batch_size=256]

Epoch 3/10:  38%|███▍     | 376/991 [1:35:11<2:43:34, 15.96s/batch, batch_loss=1.21e+3, batch_index=376, batch_size=256]

Epoch 3/10:  38%|████▌       | 376/991 [1:35:28<2:43:34, 15.96s/batch, batch_loss=22.5, batch_index=377, batch_size=256]

Epoch 3/10:  38%|████▌       | 377/991 [1:35:28<2:44:35, 16.08s/batch, batch_loss=22.5, batch_index=377, batch_size=256]

Epoch 3/10:  38%|███▍     | 377/991 [1:35:43<2:44:35, 16.08s/batch, batch_loss=1.17e+3, batch_index=378, batch_size=256]

Epoch 3/10:  38%|███▍     | 378/991 [1:35:43<2:41:30, 15.81s/batch, batch_loss=1.17e+3, batch_index=378, batch_size=256]

Epoch 3/10:  38%|████▌       | 378/991 [1:35:58<2:41:30, 15.81s/batch, batch_loss=13.4, batch_index=379, batch_size=256]

Epoch 3/10:  38%|████▌       | 379/991 [1:35:58<2:38:50, 15.57s/batch, batch_loss=13.4, batch_index=379, batch_size=256]

Epoch 3/10:  38%|████▌       | 379/991 [1:36:14<2:38:50, 15.57s/batch, batch_loss=16.4, batch_index=380, batch_size=256]

Epoch 3/10:  38%|████▌       | 380/991 [1:36:14<2:39:42, 15.68s/batch, batch_loss=16.4, batch_index=380, batch_size=256]

Epoch 3/10:  38%|████▌       | 380/991 [1:36:28<2:39:42, 15.68s/batch, batch_loss=25.9, batch_index=381, batch_size=256]

Epoch 3/10:  38%|████▌       | 381/991 [1:36:28<2:35:06, 15.26s/batch, batch_loss=25.9, batch_index=381, batch_size=256]

Epoch 3/10:  38%|████▌       | 381/991 [1:36:43<2:35:06, 15.26s/batch, batch_loss=12.4, batch_index=382, batch_size=256]

Epoch 3/10:  39%|████▋       | 382/991 [1:36:43<2:34:31, 15.22s/batch, batch_loss=12.4, batch_index=382, batch_size=256]

Epoch 3/10:  39%|████▋       | 382/991 [1:37:00<2:34:31, 15.22s/batch, batch_loss=12.7, batch_index=383, batch_size=256]

Epoch 3/10:  39%|████▋       | 383/991 [1:37:00<2:40:32, 15.84s/batch, batch_loss=12.7, batch_index=383, batch_size=256]

Epoch 3/10:  39%|████▋       | 383/991 [1:37:16<2:40:32, 15.84s/batch, batch_loss=26.8, batch_index=384, batch_size=256]

Epoch 3/10:  39%|████▋       | 384/991 [1:37:16<2:39:35, 15.77s/batch, batch_loss=26.8, batch_index=384, batch_size=256]

Epoch 3/10:  39%|████▋       | 384/991 [1:37:31<2:39:35, 15.77s/batch, batch_loss=12.4, batch_index=385, batch_size=256]

Epoch 3/10:  39%|████▋       | 385/991 [1:37:31<2:37:19, 15.58s/batch, batch_loss=12.4, batch_index=385, batch_size=256]

Epoch 3/10:  39%|████▋       | 385/991 [1:37:46<2:37:19, 15.58s/batch, batch_loss=18.1, batch_index=386, batch_size=256]

Epoch 3/10:  39%|████▋       | 386/991 [1:37:46<2:34:38, 15.34s/batch, batch_loss=18.1, batch_index=386, batch_size=256]

Epoch 3/10:  39%|█████▍        | 386/991 [1:38:01<2:34:38, 15.34s/batch, batch_loss=25, batch_index=387, batch_size=256]

Epoch 3/10:  39%|█████▍        | 387/991 [1:38:01<2:35:10, 15.41s/batch, batch_loss=25, batch_index=387, batch_size=256]

Epoch 3/10:  39%|█████        | 387/991 [1:38:16<2:35:10, 15.41s/batch, batch_loss=792, batch_index=388, batch_size=256]

Epoch 3/10:  39%|█████        | 388/991 [1:38:16<2:32:52, 15.21s/batch, batch_loss=792, batch_index=388, batch_size=256]

Epoch 3/10:  39%|████▋       | 388/991 [1:38:31<2:32:52, 15.21s/batch, batch_loss=14.2, batch_index=389, batch_size=256]

Epoch 3/10:  39%|████▋       | 389/991 [1:38:31<2:32:40, 15.22s/batch, batch_loss=14.2, batch_index=389, batch_size=256]

Epoch 3/10:  39%|█████        | 389/991 [1:38:46<2:32:40, 15.22s/batch, batch_loss=863, batch_index=390, batch_size=256]

Epoch 3/10:  39%|█████        | 390/991 [1:38:46<2:31:52, 15.16s/batch, batch_loss=863, batch_index=390, batch_size=256]

Epoch 3/10:  39%|████▋       | 390/991 [1:39:01<2:31:52, 15.16s/batch, batch_loss=17.5, batch_index=391, batch_size=256]

Epoch 3/10:  39%|████▋       | 391/991 [1:39:01<2:28:39, 14.87s/batch, batch_loss=17.5, batch_index=391, batch_size=256]

Epoch 3/10:  39%|████▋       | 391/991 [1:39:15<2:28:39, 14.87s/batch, batch_loss=15.5, batch_index=392, batch_size=256]

Epoch 3/10:  40%|████▋       | 392/991 [1:39:15<2:26:20, 14.66s/batch, batch_loss=15.5, batch_index=392, batch_size=256]

Epoch 3/10:  40%|████▋       | 392/991 [1:39:32<2:26:20, 14.66s/batch, batch_loss=19.6, batch_index=393, batch_size=256]

Epoch 3/10:  40%|████▊       | 393/991 [1:39:32<2:33:04, 15.36s/batch, batch_loss=19.6, batch_index=393, batch_size=256]

Epoch 3/10:  40%|█████▏       | 393/991 [1:39:46<2:33:04, 15.36s/batch, batch_loss=605, batch_index=394, batch_size=256]

Epoch 3/10:  40%|█████▏       | 394/991 [1:39:46<2:28:56, 14.97s/batch, batch_loss=605, batch_index=394, batch_size=256]

Epoch 3/10:  40%|█████▌        | 394/991 [1:40:01<2:28:56, 14.97s/batch, batch_loss=21, batch_index=395, batch_size=256]

Epoch 3/10:  40%|█████▌        | 395/991 [1:40:01<2:28:41, 14.97s/batch, batch_loss=21, batch_index=395, batch_size=256]

Epoch 3/10:  40%|████▊       | 395/991 [1:40:15<2:28:41, 14.97s/batch, batch_loss=13.2, batch_index=396, batch_size=256]

Epoch 3/10:  40%|████▊       | 396/991 [1:40:15<2:27:22, 14.86s/batch, batch_loss=13.2, batch_index=396, batch_size=256]

Epoch 3/10:  40%|████▊       | 396/991 [1:40:31<2:27:22, 14.86s/batch, batch_loss=15.6, batch_index=397, batch_size=256]

Epoch 3/10:  40%|████▊       | 397/991 [1:40:31<2:28:45, 15.03s/batch, batch_loss=15.6, batch_index=397, batch_size=256]

Epoch 3/10:  40%|████▊       | 397/991 [1:40:46<2:28:45, 15.03s/batch, batch_loss=14.5, batch_index=398, batch_size=256]

Epoch 3/10:  40%|████▊       | 398/991 [1:40:46<2:29:27, 15.12s/batch, batch_loss=14.5, batch_index=398, batch_size=256]

Epoch 3/10:  40%|████▊       | 398/991 [1:41:03<2:29:27, 15.12s/batch, batch_loss=23.1, batch_index=399, batch_size=256]

Epoch 3/10:  40%|████▊       | 399/991 [1:41:03<2:34:21, 15.65s/batch, batch_loss=23.1, batch_index=399, batch_size=256]

Epoch 3/10:  40%|████▊       | 399/991 [1:41:18<2:34:21, 15.65s/batch, batch_loss=10.9, batch_index=400, batch_size=256]

Epoch 3/10:  40%|████▊       | 400/991 [1:41:18<2:33:00, 15.53s/batch, batch_loss=10.9, batch_index=400, batch_size=256]

Epoch 3/10:  40%|████▊       | 400/991 [1:41:33<2:33:00, 15.53s/batch, batch_loss=11.6, batch_index=401, batch_size=256]

Epoch 3/10:  40%|████▊       | 401/991 [1:41:33<2:29:15, 15.18s/batch, batch_loss=11.6, batch_index=401, batch_size=256]

Epoch 3/10:  40%|████▊       | 401/991 [1:41:48<2:29:15, 15.18s/batch, batch_loss=16.2, batch_index=402, batch_size=256]

Epoch 3/10:  41%|████▊       | 402/991 [1:41:48<2:28:23, 15.12s/batch, batch_loss=16.2, batch_index=402, batch_size=256]

Epoch 3/10:  41%|████▊       | 402/991 [1:42:03<2:28:23, 15.12s/batch, batch_loss=15.1, batch_index=403, batch_size=256]

Epoch 3/10:  41%|████▉       | 403/991 [1:42:03<2:28:27, 15.15s/batch, batch_loss=15.1, batch_index=403, batch_size=256]

Epoch 3/10:  41%|████▉       | 403/991 [1:42:17<2:28:27, 15.15s/batch, batch_loss=10.2, batch_index=404, batch_size=256]

Epoch 3/10:  41%|████▉       | 404/991 [1:42:17<2:25:50, 14.91s/batch, batch_loss=10.2, batch_index=404, batch_size=256]

Epoch 3/10:  41%|████▉       | 404/991 [1:42:32<2:25:50, 14.91s/batch, batch_loss=13.8, batch_index=405, batch_size=256]

Epoch 3/10:  41%|████▉       | 405/991 [1:42:32<2:25:56, 14.94s/batch, batch_loss=13.8, batch_index=405, batch_size=256]

Epoch 3/10:  41%|████▉       | 405/991 [1:42:47<2:25:56, 14.94s/batch, batch_loss=7.32, batch_index=406, batch_size=256]

Epoch 3/10:  41%|████▉       | 406/991 [1:42:47<2:25:51, 14.96s/batch, batch_loss=7.32, batch_index=406, batch_size=256]

Epoch 3/10:  41%|████▉       | 406/991 [1:43:02<2:25:51, 14.96s/batch, batch_loss=23.5, batch_index=407, batch_size=256]

Epoch 3/10:  41%|████▉       | 407/991 [1:43:02<2:25:25, 14.94s/batch, batch_loss=23.5, batch_index=407, batch_size=256]

Epoch 3/10:  41%|████▉       | 407/991 [1:43:17<2:25:25, 14.94s/batch, batch_loss=7.29, batch_index=408, batch_size=256]

Epoch 3/10:  41%|████▉       | 408/991 [1:43:17<2:24:45, 14.90s/batch, batch_loss=7.29, batch_index=408, batch_size=256]

Epoch 3/10:  41%|████▉       | 408/991 [1:43:32<2:24:45, 14.90s/batch, batch_loss=20.9, batch_index=409, batch_size=256]

Epoch 3/10:  41%|████▉       | 409/991 [1:43:32<2:24:57, 14.94s/batch, batch_loss=20.9, batch_index=409, batch_size=256]

Epoch 3/10:  41%|█████▊        | 409/991 [1:43:48<2:24:57, 14.94s/batch, batch_loss=32, batch_index=410, batch_size=256]

Epoch 3/10:  41%|█████▊        | 410/991 [1:43:48<2:27:24, 15.22s/batch, batch_loss=32, batch_index=410, batch_size=256]

Epoch 3/10:  41%|████▉       | 410/991 [1:44:02<2:27:24, 15.22s/batch, batch_loss=15.1, batch_index=411, batch_size=256]

Epoch 3/10:  41%|████▉       | 411/991 [1:44:02<2:24:30, 14.95s/batch, batch_loss=15.1, batch_index=411, batch_size=256]

Epoch 3/10:  41%|█████▊        | 411/991 [1:44:16<2:24:30, 14.95s/batch, batch_loss=25, batch_index=412, batch_size=256]

Epoch 3/10:  42%|█████▊        | 412/991 [1:44:16<2:22:13, 14.74s/batch, batch_loss=25, batch_index=412, batch_size=256]

Epoch 3/10:  42%|████▉       | 412/991 [1:44:30<2:22:13, 14.74s/batch, batch_loss=16.2, batch_index=413, batch_size=256]

Epoch 3/10:  42%|█████       | 413/991 [1:44:30<2:18:47, 14.41s/batch, batch_loss=16.2, batch_index=413, batch_size=256]

Epoch 3/10:  42%|█████       | 413/991 [1:44:45<2:18:47, 14.41s/batch, batch_loss=13.2, batch_index=414, batch_size=256]

Epoch 3/10:  42%|█████       | 414/991 [1:44:45<2:18:39, 14.42s/batch, batch_loss=13.2, batch_index=414, batch_size=256]

Epoch 3/10:  42%|█████▍       | 414/991 [1:45:00<2:18:39, 14.42s/batch, batch_loss=8.2, batch_index=415, batch_size=256]

Epoch 3/10:  42%|█████▍       | 415/991 [1:45:00<2:22:29, 14.84s/batch, batch_loss=8.2, batch_index=415, batch_size=256]

Epoch 3/10:  42%|█████       | 415/991 [1:45:15<2:22:29, 14.84s/batch, batch_loss=12.6, batch_index=416, batch_size=256]

Epoch 3/10:  42%|█████       | 416/991 [1:45:15<2:22:59, 14.92s/batch, batch_loss=12.6, batch_index=416, batch_size=256]

Epoch 3/10:  42%|█████       | 416/991 [1:45:30<2:22:59, 14.92s/batch, batch_loss=11.5, batch_index=417, batch_size=256]

Epoch 3/10:  42%|█████       | 417/991 [1:45:30<2:22:58, 14.95s/batch, batch_loss=11.5, batch_index=417, batch_size=256]

Epoch 3/10:  42%|█████       | 417/991 [1:45:45<2:22:58, 14.95s/batch, batch_loss=14.4, batch_index=418, batch_size=256]

Epoch 3/10:  42%|█████       | 418/991 [1:45:45<2:22:35, 14.93s/batch, batch_loss=14.4, batch_index=418, batch_size=256]

Epoch 3/10:  42%|████▏     | 418/991 [1:46:01<2:22:35, 14.93s/batch, batch_loss=1.3e+3, batch_index=419, batch_size=256]

Epoch 3/10:  42%|████▏     | 419/991 [1:46:01<2:23:21, 15.04s/batch, batch_loss=1.3e+3, batch_index=419, batch_size=256]

Epoch 3/10:  42%|█████       | 419/991 [1:46:16<2:23:21, 15.04s/batch, batch_loss=18.4, batch_index=420, batch_size=256]

Epoch 3/10:  42%|█████       | 420/991 [1:46:16<2:23:49, 15.11s/batch, batch_loss=18.4, batch_index=420, batch_size=256]

Epoch 3/10:  42%|█████       | 420/991 [1:46:30<2:23:49, 15.11s/batch, batch_loss=15.8, batch_index=421, batch_size=256]

Epoch 3/10:  42%|█████       | 421/991 [1:46:30<2:20:57, 14.84s/batch, batch_loss=15.8, batch_index=421, batch_size=256]

Epoch 3/10:  42%|█████       | 421/991 [1:46:45<2:20:57, 14.84s/batch, batch_loss=9.55, batch_index=422, batch_size=256]

Epoch 3/10:  43%|█████       | 422/991 [1:46:45<2:21:25, 14.91s/batch, batch_loss=9.55, batch_index=422, batch_size=256]

Epoch 3/10:  43%|█████       | 422/991 [1:47:02<2:21:25, 14.91s/batch, batch_loss=8.63, batch_index=423, batch_size=256]

Epoch 3/10:  43%|█████       | 423/991 [1:47:02<2:26:43, 15.50s/batch, batch_loss=8.63, batch_index=423, batch_size=256]

Epoch 3/10:  43%|█████       | 423/991 [1:47:20<2:26:43, 15.50s/batch, batch_loss=9.11, batch_index=424, batch_size=256]

Epoch 3/10:  43%|█████▏      | 424/991 [1:47:20<2:33:54, 16.29s/batch, batch_loss=9.11, batch_index=424, batch_size=256]

Epoch 3/10:  43%|██████▍        | 424/991 [1:47:36<2:33:54, 16.29s/batch, batch_loss=6, batch_index=425, batch_size=256]

Epoch 3/10:  43%|██████▍        | 425/991 [1:47:36<2:31:41, 16.08s/batch, batch_loss=6, batch_index=425, batch_size=256]

Epoch 3/10:  43%|█████▏      | 425/991 [1:47:52<2:31:41, 16.08s/batch, batch_loss=2.51, batch_index=426, batch_size=256]

Epoch 3/10:  43%|█████▏      | 426/991 [1:47:52<2:32:05, 16.15s/batch, batch_loss=2.51, batch_index=426, batch_size=256]

Epoch 3/10:  43%|█████▏      | 426/991 [1:48:07<2:32:05, 16.15s/batch, batch_loss=9.87, batch_index=427, batch_size=256]

Epoch 3/10:  43%|█████▏      | 427/991 [1:48:07<2:27:35, 15.70s/batch, batch_loss=9.87, batch_index=427, batch_size=256]

Epoch 3/10:  43%|██████        | 427/991 [1:48:22<2:27:35, 15.70s/batch, batch_loss=22, batch_index=428, batch_size=256]

Epoch 3/10:  43%|██████        | 428/991 [1:48:22<2:26:55, 15.66s/batch, batch_loss=22, batch_index=428, batch_size=256]

Epoch 3/10:  43%|█████▏      | 428/991 [1:48:38<2:26:55, 15.66s/batch, batch_loss=28.9, batch_index=429, batch_size=256]

Epoch 3/10:  43%|█████▏      | 429/991 [1:48:38<2:26:23, 15.63s/batch, batch_loss=28.9, batch_index=429, batch_size=256]

Epoch 3/10:  43%|███▉     | 429/991 [1:48:53<2:26:23, 15.63s/batch, batch_loss=9.29e+3, batch_index=430, batch_size=256]

Epoch 3/10:  43%|███▉     | 430/991 [1:48:53<2:24:32, 15.46s/batch, batch_loss=9.29e+3, batch_index=430, batch_size=256]

Epoch 3/10:  43%|█████▏      | 430/991 [1:49:07<2:24:32, 15.46s/batch, batch_loss=24.1, batch_index=431, batch_size=256]

Epoch 3/10:  43%|█████▏      | 431/991 [1:49:07<2:21:40, 15.18s/batch, batch_loss=24.1, batch_index=431, batch_size=256]

Epoch 3/10:  43%|█████▏      | 431/991 [1:49:23<2:21:40, 15.18s/batch, batch_loss=20.9, batch_index=432, batch_size=256]

Epoch 3/10:  44%|█████▏      | 432/991 [1:49:23<2:23:05, 15.36s/batch, batch_loss=20.9, batch_index=432, batch_size=256]

Epoch 3/10:  44%|█████▏      | 432/991 [1:49:42<2:23:05, 15.36s/batch, batch_loss=10.3, batch_index=433, batch_size=256]

Epoch 3/10:  44%|█████▏      | 433/991 [1:49:42<2:32:50, 16.44s/batch, batch_loss=10.3, batch_index=433, batch_size=256]

Epoch 3/10:  44%|█████▏      | 433/991 [1:49:58<2:32:50, 16.44s/batch, batch_loss=17.2, batch_index=434, batch_size=256]

Epoch 3/10:  44%|█████▎      | 434/991 [1:49:58<2:30:42, 16.24s/batch, batch_loss=17.2, batch_index=434, batch_size=256]

Epoch 3/10:  44%|█████▎      | 434/991 [1:50:13<2:30:42, 16.24s/batch, batch_loss=12.6, batch_index=435, batch_size=256]

Epoch 3/10:  44%|█████▎      | 435/991 [1:50:13<2:25:47, 15.73s/batch, batch_loss=12.6, batch_index=435, batch_size=256]

Epoch 3/10:  44%|█████▎      | 435/991 [1:50:29<2:25:47, 15.73s/batch, batch_loss=13.9, batch_index=436, batch_size=256]

Epoch 3/10:  44%|█████▎      | 436/991 [1:50:29<2:27:26, 15.94s/batch, batch_loss=13.9, batch_index=436, batch_size=256]

Epoch 3/10:  44%|█████▎      | 436/991 [1:50:45<2:27:26, 15.94s/batch, batch_loss=17.9, batch_index=437, batch_size=256]

Epoch 3/10:  44%|█████▎      | 437/991 [1:50:45<2:26:51, 15.90s/batch, batch_loss=17.9, batch_index=437, batch_size=256]

Epoch 3/10:  44%|█████▎      | 437/991 [1:51:00<2:26:51, 15.90s/batch, batch_loss=20.4, batch_index=438, batch_size=256]

Epoch 3/10:  44%|█████▎      | 438/991 [1:51:00<2:25:35, 15.80s/batch, batch_loss=20.4, batch_index=438, batch_size=256]

Epoch 3/10:  44%|█████▎      | 438/991 [1:51:16<2:25:35, 15.80s/batch, batch_loss=12.6, batch_index=439, batch_size=256]

Epoch 3/10:  44%|█████▎      | 439/991 [1:51:16<2:24:11, 15.67s/batch, batch_loss=12.6, batch_index=439, batch_size=256]

Epoch 3/10:  44%|█████▎      | 439/991 [1:51:31<2:24:11, 15.67s/batch, batch_loss=21.5, batch_index=440, batch_size=256]

Epoch 3/10:  44%|█████▎      | 440/991 [1:51:31<2:22:42, 15.54s/batch, batch_loss=21.5, batch_index=440, batch_size=256]

Epoch 3/10:  44%|█████▎      | 440/991 [1:51:46<2:22:42, 15.54s/batch, batch_loss=23.4, batch_index=441, batch_size=256]

Epoch 3/10:  45%|█████▎      | 441/991 [1:51:46<2:21:29, 15.43s/batch, batch_loss=23.4, batch_index=441, batch_size=256]

Epoch 3/10:  45%|█████▎      | 441/991 [1:52:01<2:21:29, 15.43s/batch, batch_loss=13.8, batch_index=442, batch_size=256]

Epoch 3/10:  45%|█████▎      | 442/991 [1:52:01<2:20:15, 15.33s/batch, batch_loss=13.8, batch_index=442, batch_size=256]

Epoch 3/10:  45%|█████▎      | 442/991 [1:52:16<2:20:15, 15.33s/batch, batch_loss=21.4, batch_index=443, batch_size=256]

Epoch 3/10:  45%|█████▎      | 443/991 [1:52:16<2:18:49, 15.20s/batch, batch_loss=21.4, batch_index=443, batch_size=256]

Epoch 3/10:  45%|█████▎      | 443/991 [1:52:32<2:18:49, 15.20s/batch, batch_loss=12.6, batch_index=444, batch_size=256]

Epoch 3/10:  45%|█████▍      | 444/991 [1:52:32<2:20:28, 15.41s/batch, batch_loss=12.6, batch_index=444, batch_size=256]

Epoch 3/10:  45%|█████▍      | 444/991 [1:52:48<2:20:28, 15.41s/batch, batch_loss=19.5, batch_index=445, batch_size=256]

Epoch 3/10:  45%|█████▍      | 445/991 [1:52:48<2:21:40, 15.57s/batch, batch_loss=19.5, batch_index=445, batch_size=256]

Epoch 3/10:  45%|█████▍      | 445/991 [1:53:03<2:21:40, 15.57s/batch, batch_loss=27.9, batch_index=446, batch_size=256]

Epoch 3/10:  45%|█████▍      | 446/991 [1:53:03<2:20:13, 15.44s/batch, batch_loss=27.9, batch_index=446, batch_size=256]

Epoch 3/10:  45%|██████▎       | 446/991 [1:53:18<2:20:13, 15.44s/batch, batch_loss=14, batch_index=447, batch_size=256]

Epoch 3/10:  45%|██████▎       | 447/991 [1:53:18<2:18:50, 15.31s/batch, batch_loss=14, batch_index=447, batch_size=256]

Epoch 3/10:  45%|█████▍      | 447/991 [1:53:33<2:18:50, 15.31s/batch, batch_loss=18.8, batch_index=448, batch_size=256]

Epoch 3/10:  45%|█████▍      | 448/991 [1:53:33<2:17:07, 15.15s/batch, batch_loss=18.8, batch_index=448, batch_size=256]

Epoch 3/10:  45%|█████▍      | 448/991 [1:53:47<2:17:07, 15.15s/batch, batch_loss=16.8, batch_index=449, batch_size=256]

Epoch 3/10:  45%|█████▍      | 449/991 [1:53:47<2:14:53, 14.93s/batch, batch_loss=16.8, batch_index=449, batch_size=256]

Epoch 3/10:  45%|██████▎       | 449/991 [1:54:03<2:14:53, 14.93s/batch, batch_loss=23, batch_index=450, batch_size=256]

Epoch 3/10:  45%|██████▎       | 450/991 [1:54:03<2:16:42, 15.16s/batch, batch_loss=23, batch_index=450, batch_size=256]

Epoch 3/10:  45%|█████▍      | 450/991 [1:54:18<2:16:42, 15.16s/batch, batch_loss=21.3, batch_index=451, batch_size=256]

Epoch 3/10:  46%|█████▍      | 451/991 [1:54:18<2:15:52, 15.10s/batch, batch_loss=21.3, batch_index=451, batch_size=256]

Epoch 3/10:  46%|█████▍      | 451/991 [1:54:33<2:15:52, 15.10s/batch, batch_loss=17.3, batch_index=452, batch_size=256]

Epoch 3/10:  46%|█████▍      | 452/991 [1:54:33<2:15:36, 15.09s/batch, batch_loss=17.3, batch_index=452, batch_size=256]

Epoch 3/10:  46%|█████▍      | 452/991 [1:54:48<2:15:36, 15.09s/batch, batch_loss=19.4, batch_index=453, batch_size=256]

Epoch 3/10:  46%|█████▍      | 453/991 [1:54:48<2:14:18, 14.98s/batch, batch_loss=19.4, batch_index=453, batch_size=256]

Epoch 3/10:  46%|████     | 453/991 [1:55:03<2:14:18, 14.98s/batch, batch_loss=7.23e+3, batch_index=454, batch_size=256]

Epoch 3/10:  46%|████     | 454/991 [1:55:03<2:14:54, 15.07s/batch, batch_loss=7.23e+3, batch_index=454, batch_size=256]

Epoch 3/10:  46%|█████▍      | 454/991 [1:55:21<2:14:54, 15.07s/batch, batch_loss=26.2, batch_index=455, batch_size=256]

Epoch 3/10:  46%|█████▌      | 455/991 [1:55:21<2:22:18, 15.93s/batch, batch_loss=26.2, batch_index=455, batch_size=256]

Epoch 3/10:  46%|█████▌      | 455/991 [1:55:36<2:22:18, 15.93s/batch, batch_loss=24.6, batch_index=456, batch_size=256]

Epoch 3/10:  46%|█████▌      | 456/991 [1:55:36<2:19:32, 15.65s/batch, batch_loss=24.6, batch_index=456, batch_size=256]

Epoch 3/10:  46%|█████▌      | 456/991 [1:55:51<2:19:32, 15.65s/batch, batch_loss=13.7, batch_index=457, batch_size=256]

Epoch 3/10:  46%|█████▌      | 457/991 [1:55:51<2:17:24, 15.44s/batch, batch_loss=13.7, batch_index=457, batch_size=256]

Epoch 3/10:  46%|█████▌      | 457/991 [1:56:06<2:17:24, 15.44s/batch, batch_loss=16.7, batch_index=458, batch_size=256]

Epoch 3/10:  46%|█████▌      | 458/991 [1:56:06<2:15:34, 15.26s/batch, batch_loss=16.7, batch_index=458, batch_size=256]

Epoch 3/10:  46%|█████▌      | 458/991 [1:56:20<2:15:34, 15.26s/batch, batch_loss=24.8, batch_index=459, batch_size=256]

Epoch 3/10:  46%|█████▌      | 459/991 [1:56:20<2:11:56, 14.88s/batch, batch_loss=24.8, batch_index=459, batch_size=256]

Epoch 3/10:  46%|█████▌      | 459/991 [1:56:34<2:11:56, 14.88s/batch, batch_loss=21.9, batch_index=460, batch_size=256]

Epoch 3/10:  46%|█████▌      | 460/991 [1:56:34<2:11:05, 14.81s/batch, batch_loss=21.9, batch_index=460, batch_size=256]

Epoch 3/10:  46%|█████▌      | 460/991 [1:56:49<2:11:05, 14.81s/batch, batch_loss=59.6, batch_index=461, batch_size=256]

Epoch 3/10:  47%|█████▌      | 461/991 [1:56:49<2:11:09, 14.85s/batch, batch_loss=59.6, batch_index=461, batch_size=256]

Epoch 3/10:  47%|█████▌      | 461/991 [1:57:04<2:11:09, 14.85s/batch, batch_loss=15.6, batch_index=462, batch_size=256]

Epoch 3/10:  47%|█████▌      | 462/991 [1:57:04<2:09:59, 14.74s/batch, batch_loss=15.6, batch_index=462, batch_size=256]

Epoch 3/10:  47%|████▏    | 462/991 [1:57:21<2:09:59, 14.74s/batch, batch_loss=6.22e+4, batch_index=463, batch_size=256]

Epoch 3/10:  47%|████▏    | 463/991 [1:57:21<2:15:04, 15.35s/batch, batch_loss=6.22e+4, batch_index=463, batch_size=256]

Epoch 3/10:  47%|█████▌      | 463/991 [1:57:35<2:15:04, 15.35s/batch, batch_loss=14.2, batch_index=464, batch_size=256]

Epoch 3/10:  47%|█████▌      | 464/991 [1:57:35<2:13:18, 15.18s/batch, batch_loss=14.2, batch_index=464, batch_size=256]

Epoch 3/10:  47%|█████▌      | 464/991 [1:57:50<2:13:18, 15.18s/batch, batch_loss=13.6, batch_index=465, batch_size=256]

Epoch 3/10:  47%|█████▋      | 465/991 [1:57:50<2:11:37, 15.01s/batch, batch_loss=13.6, batch_index=465, batch_size=256]

Epoch 3/10:  47%|█████▋      | 465/991 [1:58:05<2:11:37, 15.01s/batch, batch_loss=17.2, batch_index=466, batch_size=256]

Epoch 3/10:  47%|█████▋      | 466/991 [1:58:05<2:10:47, 14.95s/batch, batch_loss=17.2, batch_index=466, batch_size=256]

Epoch 3/10:  47%|█████▋      | 466/991 [1:58:19<2:10:47, 14.95s/batch, batch_loss=14.5, batch_index=467, batch_size=256]

Epoch 3/10:  47%|█████▋      | 467/991 [1:58:19<2:08:09, 14.67s/batch, batch_loss=14.5, batch_index=467, batch_size=256]

Epoch 3/10:  47%|█████▋      | 467/991 [1:58:34<2:08:09, 14.67s/batch, batch_loss=17.3, batch_index=468, batch_size=256]

Epoch 3/10:  47%|█████▋      | 468/991 [1:58:34<2:08:50, 14.78s/batch, batch_loss=17.3, batch_index=468, batch_size=256]

Epoch 3/10:  47%|█████▋      | 468/991 [1:58:49<2:08:50, 14.78s/batch, batch_loss=17.3, batch_index=469, batch_size=256]

Epoch 3/10:  47%|█████▋      | 469/991 [1:58:49<2:09:13, 14.85s/batch, batch_loss=17.3, batch_index=469, batch_size=256]

Epoch 3/10:  47%|█████▋      | 469/991 [1:59:04<2:09:13, 14.85s/batch, batch_loss=16.5, batch_index=470, batch_size=256]

Epoch 3/10:  47%|█████▋      | 470/991 [1:59:04<2:09:18, 14.89s/batch, batch_loss=16.5, batch_index=470, batch_size=256]

Epoch 3/10:  47%|█████▋      | 470/991 [1:59:22<2:09:18, 14.89s/batch, batch_loss=20.2, batch_index=471, batch_size=256]

Epoch 3/10:  48%|█████▋      | 471/991 [1:59:22<2:16:36, 15.76s/batch, batch_loss=20.2, batch_index=471, batch_size=256]

Epoch 3/10:  48%|█████▋      | 471/991 [1:59:37<2:16:36, 15.76s/batch, batch_loss=21.6, batch_index=472, batch_size=256]

Epoch 3/10:  48%|█████▋      | 472/991 [1:59:37<2:15:25, 15.66s/batch, batch_loss=21.6, batch_index=472, batch_size=256]

Epoch 3/10:  48%|█████▋      | 472/991 [1:59:52<2:15:25, 15.66s/batch, batch_loss=19.8, batch_index=473, batch_size=256]

Epoch 3/10:  48%|█████▋      | 473/991 [1:59:52<2:12:33, 15.35s/batch, batch_loss=19.8, batch_index=473, batch_size=256]

Epoch 3/10:  48%|█████▋      | 473/991 [2:00:06<2:12:33, 15.35s/batch, batch_loss=17.1, batch_index=474, batch_size=256]

Epoch 3/10:  48%|█████▋      | 474/991 [2:00:06<2:10:19, 15.12s/batch, batch_loss=17.1, batch_index=474, batch_size=256]

Epoch 3/10:  48%|████▊     | 474/991 [2:00:21<2:10:19, 15.12s/batch, batch_loss=2.4e+3, batch_index=475, batch_size=256]

Epoch 3/10:  48%|████▊     | 475/991 [2:00:21<2:10:02, 15.12s/batch, batch_loss=2.4e+3, batch_index=475, batch_size=256]

Epoch 3/10:  48%|█████▊      | 475/991 [2:00:37<2:10:02, 15.12s/batch, batch_loss=19.2, batch_index=476, batch_size=256]

Epoch 3/10:  48%|█████▊      | 476/991 [2:00:37<2:10:44, 15.23s/batch, batch_loss=19.2, batch_index=476, batch_size=256]

Epoch 3/10:  48%|█████▊      | 476/991 [2:00:52<2:10:44, 15.23s/batch, batch_loss=18.2, batch_index=477, batch_size=256]

Epoch 3/10:  48%|█████▊      | 477/991 [2:00:52<2:10:25, 15.23s/batch, batch_loss=18.2, batch_index=477, batch_size=256]

Epoch 3/10:  48%|█████▊      | 477/991 [2:01:08<2:10:25, 15.23s/batch, batch_loss=17.2, batch_index=478, batch_size=256]

Epoch 3/10:  48%|█████▊      | 478/991 [2:01:08<2:12:11, 15.46s/batch, batch_loss=17.2, batch_index=478, batch_size=256]

Epoch 3/10:  48%|█████▊      | 478/991 [2:01:24<2:12:11, 15.46s/batch, batch_loss=20.4, batch_index=479, batch_size=256]

Epoch 3/10:  48%|█████▊      | 479/991 [2:01:24<2:12:26, 15.52s/batch, batch_loss=20.4, batch_index=479, batch_size=256]

Epoch 3/10:  48%|█████▊      | 479/991 [2:01:39<2:12:26, 15.52s/batch, batch_loss=18.7, batch_index=480, batch_size=256]

Epoch 3/10:  48%|█████▊      | 480/991 [2:01:39<2:11:29, 15.44s/batch, batch_loss=18.7, batch_index=480, batch_size=256]

Epoch 3/10:  48%|█████▊      | 480/991 [2:01:54<2:11:29, 15.44s/batch, batch_loss=26.9, batch_index=481, batch_size=256]

Epoch 3/10:  49%|█████▊      | 481/991 [2:01:54<2:09:57, 15.29s/batch, batch_loss=26.9, batch_index=481, batch_size=256]

Epoch 3/10:  49%|█████▊      | 481/991 [2:02:09<2:09:57, 15.29s/batch, batch_loss=20.3, batch_index=482, batch_size=256]

Epoch 3/10:  49%|█████▊      | 482/991 [2:02:09<2:07:46, 15.06s/batch, batch_loss=20.3, batch_index=482, batch_size=256]

Epoch 3/10:  49%|█████▊      | 482/991 [2:02:23<2:07:46, 15.06s/batch, batch_loss=14.5, batch_index=483, batch_size=256]

Epoch 3/10:  49%|█████▊      | 483/991 [2:02:23<2:07:06, 15.01s/batch, batch_loss=14.5, batch_index=483, batch_size=256]

Epoch 3/10:  49%|█████▊      | 483/991 [2:02:38<2:07:06, 15.01s/batch, batch_loss=22.4, batch_index=484, batch_size=256]

Epoch 3/10:  49%|█████▊      | 484/991 [2:02:38<2:06:05, 14.92s/batch, batch_loss=22.4, batch_index=484, batch_size=256]

Epoch 3/10:  49%|█████▊      | 484/991 [2:02:53<2:06:05, 14.92s/batch, batch_loss=10.1, batch_index=485, batch_size=256]

Epoch 3/10:  49%|█████▊      | 485/991 [2:02:53<2:04:41, 14.79s/batch, batch_loss=10.1, batch_index=485, batch_size=256]

Epoch 3/10:  49%|█████▊      | 485/991 [2:03:07<2:04:41, 14.79s/batch, batch_loss=26.1, batch_index=486, batch_size=256]

Epoch 3/10:  49%|█████▉      | 486/991 [2:03:07<2:04:30, 14.79s/batch, batch_loss=26.1, batch_index=486, batch_size=256]

Epoch 3/10:  49%|█████▉      | 486/991 [2:03:21<2:04:30, 14.79s/batch, batch_loss=15.5, batch_index=487, batch_size=256]

Epoch 3/10:  49%|█████▉      | 487/991 [2:03:21<2:02:18, 14.56s/batch, batch_loss=15.5, batch_index=487, batch_size=256]

Epoch 3/10:  49%|█████▉      | 487/991 [2:03:39<2:02:18, 14.56s/batch, batch_loss=10.5, batch_index=488, batch_size=256]

Epoch 3/10:  49%|█████▉      | 488/991 [2:03:39<2:09:53, 15.49s/batch, batch_loss=10.5, batch_index=488, batch_size=256]

Epoch 3/10:  49%|█████▉      | 488/991 [2:03:54<2:09:53, 15.49s/batch, batch_loss=11.4, batch_index=489, batch_size=256]

Epoch 3/10:  49%|█████▉      | 489/991 [2:03:54<2:07:58, 15.30s/batch, batch_loss=11.4, batch_index=489, batch_size=256]

Epoch 3/10:  49%|█████▉      | 489/991 [2:04:07<2:07:58, 15.30s/batch, batch_loss=8.35, batch_index=490, batch_size=256]

Epoch 3/10:  49%|█████▉      | 490/991 [2:04:07<2:02:28, 14.67s/batch, batch_loss=8.35, batch_index=490, batch_size=256]

Epoch 3/10:  49%|█████▉      | 490/991 [2:04:20<2:02:28, 14.67s/batch, batch_loss=22.2, batch_index=491, batch_size=256]

Epoch 3/10:  50%|█████▉      | 491/991 [2:04:20<1:58:24, 14.21s/batch, batch_loss=22.2, batch_index=491, batch_size=256]

Epoch 3/10:  50%|█████▉      | 491/991 [2:04:34<1:58:24, 14.21s/batch, batch_loss=19.8, batch_index=492, batch_size=256]

Epoch 3/10:  50%|█████▉      | 492/991 [2:04:34<1:56:25, 14.00s/batch, batch_loss=19.8, batch_index=492, batch_size=256]

Epoch 3/10:  50%|█████▉      | 492/991 [2:04:47<1:56:25, 14.00s/batch, batch_loss=21.1, batch_index=493, batch_size=256]

Epoch 3/10:  50%|█████▉      | 493/991 [2:04:47<1:54:55, 13.85s/batch, batch_loss=21.1, batch_index=493, batch_size=256]

Epoch 3/10:  50%|█████▉      | 493/991 [2:05:01<1:54:55, 13.85s/batch, batch_loss=8.67, batch_index=494, batch_size=256]

Epoch 3/10:  50%|█████▉      | 494/991 [2:05:01<1:55:08, 13.90s/batch, batch_loss=8.67, batch_index=494, batch_size=256]

Epoch 3/10:  50%|████▍    | 494/991 [2:05:15<1:55:08, 13.90s/batch, batch_loss=8.54e+4, batch_index=495, batch_size=256]

Epoch 3/10:  50%|████▍    | 495/991 [2:05:15<1:55:14, 13.94s/batch, batch_loss=8.54e+4, batch_index=495, batch_size=256]

Epoch 3/10:  50%|█████▉      | 495/991 [2:05:30<1:55:14, 13.94s/batch, batch_loss=16.3, batch_index=496, batch_size=256]

Epoch 3/10:  50%|██████      | 496/991 [2:05:30<1:57:59, 14.30s/batch, batch_loss=16.3, batch_index=496, batch_size=256]

Epoch 3/10:  50%|██████▌      | 496/991 [2:05:45<1:57:59, 14.30s/batch, batch_loss=174, batch_index=497, batch_size=256]

Epoch 3/10:  50%|██████▌      | 497/991 [2:05:45<1:58:58, 14.45s/batch, batch_loss=174, batch_index=497, batch_size=256]

Epoch 3/10:  50%|███████       | 497/991 [2:06:00<1:58:58, 14.45s/batch, batch_loss=18, batch_index=498, batch_size=256]

Epoch 3/10:  50%|███████       | 498/991 [2:06:00<1:58:17, 14.40s/batch, batch_loss=18, batch_index=498, batch_size=256]

Epoch 3/10:  50%|██████▌      | 498/991 [2:06:15<1:58:17, 14.40s/batch, batch_loss=403, batch_index=499, batch_size=256]

Epoch 3/10:  50%|██████▌      | 499/991 [2:06:15<2:00:21, 14.68s/batch, batch_loss=403, batch_index=499, batch_size=256]

Epoch 3/10:  50%|██████      | 499/991 [2:06:29<2:00:21, 14.68s/batch, batch_loss=18.1, batch_index=500, batch_size=256]

Epoch 3/10:  50%|██████      | 500/991 [2:06:29<1:59:58, 14.66s/batch, batch_loss=18.1, batch_index=500, batch_size=256]

Epoch 3/10:  50%|██████      | 500/991 [2:06:45<1:59:58, 14.66s/batch, batch_loss=9.27, batch_index=501, batch_size=256]

Epoch 3/10:  51%|██████      | 501/991 [2:06:45<2:01:54, 14.93s/batch, batch_loss=9.27, batch_index=501, batch_size=256]

Epoch 3/10:  51%|██████      | 501/991 [2:07:00<2:01:54, 14.93s/batch, batch_loss=10.5, batch_index=502, batch_size=256]

Epoch 3/10:  51%|██████      | 502/991 [2:07:00<2:02:03, 14.98s/batch, batch_loss=10.5, batch_index=502, batch_size=256]

Epoch 3/10:  51%|███████       | 502/991 [2:07:15<2:02:03, 14.98s/batch, batch_loss=30, batch_index=503, batch_size=256]

Epoch 3/10:  51%|███████       | 503/991 [2:07:15<2:00:21, 14.80s/batch, batch_loss=30, batch_index=503, batch_size=256]

Epoch 3/10:  51%|██████      | 503/991 [2:07:33<2:00:21, 14.80s/batch, batch_loss=12.6, batch_index=504, batch_size=256]

Epoch 3/10:  51%|██████      | 504/991 [2:07:33<2:07:58, 15.77s/batch, batch_loss=12.6, batch_index=504, batch_size=256]

Epoch 3/10:  51%|██████      | 504/991 [2:07:48<2:07:58, 15.77s/batch, batch_loss=7.36, batch_index=505, batch_size=256]

Epoch 3/10:  51%|██████      | 505/991 [2:07:48<2:07:25, 15.73s/batch, batch_loss=7.36, batch_index=505, batch_size=256]

Epoch 3/10:  51%|██████      | 505/991 [2:08:04<2:07:25, 15.73s/batch, batch_loss=17.1, batch_index=506, batch_size=256]

Epoch 3/10:  51%|██████▏     | 506/991 [2:08:04<2:07:05, 15.72s/batch, batch_loss=17.1, batch_index=506, batch_size=256]

Epoch 3/10:  51%|██████▏     | 506/991 [2:08:19<2:07:05, 15.72s/batch, batch_loss=11.1, batch_index=507, batch_size=256]

Epoch 3/10:  51%|██████▏     | 507/991 [2:08:19<2:05:39, 15.58s/batch, batch_loss=11.1, batch_index=507, batch_size=256]

Epoch 3/10:  51%|███████▏      | 507/991 [2:08:34<2:05:39, 15.58s/batch, batch_loss=16, batch_index=508, batch_size=256]

Epoch 3/10:  51%|███████▏      | 508/991 [2:08:34<2:03:09, 15.30s/batch, batch_loss=16, batch_index=508, batch_size=256]

Epoch 3/10:  51%|██████▏     | 508/991 [2:08:49<2:03:09, 15.30s/batch, batch_loss=19.6, batch_index=509, batch_size=256]

Epoch 3/10:  51%|██████▏     | 509/991 [2:08:49<2:02:43, 15.28s/batch, batch_loss=19.6, batch_index=509, batch_size=256]

Epoch 3/10:  51%|██████▏     | 509/991 [2:09:04<2:02:43, 15.28s/batch, batch_loss=15.1, batch_index=510, batch_size=256]

Epoch 3/10:  51%|██████▏     | 510/991 [2:09:04<2:02:26, 15.27s/batch, batch_loss=15.1, batch_index=510, batch_size=256]

Epoch 3/10:  51%|██████▏     | 510/991 [2:09:20<2:02:26, 15.27s/batch, batch_loss=16.3, batch_index=511, batch_size=256]

Epoch 3/10:  52%|██████▏     | 511/991 [2:09:20<2:03:05, 15.39s/batch, batch_loss=16.3, batch_index=511, batch_size=256]

Epoch 3/10:  52%|██████▏     | 511/991 [2:09:37<2:03:05, 15.39s/batch, batch_loss=10.9, batch_index=512, batch_size=256]

Epoch 3/10:  52%|██████▏     | 512/991 [2:09:37<2:06:15, 15.81s/batch, batch_loss=10.9, batch_index=512, batch_size=256]

Epoch 3/10:  52%|██████▏     | 512/991 [2:09:56<2:06:15, 15.81s/batch, batch_loss=11.1, batch_index=513, batch_size=256]

Epoch 3/10:  52%|██████▏     | 513/991 [2:09:56<2:13:15, 16.73s/batch, batch_loss=11.1, batch_index=513, batch_size=256]

Epoch 3/10:  52%|██████▏     | 513/991 [2:10:13<2:13:15, 16.73s/batch, batch_loss=22.4, batch_index=514, batch_size=256]

Epoch 3/10:  52%|██████▏     | 514/991 [2:10:13<2:14:04, 16.87s/batch, batch_loss=22.4, batch_index=514, batch_size=256]

Epoch 3/10:  52%|███████▎      | 514/991 [2:10:28<2:14:04, 16.87s/batch, batch_loss=24, batch_index=515, batch_size=256]

Epoch 3/10:  52%|███████▎      | 515/991 [2:10:28<2:09:19, 16.30s/batch, batch_loss=24, batch_index=515, batch_size=256]

Epoch 3/10:  52%|██████▏     | 515/991 [2:10:42<2:09:19, 16.30s/batch, batch_loss=23.8, batch_index=516, batch_size=256]

Epoch 3/10:  52%|██████▏     | 516/991 [2:10:42<2:04:52, 15.77s/batch, batch_loss=23.8, batch_index=516, batch_size=256]

Epoch 3/10:  52%|██████▏     | 516/991 [2:10:57<2:04:52, 15.77s/batch, batch_loss=10.9, batch_index=517, batch_size=256]

Epoch 3/10:  52%|██████▎     | 517/991 [2:10:57<2:02:36, 15.52s/batch, batch_loss=10.9, batch_index=517, batch_size=256]

Epoch 3/10:  52%|██████▎     | 517/991 [2:11:12<2:02:36, 15.52s/batch, batch_loss=27.5, batch_index=518, batch_size=256]

Epoch 3/10:  52%|██████▎     | 518/991 [2:11:12<2:01:27, 15.41s/batch, batch_loss=27.5, batch_index=518, batch_size=256]

Epoch 3/10:  52%|██████▎     | 518/991 [2:11:30<2:01:27, 15.41s/batch, batch_loss=15.6, batch_index=519, batch_size=256]

Epoch 3/10:  52%|██████▎     | 519/991 [2:11:30<2:05:37, 15.97s/batch, batch_loss=15.6, batch_index=519, batch_size=256]

Epoch 3/10:  52%|██████▎     | 519/991 [2:11:45<2:05:37, 15.97s/batch, batch_loss=18.6, batch_index=520, batch_size=256]

Epoch 3/10:  52%|██████▎     | 520/991 [2:11:45<2:03:06, 15.68s/batch, batch_loss=18.6, batch_index=520, batch_size=256]

Epoch 3/10:  52%|██████▊      | 520/991 [2:11:59<2:03:06, 15.68s/batch, batch_loss=8.9, batch_index=521, batch_size=256]

Epoch 3/10:  53%|██████▊      | 521/991 [2:11:59<2:00:38, 15.40s/batch, batch_loss=8.9, batch_index=521, batch_size=256]

Epoch 3/10:  53%|██████▎     | 521/991 [2:12:14<2:00:38, 15.40s/batch, batch_loss=9.58, batch_index=522, batch_size=256]

Epoch 3/10:  53%|██████▎     | 522/991 [2:12:14<1:58:18, 15.14s/batch, batch_loss=9.58, batch_index=522, batch_size=256]

Epoch 3/10:  53%|██████▎     | 522/991 [2:12:29<1:58:18, 15.14s/batch, batch_loss=3.18, batch_index=523, batch_size=256]

Epoch 3/10:  53%|██████▎     | 523/991 [2:12:29<1:56:56, 14.99s/batch, batch_loss=3.18, batch_index=523, batch_size=256]

Epoch 3/10:  53%|██████▎     | 523/991 [2:12:43<1:56:56, 14.99s/batch, batch_loss=8.62, batch_index=524, batch_size=256]

Epoch 3/10:  53%|██████▎     | 524/991 [2:12:43<1:54:27, 14.71s/batch, batch_loss=8.62, batch_index=524, batch_size=256]

Epoch 3/10:  53%|██████▎     | 524/991 [2:12:57<1:54:27, 14.71s/batch, batch_loss=6.44, batch_index=525, batch_size=256]

Epoch 3/10:  53%|██████▎     | 525/991 [2:12:57<1:54:21, 14.72s/batch, batch_loss=6.44, batch_index=525, batch_size=256]

Epoch 3/10:  53%|███████▉       | 525/991 [2:13:12<1:54:21, 14.72s/batch, batch_loss=8, batch_index=526, batch_size=256]

Epoch 3/10:  53%|███████▉       | 526/991 [2:13:12<1:54:24, 14.76s/batch, batch_loss=8, batch_index=526, batch_size=256]

Epoch 3/10:  53%|██████▎     | 526/991 [2:13:27<1:54:24, 14.76s/batch, batch_loss=17.9, batch_index=527, batch_size=256]

Epoch 3/10:  53%|██████▍     | 527/991 [2:13:27<1:54:27, 14.80s/batch, batch_loss=17.9, batch_index=527, batch_size=256]

Epoch 3/10:  53%|███████▍      | 527/991 [2:13:41<1:54:27, 14.80s/batch, batch_loss=14, batch_index=528, batch_size=256]

Epoch 3/10:  53%|███████▍      | 528/991 [2:13:41<1:52:37, 14.59s/batch, batch_loss=14, batch_index=528, batch_size=256]

Epoch 3/10:  53%|██████▍     | 528/991 [2:13:56<1:52:37, 14.59s/batch, batch_loss=8.89, batch_index=529, batch_size=256]

Epoch 3/10:  53%|██████▍     | 529/991 [2:13:56<1:53:17, 14.71s/batch, batch_loss=8.89, batch_index=529, batch_size=256]

Epoch 3/10:  53%|██████▍     | 529/991 [2:14:11<1:53:17, 14.71s/batch, batch_loss=14.4, batch_index=530, batch_size=256]

Epoch 3/10:  53%|██████▍     | 530/991 [2:14:11<1:54:06, 14.85s/batch, batch_loss=14.4, batch_index=530, batch_size=256]

Epoch 3/10:  53%|██████▍     | 530/991 [2:14:26<1:54:06, 14.85s/batch, batch_loss=12.8, batch_index=531, batch_size=256]

Epoch 3/10:  54%|██████▍     | 531/991 [2:14:26<1:52:41, 14.70s/batch, batch_loss=12.8, batch_index=531, batch_size=256]

Epoch 3/10:  54%|██████▍     | 531/991 [2:14:40<1:52:41, 14.70s/batch, batch_loss=13.7, batch_index=532, batch_size=256]

Epoch 3/10:  54%|██████▍     | 532/991 [2:14:40<1:52:21, 14.69s/batch, batch_loss=13.7, batch_index=532, batch_size=256]

Epoch 3/10:  54%|██████▍     | 532/991 [2:14:55<1:52:21, 14.69s/batch, batch_loss=13.2, batch_index=533, batch_size=256]

Epoch 3/10:  54%|██████▍     | 533/991 [2:14:55<1:52:39, 14.76s/batch, batch_loss=13.2, batch_index=533, batch_size=256]

Epoch 3/10:  54%|███████▌      | 533/991 [2:15:11<1:52:39, 14.76s/batch, batch_loss=14, batch_index=534, batch_size=256]

Epoch 3/10:  54%|███████▌      | 534/991 [2:15:11<1:53:35, 14.91s/batch, batch_loss=14, batch_index=534, batch_size=256]

Epoch 3/10:  54%|██████▍     | 534/991 [2:15:28<1:53:35, 14.91s/batch, batch_loss=23.4, batch_index=535, batch_size=256]

Epoch 3/10:  54%|██████▍     | 535/991 [2:15:28<1:59:04, 15.67s/batch, batch_loss=23.4, batch_index=535, batch_size=256]

Epoch 3/10:  54%|██████▍     | 535/991 [2:15:43<1:59:04, 15.67s/batch, batch_loss=20.1, batch_index=536, batch_size=256]

Epoch 3/10:  54%|██████▍     | 536/991 [2:15:43<1:56:53, 15.42s/batch, batch_loss=20.1, batch_index=536, batch_size=256]

Epoch 3/10:  54%|██████▍     | 536/991 [2:15:58<1:56:53, 15.42s/batch, batch_loss=10.4, batch_index=537, batch_size=256]

Epoch 3/10:  54%|██████▌     | 537/991 [2:15:58<1:56:11, 15.36s/batch, batch_loss=10.4, batch_index=537, batch_size=256]

Epoch 3/10:  54%|████▉    | 537/991 [2:16:13<1:56:11, 15.36s/batch, batch_loss=1.79e+3, batch_index=538, batch_size=256]

Epoch 3/10:  54%|████▉    | 538/991 [2:16:13<1:56:03, 15.37s/batch, batch_loss=1.79e+3, batch_index=538, batch_size=256]

Epoch 3/10:  54%|██████▌     | 538/991 [2:16:28<1:56:03, 15.37s/batch, batch_loss=29.5, batch_index=539, batch_size=256]

Epoch 3/10:  54%|██████▌     | 539/991 [2:16:28<1:54:57, 15.26s/batch, batch_loss=29.5, batch_index=539, batch_size=256]

Epoch 3/10:  54%|██████▌     | 539/991 [2:16:44<1:54:57, 15.26s/batch, batch_loss=27.3, batch_index=540, batch_size=256]

Epoch 3/10:  54%|██████▌     | 540/991 [2:16:44<1:55:01, 15.30s/batch, batch_loss=27.3, batch_index=540, batch_size=256]

Epoch 3/10:  54%|█████▍    | 540/991 [2:16:59<1:55:01, 15.30s/batch, batch_loss=1.3e+4, batch_index=541, batch_size=256]

Epoch 3/10:  55%|█████▍    | 541/991 [2:16:59<1:54:05, 15.21s/batch, batch_loss=1.3e+4, batch_index=541, batch_size=256]

Epoch 3/10:  55%|████▉    | 541/991 [2:17:14<1:54:05, 15.21s/batch, batch_loss=2.85e+3, batch_index=542, batch_size=256]

Epoch 3/10:  55%|████▉    | 542/991 [2:17:14<1:54:25, 15.29s/batch, batch_loss=2.85e+3, batch_index=542, batch_size=256]

Epoch 3/10:  55%|██████▌     | 542/991 [2:17:30<1:54:25, 15.29s/batch, batch_loss=42.7, batch_index=543, batch_size=256]

Epoch 3/10:  55%|██████▌     | 543/991 [2:17:30<1:53:56, 15.26s/batch, batch_loss=42.7, batch_index=543, batch_size=256]

Epoch 3/10:  55%|██████▌     | 543/991 [2:17:45<1:53:56, 15.26s/batch, batch_loss=21.6, batch_index=544, batch_size=256]

Epoch 3/10:  55%|██████▌     | 544/991 [2:17:45<1:53:00, 15.17s/batch, batch_loss=21.6, batch_index=544, batch_size=256]

Epoch 3/10:  55%|██████▌     | 544/991 [2:18:01<1:53:00, 15.17s/batch, batch_loss=16.7, batch_index=545, batch_size=256]

Epoch 3/10:  55%|██████▌     | 545/991 [2:18:01<1:55:02, 15.48s/batch, batch_loss=16.7, batch_index=545, batch_size=256]

Epoch 3/10:  55%|███████▏     | 545/991 [2:18:16<1:55:02, 15.48s/batch, batch_loss=299, batch_index=546, batch_size=256]

Epoch 3/10:  55%|███████▏     | 546/991 [2:18:16<1:54:32, 15.44s/batch, batch_loss=299, batch_index=546, batch_size=256]

Epoch 3/10:  55%|██████▌     | 546/991 [2:18:32<1:54:32, 15.44s/batch, batch_loss=15.6, batch_index=547, batch_size=256]

Epoch 3/10:  55%|██████▌     | 547/991 [2:18:32<1:55:12, 15.57s/batch, batch_loss=15.6, batch_index=547, batch_size=256]

Epoch 3/10:  55%|██████▌     | 547/991 [2:18:47<1:55:12, 15.57s/batch, batch_loss=16.9, batch_index=548, batch_size=256]

Epoch 3/10:  55%|██████▋     | 548/991 [2:18:47<1:53:41, 15.40s/batch, batch_loss=16.9, batch_index=548, batch_size=256]

Epoch 3/10:  55%|██████▋     | 548/991 [2:19:01<1:53:41, 15.40s/batch, batch_loss=9.63, batch_index=549, batch_size=256]

Epoch 3/10:  55%|██████▋     | 549/991 [2:19:01<1:50:54, 15.06s/batch, batch_loss=9.63, batch_index=549, batch_size=256]

Epoch 3/10:  55%|██████▋     | 549/991 [2:19:18<1:50:54, 15.06s/batch, batch_loss=21.5, batch_index=550, batch_size=256]

Epoch 3/10:  55%|██████▋     | 550/991 [2:19:18<1:54:34, 15.59s/batch, batch_loss=21.5, batch_index=550, batch_size=256]

Epoch 3/10:  55%|██████▋     | 550/991 [2:19:33<1:54:34, 15.59s/batch, batch_loss=17.7, batch_index=551, batch_size=256]

Epoch 3/10:  56%|██████▋     | 551/991 [2:19:33<1:53:06, 15.42s/batch, batch_loss=17.7, batch_index=551, batch_size=256]

Epoch 3/10:  56%|██████▋     | 551/991 [2:19:48<1:53:06, 15.42s/batch, batch_loss=14.3, batch_index=552, batch_size=256]

Epoch 3/10:  56%|██████▋     | 552/991 [2:19:48<1:50:57, 15.16s/batch, batch_loss=14.3, batch_index=552, batch_size=256]

Epoch 3/10:  56%|██████▋     | 552/991 [2:20:02<1:50:57, 15.16s/batch, batch_loss=16.3, batch_index=553, batch_size=256]

Epoch 3/10:  56%|██████▋     | 553/991 [2:20:02<1:49:18, 14.97s/batch, batch_loss=16.3, batch_index=553, batch_size=256]

Epoch 3/10:  56%|█████    | 553/991 [2:20:17<1:49:18, 14.97s/batch, batch_loss=5.73e+3, batch_index=554, batch_size=256]

Epoch 3/10:  56%|█████    | 554/991 [2:20:17<1:49:35, 15.05s/batch, batch_loss=5.73e+3, batch_index=554, batch_size=256]

Epoch 3/10:  56%|█████    | 554/991 [2:20:33<1:49:35, 15.05s/batch, batch_loss=2.58e+3, batch_index=555, batch_size=256]

Epoch 3/10:  56%|█████    | 555/991 [2:20:33<1:50:21, 15.19s/batch, batch_loss=2.58e+3, batch_index=555, batch_size=256]

Epoch 3/10:  56%|██████▋     | 555/991 [2:20:47<1:50:21, 15.19s/batch, batch_loss=17.2, batch_index=556, batch_size=256]

Epoch 3/10:  56%|██████▋     | 556/991 [2:20:47<1:48:40, 14.99s/batch, batch_loss=17.2, batch_index=556, batch_size=256]

Epoch 3/10:  56%|█████    | 556/991 [2:21:04<1:48:40, 14.99s/batch, batch_loss=1.27e+4, batch_index=557, batch_size=256]

Epoch 3/10:  56%|█████    | 557/991 [2:21:04<1:52:20, 15.53s/batch, batch_loss=1.27e+4, batch_index=557, batch_size=256]

Epoch 3/10:  56%|██████▋     | 557/991 [2:21:19<1:52:20, 15.53s/batch, batch_loss=9.44, batch_index=558, batch_size=256]

Epoch 3/10:  56%|██████▊     | 558/991 [2:21:19<1:50:03, 15.25s/batch, batch_loss=9.44, batch_index=558, batch_size=256]

Epoch 3/10:  56%|██████▊     | 558/991 [2:21:33<1:50:03, 15.25s/batch, batch_loss=17.2, batch_index=559, batch_size=256]

Epoch 3/10:  56%|██████▊     | 559/991 [2:21:33<1:47:47, 14.97s/batch, batch_loss=17.2, batch_index=559, batch_size=256]

Epoch 3/10:  56%|██████▊     | 559/991 [2:21:48<1:47:47, 14.97s/batch, batch_loss=7.16, batch_index=560, batch_size=256]

Epoch 3/10:  57%|██████▊     | 560/991 [2:21:48<1:48:01, 15.04s/batch, batch_loss=7.16, batch_index=560, batch_size=256]

Epoch 3/10:  57%|██████▊     | 560/991 [2:22:04<1:48:01, 15.04s/batch, batch_loss=8.85, batch_index=561, batch_size=256]

Epoch 3/10:  57%|██████▊     | 561/991 [2:22:04<1:48:15, 15.11s/batch, batch_loss=8.85, batch_index=561, batch_size=256]

Epoch 3/10:  57%|███████▉      | 561/991 [2:22:18<1:48:15, 15.11s/batch, batch_loss=15, batch_index=562, batch_size=256]

Epoch 3/10:  57%|███████▉      | 562/991 [2:22:18<1:46:22, 14.88s/batch, batch_loss=15, batch_index=562, batch_size=256]

Epoch 3/10:  57%|██████▊     | 562/991 [2:22:33<1:46:22, 14.88s/batch, batch_loss=6.93, batch_index=563, batch_size=256]

Epoch 3/10:  57%|██████▊     | 563/991 [2:22:33<1:46:07, 14.88s/batch, batch_loss=6.93, batch_index=563, batch_size=256]

Epoch 3/10:  57%|██████▊     | 563/991 [2:22:47<1:46:07, 14.88s/batch, batch_loss=11.2, batch_index=564, batch_size=256]

Epoch 3/10:  57%|██████▊     | 564/991 [2:22:47<1:45:12, 14.78s/batch, batch_loss=11.2, batch_index=564, batch_size=256]

Epoch 3/10:  57%|███████▍     | 564/991 [2:23:03<1:45:12, 14.78s/batch, batch_loss=496, batch_index=565, batch_size=256]

Epoch 3/10:  57%|███████▍     | 565/991 [2:23:03<1:45:56, 14.92s/batch, batch_loss=496, batch_index=565, batch_size=256]

Epoch 3/10:  57%|██████▊     | 565/991 [2:23:18<1:45:56, 14.92s/batch, batch_loss=13.4, batch_index=566, batch_size=256]

Epoch 3/10:  57%|██████▊     | 566/991 [2:23:18<1:46:07, 14.98s/batch, batch_loss=13.4, batch_index=566, batch_size=256]

Epoch 3/10:  57%|██████▊     | 566/991 [2:23:33<1:46:07, 14.98s/batch, batch_loss=20.6, batch_index=567, batch_size=256]

Epoch 3/10:  57%|██████▊     | 567/991 [2:23:33<1:46:24, 15.06s/batch, batch_loss=20.6, batch_index=567, batch_size=256]

Epoch 3/10:  57%|███████▍     | 567/991 [2:23:48<1:46:24, 15.06s/batch, batch_loss=299, batch_index=568, batch_size=256]

Epoch 3/10:  57%|███████▍     | 568/991 [2:23:48<1:46:05, 15.05s/batch, batch_loss=299, batch_index=568, batch_size=256]

Epoch 3/10:  57%|██████▉     | 568/991 [2:24:02<1:46:05, 15.05s/batch, batch_loss=32.9, batch_index=569, batch_size=256]

Epoch 3/10:  57%|██████▉     | 569/991 [2:24:02<1:42:58, 14.64s/batch, batch_loss=32.9, batch_index=569, batch_size=256]

Epoch 3/10:  57%|█████▏   | 569/991 [2:24:16<1:42:58, 14.64s/batch, batch_loss=8.48e+3, batch_index=570, batch_size=256]

Epoch 3/10:  58%|█████▏   | 570/991 [2:24:16<1:42:14, 14.57s/batch, batch_loss=8.48e+3, batch_index=570, batch_size=256]

Epoch 3/10:  58%|██████▉     | 570/991 [2:24:34<1:42:14, 14.57s/batch, batch_loss=9.19, batch_index=571, batch_size=256]

Epoch 3/10:  58%|██████▉     | 571/991 [2:24:34<1:48:25, 15.49s/batch, batch_loss=9.19, batch_index=571, batch_size=256]

Epoch 3/10:  58%|██████▉     | 571/991 [2:24:49<1:48:25, 15.49s/batch, batch_loss=10.8, batch_index=572, batch_size=256]

Epoch 3/10:  58%|██████▉     | 572/991 [2:24:49<1:47:33, 15.40s/batch, batch_loss=10.8, batch_index=572, batch_size=256]

Epoch 3/10:  58%|██████▉     | 572/991 [2:25:03<1:47:33, 15.40s/batch, batch_loss=7.08, batch_index=573, batch_size=256]

Epoch 3/10:  58%|██████▉     | 573/991 [2:25:03<1:44:43, 15.03s/batch, batch_loss=7.08, batch_index=573, batch_size=256]

Epoch 3/10:  58%|████████      | 573/991 [2:25:18<1:44:43, 15.03s/batch, batch_loss=11, batch_index=574, batch_size=256]

Epoch 3/10:  58%|████████      | 574/991 [2:25:18<1:44:19, 15.01s/batch, batch_loss=11, batch_index=574, batch_size=256]

Epoch 3/10:  58%|██████▉     | 574/991 [2:25:33<1:44:19, 15.01s/batch, batch_loss=19.4, batch_index=575, batch_size=256]

Epoch 3/10:  58%|██████▉     | 575/991 [2:25:33<1:44:10, 15.03s/batch, batch_loss=19.4, batch_index=575, batch_size=256]

Epoch 3/10:  58%|██████▉     | 575/991 [2:25:49<1:44:10, 15.03s/batch, batch_loss=28.2, batch_index=576, batch_size=256]

Epoch 3/10:  58%|██████▉     | 576/991 [2:25:49<1:46:01, 15.33s/batch, batch_loss=28.2, batch_index=576, batch_size=256]

Epoch 3/10:  58%|██████▉     | 576/991 [2:26:04<1:46:01, 15.33s/batch, batch_loss=10.9, batch_index=577, batch_size=256]

Epoch 3/10:  58%|██████▉     | 577/991 [2:26:04<1:45:07, 15.24s/batch, batch_loss=10.9, batch_index=577, batch_size=256]

Epoch 3/10:  58%|██████▉     | 577/991 [2:26:19<1:45:07, 15.24s/batch, batch_loss=8.75, batch_index=578, batch_size=256]

Epoch 3/10:  58%|██████▉     | 578/991 [2:26:19<1:44:00, 15.11s/batch, batch_loss=8.75, batch_index=578, batch_size=256]

Epoch 3/10:  58%|██████▉     | 578/991 [2:26:34<1:44:00, 15.11s/batch, batch_loss=11.2, batch_index=579, batch_size=256]

Epoch 3/10:  58%|███████     | 579/991 [2:26:34<1:43:43, 15.10s/batch, batch_loss=11.2, batch_index=579, batch_size=256]

Epoch 3/10:  58%|███████     | 579/991 [2:26:49<1:43:43, 15.10s/batch, batch_loss=16.9, batch_index=580, batch_size=256]

Epoch 3/10:  59%|███████     | 580/991 [2:26:49<1:42:43, 15.00s/batch, batch_loss=16.9, batch_index=580, batch_size=256]

Epoch 3/10:  59%|███████     | 580/991 [2:27:03<1:42:43, 15.00s/batch, batch_loss=6.28, batch_index=581, batch_size=256]

Epoch 3/10:  59%|███████     | 581/991 [2:27:03<1:41:03, 14.79s/batch, batch_loss=6.28, batch_index=581, batch_size=256]

Epoch 3/10:  59%|███████     | 581/991 [2:27:18<1:41:03, 14.79s/batch, batch_loss=0.21, batch_index=582, batch_size=256]

Epoch 3/10:  59%|███████     | 582/991 [2:27:18<1:40:16, 14.71s/batch, batch_loss=0.21, batch_index=582, batch_size=256]

Epoch 3/10:  59%|█████▎   | 582/991 [2:27:32<1:40:16, 14.71s/batch, batch_loss=6.61e+3, batch_index=583, batch_size=256]

Epoch 3/10:  59%|█████▎   | 583/991 [2:27:32<1:39:49, 14.68s/batch, batch_loss=6.61e+3, batch_index=583, batch_size=256]

Epoch 3/10:  59%|███████     | 583/991 [2:27:47<1:39:49, 14.68s/batch, batch_loss=12.1, batch_index=584, batch_size=256]

Epoch 3/10:  59%|███████     | 584/991 [2:27:47<1:39:04, 14.61s/batch, batch_loss=12.1, batch_index=584, batch_size=256]

Epoch 3/10:  59%|███████     | 584/991 [2:28:01<1:39:04, 14.61s/batch, batch_loss=10.6, batch_index=585, batch_size=256]

Epoch 3/10:  59%|███████     | 585/991 [2:28:01<1:37:18, 14.38s/batch, batch_loss=10.6, batch_index=585, batch_size=256]

Epoch 3/10:  59%|████████▎     | 585/991 [2:28:16<1:37:18, 14.38s/batch, batch_loss=25, batch_index=586, batch_size=256]

Epoch 3/10:  59%|████████▎     | 586/991 [2:28:16<1:38:59, 14.66s/batch, batch_loss=25, batch_index=586, batch_size=256]

Epoch 3/10:  59%|███████     | 586/991 [2:28:30<1:38:59, 14.66s/batch, batch_loss=22.7, batch_index=587, batch_size=256]

Epoch 3/10:  59%|███████     | 587/991 [2:28:30<1:38:36, 14.65s/batch, batch_loss=22.7, batch_index=587, batch_size=256]

Epoch 3/10:  59%|███████     | 587/991 [2:28:45<1:38:36, 14.65s/batch, batch_loss=17.2, batch_index=588, batch_size=256]

Epoch 3/10:  59%|███████     | 588/991 [2:28:45<1:37:52, 14.57s/batch, batch_loss=17.2, batch_index=588, batch_size=256]

Epoch 3/10:  59%|███████     | 588/991 [2:29:00<1:37:52, 14.57s/batch, batch_loss=8.17, batch_index=589, batch_size=256]

Epoch 3/10:  59%|███████▏    | 589/991 [2:29:00<1:38:43, 14.73s/batch, batch_loss=8.17, batch_index=589, batch_size=256]

Epoch 3/10:  59%|███████▏    | 589/991 [2:29:15<1:38:43, 14.73s/batch, batch_loss=16.2, batch_index=590, batch_size=256]

Epoch 3/10:  60%|███████▏    | 590/991 [2:29:15<1:38:58, 14.81s/batch, batch_loss=16.2, batch_index=590, batch_size=256]

Epoch 3/10:  60%|███████▏    | 590/991 [2:29:30<1:38:58, 14.81s/batch, batch_loss=15.6, batch_index=591, batch_size=256]

Epoch 3/10:  60%|███████▏    | 591/991 [2:29:30<1:38:23, 14.76s/batch, batch_loss=15.6, batch_index=591, batch_size=256]

Epoch 3/10:  60%|███████▏    | 591/991 [2:29:44<1:38:23, 14.76s/batch, batch_loss=8.15, batch_index=592, batch_size=256]

Epoch 3/10:  60%|███████▏    | 592/991 [2:29:44<1:36:42, 14.54s/batch, batch_loss=8.15, batch_index=592, batch_size=256]

Epoch 3/10:  60%|███████▏    | 592/991 [2:29:59<1:36:42, 14.54s/batch, batch_loss=11.5, batch_index=593, batch_size=256]

Epoch 3/10:  60%|███████▏    | 593/991 [2:29:59<1:37:05, 14.64s/batch, batch_loss=11.5, batch_index=593, batch_size=256]

Epoch 3/10:  60%|███████▏    | 593/991 [2:30:13<1:37:05, 14.64s/batch, batch_loss=12.1, batch_index=594, batch_size=256]

Epoch 3/10:  60%|███████▏    | 594/991 [2:30:13<1:36:52, 14.64s/batch, batch_loss=12.1, batch_index=594, batch_size=256]

Epoch 3/10:  60%|███████▏    | 594/991 [2:30:27<1:36:52, 14.64s/batch, batch_loss=7.35, batch_index=595, batch_size=256]

Epoch 3/10:  60%|███████▏    | 595/991 [2:30:27<1:35:44, 14.51s/batch, batch_loss=7.35, batch_index=595, batch_size=256]

Epoch 3/10:  60%|███████▊     | 595/991 [2:30:43<1:35:44, 14.51s/batch, batch_loss=7.8, batch_index=596, batch_size=256]

Epoch 3/10:  60%|███████▊     | 596/991 [2:30:43<1:37:25, 14.80s/batch, batch_loss=7.8, batch_index=596, batch_size=256]

Epoch 3/10:  60%|███████▏    | 596/991 [2:30:58<1:37:25, 14.80s/batch, batch_loss=23.7, batch_index=597, batch_size=256]

Epoch 3/10:  60%|███████▏    | 597/991 [2:30:58<1:38:46, 15.04s/batch, batch_loss=23.7, batch_index=597, batch_size=256]

Epoch 3/10:  60%|███████▏    | 597/991 [2:31:14<1:38:46, 15.04s/batch, batch_loss=8.29, batch_index=598, batch_size=256]

Epoch 3/10:  60%|███████▏    | 598/991 [2:31:14<1:39:10, 15.14s/batch, batch_loss=8.29, batch_index=598, batch_size=256]

Epoch 3/10:  60%|███████▏    | 598/991 [2:31:29<1:39:10, 15.14s/batch, batch_loss=18.3, batch_index=599, batch_size=256]

Epoch 3/10:  60%|███████▎    | 599/991 [2:31:29<1:39:36, 15.25s/batch, batch_loss=18.3, batch_index=599, batch_size=256]

Epoch 3/10:  60%|███████▎    | 599/991 [2:31:44<1:39:36, 15.25s/batch, batch_loss=12.9, batch_index=600, batch_size=256]

Epoch 3/10:  61%|███████▎    | 600/991 [2:31:44<1:37:52, 15.02s/batch, batch_loss=12.9, batch_index=600, batch_size=256]

Epoch 3/10:  61%|███████▎    | 600/991 [2:32:00<1:37:52, 15.02s/batch, batch_loss=15.6, batch_index=601, batch_size=256]

Epoch 3/10:  61%|███████▎    | 601/991 [2:32:00<1:39:07, 15.25s/batch, batch_loss=15.6, batch_index=601, batch_size=256]

Epoch 3/10:  61%|███████▎    | 601/991 [2:32:15<1:39:07, 15.25s/batch, batch_loss=11.7, batch_index=602, batch_size=256]

Epoch 3/10:  61%|███████▎    | 602/991 [2:32:15<1:38:33, 15.20s/batch, batch_loss=11.7, batch_index=602, batch_size=256]

Epoch 3/10:  61%|███████▎    | 602/991 [2:32:29<1:38:33, 15.20s/batch, batch_loss=6.76, batch_index=603, batch_size=256]

Epoch 3/10:  61%|███████▎    | 603/991 [2:32:29<1:37:04, 15.01s/batch, batch_loss=6.76, batch_index=603, batch_size=256]

Epoch 3/10:  61%|█████▍   | 603/991 [2:32:44<1:37:04, 15.01s/batch, batch_loss=1.01e+4, batch_index=604, batch_size=256]

Epoch 3/10:  61%|█████▍   | 604/991 [2:32:44<1:35:32, 14.81s/batch, batch_loss=1.01e+4, batch_index=604, batch_size=256]

Epoch 3/10:  61%|███████▎    | 604/991 [2:33:02<1:35:32, 14.81s/batch, batch_loss=10.1, batch_index=605, batch_size=256]

Epoch 3/10:  61%|███████▎    | 605/991 [2:33:02<1:42:21, 15.91s/batch, batch_loss=10.1, batch_index=605, batch_size=256]

Epoch 3/10:  61%|███████▎    | 605/991 [2:33:18<1:42:21, 15.91s/batch, batch_loss=12.3, batch_index=606, batch_size=256]

Epoch 3/10:  61%|███████▎    | 606/991 [2:33:18<1:41:51, 15.87s/batch, batch_loss=12.3, batch_index=606, batch_size=256]

Epoch 3/10:  61%|███████▎    | 606/991 [2:33:33<1:41:51, 15.87s/batch, batch_loss=13.5, batch_index=607, batch_size=256]

Epoch 3/10:  61%|███████▎    | 607/991 [2:33:33<1:39:16, 15.51s/batch, batch_loss=13.5, batch_index=607, batch_size=256]

Epoch 3/10:  61%|███████▎    | 607/991 [2:33:48<1:39:16, 15.51s/batch, batch_loss=15.6, batch_index=608, batch_size=256]

Epoch 3/10:  61%|███████▎    | 608/991 [2:33:48<1:39:13, 15.54s/batch, batch_loss=15.6, batch_index=608, batch_size=256]

Epoch 3/10:  61%|███████▎    | 608/991 [2:34:04<1:39:13, 15.54s/batch, batch_loss=16.8, batch_index=609, batch_size=256]

Epoch 3/10:  61%|███████▎    | 609/991 [2:34:04<1:39:57, 15.70s/batch, batch_loss=16.8, batch_index=609, batch_size=256]

Epoch 3/10:  61%|███████▎    | 609/991 [2:34:18<1:39:57, 15.70s/batch, batch_loss=20.4, batch_index=610, batch_size=256]

Epoch 3/10:  62%|███████▍    | 610/991 [2:34:18<1:35:17, 15.01s/batch, batch_loss=20.4, batch_index=610, batch_size=256]

Epoch 3/10:  62%|████████▌     | 610/991 [2:34:33<1:35:17, 15.01s/batch, batch_loss=30, batch_index=611, batch_size=256]

Epoch 3/10:  62%|████████▋     | 611/991 [2:34:33<1:35:15, 15.04s/batch, batch_loss=30, batch_index=611, batch_size=256]

Epoch 3/10:  62%|███████▍    | 611/991 [2:34:48<1:35:15, 15.04s/batch, batch_loss=8.66, batch_index=612, batch_size=256]

Epoch 3/10:  62%|███████▍    | 612/991 [2:34:48<1:35:00, 15.04s/batch, batch_loss=8.66, batch_index=612, batch_size=256]

Epoch 3/10:  62%|███████▍    | 612/991 [2:35:05<1:35:00, 15.04s/batch, batch_loss=16.6, batch_index=613, batch_size=256]

Epoch 3/10:  62%|███████▍    | 613/991 [2:35:05<1:39:22, 15.77s/batch, batch_loss=16.6, batch_index=613, batch_size=256]

Epoch 3/10:  62%|█████▌   | 613/991 [2:35:20<1:39:22, 15.77s/batch, batch_loss=1.73e+4, batch_index=614, batch_size=256]

Epoch 3/10:  62%|█████▌   | 614/991 [2:35:20<1:37:50, 15.57s/batch, batch_loss=1.73e+4, batch_index=614, batch_size=256]

Epoch 3/10:  62%|████████     | 614/991 [2:35:36<1:37:50, 15.57s/batch, batch_loss=995, batch_index=615, batch_size=256]

Epoch 3/10:  62%|████████     | 615/991 [2:35:36<1:37:03, 15.49s/batch, batch_loss=995, batch_index=615, batch_size=256]

Epoch 3/10:  62%|███████▍    | 615/991 [2:35:51<1:37:03, 15.49s/batch, batch_loss=9.13, batch_index=616, batch_size=256]

Epoch 3/10:  62%|███████▍    | 616/991 [2:35:51<1:35:38, 15.30s/batch, batch_loss=9.13, batch_index=616, batch_size=256]

Epoch 3/10:  62%|████████▋     | 616/991 [2:36:05<1:35:38, 15.30s/batch, batch_loss=19, batch_index=617, batch_size=256]

Epoch 3/10:  62%|████████▋     | 617/991 [2:36:05<1:34:01, 15.08s/batch, batch_loss=19, batch_index=617, batch_size=256]

Epoch 3/10:  62%|███████▍    | 617/991 [2:36:20<1:34:01, 15.08s/batch, batch_loss=12.6, batch_index=618, batch_size=256]

Epoch 3/10:  62%|███████▍    | 618/991 [2:36:20<1:33:09, 14.99s/batch, batch_loss=12.6, batch_index=618, batch_size=256]

Epoch 3/10:  62%|████████▋     | 618/991 [2:36:34<1:33:09, 14.99s/batch, batch_loss=24, batch_index=619, batch_size=256]

Epoch 3/10:  62%|████████▋     | 619/991 [2:36:34<1:32:18, 14.89s/batch, batch_loss=24, batch_index=619, batch_size=256]

Epoch 3/10:  62%|███████▍    | 619/991 [2:36:49<1:32:18, 14.89s/batch, batch_loss=12.9, batch_index=620, batch_size=256]

Epoch 3/10:  63%|███████▌    | 620/991 [2:36:49<1:30:56, 14.71s/batch, batch_loss=12.9, batch_index=620, batch_size=256]

Epoch 3/10:  63%|███████▌    | 620/991 [2:37:05<1:30:56, 14.71s/batch, batch_loss=8.78, batch_index=621, batch_size=256]

Epoch 3/10:  63%|███████▌    | 621/991 [2:37:05<1:34:20, 15.30s/batch, batch_loss=8.78, batch_index=621, batch_size=256]

Epoch 3/10:  63%|█████▋   | 621/991 [2:37:20<1:34:20, 15.30s/batch, batch_loss=5.48e+3, batch_index=622, batch_size=256]

Epoch 3/10:  63%|█████▋   | 622/991 [2:37:20<1:32:23, 15.02s/batch, batch_loss=5.48e+3, batch_index=622, batch_size=256]

Epoch 3/10:  63%|███████▌    | 622/991 [2:37:35<1:32:23, 15.02s/batch, batch_loss=28.9, batch_index=623, batch_size=256]

Epoch 3/10:  63%|███████▌    | 623/991 [2:37:35<1:31:48, 14.97s/batch, batch_loss=28.9, batch_index=623, batch_size=256]

Epoch 3/10:  63%|██████▎   | 623/991 [2:37:49<1:31:48, 14.97s/batch, batch_loss=1.6e+4, batch_index=624, batch_size=256]

Epoch 3/10:  63%|██████▎   | 624/991 [2:37:49<1:31:15, 14.92s/batch, batch_loss=1.6e+4, batch_index=624, batch_size=256]

Epoch 3/10:  63%|███████▌    | 624/991 [2:38:04<1:31:15, 14.92s/batch, batch_loss=9.39, batch_index=625, batch_size=256]

Epoch 3/10:  63%|███████▌    | 625/991 [2:38:04<1:30:59, 14.92s/batch, batch_loss=9.39, batch_index=625, batch_size=256]

Epoch 3/10:  63%|████████▏    | 625/991 [2:38:19<1:30:59, 14.92s/batch, batch_loss=6.4, batch_index=626, batch_size=256]

Epoch 3/10:  63%|████████▏    | 626/991 [2:38:19<1:29:51, 14.77s/batch, batch_loss=6.4, batch_index=626, batch_size=256]

Epoch 3/10:  63%|█████▋   | 626/991 [2:38:34<1:29:51, 14.77s/batch, batch_loss=4.32e+3, batch_index=627, batch_size=256]

Epoch 3/10:  63%|█████▋   | 627/991 [2:38:34<1:29:29, 14.75s/batch, batch_loss=4.32e+3, batch_index=627, batch_size=256]

Epoch 3/10:  63%|█████▋   | 627/991 [2:38:48<1:29:29, 14.75s/batch, batch_loss=1.05e+3, batch_index=628, batch_size=256]

Epoch 3/10:  63%|█████▋   | 628/991 [2:38:48<1:28:43, 14.66s/batch, batch_loss=1.05e+3, batch_index=628, batch_size=256]

Epoch 3/10:  63%|███████▌    | 628/991 [2:39:03<1:28:43, 14.66s/batch, batch_loss=13.1, batch_index=629, batch_size=256]

Epoch 3/10:  63%|███████▌    | 629/991 [2:39:03<1:28:20, 14.64s/batch, batch_loss=13.1, batch_index=629, batch_size=256]

Epoch 3/10:  63%|███████▌    | 629/991 [2:39:20<1:28:20, 14.64s/batch, batch_loss=21.8, batch_index=630, batch_size=256]

Epoch 3/10:  64%|███████▋    | 630/991 [2:39:20<1:33:23, 15.52s/batch, batch_loss=21.8, batch_index=630, batch_size=256]

Epoch 3/10:  64%|███████▋    | 630/991 [2:39:35<1:33:23, 15.52s/batch, batch_loss=17.3, batch_index=631, batch_size=256]

Epoch 3/10:  64%|███████▋    | 631/991 [2:39:35<1:32:19, 15.39s/batch, batch_loss=17.3, batch_index=631, batch_size=256]

Epoch 3/10:  64%|████████▎    | 631/991 [2:39:50<1:32:19, 15.39s/batch, batch_loss=5.2, batch_index=632, batch_size=256]

Epoch 3/10:  64%|████████▎    | 632/991 [2:39:50<1:31:45, 15.34s/batch, batch_loss=5.2, batch_index=632, batch_size=256]

Epoch 3/10:  64%|███████▋    | 632/991 [2:40:07<1:31:45, 15.34s/batch, batch_loss=26.4, batch_index=633, batch_size=256]

Epoch 3/10:  64%|███████▋    | 633/991 [2:40:07<1:33:19, 15.64s/batch, batch_loss=26.4, batch_index=633, batch_size=256]

Epoch 3/10:  64%|███████▋    | 633/991 [2:40:22<1:33:19, 15.64s/batch, batch_loss=27.1, batch_index=634, batch_size=256]

Epoch 3/10:  64%|███████▋    | 634/991 [2:40:22<1:32:44, 15.59s/batch, batch_loss=27.1, batch_index=634, batch_size=256]

Epoch 3/10:  64%|███████▋    | 634/991 [2:40:38<1:32:44, 15.59s/batch, batch_loss=24.9, batch_index=635, batch_size=256]

Epoch 3/10:  64%|███████▋    | 635/991 [2:40:38<1:32:42, 15.62s/batch, batch_loss=24.9, batch_index=635, batch_size=256]

Epoch 3/10:  64%|███████▋    | 635/991 [2:40:53<1:32:42, 15.62s/batch, batch_loss=17.2, batch_index=636, batch_size=256]

Epoch 3/10:  64%|███████▋    | 636/991 [2:40:53<1:31:43, 15.50s/batch, batch_loss=17.2, batch_index=636, batch_size=256]

Epoch 3/10:  64%|███████▋    | 636/991 [2:41:09<1:31:43, 15.50s/batch, batch_loss=19.7, batch_index=637, batch_size=256]

Epoch 3/10:  64%|███████▋    | 637/991 [2:41:09<1:31:18, 15.48s/batch, batch_loss=19.7, batch_index=637, batch_size=256]

Epoch 3/10:  64%|███████▋    | 637/991 [2:41:24<1:31:18, 15.48s/batch, batch_loss=18.6, batch_index=638, batch_size=256]

Epoch 3/10:  64%|███████▋    | 638/991 [2:41:24<1:30:06, 15.32s/batch, batch_loss=18.6, batch_index=638, batch_size=256]

Epoch 3/10:  64%|███████▋    | 638/991 [2:41:39<1:30:06, 15.32s/batch, batch_loss=11.9, batch_index=639, batch_size=256]

Epoch 3/10:  64%|███████▋    | 639/991 [2:41:39<1:30:11, 15.37s/batch, batch_loss=11.9, batch_index=639, batch_size=256]

Epoch 3/10:  64%|████████▍    | 639/991 [2:41:58<1:30:11, 15.37s/batch, batch_loss=673, batch_index=640, batch_size=256]

Epoch 3/10:  65%|████████▍    | 640/991 [2:41:58<1:35:58, 16.40s/batch, batch_loss=673, batch_index=640, batch_size=256]

Epoch 3/10:  65%|███████▋    | 640/991 [2:42:13<1:35:58, 16.40s/batch, batch_loss=12.9, batch_index=641, batch_size=256]

Epoch 3/10:  65%|███████▊    | 641/991 [2:42:13<1:33:25, 16.02s/batch, batch_loss=12.9, batch_index=641, batch_size=256]

Epoch 3/10:  65%|███████▊    | 641/991 [2:42:29<1:33:25, 16.02s/batch, batch_loss=8.58, batch_index=642, batch_size=256]

Epoch 3/10:  65%|███████▊    | 642/991 [2:42:29<1:32:58, 15.98s/batch, batch_loss=8.58, batch_index=642, batch_size=256]

Epoch 3/10:  65%|█████▊   | 642/991 [2:42:44<1:32:58, 15.98s/batch, batch_loss=2.12e+4, batch_index=643, batch_size=256]

Epoch 3/10:  65%|█████▊   | 643/991 [2:42:44<1:31:35, 15.79s/batch, batch_loss=2.12e+4, batch_index=643, batch_size=256]

Epoch 3/10:  65%|█████▊   | 643/991 [2:42:59<1:31:35, 15.79s/batch, batch_loss=1.76e+4, batch_index=644, batch_size=256]

Epoch 3/10:  65%|█████▊   | 644/991 [2:42:59<1:29:33, 15.48s/batch, batch_loss=1.76e+4, batch_index=644, batch_size=256]

Epoch 3/10:  65%|█████▊   | 644/991 [2:43:14<1:29:33, 15.48s/batch, batch_loss=2.19e+3, batch_index=645, batch_size=256]

Epoch 3/10:  65%|█████▊   | 645/991 [2:43:14<1:28:27, 15.34s/batch, batch_loss=2.19e+3, batch_index=645, batch_size=256]

Epoch 3/10:  65%|███████▊    | 645/991 [2:43:29<1:28:27, 15.34s/batch, batch_loss=10.2, batch_index=646, batch_size=256]

Epoch 3/10:  65%|███████▊    | 646/991 [2:43:29<1:27:13, 15.17s/batch, batch_loss=10.2, batch_index=646, batch_size=256]

Epoch 3/10:  65%|███████▊    | 646/991 [2:43:46<1:27:13, 15.17s/batch, batch_loss=14.6, batch_index=647, batch_size=256]

Epoch 3/10:  65%|███████▊    | 647/991 [2:43:46<1:31:00, 15.87s/batch, batch_loss=14.6, batch_index=647, batch_size=256]

Epoch 3/10:  65%|███████▊    | 647/991 [2:44:01<1:31:00, 15.87s/batch, batch_loss=14.4, batch_index=648, batch_size=256]

Epoch 3/10:  65%|███████▊    | 648/991 [2:44:01<1:29:28, 15.65s/batch, batch_loss=14.4, batch_index=648, batch_size=256]

Epoch 3/10:  65%|███████▊    | 648/991 [2:44:15<1:29:28, 15.65s/batch, batch_loss=13.6, batch_index=649, batch_size=256]

Epoch 3/10:  65%|███████▊    | 649/991 [2:44:15<1:26:17, 15.14s/batch, batch_loss=13.6, batch_index=649, batch_size=256]

Epoch 3/10:  65%|█████▉   | 649/991 [2:44:30<1:26:17, 15.14s/batch, batch_loss=1.34e+4, batch_index=650, batch_size=256]

Epoch 3/10:  66%|█████▉   | 650/991 [2:44:30<1:25:27, 15.04s/batch, batch_loss=1.34e+4, batch_index=650, batch_size=256]

Epoch 3/10:  66%|███████▊    | 650/991 [2:44:45<1:25:27, 15.04s/batch, batch_loss=10.1, batch_index=651, batch_size=256]

Epoch 3/10:  66%|███████▉    | 651/991 [2:44:45<1:24:11, 14.86s/batch, batch_loss=10.1, batch_index=651, batch_size=256]

Epoch 3/10:  66%|███████▉    | 651/991 [2:44:59<1:24:11, 14.86s/batch, batch_loss=12.7, batch_index=652, batch_size=256]

Epoch 3/10:  66%|███████▉    | 652/991 [2:44:59<1:23:55, 14.85s/batch, batch_loss=12.7, batch_index=652, batch_size=256]

Epoch 3/10:  66%|███████▉    | 652/991 [2:45:14<1:23:55, 14.85s/batch, batch_loss=22.6, batch_index=653, batch_size=256]

Epoch 3/10:  66%|███████▉    | 653/991 [2:45:14<1:22:23, 14.62s/batch, batch_loss=22.6, batch_index=653, batch_size=256]

Epoch 3/10:  66%|█████████▏    | 653/991 [2:45:29<1:22:23, 14.62s/batch, batch_loss=18, batch_index=654, batch_size=256]

Epoch 3/10:  66%|█████████▏    | 654/991 [2:45:29<1:22:50, 14.75s/batch, batch_loss=18, batch_index=654, batch_size=256]

Epoch 3/10:  66%|█████▉   | 654/991 [2:45:44<1:22:50, 14.75s/batch, batch_loss=3.84e+3, batch_index=655, batch_size=256]

Epoch 3/10:  66%|█████▉   | 655/991 [2:45:44<1:23:29, 14.91s/batch, batch_loss=3.84e+3, batch_index=655, batch_size=256]

Epoch 3/10:  66%|█████▉   | 655/991 [2:45:59<1:23:29, 14.91s/batch, batch_loss=5.21e+3, batch_index=656, batch_size=256]

Epoch 3/10:  66%|█████▉   | 656/991 [2:45:59<1:23:42, 14.99s/batch, batch_loss=5.21e+3, batch_index=656, batch_size=256]

Epoch 3/10:  66%|█████▉   | 656/991 [2:46:14<1:23:42, 14.99s/batch, batch_loss=4.22e+3, batch_index=657, batch_size=256]

Epoch 3/10:  66%|█████▉   | 657/991 [2:46:14<1:22:40, 14.85s/batch, batch_loss=4.22e+3, batch_index=657, batch_size=256]

Epoch 3/10:  66%|██████▋   | 657/991 [2:46:28<1:22:40, 14.85s/batch, batch_loss=2.2e+4, batch_index=658, batch_size=256]

Epoch 3/10:  66%|██████▋   | 658/991 [2:46:28<1:21:52, 14.75s/batch, batch_loss=2.2e+4, batch_index=658, batch_size=256]

Epoch 3/10:  66%|███████▉    | 658/991 [2:46:43<1:21:52, 14.75s/batch, batch_loss=18.8, batch_index=659, batch_size=256]

Epoch 3/10:  66%|███████▉    | 659/991 [2:46:43<1:22:26, 14.90s/batch, batch_loss=18.8, batch_index=659, batch_size=256]

Epoch 3/10:  66%|███████▉    | 659/991 [2:46:59<1:22:26, 14.90s/batch, batch_loss=5.29, batch_index=660, batch_size=256]

Epoch 3/10:  67%|███████▉    | 660/991 [2:46:59<1:23:35, 15.15s/batch, batch_loss=5.29, batch_index=660, batch_size=256]

Epoch 3/10:  67%|███████▉    | 660/991 [2:47:17<1:23:35, 15.15s/batch, batch_loss=15.7, batch_index=661, batch_size=256]

Epoch 3/10:  67%|████████    | 661/991 [2:47:17<1:28:11, 16.04s/batch, batch_loss=15.7, batch_index=661, batch_size=256]

Epoch 3/10:  67%|████████    | 661/991 [2:47:33<1:28:11, 16.04s/batch, batch_loss=21.6, batch_index=662, batch_size=256]

Epoch 3/10:  67%|████████    | 662/991 [2:47:33<1:27:53, 16.03s/batch, batch_loss=21.6, batch_index=662, batch_size=256]

Epoch 3/10:  67%|████████    | 662/991 [2:47:48<1:27:53, 16.03s/batch, batch_loss=17.7, batch_index=663, batch_size=256]

Epoch 3/10:  67%|████████    | 663/991 [2:47:48<1:26:23, 15.80s/batch, batch_loss=17.7, batch_index=663, batch_size=256]

Epoch 3/10:  67%|██████   | 663/991 [2:48:04<1:26:23, 15.80s/batch, batch_loss=3.05e+3, batch_index=664, batch_size=256]

Epoch 3/10:  67%|██████   | 664/991 [2:48:04<1:26:00, 15.78s/batch, batch_loss=3.05e+3, batch_index=664, batch_size=256]

Epoch 3/10:  67%|████████    | 664/991 [2:48:20<1:26:00, 15.78s/batch, batch_loss=17.1, batch_index=665, batch_size=256]

Epoch 3/10:  67%|████████    | 665/991 [2:48:20<1:25:39, 15.77s/batch, batch_loss=17.1, batch_index=665, batch_size=256]

Epoch 3/10:  67%|██████   | 665/991 [2:48:35<1:25:39, 15.77s/batch, batch_loss=3.07e+3, batch_index=666, batch_size=256]

Epoch 3/10:  67%|██████   | 666/991 [2:48:35<1:24:59, 15.69s/batch, batch_loss=3.07e+3, batch_index=666, batch_size=256]

Epoch 3/10:  67%|████████    | 666/991 [2:48:51<1:24:59, 15.69s/batch, batch_loss=20.2, batch_index=667, batch_size=256]

Epoch 3/10:  67%|████████    | 667/991 [2:48:51<1:25:10, 15.77s/batch, batch_loss=20.2, batch_index=667, batch_size=256]

Epoch 3/10:  67%|████████▋    | 667/991 [2:49:07<1:25:10, 15.77s/batch, batch_loss=382, batch_index=668, batch_size=256]

Epoch 3/10:  67%|████████▊    | 668/991 [2:49:07<1:24:26, 15.68s/batch, batch_loss=382, batch_index=668, batch_size=256]

Epoch 3/10:  67%|██████   | 668/991 [2:49:22<1:24:26, 15.68s/batch, batch_loss=2.96e+3, batch_index=669, batch_size=256]

Epoch 3/10:  68%|██████   | 669/991 [2:49:22<1:23:27, 15.55s/batch, batch_loss=2.96e+3, batch_index=669, batch_size=256]

Epoch 3/10:  68%|██████   | 669/991 [2:49:40<1:23:27, 15.55s/batch, batch_loss=1.02e+3, batch_index=670, batch_size=256]

Epoch 3/10:  68%|██████   | 670/991 [2:49:40<1:26:46, 16.22s/batch, batch_loss=1.02e+3, batch_index=670, batch_size=256]

Epoch 3/10:  68%|████████    | 670/991 [2:49:55<1:26:46, 16.22s/batch, batch_loss=10.2, batch_index=671, batch_size=256]

Epoch 3/10:  68%|████████▏   | 671/991 [2:49:55<1:24:41, 15.88s/batch, batch_loss=10.2, batch_index=671, batch_size=256]

Epoch 3/10:  68%|████████▏   | 671/991 [2:50:11<1:24:41, 15.88s/batch, batch_loss=13.9, batch_index=672, batch_size=256]

Epoch 3/10:  68%|████████▏   | 672/991 [2:50:11<1:24:10, 15.83s/batch, batch_loss=13.9, batch_index=672, batch_size=256]

Epoch 3/10:  68%|████████▏   | 672/991 [2:50:26<1:24:10, 15.83s/batch, batch_loss=18.9, batch_index=673, batch_size=256]

Epoch 3/10:  68%|████████▏   | 673/991 [2:50:26<1:23:01, 15.67s/batch, batch_loss=18.9, batch_index=673, batch_size=256]

Epoch 3/10:  68%|████████▏   | 673/991 [2:50:42<1:23:01, 15.67s/batch, batch_loss=15.1, batch_index=674, batch_size=256]

Epoch 3/10:  68%|████████▏   | 674/991 [2:50:42<1:23:00, 15.71s/batch, batch_loss=15.1, batch_index=674, batch_size=256]

Epoch 3/10:  68%|████████▏   | 674/991 [2:50:56<1:23:00, 15.71s/batch, batch_loss=4.97, batch_index=675, batch_size=256]

Epoch 3/10:  68%|████████▏   | 675/991 [2:50:56<1:20:37, 15.31s/batch, batch_loss=4.97, batch_index=675, batch_size=256]

Epoch 3/10:  68%|████████▏   | 675/991 [2:51:12<1:20:37, 15.31s/batch, batch_loss=9.29, batch_index=676, batch_size=256]

Epoch 3/10:  68%|████████▏   | 676/991 [2:51:12<1:20:35, 15.35s/batch, batch_loss=9.29, batch_index=676, batch_size=256]

Epoch 3/10:  68%|████████▏   | 676/991 [2:51:26<1:20:35, 15.35s/batch, batch_loss=18.4, batch_index=677, batch_size=256]

Epoch 3/10:  68%|████████▏   | 677/991 [2:51:26<1:18:32, 15.01s/batch, batch_loss=18.4, batch_index=677, batch_size=256]

Epoch 3/10:  68%|████████▏   | 677/991 [2:51:41<1:18:32, 15.01s/batch, batch_loss=7.65, batch_index=678, batch_size=256]

Epoch 3/10:  68%|████████▏   | 678/991 [2:51:41<1:18:21, 15.02s/batch, batch_loss=7.65, batch_index=678, batch_size=256]

Epoch 3/10:  68%|██████▏  | 678/991 [2:51:56<1:18:21, 15.02s/batch, batch_loss=3.81e+3, batch_index=679, batch_size=256]

Epoch 3/10:  69%|██████▏  | 679/991 [2:51:56<1:17:29, 14.90s/batch, batch_loss=3.81e+3, batch_index=679, batch_size=256]

Epoch 3/10:  69%|██████▏  | 679/991 [2:52:10<1:17:29, 14.90s/batch, batch_loss=6.09e+3, batch_index=680, batch_size=256]

Epoch 3/10:  69%|██████▏  | 680/991 [2:52:10<1:16:53, 14.83s/batch, batch_loss=6.09e+3, batch_index=680, batch_size=256]

Epoch 3/10:  69%|██████▏  | 680/991 [2:52:26<1:16:53, 14.83s/batch, batch_loss=7.24e+4, batch_index=681, batch_size=256]

Epoch 3/10:  69%|██████▏  | 681/991 [2:52:26<1:17:51, 15.07s/batch, batch_loss=7.24e+4, batch_index=681, batch_size=256]

Epoch 3/10:  69%|████████▏   | 681/991 [2:52:40<1:17:51, 15.07s/batch, batch_loss=14.3, batch_index=682, batch_size=256]

Epoch 3/10:  69%|████████▎   | 682/991 [2:52:40<1:16:50, 14.92s/batch, batch_loss=14.3, batch_index=682, batch_size=256]

Epoch 3/10:  69%|████████▉    | 682/991 [2:52:56<1:16:50, 14.92s/batch, batch_loss=378, batch_index=683, batch_size=256]

Epoch 3/10:  69%|████████▉    | 683/991 [2:52:56<1:17:21, 15.07s/batch, batch_loss=378, batch_index=683, batch_size=256]

Epoch 3/10:  69%|████████▎   | 683/991 [2:53:11<1:17:21, 15.07s/batch, batch_loss=6.83, batch_index=684, batch_size=256]

Epoch 3/10:  69%|████████▎   | 684/991 [2:53:11<1:17:56, 15.23s/batch, batch_loss=6.83, batch_index=684, batch_size=256]

Epoch 3/10:  69%|████████▎   | 684/991 [2:53:27<1:17:56, 15.23s/batch, batch_loss=14.9, batch_index=685, batch_size=256]

Epoch 3/10:  69%|████████▎   | 685/991 [2:53:27<1:17:40, 15.23s/batch, batch_loss=14.9, batch_index=685, batch_size=256]

Epoch 3/10:  69%|████████▎   | 685/991 [2:53:42<1:17:40, 15.23s/batch, batch_loss=14.9, batch_index=686, batch_size=256]

Epoch 3/10:  69%|████████▎   | 686/991 [2:53:42<1:17:07, 15.17s/batch, batch_loss=14.9, batch_index=686, batch_size=256]

Epoch 3/10:  69%|████████▉    | 686/991 [2:53:57<1:17:07, 15.17s/batch, batch_loss=538, batch_index=687, batch_size=256]

Epoch 3/10:  69%|█████████    | 687/991 [2:53:57<1:16:59, 15.19s/batch, batch_loss=538, batch_index=687, batch_size=256]

Epoch 3/10:  69%|████████▎   | 687/991 [2:54:12<1:16:59, 15.19s/batch, batch_loss=6.58, batch_index=688, batch_size=256]

Epoch 3/10:  69%|████████▎   | 688/991 [2:54:12<1:16:44, 15.20s/batch, batch_loss=6.58, batch_index=688, batch_size=256]

Epoch 3/10:  69%|████████▎   | 688/991 [2:54:27<1:16:44, 15.20s/batch, batch_loss=7.73, batch_index=689, batch_size=256]

Epoch 3/10:  70%|████████▎   | 689/991 [2:54:27<1:16:09, 15.13s/batch, batch_loss=7.73, batch_index=689, batch_size=256]

Epoch 3/10:  70%|████████▎   | 689/991 [2:54:42<1:16:09, 15.13s/batch, batch_loss=12.4, batch_index=690, batch_size=256]

Epoch 3/10:  70%|████████▎   | 690/991 [2:54:42<1:16:16, 15.20s/batch, batch_loss=12.4, batch_index=690, batch_size=256]

Epoch 3/10:  70%|████████▎   | 690/991 [2:54:58<1:16:16, 15.20s/batch, batch_loss=15.9, batch_index=691, batch_size=256]

Epoch 3/10:  70%|████████▎   | 691/991 [2:54:58<1:16:27, 15.29s/batch, batch_loss=15.9, batch_index=691, batch_size=256]

Epoch 3/10:  70%|████████▎   | 691/991 [2:55:17<1:16:27, 15.29s/batch, batch_loss=7.82, batch_index=692, batch_size=256]

Epoch 3/10:  70%|████████▍   | 692/991 [2:55:17<1:21:15, 16.30s/batch, batch_loss=7.82, batch_index=692, batch_size=256]

Epoch 3/10:  70%|██████▎  | 692/991 [2:55:33<1:21:15, 16.30s/batch, batch_loss=4.65e+3, batch_index=693, batch_size=256]

Epoch 3/10:  70%|██████▎  | 693/991 [2:55:33<1:20:27, 16.20s/batch, batch_loss=4.65e+3, batch_index=693, batch_size=256]

Epoch 3/10:  70%|█████████    | 693/991 [2:55:49<1:20:27, 16.20s/batch, batch_loss=450, batch_index=694, batch_size=256]

Epoch 3/10:  70%|█████████    | 694/991 [2:55:49<1:20:13, 16.21s/batch, batch_loss=450, batch_index=694, batch_size=256]

Epoch 3/10:  70%|█████████    | 694/991 [2:56:05<1:20:13, 16.21s/batch, batch_loss=777, batch_index=695, batch_size=256]

Epoch 3/10:  70%|█████████    | 695/991 [2:56:05<1:19:29, 16.11s/batch, batch_loss=777, batch_index=695, batch_size=256]

Epoch 3/10:  70%|█████████    | 695/991 [2:56:20<1:19:29, 16.11s/batch, batch_loss=9.1, batch_index=696, batch_size=256]

Epoch 3/10:  70%|█████████▏   | 696/991 [2:56:20<1:18:01, 15.87s/batch, batch_loss=9.1, batch_index=696, batch_size=256]

Epoch 3/10:  70%|███████   | 696/991 [2:56:36<1:18:01, 15.87s/batch, batch_loss=6.8e+3, batch_index=697, batch_size=256]

Epoch 3/10:  70%|███████   | 697/991 [2:56:36<1:17:35, 15.83s/batch, batch_loss=6.8e+3, batch_index=697, batch_size=256]

Epoch 3/10:  70%|████████▍   | 697/991 [2:56:52<1:17:35, 15.83s/batch, batch_loss=13.2, batch_index=698, batch_size=256]

Epoch 3/10:  70%|████████▍   | 698/991 [2:56:52<1:17:26, 15.86s/batch, batch_loss=13.2, batch_index=698, batch_size=256]

Epoch 3/10:  70%|████████▍   | 698/991 [2:57:08<1:17:26, 15.86s/batch, batch_loss=9.63, batch_index=699, batch_size=256]

Epoch 3/10:  71%|████████▍   | 699/991 [2:57:08<1:18:00, 16.03s/batch, batch_loss=9.63, batch_index=699, batch_size=256]

Epoch 3/10:  71%|████████▍   | 699/991 [2:57:27<1:18:00, 16.03s/batch, batch_loss=8.54, batch_index=700, batch_size=256]

Epoch 3/10:  71%|████████▍   | 700/991 [2:57:27<1:21:54, 16.89s/batch, batch_loss=8.54, batch_index=700, batch_size=256]

Epoch 3/10:  71%|█████████▏   | 700/991 [2:57:43<1:21:54, 16.89s/batch, batch_loss=215, batch_index=701, batch_size=256]

Epoch 3/10:  71%|█████████▏   | 701/991 [2:57:43<1:20:38, 16.68s/batch, batch_loss=215, batch_index=701, batch_size=256]

Epoch 3/10:  71%|████████▍   | 701/991 [2:57:59<1:20:38, 16.68s/batch, batch_loss=22.2, batch_index=702, batch_size=256]

Epoch 3/10:  71%|████████▌   | 702/991 [2:57:59<1:19:18, 16.46s/batch, batch_loss=22.2, batch_index=702, batch_size=256]

Epoch 3/10:  71%|█████████▏   | 702/991 [2:58:16<1:19:18, 16.46s/batch, batch_loss=275, batch_index=703, batch_size=256]

Epoch 3/10:  71%|█████████▏   | 703/991 [2:58:16<1:18:58, 16.45s/batch, batch_loss=275, batch_index=703, batch_size=256]

Epoch 3/10:  71%|████████▌   | 703/991 [2:58:31<1:18:58, 16.45s/batch, batch_loss=8.62, batch_index=704, batch_size=256]

Epoch 3/10:  71%|████████▌   | 704/991 [2:58:31<1:17:40, 16.24s/batch, batch_loss=8.62, batch_index=704, batch_size=256]

Epoch 3/10:  71%|████████▌   | 704/991 [2:58:47<1:17:40, 16.24s/batch, batch_loss=10.8, batch_index=705, batch_size=256]

Epoch 3/10:  71%|████████▌   | 705/991 [2:58:47<1:16:40, 16.09s/batch, batch_loss=10.8, batch_index=705, batch_size=256]

Epoch 3/10:  71%|████████▌   | 705/991 [2:59:01<1:16:40, 16.09s/batch, batch_loss=19.3, batch_index=706, batch_size=256]

Epoch 3/10:  71%|████████▌   | 706/991 [2:59:01<1:13:57, 15.57s/batch, batch_loss=19.3, batch_index=706, batch_size=256]

Epoch 3/10:  71%|████████▌   | 706/991 [2:59:19<1:13:57, 15.57s/batch, batch_loss=19.2, batch_index=707, batch_size=256]

Epoch 3/10:  71%|████████▌   | 707/991 [2:59:19<1:16:10, 16.09s/batch, batch_loss=19.2, batch_index=707, batch_size=256]

Epoch 3/10:  71%|████████▌   | 707/991 [2:59:34<1:16:10, 16.09s/batch, batch_loss=9.12, batch_index=708, batch_size=256]

Epoch 3/10:  71%|████████▌   | 708/991 [2:59:34<1:14:24, 15.78s/batch, batch_loss=9.12, batch_index=708, batch_size=256]

Epoch 3/10:  71%|████████▌   | 708/991 [2:59:49<1:14:24, 15.78s/batch, batch_loss=8.12, batch_index=709, batch_size=256]

Epoch 3/10:  72%|████████▌   | 709/991 [2:59:49<1:13:02, 15.54s/batch, batch_loss=8.12, batch_index=709, batch_size=256]

Epoch 3/10:  72%|██████████    | 709/991 [3:00:04<1:13:02, 15.54s/batch, batch_loss=30, batch_index=710, batch_size=256]

Epoch 3/10:  72%|██████████    | 710/991 [3:00:04<1:12:17, 15.44s/batch, batch_loss=30, batch_index=710, batch_size=256]

Epoch 3/10:  72%|██████████    | 710/991 [3:00:19<1:12:17, 15.44s/batch, batch_loss=98, batch_index=711, batch_size=256]

Epoch 3/10:  72%|██████████    | 711/991 [3:00:19<1:11:38, 15.35s/batch, batch_loss=98, batch_index=711, batch_size=256]

Epoch 3/10:  72%|████████▌   | 711/991 [3:00:34<1:11:38, 15.35s/batch, batch_loss=13.4, batch_index=712, batch_size=256]

Epoch 3/10:  72%|████████▌   | 712/991 [3:00:34<1:10:34, 15.18s/batch, batch_loss=13.4, batch_index=712, batch_size=256]

Epoch 3/10:  72%|████████▌   | 712/991 [3:00:49<1:10:34, 15.18s/batch, batch_loss=78.4, batch_index=713, batch_size=256]

Epoch 3/10:  72%|████████▋   | 713/991 [3:00:49<1:10:52, 15.30s/batch, batch_loss=78.4, batch_index=713, batch_size=256]

Epoch 3/10:  72%|████████▋   | 713/991 [3:01:04<1:10:52, 15.30s/batch, batch_loss=24.9, batch_index=714, batch_size=256]

Epoch 3/10:  72%|████████▋   | 714/991 [3:01:04<1:10:15, 15.22s/batch, batch_loss=24.9, batch_index=714, batch_size=256]

Epoch 3/10:  72%|████████▋   | 714/991 [3:01:20<1:10:15, 15.22s/batch, batch_loss=18.6, batch_index=715, batch_size=256]

Epoch 3/10:  72%|████████▋   | 715/991 [3:01:20<1:10:22, 15.30s/batch, batch_loss=18.6, batch_index=715, batch_size=256]

Epoch 3/10:  72%|████████▋   | 715/991 [3:01:36<1:10:22, 15.30s/batch, batch_loss=16.2, batch_index=716, batch_size=256]

Epoch 3/10:  72%|████████▋   | 716/991 [3:01:36<1:11:02, 15.50s/batch, batch_loss=16.2, batch_index=716, batch_size=256]

Epoch 3/10:  72%|████████▋   | 716/991 [3:01:51<1:11:02, 15.50s/batch, batch_loss=17.1, batch_index=717, batch_size=256]

Epoch 3/10:  72%|████████▋   | 717/991 [3:01:51<1:10:17, 15.39s/batch, batch_loss=17.1, batch_index=717, batch_size=256]

Epoch 3/10:  72%|████████▋   | 717/991 [3:02:06<1:10:17, 15.39s/batch, batch_loss=18.4, batch_index=718, batch_size=256]

Epoch 3/10:  72%|████████▋   | 718/991 [3:02:06<1:09:15, 15.22s/batch, batch_loss=18.4, batch_index=718, batch_size=256]

Epoch 3/10:  72%|████████▋   | 718/991 [3:02:22<1:09:15, 15.22s/batch, batch_loss=11.2, batch_index=719, batch_size=256]

Epoch 3/10:  73%|████████▋   | 719/991 [3:02:22<1:09:52, 15.41s/batch, batch_loss=11.2, batch_index=719, batch_size=256]

Epoch 3/10:  73%|████████▋   | 719/991 [3:02:37<1:09:52, 15.41s/batch, batch_loss=12.9, batch_index=720, batch_size=256]

Epoch 3/10:  73%|████████▋   | 720/991 [3:02:37<1:09:02, 15.29s/batch, batch_loss=12.9, batch_index=720, batch_size=256]

Epoch 3/10:  73%|████████▋   | 720/991 [3:02:55<1:09:02, 15.29s/batch, batch_loss=20.4, batch_index=721, batch_size=256]

Epoch 3/10:  73%|████████▋   | 721/991 [3:02:55<1:12:57, 16.21s/batch, batch_loss=20.4, batch_index=721, batch_size=256]

Epoch 3/10:  73%|████████▋   | 721/991 [3:03:09<1:12:57, 16.21s/batch, batch_loss=20.3, batch_index=722, batch_size=256]

Epoch 3/10:  73%|████████▋   | 722/991 [3:03:09<1:10:07, 15.64s/batch, batch_loss=20.3, batch_index=722, batch_size=256]

Epoch 3/10:  73%|██████▌  | 722/991 [3:03:25<1:10:07, 15.64s/batch, batch_loss=7.22e+3, batch_index=723, batch_size=256]

Epoch 3/10:  73%|██████▌  | 723/991 [3:03:25<1:09:22, 15.53s/batch, batch_loss=7.22e+3, batch_index=723, batch_size=256]

Epoch 3/10:  73%|████████▊   | 723/991 [3:03:41<1:09:22, 15.53s/batch, batch_loss=4.65, batch_index=724, batch_size=256]

Epoch 3/10:  73%|████████▊   | 724/991 [3:03:41<1:09:32, 15.63s/batch, batch_loss=4.65, batch_index=724, batch_size=256]

Epoch 3/10:  73%|████████▊   | 724/991 [3:03:55<1:09:32, 15.63s/batch, batch_loss=13.5, batch_index=725, batch_size=256]

Epoch 3/10:  73%|████████▊   | 725/991 [3:03:55<1:07:22, 15.20s/batch, batch_loss=13.5, batch_index=725, batch_size=256]

Epoch 3/10:  73%|████████▊   | 725/991 [3:04:09<1:07:22, 15.20s/batch, batch_loss=12.3, batch_index=726, batch_size=256]

Epoch 3/10:  73%|████████▊   | 726/991 [3:04:09<1:05:40, 14.87s/batch, batch_loss=12.3, batch_index=726, batch_size=256]

Epoch 3/10:  73%|███████▎  | 726/991 [3:04:22<1:05:40, 14.87s/batch, batch_loss=1.3e+4, batch_index=727, batch_size=256]

Epoch 3/10:  73%|███████▎  | 727/991 [3:04:22<1:03:44, 14.49s/batch, batch_loss=1.3e+4, batch_index=727, batch_size=256]

Epoch 3/10:  73%|████████▊   | 727/991 [3:04:37<1:03:44, 14.49s/batch, batch_loss=13.6, batch_index=728, batch_size=256]

Epoch 3/10:  73%|████████▊   | 728/991 [3:04:37<1:03:48, 14.56s/batch, batch_loss=13.6, batch_index=728, batch_size=256]

Epoch 3/10:  73%|█████████▌   | 728/991 [3:04:53<1:03:48, 14.56s/batch, batch_loss=128, batch_index=729, batch_size=256]

Epoch 3/10:  74%|█████████▌   | 729/991 [3:04:53<1:05:28, 15.00s/batch, batch_loss=128, batch_index=729, batch_size=256]

Epoch 3/10:  74%|████████▊   | 729/991 [3:05:11<1:05:28, 15.00s/batch, batch_loss=11.2, batch_index=730, batch_size=256]

Epoch 3/10:  74%|████████▊   | 730/991 [3:05:11<1:09:30, 15.98s/batch, batch_loss=11.2, batch_index=730, batch_size=256]

Epoch 3/10:  74%|█████████▌   | 730/991 [3:05:26<1:09:30, 15.98s/batch, batch_loss=107, batch_index=731, batch_size=256]

Epoch 3/10:  74%|█████████▌   | 731/991 [3:05:26<1:07:42, 15.63s/batch, batch_loss=107, batch_index=731, batch_size=256]

Epoch 3/10:  74%|██████▋  | 731/991 [3:05:42<1:07:42, 15.63s/batch, batch_loss=1.39e+4, batch_index=732, batch_size=256]

Epoch 3/10:  74%|██████▋  | 732/991 [3:05:42<1:07:31, 15.64s/batch, batch_loss=1.39e+4, batch_index=732, batch_size=256]

Epoch 3/10:  74%|████████▊   | 732/991 [3:05:58<1:07:31, 15.64s/batch, batch_loss=15.7, batch_index=733, batch_size=256]

Epoch 3/10:  74%|████████▉   | 733/991 [3:05:58<1:07:33, 15.71s/batch, batch_loss=15.7, batch_index=733, batch_size=256]

Epoch 3/10:  74%|██████▋  | 733/991 [3:06:14<1:07:33, 15.71s/batch, batch_loss=6.81e+3, batch_index=734, batch_size=256]

Epoch 3/10:  74%|██████▋  | 734/991 [3:06:14<1:08:29, 15.99s/batch, batch_loss=6.81e+3, batch_index=734, batch_size=256]

Epoch 3/10:  74%|██████████▎   | 734/991 [3:06:30<1:08:29, 15.99s/batch, batch_loss=15, batch_index=735, batch_size=256]

Epoch 3/10:  74%|██████████▍   | 735/991 [3:06:30<1:07:44, 15.88s/batch, batch_loss=15, batch_index=735, batch_size=256]

Epoch 3/10:  74%|██████████▍   | 735/991 [3:06:46<1:07:44, 15.88s/batch, batch_loss=13, batch_index=736, batch_size=256]

Epoch 3/10:  74%|██████████▍   | 736/991 [3:06:46<1:07:18, 15.84s/batch, batch_loss=13, batch_index=736, batch_size=256]

Epoch 3/10:  74%|████████▉   | 736/991 [3:07:01<1:07:18, 15.84s/batch, batch_loss=9.49, batch_index=737, batch_size=256]

Epoch 3/10:  74%|████████▉   | 737/991 [3:07:01<1:05:34, 15.49s/batch, batch_loss=9.49, batch_index=737, batch_size=256]

Epoch 3/10:  74%|██████▋  | 737/991 [3:07:15<1:05:34, 15.49s/batch, batch_loss=1.47e+3, batch_index=738, batch_size=256]

Epoch 3/10:  74%|██████▋  | 738/991 [3:07:15<1:04:27, 15.28s/batch, batch_loss=1.47e+3, batch_index=738, batch_size=256]

Epoch 3/10:  74%|████████▉   | 738/991 [3:07:33<1:04:27, 15.28s/batch, batch_loss=27.3, batch_index=739, batch_size=256]

Epoch 3/10:  75%|████████▉   | 739/991 [3:07:33<1:06:52, 15.92s/batch, batch_loss=27.3, batch_index=739, batch_size=256]

Epoch 3/10:  75%|████████▉   | 739/991 [3:07:48<1:06:52, 15.92s/batch, batch_loss=9.83, batch_index=740, batch_size=256]

Epoch 3/10:  75%|████████▉   | 740/991 [3:07:48<1:05:15, 15.60s/batch, batch_loss=9.83, batch_index=740, batch_size=256]

Epoch 3/10:  75%|██████▋  | 740/991 [3:08:03<1:05:15, 15.60s/batch, batch_loss=1.81e+4, batch_index=741, batch_size=256]

Epoch 3/10:  75%|██████▋  | 741/991 [3:08:03<1:04:19, 15.44s/batch, batch_loss=1.81e+4, batch_index=741, batch_size=256]

Epoch 3/10:  75%|██████▋  | 741/991 [3:08:18<1:04:19, 15.44s/batch, batch_loss=2.28e+3, batch_index=742, batch_size=256]

Epoch 3/10:  75%|██████▋  | 742/991 [3:08:18<1:04:09, 15.46s/batch, batch_loss=2.28e+3, batch_index=742, batch_size=256]

Epoch 3/10:  75%|████████▉   | 742/991 [3:08:34<1:04:09, 15.46s/batch, batch_loss=11.2, batch_index=743, batch_size=256]

Epoch 3/10:  75%|████████▉   | 743/991 [3:08:34<1:03:55, 15.47s/batch, batch_loss=11.2, batch_index=743, batch_size=256]

Epoch 3/10:  75%|████████▉   | 743/991 [3:08:49<1:03:55, 15.47s/batch, batch_loss=12.4, batch_index=744, batch_size=256]

Epoch 3/10:  75%|█████████   | 744/991 [3:08:49<1:03:37, 15.45s/batch, batch_loss=12.4, batch_index=744, batch_size=256]

Epoch 3/10:  75%|█████████   | 744/991 [3:09:04<1:03:37, 15.45s/batch, batch_loss=15.1, batch_index=745, batch_size=256]

Epoch 3/10:  75%|█████████   | 745/991 [3:09:04<1:02:34, 15.26s/batch, batch_loss=15.1, batch_index=745, batch_size=256]

Epoch 3/10:  75%|██████▊  | 745/991 [3:09:20<1:02:34, 15.26s/batch, batch_loss=1.15e+3, batch_index=746, batch_size=256]

Epoch 3/10:  75%|██████▊  | 746/991 [3:09:20<1:03:55, 15.65s/batch, batch_loss=1.15e+3, batch_index=746, batch_size=256]

Epoch 3/10:  75%|██████▊  | 746/991 [3:09:35<1:03:55, 15.65s/batch, batch_loss=3.87e+3, batch_index=747, batch_size=256]

Epoch 3/10:  75%|██████▊  | 747/991 [3:09:35<1:02:05, 15.27s/batch, batch_loss=3.87e+3, batch_index=747, batch_size=256]

Epoch 3/10:  75%|█████████   | 747/991 [3:09:50<1:02:05, 15.27s/batch, batch_loss=14.6, batch_index=748, batch_size=256]

Epoch 3/10:  75%|█████████   | 748/991 [3:09:50<1:02:10, 15.35s/batch, batch_loss=14.6, batch_index=748, batch_size=256]

Epoch 3/10:  75%|█████████   | 748/991 [3:10:04<1:02:10, 15.35s/batch, batch_loss=13.5, batch_index=749, batch_size=256]

Epoch 3/10:  76%|█████████   | 749/991 [3:10:04<1:00:15, 14.94s/batch, batch_loss=13.5, batch_index=749, batch_size=256]

Epoch 3/10:  76%|█████████   | 749/991 [3:10:19<1:00:15, 14.94s/batch, batch_loss=13.5, batch_index=750, batch_size=256]

Epoch 3/10:  76%|██████████▌   | 750/991 [3:10:19<59:36, 14.84s/batch, batch_loss=13.5, batch_index=750, batch_size=256]

Epoch 3/10:  76%|██████████▌   | 750/991 [3:10:34<59:36, 14.84s/batch, batch_loss=12.8, batch_index=751, batch_size=256]

Epoch 3/10:  76%|██████████▌   | 751/991 [3:10:34<59:14, 14.81s/batch, batch_loss=12.8, batch_index=751, batch_size=256]

Epoch 3/10:  76%|██████████▌   | 751/991 [3:10:49<59:14, 14.81s/batch, batch_loss=7.24, batch_index=752, batch_size=256]

Epoch 3/10:  76%|██████████▌   | 752/991 [3:10:49<59:05, 14.84s/batch, batch_loss=7.24, batch_index=752, batch_size=256]

Epoch 3/10:  76%|██████████▌   | 752/991 [3:11:03<59:05, 14.84s/batch, batch_loss=9.77, batch_index=753, batch_size=256]

Epoch 3/10:  76%|██████████▋   | 753/991 [3:11:03<58:46, 14.82s/batch, batch_loss=9.77, batch_index=753, batch_size=256]

Epoch 3/10:  76%|██████████▋   | 753/991 [3:11:18<58:46, 14.82s/batch, batch_loss=6.34, batch_index=754, batch_size=256]

Epoch 3/10:  76%|██████████▋   | 754/991 [3:11:18<58:06, 14.71s/batch, batch_loss=6.34, batch_index=754, batch_size=256]

Epoch 3/10:  76%|██████████▋   | 754/991 [3:11:36<58:06, 14.71s/batch, batch_loss=13.9, batch_index=755, batch_size=256]

Epoch 3/10:  76%|█████████▏  | 755/991 [3:11:36<1:02:32, 15.90s/batch, batch_loss=13.9, batch_index=755, batch_size=256]

Epoch 3/10:  76%|█████████▏  | 755/991 [3:11:52<1:02:32, 15.90s/batch, batch_loss=12.8, batch_index=756, batch_size=256]

Epoch 3/10:  76%|█████████▏  | 756/991 [3:11:52<1:01:38, 15.74s/batch, batch_loss=12.8, batch_index=756, batch_size=256]

Epoch 3/10:  76%|█████████▏  | 756/991 [3:12:08<1:01:38, 15.74s/batch, batch_loss=6.62, batch_index=757, batch_size=256]

Epoch 3/10:  76%|█████████▏  | 757/991 [3:12:08<1:01:29, 15.76s/batch, batch_loss=6.62, batch_index=757, batch_size=256]

Epoch 3/10:  76%|█████████▏  | 757/991 [3:12:24<1:01:29, 15.76s/batch, batch_loss=15.7, batch_index=758, batch_size=256]

Epoch 3/10:  76%|█████████▏  | 758/991 [3:12:24<1:01:42, 15.89s/batch, batch_loss=15.7, batch_index=758, batch_size=256]

Epoch 3/10:  76%|█████████▏  | 758/991 [3:12:39<1:01:42, 15.89s/batch, batch_loss=17.2, batch_index=759, batch_size=256]

Epoch 3/10:  77%|█████████▏  | 759/991 [3:12:39<1:00:53, 15.75s/batch, batch_loss=17.2, batch_index=759, batch_size=256]

Epoch 3/10:  77%|█████████▏  | 759/991 [3:12:54<1:00:53, 15.75s/batch, batch_loss=16.3, batch_index=760, batch_size=256]

Epoch 3/10:  77%|██████████▋   | 760/991 [3:12:54<59:55, 15.56s/batch, batch_loss=16.3, batch_index=760, batch_size=256]

Epoch 3/10:  77%|██████████▋   | 760/991 [3:13:09<59:55, 15.56s/batch, batch_loss=16.1, batch_index=761, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 761/991 [3:13:09<59:02, 15.40s/batch, batch_loss=16.1, batch_index=761, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 761/991 [3:13:24<59:02, 15.40s/batch, batch_loss=25.8, batch_index=762, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 762/991 [3:13:24<58:09, 15.24s/batch, batch_loss=25.8, batch_index=762, batch_size=256]

Epoch 3/10:  77%|███████████▌   | 762/991 [3:13:40<58:09, 15.24s/batch, batch_loss=513, batch_index=763, batch_size=256]

Epoch 3/10:  77%|███████████▌   | 763/991 [3:13:40<58:33, 15.41s/batch, batch_loss=513, batch_index=763, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 763/991 [3:13:56<58:33, 15.41s/batch, batch_loss=10.5, batch_index=764, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 764/991 [3:13:56<58:39, 15.50s/batch, batch_loss=10.5, batch_index=764, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 764/991 [3:14:11<58:39, 15.50s/batch, batch_loss=3.67, batch_index=765, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 765/991 [3:14:11<58:15, 15.47s/batch, batch_loss=3.67, batch_index=765, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 765/991 [3:14:27<58:15, 15.47s/batch, batch_loss=12.4, batch_index=766, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 766/991 [3:14:27<57:52, 15.43s/batch, batch_loss=12.4, batch_index=766, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 766/991 [3:14:42<57:52, 15.43s/batch, batch_loss=14.9, batch_index=767, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 767/991 [3:14:42<58:05, 15.56s/batch, batch_loss=14.9, batch_index=767, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 767/991 [3:14:59<58:05, 15.56s/batch, batch_loss=4.93, batch_index=768, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 768/991 [3:14:59<58:53, 15.85s/batch, batch_loss=4.93, batch_index=768, batch_size=256]

Epoch 3/10:  77%|██████████▊   | 768/991 [3:15:15<58:53, 15.85s/batch, batch_loss=2.66, batch_index=769, batch_size=256]

Epoch 3/10:  78%|██████████▊   | 769/991 [3:15:15<59:25, 16.06s/batch, batch_loss=2.66, batch_index=769, batch_size=256]

Epoch 3/10:  78%|██████████▊   | 769/991 [3:15:35<59:25, 16.06s/batch, batch_loss=12.6, batch_index=770, batch_size=256]

Epoch 3/10:  78%|█████████▎  | 770/991 [3:15:35<1:02:45, 17.04s/batch, batch_loss=12.6, batch_index=770, batch_size=256]

Epoch 3/10:  78%|██████▉  | 770/991 [3:15:51<1:02:45, 17.04s/batch, batch_loss=2.74e+3, batch_index=771, batch_size=256]

Epoch 3/10:  78%|███████  | 771/991 [3:15:51<1:02:02, 16.92s/batch, batch_loss=2.74e+3, batch_index=771, batch_size=256]

Epoch 3/10:  78%|█████████▎  | 771/991 [3:16:07<1:02:02, 16.92s/batch, batch_loss=5.48, batch_index=772, batch_size=256]

Epoch 3/10:  78%|█████████▎  | 772/991 [3:16:07<1:00:10, 16.49s/batch, batch_loss=5.48, batch_index=772, batch_size=256]

Epoch 3/10:  78%|█████████▎  | 772/991 [3:16:21<1:00:10, 16.49s/batch, batch_loss=1.36, batch_index=773, batch_size=256]

Epoch 3/10:  78%|██████████▉   | 773/991 [3:16:21<57:28, 15.82s/batch, batch_loss=1.36, batch_index=773, batch_size=256]

Epoch 3/10:  78%|██████████▉   | 773/991 [3:16:37<57:28, 15.82s/batch, batch_loss=8.16, batch_index=774, batch_size=256]

Epoch 3/10:  78%|██████████▉   | 774/991 [3:16:37<57:00, 15.76s/batch, batch_loss=8.16, batch_index=774, batch_size=256]

Epoch 3/10:  78%|██████████▉   | 774/991 [3:16:52<57:00, 15.76s/batch, batch_loss=8.91, batch_index=775, batch_size=256]

Epoch 3/10:  78%|██████████▉   | 775/991 [3:16:52<56:33, 15.71s/batch, batch_loss=8.91, batch_index=775, batch_size=256]

Epoch 3/10:  78%|███████████▋   | 775/991 [3:17:08<56:33, 15.71s/batch, batch_loss=255, batch_index=776, batch_size=256]

Epoch 3/10:  78%|███████████▋   | 776/991 [3:17:08<56:38, 15.80s/batch, batch_loss=255, batch_index=776, batch_size=256]

Epoch 3/10:  78%|██████████▏  | 776/991 [3:17:24<56:38, 15.80s/batch, batch_loss=0.581, batch_index=777, batch_size=256]

Epoch 3/10:  78%|██████████▏  | 777/991 [3:17:24<55:53, 15.67s/batch, batch_loss=0.581, batch_index=777, batch_size=256]

Epoch 3/10:  78%|██████████▏  | 777/991 [3:17:40<55:53, 15.67s/batch, batch_loss=0.791, batch_index=778, batch_size=256]

Epoch 3/10:  79%|██████████▏  | 778/991 [3:17:40<55:55, 15.75s/batch, batch_loss=0.791, batch_index=778, batch_size=256]

Epoch 3/10:  79%|██████████▉   | 778/991 [3:17:56<55:55, 15.75s/batch, batch_loss=7.16, batch_index=779, batch_size=256]

Epoch 3/10:  79%|███████████   | 779/991 [3:17:56<56:37, 16.03s/batch, batch_loss=7.16, batch_index=779, batch_size=256]

Epoch 3/10:  79%|███████████   | 779/991 [3:18:13<56:37, 16.03s/batch, batch_loss=4.28, batch_index=780, batch_size=256]

Epoch 3/10:  79%|███████████   | 780/991 [3:18:13<57:04, 16.23s/batch, batch_loss=4.28, batch_index=780, batch_size=256]

Epoch 3/10:  79%|███████████   | 780/991 [3:18:30<57:04, 16.23s/batch, batch_loss=3.83, batch_index=781, batch_size=256]

Epoch 3/10:  79%|███████████   | 781/991 [3:18:30<57:18, 16.37s/batch, batch_loss=3.83, batch_index=781, batch_size=256]

Epoch 3/10:  79%|████████▋  | 781/991 [3:18:46<57:18, 16.37s/batch, batch_loss=2.51e+4, batch_index=782, batch_size=256]

Epoch 3/10:  79%|████████▋  | 782/991 [3:18:46<56:22, 16.19s/batch, batch_loss=2.51e+4, batch_index=782, batch_size=256]

Epoch 3/10:  79%|███████████   | 782/991 [3:19:01<56:22, 16.19s/batch, batch_loss=19.1, batch_index=783, batch_size=256]

Epoch 3/10:  79%|███████████   | 783/991 [3:19:01<55:17, 15.95s/batch, batch_loss=19.1, batch_index=783, batch_size=256]

Epoch 3/10:  79%|███████████   | 783/991 [3:19:15<55:17, 15.95s/batch, batch_loss=15.3, batch_index=784, batch_size=256]

Epoch 3/10:  79%|███████████   | 784/991 [3:19:15<53:33, 15.52s/batch, batch_loss=15.3, batch_index=784, batch_size=256]

Epoch 3/10:  79%|███████████   | 784/991 [3:19:34<53:33, 15.52s/batch, batch_loss=15.5, batch_index=785, batch_size=256]

Epoch 3/10:  79%|███████████   | 785/991 [3:19:34<56:02, 16.32s/batch, batch_loss=15.5, batch_index=785, batch_size=256]

Epoch 3/10:  79%|████████████▋   | 785/991 [3:19:50<56:02, 16.32s/batch, batch_loss=10, batch_index=786, batch_size=256]

Epoch 3/10:  79%|████████████▋   | 786/991 [3:19:50<56:02, 16.40s/batch, batch_loss=10, batch_index=786, batch_size=256]

Epoch 3/10:  79%|████████▋  | 786/991 [3:20:05<56:02, 16.40s/batch, batch_loss=2.48e+4, batch_index=787, batch_size=256]

Epoch 3/10:  79%|████████▋  | 787/991 [3:20:05<54:12, 15.95s/batch, batch_loss=2.48e+4, batch_index=787, batch_size=256]

Epoch 3/10:  79%|███████████▉   | 787/991 [3:20:21<54:12, 15.95s/batch, batch_loss=685, batch_index=788, batch_size=256]

Epoch 3/10:  80%|███████████▉   | 788/991 [3:20:21<53:28, 15.81s/batch, batch_loss=685, batch_index=788, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 788/991 [3:20:36<53:28, 15.81s/batch, batch_loss=19.2, batch_index=789, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 789/991 [3:20:36<52:35, 15.62s/batch, batch_loss=19.2, batch_index=789, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 789/991 [3:20:51<52:35, 15.62s/batch, batch_loss=14.4, batch_index=790, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 790/991 [3:20:51<52:18, 15.62s/batch, batch_loss=14.4, batch_index=790, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 790/991 [3:21:08<52:18, 15.62s/batch, batch_loss=13.4, batch_index=791, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 791/991 [3:21:08<53:13, 15.97s/batch, batch_loss=13.4, batch_index=791, batch_size=256]

Epoch 3/10:  80%|████████▊  | 791/991 [3:21:24<53:13, 15.97s/batch, batch_loss=1.04e+4, batch_index=792, batch_size=256]

Epoch 3/10:  80%|████████▊  | 792/991 [3:21:24<52:40, 15.88s/batch, batch_loss=1.04e+4, batch_index=792, batch_size=256]

Epoch 3/10:  80%|███████████▉   | 792/991 [3:21:39<52:40, 15.88s/batch, batch_loss=8.3, batch_index=793, batch_size=256]

Epoch 3/10:  80%|████████████   | 793/991 [3:21:39<51:23, 15.57s/batch, batch_loss=8.3, batch_index=793, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 793/991 [3:21:54<51:23, 15.57s/batch, batch_loss=2.04, batch_index=794, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 794/991 [3:21:54<50:37, 15.42s/batch, batch_loss=2.04, batch_index=794, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 794/991 [3:22:10<50:37, 15.42s/batch, batch_loss=7.89, batch_index=795, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 795/991 [3:22:10<50:56, 15.60s/batch, batch_loss=7.89, batch_index=795, batch_size=256]

Epoch 3/10:  80%|████████████▊   | 795/991 [3:22:25<50:56, 15.60s/batch, batch_loss=11, batch_index=796, batch_size=256]

Epoch 3/10:  80%|████████████▊   | 796/991 [3:22:25<50:45, 15.62s/batch, batch_loss=11, batch_index=796, batch_size=256]

Epoch 3/10:  80%|███████████▏  | 796/991 [3:22:41<50:45, 15.62s/batch, batch_loss=20.2, batch_index=797, batch_size=256]

Epoch 3/10:  80%|███████████▎  | 797/991 [3:22:41<50:05, 15.49s/batch, batch_loss=20.2, batch_index=797, batch_size=256]

Epoch 3/10:  80%|████████████   | 797/991 [3:22:55<50:05, 15.49s/batch, batch_loss=337, batch_index=798, batch_size=256]

Epoch 3/10:  81%|████████████   | 798/991 [3:22:55<48:25, 15.05s/batch, batch_loss=337, batch_index=798, batch_size=256]

Epoch 3/10:  81%|████████████▉   | 798/991 [3:23:10<48:25, 15.05s/batch, batch_loss=10, batch_index=799, batch_size=256]

Epoch 3/10:  81%|████████████▉   | 799/991 [3:23:10<48:06, 15.03s/batch, batch_loss=10, batch_index=799, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 799/991 [3:23:24<48:06, 15.03s/batch, batch_loss=16.6, batch_index=800, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 800/991 [3:23:24<47:32, 14.93s/batch, batch_loss=16.6, batch_index=800, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 800/991 [3:23:40<47:32, 14.93s/batch, batch_loss=12.7, batch_index=801, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 801/991 [3:23:40<47:58, 15.15s/batch, batch_loss=12.7, batch_index=801, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 801/991 [3:23:55<47:58, 15.15s/batch, batch_loss=15.9, batch_index=802, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 802/991 [3:23:55<47:29, 15.08s/batch, batch_loss=15.9, batch_index=802, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 802/991 [3:24:09<47:29, 15.08s/batch, batch_loss=6.67, batch_index=803, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 803/991 [3:24:09<46:00, 14.69s/batch, batch_loss=6.67, batch_index=803, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 803/991 [3:24:25<46:00, 14.69s/batch, batch_loss=13.4, batch_index=804, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 804/991 [3:24:25<46:54, 15.05s/batch, batch_loss=13.4, batch_index=804, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 804/991 [3:24:40<46:54, 15.05s/batch, batch_loss=6.56, batch_index=805, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 805/991 [3:24:40<47:22, 15.28s/batch, batch_loss=6.56, batch_index=805, batch_size=256]

Epoch 3/10:  81%|███████████▎  | 805/991 [3:24:56<47:22, 15.28s/batch, batch_loss=9.96, batch_index=806, batch_size=256]

Epoch 3/10:  81%|███████████▍  | 806/991 [3:24:56<47:37, 15.45s/batch, batch_loss=9.96, batch_index=806, batch_size=256]

Epoch 3/10:  81%|███████████▍  | 806/991 [3:25:12<47:37, 15.45s/batch, batch_loss=8.66, batch_index=807, batch_size=256]

Epoch 3/10:  81%|███████████▍  | 807/991 [3:25:12<47:27, 15.48s/batch, batch_loss=8.66, batch_index=807, batch_size=256]

Epoch 3/10:  81%|███████████▍  | 807/991 [3:25:27<47:27, 15.48s/batch, batch_loss=17.4, batch_index=808, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 808/991 [3:25:27<47:05, 15.44s/batch, batch_loss=17.4, batch_index=808, batch_size=256]

Epoch 3/10:  82%|████████▉  | 808/991 [3:25:43<47:05, 15.44s/batch, batch_loss=1.21e+4, batch_index=809, batch_size=256]

Epoch 3/10:  82%|████████▉  | 809/991 [3:25:43<46:48, 15.43s/batch, batch_loss=1.21e+4, batch_index=809, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 809/991 [3:25:58<46:48, 15.43s/batch, batch_loss=14.3, batch_index=810, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 810/991 [3:25:58<46:36, 15.45s/batch, batch_loss=14.3, batch_index=810, batch_size=256]

Epoch 3/10:  82%|████████████▎  | 810/991 [3:26:14<46:36, 15.45s/batch, batch_loss=7.7, batch_index=811, batch_size=256]

Epoch 3/10:  82%|████████████▎  | 811/991 [3:26:14<46:41, 15.56s/batch, batch_loss=7.7, batch_index=811, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 811/991 [3:26:31<46:41, 15.56s/batch, batch_loss=7.11, batch_index=812, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 812/991 [3:26:31<47:23, 15.89s/batch, batch_loss=7.11, batch_index=812, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 812/991 [3:26:46<47:23, 15.89s/batch, batch_loss=7.58, batch_index=813, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 813/991 [3:26:46<46:31, 15.69s/batch, batch_loss=7.58, batch_index=813, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 813/991 [3:27:01<46:31, 15.69s/batch, batch_loss=11.8, batch_index=814, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 814/991 [3:27:01<46:02, 15.61s/batch, batch_loss=11.8, batch_index=814, batch_size=256]

Epoch 3/10:  82%|███████████▍  | 814/991 [3:27:17<46:02, 15.61s/batch, batch_loss=7.34, batch_index=815, batch_size=256]

Epoch 3/10:  82%|███████████▌  | 815/991 [3:27:17<45:53, 15.65s/batch, batch_loss=7.34, batch_index=815, batch_size=256]

Epoch 3/10:  82%|███████████▌  | 815/991 [3:27:33<45:53, 15.65s/batch, batch_loss=90.4, batch_index=816, batch_size=256]

Epoch 3/10:  82%|███████████▌  | 816/991 [3:27:33<46:12, 15.85s/batch, batch_loss=90.4, batch_index=816, batch_size=256]

Epoch 3/10:  82%|████████████▎  | 816/991 [3:27:48<46:12, 15.85s/batch, batch_loss=354, batch_index=817, batch_size=256]

Epoch 3/10:  82%|████████████▎  | 817/991 [3:27:48<45:22, 15.65s/batch, batch_loss=354, batch_index=817, batch_size=256]

Epoch 3/10:  82%|████████████▎  | 817/991 [3:28:05<45:22, 15.65s/batch, batch_loss=362, batch_index=818, batch_size=256]

Epoch 3/10:  83%|████████████▍  | 818/991 [3:28:05<45:32, 15.79s/batch, batch_loss=362, batch_index=818, batch_size=256]

Epoch 3/10:  83%|███████████▌  | 818/991 [3:28:20<45:32, 15.79s/batch, batch_loss=13.1, batch_index=819, batch_size=256]

Epoch 3/10:  83%|███████████▌  | 819/991 [3:28:20<44:55, 15.67s/batch, batch_loss=13.1, batch_index=819, batch_size=256]

Epoch 3/10:  83%|███████████▌  | 819/991 [3:28:36<44:55, 15.67s/batch, batch_loss=7.62, batch_index=820, batch_size=256]

Epoch 3/10:  83%|███████████▌  | 820/991 [3:28:36<44:41, 15.68s/batch, batch_loss=7.62, batch_index=820, batch_size=256]

Epoch 3/10:  83%|████████████▍  | 820/991 [3:28:52<44:41, 15.68s/batch, batch_loss=7.8, batch_index=821, batch_size=256]

Epoch 3/10:  83%|████████████▍  | 821/991 [3:28:52<45:16, 15.98s/batch, batch_loss=7.8, batch_index=821, batch_size=256]

Epoch 3/10:  83%|███████████▌  | 821/991 [3:29:08<45:16, 15.98s/batch, batch_loss=9.47, batch_index=822, batch_size=256]

Epoch 3/10:  83%|███████████▌  | 822/991 [3:29:08<44:27, 15.78s/batch, batch_loss=9.47, batch_index=822, batch_size=256]

Epoch 3/10:  83%|████████████▍  | 822/991 [3:29:24<44:27, 15.78s/batch, batch_loss=155, batch_index=823, batch_size=256]

Epoch 3/10:  83%|████████████▍  | 823/991 [3:29:24<44:52, 16.03s/batch, batch_loss=155, batch_index=823, batch_size=256]

Epoch 3/10:  83%|███████████▋  | 823/991 [3:29:40<44:52, 16.03s/batch, batch_loss=7.19, batch_index=824, batch_size=256]

Epoch 3/10:  83%|███████████▋  | 824/991 [3:29:40<44:19, 15.92s/batch, batch_loss=7.19, batch_index=824, batch_size=256]

Epoch 3/10:  83%|███████████▋  | 824/991 [3:29:56<44:19, 15.92s/batch, batch_loss=13.3, batch_index=825, batch_size=256]

Epoch 3/10:  83%|███████████▋  | 825/991 [3:29:56<44:21, 16.04s/batch, batch_loss=13.3, batch_index=825, batch_size=256]

Epoch 3/10:  83%|█████████▉  | 825/991 [3:30:13<44:21, 16.04s/batch, batch_loss=2.6e+3, batch_index=826, batch_size=256]

Epoch 3/10:  83%|██████████  | 826/991 [3:30:13<44:19, 16.12s/batch, batch_loss=2.6e+3, batch_index=826, batch_size=256]

Epoch 3/10:  83%|███████████▋  | 826/991 [3:30:28<44:19, 16.12s/batch, batch_loss=21.5, batch_index=827, batch_size=256]

Epoch 3/10:  83%|███████████▋  | 827/991 [3:30:28<43:40, 15.98s/batch, batch_loss=21.5, batch_index=827, batch_size=256]

Epoch 3/10:  83%|███████████▋  | 827/991 [3:30:43<43:40, 15.98s/batch, batch_loss=21.7, batch_index=828, batch_size=256]

Epoch 3/10:  84%|███████████▋  | 828/991 [3:30:43<42:11, 15.53s/batch, batch_loss=21.7, batch_index=828, batch_size=256]

Epoch 3/10:  84%|█████████████▎  | 828/991 [3:30:58<42:11, 15.53s/batch, batch_loss=10, batch_index=829, batch_size=256]

Epoch 3/10:  84%|█████████████▍  | 829/991 [3:30:58<41:25, 15.34s/batch, batch_loss=10, batch_index=829, batch_size=256]

Epoch 3/10:  84%|███████████▋  | 829/991 [3:31:12<41:25, 15.34s/batch, batch_loss=13.8, batch_index=830, batch_size=256]

Epoch 3/10:  84%|███████████▋  | 830/991 [3:31:12<40:27, 15.08s/batch, batch_loss=13.8, batch_index=830, batch_size=256]

Epoch 3/10:  84%|███████████▋  | 830/991 [3:31:26<40:27, 15.08s/batch, batch_loss=10.1, batch_index=831, batch_size=256]

Epoch 3/10:  84%|███████████▋  | 831/991 [3:31:26<39:42, 14.89s/batch, batch_loss=10.1, batch_index=831, batch_size=256]

Epoch 3/10:  84%|███████████▋  | 831/991 [3:31:41<39:42, 14.89s/batch, batch_loss=15.7, batch_index=832, batch_size=256]

Epoch 3/10:  84%|███████████▊  | 832/991 [3:31:41<39:11, 14.79s/batch, batch_loss=15.7, batch_index=832, batch_size=256]

Epoch 3/10:  84%|████████████▌  | 832/991 [3:31:56<39:11, 14.79s/batch, batch_loss=220, batch_index=833, batch_size=256]

Epoch 3/10:  84%|████████████▌  | 833/991 [3:31:56<39:22, 14.95s/batch, batch_loss=220, batch_index=833, batch_size=256]

Epoch 3/10:  84%|███████████▊  | 833/991 [3:32:11<39:22, 14.95s/batch, batch_loss=19.5, batch_index=834, batch_size=256]

Epoch 3/10:  84%|███████████▊  | 834/991 [3:32:11<39:01, 14.92s/batch, batch_loss=19.5, batch_index=834, batch_size=256]

Epoch 3/10:  84%|███████████▊  | 834/991 [3:32:26<39:01, 14.92s/batch, batch_loss=13.1, batch_index=835, batch_size=256]

Epoch 3/10:  84%|███████████▊  | 835/991 [3:32:26<39:02, 15.02s/batch, batch_loss=13.1, batch_index=835, batch_size=256]

Epoch 3/10:  84%|█████████▎ | 835/991 [3:32:42<39:02, 15.02s/batch, batch_loss=3.27e+3, batch_index=836, batch_size=256]

Epoch 3/10:  84%|█████████▎ | 836/991 [3:32:42<39:01, 15.10s/batch, batch_loss=3.27e+3, batch_index=836, batch_size=256]

Epoch 3/10:  84%|██████████  | 836/991 [3:33:00<39:01, 15.10s/batch, batch_loss=4.9e+3, batch_index=837, batch_size=256]

Epoch 3/10:  84%|██████████▏ | 837/991 [3:33:00<41:21, 16.12s/batch, batch_loss=4.9e+3, batch_index=837, batch_size=256]

Epoch 3/10:  84%|███████████▊  | 837/991 [3:33:17<41:21, 16.12s/batch, batch_loss=17.1, batch_index=838, batch_size=256]

Epoch 3/10:  85%|███████████▊  | 838/991 [3:33:17<41:17, 16.19s/batch, batch_loss=17.1, batch_index=838, batch_size=256]

Epoch 3/10:  85%|███████████▊  | 838/991 [3:33:33<41:17, 16.19s/batch, batch_loss=4.65, batch_index=839, batch_size=256]

Epoch 3/10:  85%|███████████▊  | 839/991 [3:33:33<40:49, 16.11s/batch, batch_loss=4.65, batch_index=839, batch_size=256]

Epoch 3/10:  85%|███████████▊  | 839/991 [3:33:48<40:49, 16.11s/batch, batch_loss=4.65, batch_index=840, batch_size=256]

Epoch 3/10:  85%|███████████▊  | 840/991 [3:33:48<40:24, 16.05s/batch, batch_loss=4.65, batch_index=840, batch_size=256]

Epoch 3/10:  85%|███████████▊  | 840/991 [3:34:04<40:24, 16.05s/batch, batch_loss=15.1, batch_index=841, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 841/991 [3:34:04<39:53, 15.96s/batch, batch_loss=15.1, batch_index=841, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 841/991 [3:34:18<39:53, 15.96s/batch, batch_loss=14.1, batch_index=842, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 842/991 [3:34:18<38:08, 15.36s/batch, batch_loss=14.1, batch_index=842, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 842/991 [3:34:32<38:08, 15.36s/batch, batch_loss=7.92, batch_index=843, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 843/991 [3:34:32<36:37, 14.85s/batch, batch_loss=7.92, batch_index=843, batch_size=256]

Epoch 3/10:  85%|█████████▎ | 843/991 [3:34:47<36:37, 14.85s/batch, batch_loss=1.69e+3, batch_index=844, batch_size=256]

Epoch 3/10:  85%|█████████▎ | 844/991 [3:34:47<36:17, 14.81s/batch, batch_loss=1.69e+3, batch_index=844, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 844/991 [3:35:05<36:17, 14.81s/batch, batch_loss=18.4, batch_index=845, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 845/991 [3:35:05<38:36, 15.86s/batch, batch_loss=18.4, batch_index=845, batch_size=256]

Epoch 3/10:  85%|█████████▍ | 845/991 [3:35:20<38:36, 15.86s/batch, batch_loss=1.18e+4, batch_index=846, batch_size=256]

Epoch 3/10:  85%|█████████▍ | 846/991 [3:35:20<37:59, 15.72s/batch, batch_loss=1.18e+4, batch_index=846, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 846/991 [3:35:35<37:59, 15.72s/batch, batch_loss=25.2, batch_index=847, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 847/991 [3:35:35<37:08, 15.48s/batch, batch_loss=25.2, batch_index=847, batch_size=256]

Epoch 3/10:  85%|███████████▉  | 847/991 [3:35:52<37:08, 15.48s/batch, batch_loss=23.9, batch_index=848, batch_size=256]

Epoch 3/10:  86%|███████████▉  | 848/991 [3:35:52<37:33, 15.76s/batch, batch_loss=23.9, batch_index=848, batch_size=256]

Epoch 3/10:  86%|███████████▉  | 848/991 [3:36:07<37:33, 15.76s/batch, batch_loss=1e+3, batch_index=849, batch_size=256]

Epoch 3/10:  86%|███████████▉  | 849/991 [3:36:07<37:04, 15.67s/batch, batch_loss=1e+3, batch_index=849, batch_size=256]

Epoch 3/10:  86%|███████████▉  | 849/991 [3:36:23<37:04, 15.67s/batch, batch_loss=8.02, batch_index=850, batch_size=256]

Epoch 3/10:  86%|████████████  | 850/991 [3:36:23<36:49, 15.67s/batch, batch_loss=8.02, batch_index=850, batch_size=256]

Epoch 3/10:  86%|████████████  | 850/991 [3:36:37<36:49, 15.67s/batch, batch_loss=16.1, batch_index=851, batch_size=256]

Epoch 3/10:  86%|████████████  | 851/991 [3:36:37<35:51, 15.37s/batch, batch_loss=16.1, batch_index=851, batch_size=256]

Epoch 3/10:  86%|████████████  | 851/991 [3:36:55<35:51, 15.37s/batch, batch_loss=14.2, batch_index=852, batch_size=256]

Epoch 3/10:  86%|████████████  | 852/991 [3:36:55<37:04, 16.00s/batch, batch_loss=14.2, batch_index=852, batch_size=256]

Epoch 3/10:  86%|█████████▍ | 852/991 [3:37:11<37:04, 16.00s/batch, batch_loss=7.64e+3, batch_index=853, batch_size=256]

Epoch 3/10:  86%|█████████▍ | 853/991 [3:37:11<36:38, 15.93s/batch, batch_loss=7.64e+3, batch_index=853, batch_size=256]

Epoch 3/10:  86%|████████████  | 853/991 [3:37:27<36:38, 15.93s/batch, batch_loss=19.5, batch_index=854, batch_size=256]

Epoch 3/10:  86%|████████████  | 854/991 [3:37:27<36:36, 16.03s/batch, batch_loss=19.5, batch_index=854, batch_size=256]

Epoch 3/10:  86%|████████████  | 854/991 [3:37:43<36:36, 16.03s/batch, batch_loss=7.22, batch_index=855, batch_size=256]

Epoch 3/10:  86%|████████████  | 855/991 [3:37:43<36:05, 15.92s/batch, batch_loss=7.22, batch_index=855, batch_size=256]

Epoch 3/10:  86%|████████████  | 855/991 [3:37:57<36:05, 15.92s/batch, batch_loss=8.45, batch_index=856, batch_size=256]

Epoch 3/10:  86%|████████████  | 856/991 [3:37:57<34:48, 15.47s/batch, batch_loss=8.45, batch_index=856, batch_size=256]

Epoch 3/10:  86%|████████████  | 856/991 [3:38:12<34:48, 15.47s/batch, batch_loss=8.12, batch_index=857, batch_size=256]

Epoch 3/10:  86%|████████████  | 857/991 [3:38:12<34:33, 15.47s/batch, batch_loss=8.12, batch_index=857, batch_size=256]

Epoch 3/10:  86%|████████████  | 857/991 [3:38:27<34:33, 15.47s/batch, batch_loss=24.3, batch_index=858, batch_size=256]

Epoch 3/10:  87%|████████████  | 858/991 [3:38:27<33:57, 15.32s/batch, batch_loss=24.3, batch_index=858, batch_size=256]

Epoch 3/10:  87%|████████████  | 858/991 [3:38:42<33:57, 15.32s/batch, batch_loss=11.9, batch_index=859, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 859/991 [3:38:42<33:10, 15.08s/batch, batch_loss=11.9, batch_index=859, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 859/991 [3:38:58<33:10, 15.08s/batch, batch_loss=19.8, batch_index=860, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 860/991 [3:38:58<33:32, 15.36s/batch, batch_loss=19.8, batch_index=860, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 860/991 [3:39:16<33:32, 15.36s/batch, batch_loss=8.27, batch_index=861, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 861/991 [3:39:16<35:16, 16.28s/batch, batch_loss=8.27, batch_index=861, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 861/991 [3:39:32<35:16, 16.28s/batch, batch_loss=18.1, batch_index=862, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 862/991 [3:39:32<34:19, 15.97s/batch, batch_loss=18.1, batch_index=862, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 862/991 [3:39:47<34:19, 15.97s/batch, batch_loss=28.2, batch_index=863, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 863/991 [3:39:47<33:27, 15.68s/batch, batch_loss=28.2, batch_index=863, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 863/991 [3:40:02<33:27, 15.68s/batch, batch_loss=8.72, batch_index=864, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 864/991 [3:40:02<33:18, 15.74s/batch, batch_loss=8.72, batch_index=864, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 864/991 [3:40:18<33:18, 15.74s/batch, batch_loss=16.9, batch_index=865, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 865/991 [3:40:18<32:39, 15.55s/batch, batch_loss=16.9, batch_index=865, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 865/991 [3:40:32<32:39, 15.55s/batch, batch_loss=22.1, batch_index=866, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 866/991 [3:40:32<31:47, 15.26s/batch, batch_loss=22.1, batch_index=866, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 866/991 [3:40:48<31:47, 15.26s/batch, batch_loss=20.5, batch_index=867, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 867/991 [3:40:48<31:46, 15.38s/batch, batch_loss=20.5, batch_index=867, batch_size=256]

Epoch 3/10:  87%|████████████▏ | 867/991 [3:41:03<31:46, 15.38s/batch, batch_loss=19.3, batch_index=868, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 868/991 [3:41:03<31:29, 15.36s/batch, batch_loss=19.3, batch_index=868, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 868/991 [3:41:19<31:29, 15.36s/batch, batch_loss=10.4, batch_index=869, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 869/991 [3:41:19<31:20, 15.41s/batch, batch_loss=10.4, batch_index=869, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 869/991 [3:41:34<31:20, 15.41s/batch, batch_loss=12.5, batch_index=870, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 870/991 [3:41:34<31:00, 15.38s/batch, batch_loss=12.5, batch_index=870, batch_size=256]

Epoch 3/10:  88%|█████████████▏ | 870/991 [3:41:50<31:00, 15.38s/batch, batch_loss=8.3, batch_index=871, batch_size=256]

Epoch 3/10:  88%|█████████████▏ | 871/991 [3:41:50<30:57, 15.48s/batch, batch_loss=8.3, batch_index=871, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 871/991 [3:42:05<30:57, 15.48s/batch, batch_loss=17.9, batch_index=872, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 872/991 [3:42:05<30:28, 15.36s/batch, batch_loss=17.9, batch_index=872, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 872/991 [3:42:20<30:28, 15.36s/batch, batch_loss=14.3, batch_index=873, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 873/991 [3:42:20<30:12, 15.36s/batch, batch_loss=14.3, batch_index=873, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 873/991 [3:42:36<30:12, 15.36s/batch, batch_loss=9.03, batch_index=874, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 874/991 [3:42:36<30:08, 15.46s/batch, batch_loss=9.03, batch_index=874, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 874/991 [3:42:51<30:08, 15.46s/batch, batch_loss=11.6, batch_index=875, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 875/991 [3:42:51<29:36, 15.32s/batch, batch_loss=11.6, batch_index=875, batch_size=256]

Epoch 3/10:  88%|████████████▎ | 875/991 [3:43:06<29:36, 15.32s/batch, batch_loss=23.8, batch_index=876, batch_size=256]

Epoch 3/10:  88%|████████████▍ | 876/991 [3:43:06<29:04, 15.17s/batch, batch_loss=23.8, batch_index=876, batch_size=256]

Epoch 3/10:  88%|████████████▍ | 876/991 [3:43:23<29:04, 15.17s/batch, batch_loss=19.1, batch_index=877, batch_size=256]

Epoch 3/10:  88%|████████████▍ | 877/991 [3:43:23<30:11, 15.89s/batch, batch_loss=19.1, batch_index=877, batch_size=256]

Epoch 3/10:  88%|████████████▍ | 877/991 [3:43:38<30:11, 15.89s/batch, batch_loss=25.5, batch_index=878, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 878/991 [3:43:38<29:27, 15.64s/batch, batch_loss=25.5, batch_index=878, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 878/991 [3:43:53<29:27, 15.64s/batch, batch_loss=17.2, batch_index=879, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 879/991 [3:43:53<28:40, 15.36s/batch, batch_loss=17.2, batch_index=879, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 879/991 [3:44:08<28:40, 15.36s/batch, batch_loss=10.1, batch_index=880, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 880/991 [3:44:08<28:28, 15.39s/batch, batch_loss=10.1, batch_index=880, batch_size=256]

Epoch 3/10:  89%|█████████▊ | 880/991 [3:44:24<28:28, 15.39s/batch, batch_loss=5.12e+3, batch_index=881, batch_size=256]

Epoch 3/10:  89%|█████████▊ | 881/991 [3:44:24<28:13, 15.39s/batch, batch_loss=5.12e+3, batch_index=881, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 881/991 [3:44:39<28:13, 15.39s/batch, batch_loss=15.4, batch_index=882, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 882/991 [3:44:39<28:05, 15.47s/batch, batch_loss=15.4, batch_index=882, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 882/991 [3:44:55<28:05, 15.47s/batch, batch_loss=14.9, batch_index=883, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 883/991 [3:44:55<27:49, 15.46s/batch, batch_loss=14.9, batch_index=883, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 883/991 [3:45:10<27:49, 15.46s/batch, batch_loss=9.02, batch_index=884, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 884/991 [3:45:10<27:27, 15.40s/batch, batch_loss=9.02, batch_index=884, batch_size=256]

Epoch 3/10:  89%|████████████▍ | 884/991 [3:45:25<27:27, 15.40s/batch, batch_loss=12.9, batch_index=885, batch_size=256]

Epoch 3/10:  89%|████████████▌ | 885/991 [3:45:25<26:48, 15.18s/batch, batch_loss=12.9, batch_index=885, batch_size=256]

Epoch 3/10:  89%|████████████▌ | 885/991 [3:45:40<26:48, 15.18s/batch, batch_loss=15.6, batch_index=886, batch_size=256]

Epoch 3/10:  89%|████████████▌ | 886/991 [3:45:40<26:24, 15.09s/batch, batch_loss=15.6, batch_index=886, batch_size=256]

Epoch 3/10:  89%|█████████▊ | 886/991 [3:45:55<26:24, 15.09s/batch, batch_loss=1.93e+4, batch_index=887, batch_size=256]

Epoch 3/10:  90%|█████████▊ | 887/991 [3:45:55<26:06, 15.06s/batch, batch_loss=1.93e+4, batch_index=887, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 887/991 [3:46:10<26:06, 15.06s/batch, batch_loss=15.4, batch_index=888, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 888/991 [3:46:10<25:59, 15.14s/batch, batch_loss=15.4, batch_index=888, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 888/991 [3:46:25<25:59, 15.14s/batch, batch_loss=18.7, batch_index=889, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 889/991 [3:46:25<25:43, 15.13s/batch, batch_loss=18.7, batch_index=889, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 889/991 [3:46:40<25:43, 15.13s/batch, batch_loss=12.4, batch_index=890, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 890/991 [3:46:40<25:16, 15.01s/batch, batch_loss=12.4, batch_index=890, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 890/991 [3:46:58<25:16, 15.01s/batch, batch_loss=14.2, batch_index=891, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 891/991 [3:46:58<26:29, 15.89s/batch, batch_loss=14.2, batch_index=891, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 891/991 [3:47:14<26:29, 15.89s/batch, batch_loss=17.1, batch_index=892, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 892/991 [3:47:14<26:30, 16.06s/batch, batch_loss=17.1, batch_index=892, batch_size=256]

Epoch 3/10:  90%|██████████▊ | 892/991 [3:47:30<26:30, 16.06s/batch, batch_loss=3.7e+3, batch_index=893, batch_size=256]

Epoch 3/10:  90%|██████████▊ | 893/991 [3:47:30<26:09, 16.01s/batch, batch_loss=3.7e+3, batch_index=893, batch_size=256]

Epoch 3/10:  90%|████████████▌ | 893/991 [3:47:46<26:09, 16.01s/batch, batch_loss=9.33, batch_index=894, batch_size=256]

Epoch 3/10:  90%|████████████▋ | 894/991 [3:47:46<25:46, 15.95s/batch, batch_loss=9.33, batch_index=894, batch_size=256]

Epoch 3/10:  90%|████████████▋ | 894/991 [3:48:02<25:46, 15.95s/batch, batch_loss=14.1, batch_index=895, batch_size=256]

Epoch 3/10:  90%|████████████▋ | 895/991 [3:48:02<25:21, 15.85s/batch, batch_loss=14.1, batch_index=895, batch_size=256]

Epoch 3/10:  90%|██████████████▍ | 895/991 [3:48:16<25:21, 15.85s/batch, batch_loss=11, batch_index=896, batch_size=256]

Epoch 3/10:  90%|██████████████▍ | 896/991 [3:48:16<24:38, 15.56s/batch, batch_loss=11, batch_index=896, batch_size=256]

Epoch 3/10:  90%|████████████▋ | 896/991 [3:48:31<24:38, 15.56s/batch, batch_loss=16.7, batch_index=897, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 897/991 [3:48:31<23:49, 15.21s/batch, batch_loss=16.7, batch_index=897, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 897/991 [3:48:46<23:49, 15.21s/batch, batch_loss=20.8, batch_index=898, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 898/991 [3:48:46<23:40, 15.28s/batch, batch_loss=20.8, batch_index=898, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 898/991 [3:49:02<23:40, 15.28s/batch, batch_loss=15.2, batch_index=899, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 899/991 [3:49:02<23:30, 15.33s/batch, batch_loss=15.2, batch_index=899, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 899/991 [3:49:17<23:30, 15.33s/batch, batch_loss=17.7, batch_index=900, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 900/991 [3:49:17<23:22, 15.41s/batch, batch_loss=17.7, batch_index=900, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 900/991 [3:49:33<23:22, 15.41s/batch, batch_loss=13.5, batch_index=901, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 901/991 [3:49:33<23:13, 15.49s/batch, batch_loss=13.5, batch_index=901, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 901/991 [3:49:52<23:13, 15.49s/batch, batch_loss=12.8, batch_index=902, batch_size=256]

Epoch 3/10:  91%|████████████▋ | 902/991 [3:49:52<24:19, 16.40s/batch, batch_loss=12.8, batch_index=902, batch_size=256]

Epoch 3/10:  91%|█████████████▋ | 902/991 [3:50:07<24:19, 16.40s/batch, batch_loss=5.6, batch_index=903, batch_size=256]

Epoch 3/10:  91%|█████████████▋ | 903/991 [3:50:07<23:39, 16.13s/batch, batch_loss=5.6, batch_index=903, batch_size=256]

Epoch 3/10:  91%|████████████▊ | 903/991 [3:50:23<23:39, 16.13s/batch, batch_loss=8.38, batch_index=904, batch_size=256]

Epoch 3/10:  91%|████████████▊ | 904/991 [3:50:23<23:08, 15.96s/batch, batch_loss=8.38, batch_index=904, batch_size=256]

Epoch 3/10:  91%|████████████▊ | 904/991 [3:50:37<23:08, 15.96s/batch, batch_loss=24.9, batch_index=905, batch_size=256]

Epoch 3/10:  91%|████████████▊ | 905/991 [3:50:37<22:24, 15.63s/batch, batch_loss=24.9, batch_index=905, batch_size=256]

Epoch 3/10:  91%|████████████▊ | 905/991 [3:50:53<22:24, 15.63s/batch, batch_loss=16.4, batch_index=906, batch_size=256]

Epoch 3/10:  91%|████████████▊ | 906/991 [3:50:53<22:03, 15.57s/batch, batch_loss=16.4, batch_index=906, batch_size=256]

Epoch 3/10:  91%|████████████▊ | 906/991 [3:51:08<22:03, 15.57s/batch, batch_loss=16.5, batch_index=907, batch_size=256]

Epoch 3/10:  92%|████████████▊ | 907/991 [3:51:08<21:38, 15.46s/batch, batch_loss=16.5, batch_index=907, batch_size=256]

Epoch 3/10:  92%|████████████▊ | 907/991 [3:51:24<21:38, 15.46s/batch, batch_loss=13.3, batch_index=908, batch_size=256]

Epoch 3/10:  92%|████████████▊ | 908/991 [3:51:24<21:22, 15.45s/batch, batch_loss=13.3, batch_index=908, batch_size=256]

Epoch 3/10:  92%|████████████▊ | 908/991 [3:51:38<21:22, 15.45s/batch, batch_loss=5.47, batch_index=909, batch_size=256]

Epoch 3/10:  92%|████████████▊ | 909/991 [3:51:38<20:45, 15.18s/batch, batch_loss=5.47, batch_index=909, batch_size=256]

Epoch 3/10:  92%|█████████████▊ | 909/991 [3:51:53<20:45, 15.18s/batch, batch_loss=683, batch_index=910, batch_size=256]

Epoch 3/10:  92%|█████████████▊ | 910/991 [3:51:53<20:24, 15.12s/batch, batch_loss=683, batch_index=910, batch_size=256]

Epoch 3/10:  92%|██████████ | 910/991 [3:52:08<20:24, 15.12s/batch, batch_loss=1.02e+3, batch_index=911, batch_size=256]

Epoch 3/10:  92%|██████████ | 911/991 [3:52:08<19:55, 14.94s/batch, batch_loss=1.02e+3, batch_index=911, batch_size=256]

Epoch 3/10:  92%|██████████████▋ | 911/991 [3:52:23<19:55, 14.94s/batch, batch_loss=24, batch_index=912, batch_size=256]

Epoch 3/10:  92%|██████████████▋ | 912/991 [3:52:23<19:47, 15.03s/batch, batch_loss=24, batch_index=912, batch_size=256]

Epoch 3/10:  92%|████████████▉ | 912/991 [3:52:38<19:47, 15.03s/batch, batch_loss=20.7, batch_index=913, batch_size=256]

Epoch 3/10:  92%|████████████▉ | 913/991 [3:52:38<19:34, 15.06s/batch, batch_loss=20.7, batch_index=913, batch_size=256]

Epoch 3/10:  92%|████████████▉ | 913/991 [3:52:53<19:34, 15.06s/batch, batch_loss=18.8, batch_index=914, batch_size=256]

Epoch 3/10:  92%|████████████▉ | 914/991 [3:52:53<19:14, 14.99s/batch, batch_loss=18.8, batch_index=914, batch_size=256]

Epoch 3/10:  92%|████████████▉ | 914/991 [3:53:08<19:14, 14.99s/batch, batch_loss=18.5, batch_index=915, batch_size=256]

Epoch 3/10:  92%|████████████▉ | 915/991 [3:53:08<18:58, 14.98s/batch, batch_loss=18.5, batch_index=915, batch_size=256]

Epoch 3/10:  92%|██████████████▊ | 915/991 [3:53:23<18:58, 14.98s/batch, batch_loss=15, batch_index=916, batch_size=256]

Epoch 3/10:  92%|██████████████▊ | 916/991 [3:53:23<18:47, 15.04s/batch, batch_loss=15, batch_index=916, batch_size=256]

Epoch 3/10:  92%|████████████▉ | 916/991 [3:53:38<18:47, 15.04s/batch, batch_loss=7.72, batch_index=917, batch_size=256]

Epoch 3/10:  93%|████████████▉ | 917/991 [3:53:38<18:29, 14.99s/batch, batch_loss=7.72, batch_index=917, batch_size=256]

Epoch 3/10:  93%|████████████▉ | 917/991 [3:53:52<18:29, 14.99s/batch, batch_loss=12.4, batch_index=918, batch_size=256]

Epoch 3/10:  93%|████████████▉ | 918/991 [3:53:52<17:56, 14.74s/batch, batch_loss=12.4, batch_index=918, batch_size=256]

Epoch 3/10:  93%|████████████▉ | 918/991 [3:54:07<17:56, 14.74s/batch, batch_loss=12.8, batch_index=919, batch_size=256]

Epoch 3/10:  93%|████████████▉ | 919/991 [3:54:07<17:50, 14.87s/batch, batch_loss=12.8, batch_index=919, batch_size=256]

Epoch 3/10:  93%|████████████▉ | 919/991 [3:54:22<17:50, 14.87s/batch, batch_loss=13.2, batch_index=920, batch_size=256]

Epoch 3/10:  93%|████████████▉ | 920/991 [3:54:22<17:39, 14.93s/batch, batch_loss=13.2, batch_index=920, batch_size=256]

Epoch 3/10:  93%|████████████▉ | 920/991 [3:54:37<17:39, 14.93s/batch, batch_loss=17.6, batch_index=921, batch_size=256]

Epoch 3/10:  93%|█████████████ | 921/991 [3:54:37<17:31, 15.02s/batch, batch_loss=17.6, batch_index=921, batch_size=256]

Epoch 3/10:  93%|█████████████ | 921/991 [3:54:54<17:31, 15.02s/batch, batch_loss=23.9, batch_index=922, batch_size=256]

Epoch 3/10:  93%|█████████████ | 922/991 [3:54:54<17:39, 15.36s/batch, batch_loss=23.9, batch_index=922, batch_size=256]

Epoch 3/10:  93%|█████████████ | 922/991 [3:55:12<17:39, 15.36s/batch, batch_loss=6.18, batch_index=923, batch_size=256]

Epoch 3/10:  93%|█████████████ | 923/991 [3:55:12<18:24, 16.24s/batch, batch_loss=6.18, batch_index=923, batch_size=256]

Epoch 3/10:  93%|█████████████ | 923/991 [3:55:27<18:24, 16.24s/batch, batch_loss=11.2, batch_index=924, batch_size=256]

Epoch 3/10:  93%|█████████████ | 924/991 [3:55:27<17:41, 15.84s/batch, batch_loss=11.2, batch_index=924, batch_size=256]

Epoch 3/10:  93%|█████████████ | 924/991 [3:55:43<17:41, 15.84s/batch, batch_loss=10.1, batch_index=925, batch_size=256]

Epoch 3/10:  93%|█████████████ | 925/991 [3:55:43<17:35, 15.99s/batch, batch_loss=10.1, batch_index=925, batch_size=256]

Epoch 3/10:  93%|█████████████ | 925/991 [3:55:59<17:35, 15.99s/batch, batch_loss=3e+4, batch_index=926, batch_size=256]

Epoch 3/10:  93%|█████████████ | 926/991 [3:55:59<17:14, 15.91s/batch, batch_loss=3e+4, batch_index=926, batch_size=256]

Epoch 3/10:  93%|█████████████ | 926/991 [3:56:14<17:14, 15.91s/batch, batch_loss=6.36, batch_index=927, batch_size=256]

Epoch 3/10:  94%|█████████████ | 927/991 [3:56:14<16:40, 15.63s/batch, batch_loss=6.36, batch_index=927, batch_size=256]

Epoch 3/10:  94%|██████████████ | 927/991 [3:56:29<16:40, 15.63s/batch, batch_loss=853, batch_index=928, batch_size=256]

Epoch 3/10:  94%|██████████████ | 928/991 [3:56:29<16:13, 15.46s/batch, batch_loss=853, batch_index=928, batch_size=256]

Epoch 3/10:  94%|█████████████ | 928/991 [3:56:44<16:13, 15.46s/batch, batch_loss=10.4, batch_index=929, batch_size=256]

Epoch 3/10:  94%|█████████████ | 929/991 [3:56:44<15:48, 15.30s/batch, batch_loss=10.4, batch_index=929, batch_size=256]

Epoch 3/10:  94%|██████████████ | 929/991 [3:56:59<15:48, 15.30s/batch, batch_loss=8.4, batch_index=930, batch_size=256]

Epoch 3/10:  94%|██████████████ | 930/991 [3:56:59<15:32, 15.28s/batch, batch_loss=8.4, batch_index=930, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 930/991 [3:57:17<15:32, 15.28s/batch, batch_loss=12.3, batch_index=931, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 931/991 [3:57:17<16:06, 16.12s/batch, batch_loss=12.3, batch_index=931, batch_size=256]

Epoch 3/10:  94%|███████████████ | 931/991 [3:57:32<16:06, 16.12s/batch, batch_loss=12, batch_index=932, batch_size=256]

Epoch 3/10:  94%|███████████████ | 932/991 [3:57:32<15:24, 15.67s/batch, batch_loss=12, batch_index=932, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 932/991 [3:57:47<15:24, 15.67s/batch, batch_loss=11.4, batch_index=933, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 933/991 [3:57:47<14:54, 15.43s/batch, batch_loss=11.4, batch_index=933, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 933/991 [3:58:02<14:54, 15.43s/batch, batch_loss=1.88, batch_index=934, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 934/991 [3:58:02<14:40, 15.44s/batch, batch_loss=1.88, batch_index=934, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 934/991 [3:58:17<14:40, 15.44s/batch, batch_loss=1.92, batch_index=935, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 935/991 [3:58:17<14:20, 15.36s/batch, batch_loss=1.92, batch_index=935, batch_size=256]

Epoch 3/10:  94%|██████████████▏| 935/991 [3:58:33<14:20, 15.36s/batch, batch_loss=164, batch_index=936, batch_size=256]

Epoch 3/10:  94%|██████████████▏| 936/991 [3:58:33<14:14, 15.54s/batch, batch_loss=164, batch_index=936, batch_size=256]

Epoch 3/10:  94%|█████████████▏| 936/991 [3:58:48<14:14, 15.54s/batch, batch_loss=36.5, batch_index=937, batch_size=256]

Epoch 3/10:  95%|█████████████▏| 937/991 [3:58:48<13:47, 15.32s/batch, batch_loss=36.5, batch_index=937, batch_size=256]

Epoch 3/10:  95%|█████████████▏| 937/991 [3:59:03<13:47, 15.32s/batch, batch_loss=9.67, batch_index=938, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 938/991 [3:59:03<13:30, 15.29s/batch, batch_loss=9.67, batch_index=938, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 938/991 [3:59:21<13:30, 15.29s/batch, batch_loss=8.45, batch_index=939, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 939/991 [3:59:21<13:54, 16.05s/batch, batch_loss=8.45, batch_index=939, batch_size=256]

Epoch 3/10:  95%|██████████████▏| 939/991 [3:59:36<13:54, 16.05s/batch, batch_loss=430, batch_index=940, batch_size=256]

Epoch 3/10:  95%|██████████████▏| 940/991 [3:59:36<13:21, 15.71s/batch, batch_loss=430, batch_index=940, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 940/991 [3:59:51<13:21, 15.71s/batch, batch_loss=17.9, batch_index=941, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 941/991 [3:59:51<12:53, 15.47s/batch, batch_loss=17.9, batch_index=941, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 941/991 [4:00:07<12:53, 15.47s/batch, batch_loss=13.9, batch_index=942, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 942/991 [4:00:07<12:43, 15.57s/batch, batch_loss=13.9, batch_index=942, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 942/991 [4:00:23<12:43, 15.57s/batch, batch_loss=10.8, batch_index=943, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 943/991 [4:00:23<12:32, 15.68s/batch, batch_loss=10.8, batch_index=943, batch_size=256]

Epoch 3/10:  95%|███████████████▏| 943/991 [4:00:39<12:32, 15.68s/batch, batch_loss=15, batch_index=944, batch_size=256]

Epoch 3/10:  95%|███████████████▏| 944/991 [4:00:39<12:28, 15.92s/batch, batch_loss=15, batch_index=944, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 944/991 [4:00:55<12:28, 15.92s/batch, batch_loss=2.01, batch_index=945, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 945/991 [4:00:55<12:08, 15.83s/batch, batch_loss=2.01, batch_index=945, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 945/991 [4:01:10<12:08, 15.83s/batch, batch_loss=11.2, batch_index=946, batch_size=256]

Epoch 3/10:  95%|█████████████▎| 946/991 [4:01:10<11:50, 15.78s/batch, batch_loss=11.2, batch_index=946, batch_size=256]

Epoch 3/10:  95%|███████████████▎| 946/991 [4:01:27<11:50, 15.78s/batch, batch_loss=12, batch_index=947, batch_size=256]

Epoch 3/10:  96%|███████████████▎| 947/991 [4:01:27<11:42, 15.96s/batch, batch_loss=12, batch_index=947, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 947/991 [4:01:43<11:42, 15.96s/batch, batch_loss=10.2, batch_index=948, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 948/991 [4:01:43<11:34, 16.16s/batch, batch_loss=10.2, batch_index=948, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 948/991 [4:02:00<11:34, 16.16s/batch, batch_loss=5.97, batch_index=949, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 949/991 [4:02:00<11:19, 16.18s/batch, batch_loss=5.97, batch_index=949, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 949/991 [4:02:16<11:19, 16.18s/batch, batch_loss=7.83, batch_index=950, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 950/991 [4:02:16<11:03, 16.19s/batch, batch_loss=7.83, batch_index=950, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 950/991 [4:02:32<11:03, 16.19s/batch, batch_loss=16.2, batch_index=951, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 951/991 [4:02:32<10:50, 16.26s/batch, batch_loss=16.2, batch_index=951, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 951/991 [4:02:47<10:50, 16.26s/batch, batch_loss=15.6, batch_index=952, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 952/991 [4:02:47<10:20, 15.90s/batch, batch_loss=15.6, batch_index=952, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 952/991 [4:03:06<10:20, 15.90s/batch, batch_loss=7.33, batch_index=953, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 953/991 [4:03:06<10:32, 16.66s/batch, batch_loss=7.33, batch_index=953, batch_size=256]

Epoch 3/10:  96%|██████████████▍| 953/991 [4:03:20<10:32, 16.66s/batch, batch_loss=331, batch_index=954, batch_size=256]

Epoch 3/10:  96%|██████████████▍| 954/991 [4:03:20<09:54, 16.08s/batch, batch_loss=331, batch_index=954, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 954/991 [4:03:36<09:54, 16.08s/batch, batch_loss=14.3, batch_index=955, batch_size=256]

Epoch 3/10:  96%|█████████████▍| 955/991 [4:03:36<09:29, 15.83s/batch, batch_loss=14.3, batch_index=955, batch_size=256]

Epoch 3/10:  96%|███████████████▍| 955/991 [4:03:51<09:29, 15.83s/batch, batch_loss=16, batch_index=956, batch_size=256]

Epoch 3/10:  96%|███████████████▍| 956/991 [4:03:51<09:11, 15.75s/batch, batch_loss=16, batch_index=956, batch_size=256]

Epoch 3/10:  96%|█████████████▌| 956/991 [4:04:04<09:11, 15.75s/batch, batch_loss=15.2, batch_index=957, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 957/991 [4:04:04<08:27, 14.92s/batch, batch_loss=15.2, batch_index=957, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 957/991 [4:04:18<08:27, 14.92s/batch, batch_loss=13.8, batch_index=958, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 958/991 [4:04:18<07:58, 14.50s/batch, batch_loss=13.8, batch_index=958, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 958/991 [4:04:31<07:58, 14.50s/batch, batch_loss=7.24, batch_index=959, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 959/991 [4:04:31<07:36, 14.26s/batch, batch_loss=7.24, batch_index=959, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 959/991 [4:04:50<07:36, 14.26s/batch, batch_loss=10.7, batch_index=960, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 960/991 [4:04:50<08:00, 15.48s/batch, batch_loss=10.7, batch_index=960, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 960/991 [4:05:06<08:00, 15.48s/batch, batch_loss=14.3, batch_index=961, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 961/991 [4:05:06<07:47, 15.59s/batch, batch_loss=14.3, batch_index=961, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 961/991 [4:05:22<07:47, 15.59s/batch, batch_loss=5.45, batch_index=962, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 962/991 [4:05:22<07:35, 15.69s/batch, batch_loss=5.45, batch_index=962, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 962/991 [4:05:37<07:35, 15.69s/batch, batch_loss=6.83, batch_index=963, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 963/991 [4:05:37<07:19, 15.71s/batch, batch_loss=6.83, batch_index=963, batch_size=256]

Epoch 3/10:  97%|██████████▋| 963/991 [4:05:52<07:19, 15.71s/batch, batch_loss=9.44e+3, batch_index=964, batch_size=256]

Epoch 3/10:  97%|██████████▋| 964/991 [4:05:52<06:52, 15.30s/batch, batch_loss=9.44e+3, batch_index=964, batch_size=256]

Epoch 3/10:  97%|█████████████▌| 964/991 [4:06:08<06:52, 15.30s/batch, batch_loss=18.6, batch_index=965, batch_size=256]

Epoch 3/10:  97%|█████████████▋| 965/991 [4:06:08<06:43, 15.51s/batch, batch_loss=18.6, batch_index=965, batch_size=256]

Epoch 3/10:  97%|█████████████▋| 965/991 [4:06:22<06:43, 15.51s/batch, batch_loss=15.6, batch_index=966, batch_size=256]

Epoch 3/10:  97%|█████████████▋| 966/991 [4:06:22<06:22, 15.29s/batch, batch_loss=15.6, batch_index=966, batch_size=256]

Epoch 3/10:  97%|██████████▋| 966/991 [4:06:38<06:22, 15.29s/batch, batch_loss=2.41e+4, batch_index=967, batch_size=256]

Epoch 3/10:  98%|██████████▋| 967/991 [4:06:38<06:11, 15.48s/batch, batch_loss=2.41e+4, batch_index=967, batch_size=256]

Epoch 3/10:  98%|██████████████▋| 967/991 [4:06:53<06:11, 15.48s/batch, batch_loss=410, batch_index=968, batch_size=256]

Epoch 3/10:  98%|██████████████▋| 968/991 [4:06:53<05:49, 15.19s/batch, batch_loss=410, batch_index=968, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 968/991 [4:07:08<05:49, 15.19s/batch, batch_loss=19.3, batch_index=969, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 969/991 [4:07:08<05:34, 15.22s/batch, batch_loss=19.3, batch_index=969, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 969/991 [4:07:27<05:34, 15.22s/batch, batch_loss=1.19, batch_index=970, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 970/991 [4:07:27<05:39, 16.17s/batch, batch_loss=1.19, batch_index=970, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 970/991 [4:07:41<05:39, 16.17s/batch, batch_loss=10.2, batch_index=971, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 971/991 [4:07:41<05:13, 15.70s/batch, batch_loss=10.2, batch_index=971, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 971/991 [4:07:57<05:13, 15.70s/batch, batch_loss=27.2, batch_index=972, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 972/991 [4:07:57<04:57, 15.66s/batch, batch_loss=27.2, batch_index=972, batch_size=256]

Epoch 3/10:  98%|███████████████▋| 972/991 [4:08:11<04:57, 15.66s/batch, batch_loss=19, batch_index=973, batch_size=256]

Epoch 3/10:  98%|███████████████▋| 973/991 [4:08:11<04:33, 15.22s/batch, batch_loss=19, batch_index=973, batch_size=256]

Epoch 3/10:  98%|█████████████▋| 973/991 [4:08:26<04:33, 15.22s/batch, batch_loss=17.9, batch_index=974, batch_size=256]

Epoch 3/10:  98%|█████████████▊| 974/991 [4:08:26<04:17, 15.12s/batch, batch_loss=17.9, batch_index=974, batch_size=256]

Epoch 3/10:  98%|█████████████▊| 974/991 [4:08:41<04:17, 15.12s/batch, batch_loss=9.97, batch_index=975, batch_size=256]

Epoch 3/10:  98%|█████████████▊| 975/991 [4:08:41<04:01, 15.12s/batch, batch_loss=9.97, batch_index=975, batch_size=256]

Epoch 3/10:  98%|█████████████▊| 975/991 [4:08:58<04:01, 15.12s/batch, batch_loss=29.6, batch_index=976, batch_size=256]

Epoch 3/10:  98%|█████████████▊| 976/991 [4:08:58<03:54, 15.64s/batch, batch_loss=29.6, batch_index=976, batch_size=256]

Epoch 3/10:  98%|█████████████▊| 976/991 [4:09:11<03:54, 15.64s/batch, batch_loss=2.07, batch_index=977, batch_size=256]

Epoch 3/10:  99%|█████████████▊| 977/991 [4:09:11<03:27, 14.83s/batch, batch_loss=2.07, batch_index=977, batch_size=256]

Epoch 3/10:  99%|█████████████▊| 977/991 [4:09:25<03:27, 14.83s/batch, batch_loss=1.33, batch_index=978, batch_size=256]

Epoch 3/10:  99%|█████████████▊| 978/991 [4:09:25<03:10, 14.64s/batch, batch_loss=1.33, batch_index=978, batch_size=256]

Epoch 3/10:  99%|████████████▊| 978/991 [4:09:38<03:10, 14.64s/batch, batch_loss=0.749, batch_index=979, batch_size=256]

Epoch 3/10:  99%|████████████▊| 979/991 [4:09:38<02:51, 14.26s/batch, batch_loss=0.749, batch_index=979, batch_size=256]

Epoch 3/10:  99%|████████████▊| 979/991 [4:09:52<02:51, 14.26s/batch, batch_loss=0.277, batch_index=980, batch_size=256]

Epoch 3/10:  99%|████████████▊| 980/991 [4:09:52<02:33, 13.96s/batch, batch_loss=0.277, batch_index=980, batch_size=256]

Epoch 3/10:  99%|███████████▊| 980/991 [4:10:04<02:33, 13.96s/batch, batch_loss=0.0892, batch_index=981, batch_size=256]

Epoch 3/10:  99%|███████████▉| 981/991 [4:10:04<02:16, 13.64s/batch, batch_loss=0.0892, batch_index=981, batch_size=256]

Epoch 3/10:  99%|████████████▊| 981/991 [4:10:18<02:16, 13.64s/batch, batch_loss=0.025, batch_index=982, batch_size=256]

Epoch 3/10:  99%|████████████▉| 982/991 [4:10:18<02:02, 13.61s/batch, batch_loss=0.025, batch_index=982, batch_size=256]

Epoch 3/10:  99%|███████████▉| 982/991 [4:10:31<02:02, 13.61s/batch, batch_loss=0.0144, batch_index=983, batch_size=256]

Epoch 3/10:  99%|███████████▉| 983/991 [4:10:31<01:47, 13.44s/batch, batch_loss=0.0144, batch_index=983, batch_size=256]

Epoch 3/10:  99%|███████████▉| 983/991 [4:10:44<01:47, 13.44s/batch, batch_loss=0.0281, batch_index=984, batch_size=256]

Epoch 3/10:  99%|███████████▉| 984/991 [4:10:44<01:33, 13.42s/batch, batch_loss=0.0281, batch_index=984, batch_size=256]

Epoch 3/10:  99%|███████████▉| 984/991 [4:10:58<01:33, 13.42s/batch, batch_loss=0.0543, batch_index=985, batch_size=256]

Epoch 3/10:  99%|███████████▉| 985/991 [4:10:58<01:21, 13.59s/batch, batch_loss=0.0543, batch_index=985, batch_size=256]

Epoch 3/10:  99%|███████████▉| 985/991 [4:11:14<01:21, 13.59s/batch, batch_loss=0.0842, batch_index=986, batch_size=256]

Epoch 3/10:  99%|███████████▉| 986/991 [4:11:14<01:11, 14.28s/batch, batch_loss=0.0842, batch_index=986, batch_size=256]

Epoch 3/10:  99%|████████████▉| 986/991 [4:11:28<01:11, 14.28s/batch, batch_loss=0.111, batch_index=987, batch_size=256]

Epoch 3/10: 100%|████████████▉| 987/991 [4:11:28<00:56, 14.09s/batch, batch_loss=0.111, batch_index=987, batch_size=256]

Epoch 3/10: 100%|████████████▉| 987/991 [4:11:42<00:56, 14.09s/batch, batch_loss=0.131, batch_index=988, batch_size=256]

Epoch 3/10: 100%|████████████▉| 988/991 [4:11:42<00:42, 14.10s/batch, batch_loss=0.131, batch_index=988, batch_size=256]

Epoch 3/10: 100%|████████████▉| 988/991 [4:11:56<00:42, 14.10s/batch, batch_loss=0.141, batch_index=989, batch_size=256]

Epoch 3/10: 100%|████████████▉| 989/991 [4:11:56<00:28, 14.12s/batch, batch_loss=0.141, batch_index=989, batch_size=256]

Epoch 3/10: 100%|████████████▉| 989/991 [4:12:10<00:28, 14.12s/batch, batch_loss=0.142, batch_index=990, batch_size=256]

Epoch 3/10: 100%|████████████▉| 990/991 [4:12:10<00:13, 13.88s/batch, batch_loss=0.142, batch_index=990, batch_size=256]

Epoch 3/10: 100%|████████████▉| 990/991 [4:12:22<00:13, 13.88s/batch, batch_loss=0.135, batch_index=991, batch_size=220]

Epoch 3/10: 100%|█████████████| 991/991 [4:12:22<00:00, 13.36s/batch, batch_loss=0.135, batch_index=991, batch_size=220]

Epoch 3/10: 100%|█████████████| 991/991 [4:12:22<00:00, 15.28s/batch, batch_loss=0.135, batch_index=991, batch_size=220]




Epoch 3, Loss: 987.3113


Validation:   0%|                                    | 0/743 [00:00<?, ?batch/s]

Validation:   0%| | 0/743 [00:14<?, ?batch/s, batch_loss=16.9, batch_index=1, ba

Validation:   0%| | 1/743 [00:14<2:58:30, 14.43s/batch, batch_loss=16.9, batch_i

Validation:   0%| | 1/743 [00:29<2:58:30, 14.43s/batch, batch_loss=16.5, batch_i

Validation:   0%| | 2/743 [00:29<3:03:55, 14.89s/batch, batch_loss=16.5, batch_i

Validation:   0%| | 2/743 [00:44<3:03:55, 14.89s/batch, batch_loss=15.2, batch_i

Validation:   0%| | 3/743 [00:44<3:04:57, 15.00s/batch, batch_loss=15.2, batch_i

Validation:   0%| | 3/743 [00:58<3:04:57, 15.00s/batch, batch_loss=9.23, batch_i

Validation:   1%| | 4/743 [00:58<3:00:13, 14.63s/batch, batch_loss=9.23, batch_i

Validation:   1%| | 4/743 [01:13<3:00:13, 14.63s/batch, batch_loss=19.7, batch_i

Validation:   1%| | 5/743 [01:13<3:01:14, 14.74s/batch, batch_loss=19.7, batch_i

Validation:   1%| | 5/743 [01:29<3:01:14, 14.74s/batch, batch_loss=20.3, batch_i

Validation:   1%| | 6/743 [01:29<3:04:43, 15.04s/batch, batch_loss=20.3, batch_i

Validation:   1%| | 6/743 [01:44<3:04:43, 15.04s/batch, batch_loss=557, batch_in

Validation:   1%| | 7/743 [01:44<3:03:48, 14.98s/batch, batch_loss=557, batch_in

Validation:   1%| | 7/743 [01:59<3:03:48, 14.98s/batch, batch_loss=17.7, batch_i

Validation:   1%| | 8/743 [01:59<3:03:17, 14.96s/batch, batch_loss=17.7, batch_i

Validation:   1%| | 8/743 [02:14<3:03:17, 14.96s/batch, batch_loss=13.9, batch_i

Validation:   1%| | 9/743 [02:14<3:02:38, 14.93s/batch, batch_loss=13.9, batch_i

Validation:   1%| | 9/743 [02:28<3:02:38, 14.93s/batch, batch_loss=12, batch_ind

Validation:   1%| | 10/743 [02:28<3:02:21, 14.93s/batch, batch_loss=12, batch_in

Validation:   1%| | 10/743 [02:43<3:02:21, 14.93s/batch, batch_loss=9.79, batch_

Validation:   1%| | 11/743 [02:43<2:59:41, 14.73s/batch, batch_loss=9.79, batch_

Validation:   1%| | 11/743 [02:59<2:59:41, 14.73s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [02:59<3:05:47, 15.25s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:14<3:05:47, 15.25s/batch, batch_loss=13.7, batch_

Validation:   2%| | 13/743 [03:14<3:04:46, 15.19s/batch, batch_loss=13.7, batch_

Validation:   2%| | 13/743 [03:29<3:04:46, 15.19s/batch, batch_loss=9.55, batch_

Validation:   2%| | 14/743 [03:29<3:01:48, 14.96s/batch, batch_loss=9.55, batch_

Validation:   2%| | 14/743 [03:43<3:01:48, 14.96s/batch, batch_loss=16.6, batch_

Validation:   2%| | 15/743 [03:43<3:00:12, 14.85s/batch, batch_loss=16.6, batch_

Validation:   2%| | 15/743 [03:58<3:00:12, 14.85s/batch, batch_loss=13.3, batch_

Validation:   2%| | 16/743 [03:58<2:59:48, 14.84s/batch, batch_loss=13.3, batch_

Validation:   2%| | 16/743 [04:14<2:59:48, 14.84s/batch, batch_loss=11.8, batch_

Validation:   2%| | 17/743 [04:14<3:02:33, 15.09s/batch, batch_loss=11.8, batch_

Validation:   2%| | 17/743 [04:29<3:02:33, 15.09s/batch, batch_loss=4.52e+3, bat

Validation:   2%| | 18/743 [04:29<3:01:19, 15.01s/batch, batch_loss=4.52e+3, bat

Validation:   2%| | 18/743 [04:43<3:01:19, 15.01s/batch, batch_loss=10.4, batch_

Validation:   3%| | 19/743 [04:43<2:59:49, 14.90s/batch, batch_loss=10.4, batch_

Validation:   3%| | 19/743 [04:58<2:59:49, 14.90s/batch, batch_loss=14.1, batch_

Validation:   3%| | 20/743 [04:58<2:58:04, 14.78s/batch, batch_loss=14.1, batch_

Validation:   3%| | 20/743 [05:11<2:58:04, 14.78s/batch, batch_loss=964, batch_i

Validation:   3%| | 21/743 [05:11<2:53:33, 14.42s/batch, batch_loss=964, batch_i

Validation:   3%| | 21/743 [05:27<2:53:33, 14.42s/batch, batch_loss=13.6, batch_

Validation:   3%| | 22/743 [05:27<2:56:22, 14.68s/batch, batch_loss=13.6, batch_

Validation:   3%| | 22/743 [05:42<2:56:22, 14.68s/batch, batch_loss=5.88, batch_

Validation:   3%| | 23/743 [05:42<2:59:34, 14.96s/batch, batch_loss=5.88, batch_

Validation:   3%| | 23/743 [05:57<2:59:34, 14.96s/batch, batch_loss=15.6, batch_

Validation:   3%| | 24/743 [05:57<2:57:27, 14.81s/batch, batch_loss=15.6, batch_

Validation:   3%| | 24/743 [06:12<2:57:27, 14.81s/batch, batch_loss=13.9, batch_

Validation:   3%| | 25/743 [06:12<2:57:34, 14.84s/batch, batch_loss=13.9, batch_

Validation:   3%| | 25/743 [06:26<2:57:34, 14.84s/batch, batch_loss=19.9, batch_

Validation:   3%| | 26/743 [06:26<2:56:27, 14.77s/batch, batch_loss=19.9, batch_

Validation:   3%| | 26/743 [06:41<2:56:27, 14.77s/batch, batch_loss=1.64e+3, bat

Validation:   4%| | 27/743 [06:41<2:58:14, 14.94s/batch, batch_loss=1.64e+3, bat

Validation:   4%| | 27/743 [06:58<2:58:14, 14.94s/batch, batch_loss=16.2, batch_

Validation:   4%| | 28/743 [06:58<3:04:51, 15.51s/batch, batch_loss=16.2, batch_

Validation:   4%| | 28/743 [07:14<3:04:51, 15.51s/batch, batch_loss=13.8, batch_

Validation:   4%| | 29/743 [07:14<3:03:21, 15.41s/batch, batch_loss=13.8, batch_

Validation:   4%| | 29/743 [07:29<3:03:21, 15.41s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:29<3:02:05, 15.32s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:44<3:02:05, 15.32s/batch, batch_loss=19.3, batch_

Validation:   4%| | 31/743 [07:44<3:03:11, 15.44s/batch, batch_loss=19.3, batch_

Validation:   4%| | 31/743 [08:00<3:03:11, 15.44s/batch, batch_loss=12.7, batch_

Validation:   4%| | 32/743 [08:00<3:03:58, 15.52s/batch, batch_loss=12.7, batch_

Validation:   4%| | 32/743 [08:16<3:03:58, 15.52s/batch, batch_loss=16, batch_in

Validation:   4%| | 33/743 [08:16<3:04:13, 15.57s/batch, batch_loss=16, batch_in

Validation:   4%| | 33/743 [08:34<3:04:13, 15.57s/batch, batch_loss=17.2, batch_

Validation:   5%| | 34/743 [08:34<3:13:21, 16.36s/batch, batch_loss=17.2, batch_

Validation:   5%| | 34/743 [08:49<3:13:21, 16.36s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [08:49<3:09:15, 16.04s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [09:03<3:09:15, 16.04s/batch, batch_loss=13.3, batch_

Validation:   5%| | 36/743 [09:03<3:02:20, 15.47s/batch, batch_loss=13.3, batch_

Validation:   5%| | 36/743 [09:19<3:02:20, 15.47s/batch, batch_loss=162, batch_i

Validation:   5%| | 37/743 [09:19<3:02:06, 15.48s/batch, batch_loss=162, batch_i

Validation:   5%| | 37/743 [09:34<3:02:06, 15.48s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:34<3:00:54, 15.40s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:49<3:00:54, 15.40s/batch, batch_loss=12.6, batch_

Validation:   5%| | 39/743 [09:49<2:58:06, 15.18s/batch, batch_loss=12.6, batch_

Validation:   5%| | 39/743 [10:04<2:58:06, 15.18s/batch, batch_loss=18.5, batch_

Validation:   5%| | 40/743 [10:04<2:56:23, 15.05s/batch, batch_loss=18.5, batch_

Validation:   5%| | 40/743 [10:19<2:56:23, 15.05s/batch, batch_loss=13.3, batch_

Validation:   6%| | 41/743 [10:19<2:59:08, 15.31s/batch, batch_loss=13.3, batch_

Validation:   6%| | 41/743 [10:34<2:59:08, 15.31s/batch, batch_loss=15, batch_in

Validation:   6%| | 42/743 [10:34<2:55:21, 15.01s/batch, batch_loss=15, batch_in

Validation:   6%| | 42/743 [10:48<2:55:21, 15.01s/batch, batch_loss=10.3, batch_

Validation:   6%| | 43/743 [10:48<2:53:52, 14.90s/batch, batch_loss=10.3, batch_

Validation:   6%| | 43/743 [11:03<2:53:52, 14.90s/batch, batch_loss=14.4, batch_

Validation:   6%| | 44/743 [11:03<2:52:50, 14.84s/batch, batch_loss=14.4, batch_

Validation:   6%| | 44/743 [11:17<2:52:50, 14.84s/batch, batch_loss=17.8, batch_

Validation:   6%| | 45/743 [11:17<2:49:46, 14.59s/batch, batch_loss=17.8, batch_

Validation:   6%| | 45/743 [11:32<2:49:46, 14.59s/batch, batch_loss=8.26, batch_

Validation:   6%| | 46/743 [11:32<2:49:28, 14.59s/batch, batch_loss=8.26, batch_

Validation:   6%| | 46/743 [11:46<2:49:28, 14.59s/batch, batch_loss=16.1, batch_

Validation:   6%| | 47/743 [11:46<2:48:13, 14.50s/batch, batch_loss=16.1, batch_

Validation:   6%| | 47/743 [12:01<2:48:13, 14.50s/batch, batch_loss=17.1, batch_

Validation:   6%| | 48/743 [12:01<2:48:34, 14.55s/batch, batch_loss=17.1, batch_

Validation:   6%| | 48/743 [12:17<2:48:34, 14.55s/batch, batch_loss=19.2, batch_

Validation:   7%| | 49/743 [12:17<2:56:06, 15.23s/batch, batch_loss=19.2, batch_

Validation:   7%| | 49/743 [12:32<2:56:06, 15.23s/batch, batch_loss=12.9, batch_

Validation:   7%| | 50/743 [12:32<2:54:21, 15.10s/batch, batch_loss=12.9, batch_

Validation:   7%| | 50/743 [12:47<2:54:21, 15.10s/batch, batch_loss=13.9, batch_

Validation:   7%| | 51/743 [12:47<2:54:03, 15.09s/batch, batch_loss=13.9, batch_

Validation:   7%| | 51/743 [13:03<2:54:03, 15.09s/batch, batch_loss=15.4, batch_

Validation:   7%| | 52/743 [13:03<2:54:55, 15.19s/batch, batch_loss=15.4, batch_

Validation:   7%| | 52/743 [13:17<2:54:55, 15.19s/batch, batch_loss=20, batch_in

Validation:   7%| | 53/743 [13:17<2:50:24, 14.82s/batch, batch_loss=20, batch_in

Validation:   7%| | 53/743 [13:31<2:50:24, 14.82s/batch, batch_loss=12.7, batch_

Validation:   7%| | 54/743 [13:31<2:48:22, 14.66s/batch, batch_loss=12.7, batch_

Validation:   7%| | 54/743 [13:46<2:48:22, 14.66s/batch, batch_loss=19.5, batch_

Validation:   7%| | 55/743 [13:46<2:48:50, 14.72s/batch, batch_loss=19.5, batch_

Validation:   7%| | 55/743 [14:01<2:48:50, 14.72s/batch, batch_loss=16, batch_in

Validation:   8%| | 56/743 [14:01<2:50:30, 14.89s/batch, batch_loss=16, batch_in

Validation:   8%| | 56/743 [14:16<2:50:30, 14.89s/batch, batch_loss=11.4, batch_

Validation:   8%| | 57/743 [14:16<2:51:10, 14.97s/batch, batch_loss=11.4, batch_

Validation:   8%| | 57/743 [14:32<2:51:10, 14.97s/batch, batch_loss=15.9, batch_

Validation:   8%| | 58/743 [14:32<2:52:51, 15.14s/batch, batch_loss=15.9, batch_

Validation:   8%| | 58/743 [14:47<2:52:51, 15.14s/batch, batch_loss=106, batch_i

Validation:   8%| | 59/743 [14:47<2:54:17, 15.29s/batch, batch_loss=106, batch_i

Validation:   8%| | 59/743 [15:02<2:54:17, 15.29s/batch, batch_loss=6.14e+3, bat

Validation:   8%| | 60/743 [15:02<2:51:37, 15.08s/batch, batch_loss=6.14e+3, bat

Validation:   8%| | 60/743 [15:18<2:51:37, 15.08s/batch, batch_loss=7.33, batch_

Validation:   8%| | 61/743 [15:18<2:52:48, 15.20s/batch, batch_loss=7.33, batch_

Validation:   8%| | 61/743 [15:32<2:52:48, 15.20s/batch, batch_loss=8.93, batch_

Validation:   8%| | 62/743 [15:32<2:51:11, 15.08s/batch, batch_loss=8.93, batch_

Validation:   8%| | 62/743 [15:47<2:51:11, 15.08s/batch, batch_loss=20.5, batch_

Validation:   8%| | 63/743 [15:47<2:51:03, 15.09s/batch, batch_loss=20.5, batch_

Validation:   8%| | 63/743 [16:02<2:51:03, 15.09s/batch, batch_loss=11.8, batch_

Validation:   9%| | 64/743 [16:02<2:50:19, 15.05s/batch, batch_loss=11.8, batch_

Validation:   9%| | 64/743 [16:18<2:50:19, 15.05s/batch, batch_loss=16.8, batch_

Validation:   9%| | 65/743 [16:18<2:53:15, 15.33s/batch, batch_loss=16.8, batch_

Validation:   9%| | 65/743 [16:34<2:53:15, 15.33s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [16:34<2:53:04, 15.34s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [16:48<2:53:04, 15.34s/batch, batch_loss=13.6, batch_

Validation:   9%| | 67/743 [16:48<2:50:25, 15.13s/batch, batch_loss=13.6, batch_

Validation:   9%| | 67/743 [17:03<2:50:25, 15.13s/batch, batch_loss=14.8, batch_

Validation:   9%| | 68/743 [17:03<2:47:38, 14.90s/batch, batch_loss=14.8, batch_

Validation:   9%| | 68/743 [17:18<2:47:38, 14.90s/batch, batch_loss=9.47, batch_

Validation:   9%| | 69/743 [17:18<2:49:00, 15.05s/batch, batch_loss=9.47, batch_

Validation:   9%| | 69/743 [17:33<2:49:00, 15.05s/batch, batch_loss=12.5, batch_

Validation:   9%| | 70/743 [17:33<2:47:02, 14.89s/batch, batch_loss=12.5, batch_

Validation:   9%| | 70/743 [17:47<2:47:02, 14.89s/batch, batch_loss=8.82, batch_

Validation:  10%| | 71/743 [17:47<2:45:53, 14.81s/batch, batch_loss=8.82, batch_

Validation:  10%| | 71/743 [18:03<2:45:53, 14.81s/batch, batch_loss=12.8, batch_

Validation:  10%| | 72/743 [18:03<2:47:39, 14.99s/batch, batch_loss=12.8, batch_

Validation:  10%| | 72/743 [18:17<2:47:39, 14.99s/batch, batch_loss=13.6, batch_

Validation:  10%| | 73/743 [18:17<2:45:56, 14.86s/batch, batch_loss=13.6, batch_

Validation:  10%| | 73/743 [18:32<2:45:56, 14.86s/batch, batch_loss=16.4, batch_

Validation:  10%| | 74/743 [18:32<2:46:55, 14.97s/batch, batch_loss=16.4, batch_

Validation:  10%| | 74/743 [18:47<2:46:55, 14.97s/batch, batch_loss=11.4, batch_

Validation:  10%| | 75/743 [18:47<2:44:37, 14.79s/batch, batch_loss=11.4, batch_

Validation:  10%| | 75/743 [19:01<2:44:37, 14.79s/batch, batch_loss=13.7, batch_

Validation:  10%| | 76/743 [19:01<2:41:33, 14.53s/batch, batch_loss=13.7, batch_

Validation:  10%| | 76/743 [19:16<2:41:33, 14.53s/batch, batch_loss=11.9, batch_

Validation:  10%| | 77/743 [19:16<2:45:05, 14.87s/batch, batch_loss=11.9, batch_

Validation:  10%| | 77/743 [19:31<2:45:05, 14.87s/batch, batch_loss=14.6, batch_

Validation:  10%| | 78/743 [19:31<2:43:01, 14.71s/batch, batch_loss=14.6, batch_

Validation:  10%| | 78/743 [19:46<2:43:01, 14.71s/batch, batch_loss=8.24, batch_

Validation:  11%| | 79/743 [19:46<2:45:18, 14.94s/batch, batch_loss=8.24, batch_

Validation:  11%| | 79/743 [20:01<2:45:18, 14.94s/batch, batch_loss=7.51, batch_

Validation:  11%| | 80/743 [20:01<2:44:18, 14.87s/batch, batch_loss=7.51, batch_

Validation:  11%| | 80/743 [20:15<2:44:18, 14.87s/batch, batch_loss=150, batch_i

Validation:  11%| | 81/743 [20:15<2:40:35, 14.56s/batch, batch_loss=150, batch_i

Validation:  11%| | 81/743 [20:29<2:40:35, 14.56s/batch, batch_loss=1.5e+3, batc

Validation:  11%| | 82/743 [20:29<2:38:48, 14.42s/batch, batch_loss=1.5e+3, batc

Validation:  11%| | 82/743 [20:45<2:38:48, 14.42s/batch, batch_loss=31.2, batch_

Validation:  11%| | 83/743 [20:45<2:44:23, 14.94s/batch, batch_loss=31.2, batch_

Validation:  11%| | 83/743 [21:01<2:44:23, 14.94s/batch, batch_loss=15.2, batch_

Validation:  11%| | 84/743 [21:01<2:46:20, 15.14s/batch, batch_loss=15.2, batch_

Validation:  11%| | 84/743 [21:15<2:46:20, 15.14s/batch, batch_loss=18.7, batch_

Validation:  11%| | 85/743 [21:15<2:42:38, 14.83s/batch, batch_loss=18.7, batch_

Validation:  11%| | 85/743 [21:30<2:42:38, 14.83s/batch, batch_loss=22.8, batch_

Validation:  12%| | 86/743 [21:30<2:42:31, 14.84s/batch, batch_loss=22.8, batch_

Validation:  12%| | 86/743 [21:44<2:42:31, 14.84s/batch, batch_loss=31.3, batch_

Validation:  12%| | 87/743 [21:44<2:40:47, 14.71s/batch, batch_loss=31.3, batch_

Validation:  12%| | 87/743 [21:58<2:40:47, 14.71s/batch, batch_loss=21.4, batch_

Validation:  12%| | 88/743 [21:58<2:36:50, 14.37s/batch, batch_loss=21.4, batch_

Validation:  12%| | 88/743 [22:12<2:36:50, 14.37s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [22:12<2:35:33, 14.27s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [22:26<2:35:33, 14.27s/batch, batch_loss=4.08, batch_

Validation:  12%| | 90/743 [22:26<2:35:53, 14.32s/batch, batch_loss=4.08, batch_

Validation:  12%| | 90/743 [22:45<2:35:53, 14.32s/batch, batch_loss=31.6, batch_

Validation:  12%| | 91/743 [22:45<2:51:36, 15.79s/batch, batch_loss=31.6, batch_

Validation:  12%| | 91/743 [23:01<2:51:36, 15.79s/batch, batch_loss=31.7, batch_

Validation:  12%| | 92/743 [23:01<2:51:42, 15.83s/batch, batch_loss=31.7, batch_

Validation:  12%| | 92/743 [23:17<2:51:42, 15.83s/batch, batch_loss=22.2, batch_

Validation:  13%|▏| 93/743 [23:17<2:49:51, 15.68s/batch, batch_loss=22.2, batch_

Validation:  13%|▏| 93/743 [23:33<2:49:51, 15.68s/batch, batch_loss=30, batch_in

Validation:  13%|▏| 94/743 [23:33<2:50:39, 15.78s/batch, batch_loss=30, batch_in

Validation:  13%|▏| 94/743 [23:48<2:50:39, 15.78s/batch, batch_loss=11.5, batch_

Validation:  13%|▏| 95/743 [23:48<2:49:13, 15.67s/batch, batch_loss=11.5, batch_

Validation:  13%|▏| 95/743 [24:04<2:49:13, 15.67s/batch, batch_loss=17.2, batch_

Validation:  13%|▏| 96/743 [24:04<2:50:24, 15.80s/batch, batch_loss=17.2, batch_

Validation:  13%|▏| 96/743 [24:20<2:50:24, 15.80s/batch, batch_loss=27.5, batch_

Validation:  13%|▏| 97/743 [24:20<2:51:40, 15.94s/batch, batch_loss=27.5, batch_

Validation:  13%|▏| 97/743 [24:38<2:51:40, 15.94s/batch, batch_loss=15.5, batch_

Validation:  13%|▏| 98/743 [24:38<2:58:09, 16.57s/batch, batch_loss=15.5, batch_

Validation:  13%|▏| 98/743 [24:54<2:58:09, 16.57s/batch, batch_loss=18.2, batch_

Validation:  13%|▏| 99/743 [24:54<2:53:26, 16.16s/batch, batch_loss=18.2, batch_

Validation:  13%|▏| 99/743 [25:08<2:53:26, 16.16s/batch, batch_loss=10.2, batch_

Validation:  13%|▏| 100/743 [25:08<2:47:58, 15.67s/batch, batch_loss=10.2, batch

Validation:  13%|▏| 100/743 [25:23<2:47:58, 15.67s/batch, batch_loss=15.1, batch

Validation:  14%|▏| 101/743 [25:23<2:46:23, 15.55s/batch, batch_loss=15.1, batch

Validation:  14%|▏| 101/743 [25:39<2:46:23, 15.55s/batch, batch_loss=11.6, batch

Validation:  14%|▏| 102/743 [25:39<2:46:38, 15.60s/batch, batch_loss=11.6, batch

Validation:  14%|▏| 102/743 [25:54<2:46:38, 15.60s/batch, batch_loss=3.39e+3, ba

Validation:  14%|▏| 103/743 [25:54<2:45:39, 15.53s/batch, batch_loss=3.39e+3, ba

Validation:  14%|▏| 103/743 [26:09<2:45:39, 15.53s/batch, batch_loss=15.5, batch

Validation:  14%|▏| 104/743 [26:09<2:43:21, 15.34s/batch, batch_loss=15.5, batch

Validation:  14%|▏| 104/743 [26:23<2:43:21, 15.34s/batch, batch_loss=7.02, batch

Validation:  14%|▏| 105/743 [26:23<2:39:03, 14.96s/batch, batch_loss=7.02, batch

Validation:  14%|▏| 105/743 [26:41<2:39:03, 14.96s/batch, batch_loss=14.7, batch

Validation:  14%|▏| 106/743 [26:41<2:46:03, 15.64s/batch, batch_loss=14.7, batch

Validation:  14%|▏| 106/743 [26:56<2:46:03, 15.64s/batch, batch_loss=723, batch_

Validation:  14%|▏| 107/743 [26:56<2:44:46, 15.55s/batch, batch_loss=723, batch_

Validation:  14%|▏| 107/743 [27:11<2:44:46, 15.55s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [27:11<2:41:15, 15.24s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [27:25<2:41:15, 15.24s/batch, batch_loss=200, batch_

Validation:  15%|▏| 109/743 [27:25<2:39:45, 15.12s/batch, batch_loss=200, batch_

Validation:  15%|▏| 109/743 [27:40<2:39:45, 15.12s/batch, batch_loss=26.9, batch

Validation:  15%|▏| 110/743 [27:40<2:36:32, 14.84s/batch, batch_loss=26.9, batch

Validation:  15%|▏| 110/743 [27:54<2:36:32, 14.84s/batch, batch_loss=12.3, batch

Validation:  15%|▏| 111/743 [27:54<2:35:56, 14.81s/batch, batch_loss=12.3, batch

Validation:  15%|▏| 111/743 [28:09<2:35:56, 14.81s/batch, batch_loss=24.5, batch

Validation:  15%|▏| 112/743 [28:09<2:34:22, 14.68s/batch, batch_loss=24.5, batch

Validation:  15%|▏| 112/743 [28:23<2:34:22, 14.68s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [28:23<2:32:45, 14.55s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [28:37<2:32:45, 14.55s/batch, batch_loss=18.9, batch

Validation:  15%|▏| 114/743 [28:37<2:29:56, 14.30s/batch, batch_loss=18.9, batch

Validation:  15%|▏| 114/743 [28:53<2:29:56, 14.30s/batch, batch_loss=18.9, batch

Validation:  15%|▏| 115/743 [28:53<2:36:28, 14.95s/batch, batch_loss=18.9, batch

Validation:  15%|▏| 115/743 [29:08<2:36:28, 14.95s/batch, batch_loss=14.4, batch

Validation:  16%|▏| 116/743 [29:08<2:36:37, 14.99s/batch, batch_loss=14.4, batch

Validation:  16%|▏| 116/743 [29:23<2:36:37, 14.99s/batch, batch_loss=22.2, batch

Validation:  16%|▏| 117/743 [29:23<2:37:12, 15.07s/batch, batch_loss=22.2, batch

Validation:  16%|▏| 117/743 [29:38<2:37:12, 15.07s/batch, batch_loss=22.9, batch

Validation:  16%|▏| 118/743 [29:38<2:35:31, 14.93s/batch, batch_loss=22.9, batch

Validation:  16%|▏| 118/743 [29:53<2:35:31, 14.93s/batch, batch_loss=14.9, batch

Validation:  16%|▏| 119/743 [29:53<2:35:30, 14.95s/batch, batch_loss=14.9, batch

Validation:  16%|▏| 119/743 [30:07<2:35:30, 14.95s/batch, batch_loss=18.5, batch

Validation:  16%|▏| 120/743 [30:07<2:32:39, 14.70s/batch, batch_loss=18.5, batch

Validation:  16%|▏| 120/743 [30:22<2:32:39, 14.70s/batch, batch_loss=12.7, batch

Validation:  16%|▏| 121/743 [30:22<2:31:50, 14.65s/batch, batch_loss=12.7, batch

Validation:  16%|▏| 121/743 [30:37<2:31:50, 14.65s/batch, batch_loss=4.08, batch

Validation:  16%|▏| 122/743 [30:37<2:34:42, 14.95s/batch, batch_loss=4.08, batch

Validation:  16%|▏| 122/743 [30:55<2:34:42, 14.95s/batch, batch_loss=7.34, batch

Validation:  17%|▏| 123/743 [30:55<2:42:16, 15.70s/batch, batch_loss=7.34, batch

Validation:  17%|▏| 123/743 [31:11<2:42:16, 15.70s/batch, batch_loss=9.86, batch

Validation:  17%|▏| 124/743 [31:11<2:42:49, 15.78s/batch, batch_loss=9.86, batch

Validation:  17%|▏| 124/743 [31:26<2:42:49, 15.78s/batch, batch_loss=24, batch_i

Validation:  17%|▏| 125/743 [31:26<2:39:54, 15.52s/batch, batch_loss=24, batch_i

Validation:  17%|▏| 125/743 [31:41<2:39:54, 15.52s/batch, batch_loss=12.4, batch

Validation:  17%|▏| 126/743 [31:41<2:40:01, 15.56s/batch, batch_loss=12.4, batch

Validation:  17%|▏| 126/743 [31:57<2:40:01, 15.56s/batch, batch_loss=11, batch_i

Validation:  17%|▏| 127/743 [31:57<2:41:23, 15.72s/batch, batch_loss=11, batch_i

Validation:  17%|▏| 127/743 [32:13<2:41:23, 15.72s/batch, batch_loss=22.2, batch

Validation:  17%|▏| 128/743 [32:13<2:40:50, 15.69s/batch, batch_loss=22.2, batch

Validation:  17%|▏| 128/743 [32:29<2:40:50, 15.69s/batch, batch_loss=12.9, batch

Validation:  17%|▏| 129/743 [32:29<2:40:52, 15.72s/batch, batch_loss=12.9, batch

Validation:  17%|▏| 129/743 [32:45<2:40:52, 15.72s/batch, batch_loss=18.2, batch

Validation:  17%|▏| 130/743 [32:45<2:41:36, 15.82s/batch, batch_loss=18.2, batch

Validation:  17%|▏| 130/743 [33:01<2:41:36, 15.82s/batch, batch_loss=22.9, batch

Validation:  18%|▏| 131/743 [33:01<2:41:18, 15.82s/batch, batch_loss=22.9, batch

Validation:  18%|▏| 131/743 [33:16<2:41:18, 15.82s/batch, batch_loss=24.3, batch

Validation:  18%|▏| 132/743 [33:16<2:39:20, 15.65s/batch, batch_loss=24.3, batch

Validation:  18%|▏| 132/743 [33:31<2:39:20, 15.65s/batch, batch_loss=36.6, batch

Validation:  18%|▏| 133/743 [33:31<2:36:43, 15.42s/batch, batch_loss=36.6, batch

Validation:  18%|▏| 133/743 [33:46<2:36:43, 15.42s/batch, batch_loss=17.4, batch

Validation:  18%|▏| 134/743 [33:46<2:36:13, 15.39s/batch, batch_loss=17.4, batch

Validation:  18%|▏| 134/743 [34:01<2:36:13, 15.39s/batch, batch_loss=33.6, batch

Validation:  18%|▏| 135/743 [34:01<2:34:13, 15.22s/batch, batch_loss=33.6, batch

Validation:  18%|▏| 135/743 [34:16<2:34:13, 15.22s/batch, batch_loss=16.3, batch

Validation:  18%|▏| 136/743 [34:16<2:33:05, 15.13s/batch, batch_loss=16.3, batch

Validation:  18%|▏| 136/743 [34:33<2:33:05, 15.13s/batch, batch_loss=24.7, batch

Validation:  18%|▏| 137/743 [34:33<2:38:43, 15.72s/batch, batch_loss=24.7, batch

Validation:  18%|▏| 137/743 [34:47<2:38:43, 15.72s/batch, batch_loss=7.26, batch

Validation:  19%|▏| 138/743 [34:47<2:34:19, 15.30s/batch, batch_loss=7.26, batch

Validation:  19%|▏| 138/743 [35:01<2:34:19, 15.30s/batch, batch_loss=252, batch_

Validation:  19%|▏| 139/743 [35:01<2:30:33, 14.96s/batch, batch_loss=252, batch_

Validation:  19%|▏| 139/743 [35:16<2:30:33, 14.96s/batch, batch_loss=16.3, batch

Validation:  19%|▏| 140/743 [35:16<2:30:07, 14.94s/batch, batch_loss=16.3, batch

Validation:  19%|▏| 140/743 [35:31<2:30:07, 14.94s/batch, batch_loss=12.2, batch

Validation:  19%|▏| 141/743 [35:31<2:28:14, 14.78s/batch, batch_loss=12.2, batch

Validation:  19%|▏| 141/743 [35:44<2:28:14, 14.78s/batch, batch_loss=12.4, batch

Validation:  19%|▏| 142/743 [35:44<2:23:45, 14.35s/batch, batch_loss=12.4, batch

Validation:  19%|▏| 142/743 [35:58<2:23:45, 14.35s/batch, batch_loss=12.2, batch

Validation:  19%|▏| 143/743 [35:58<2:22:43, 14.27s/batch, batch_loss=12.2, batch

Validation:  19%|▏| 143/743 [36:13<2:22:43, 14.27s/batch, batch_loss=16.4, batch

Validation:  19%|▏| 144/743 [36:13<2:24:03, 14.43s/batch, batch_loss=16.4, batch

Validation:  19%|▏| 144/743 [36:28<2:24:03, 14.43s/batch, batch_loss=13.7, batch

Validation:  20%|▏| 145/743 [36:28<2:25:01, 14.55s/batch, batch_loss=13.7, batch

Validation:  20%|▏| 145/743 [36:42<2:25:01, 14.55s/batch, batch_loss=14.8, batch

Validation:  20%|▏| 146/743 [36:42<2:24:22, 14.51s/batch, batch_loss=14.8, batch

Validation:  20%|▏| 146/743 [36:57<2:24:22, 14.51s/batch, batch_loss=16.9, batch

Validation:  20%|▏| 147/743 [36:57<2:26:03, 14.70s/batch, batch_loss=16.9, batch

Validation:  20%|▏| 147/743 [37:14<2:26:03, 14.70s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [37:14<2:32:05, 15.34s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [37:29<2:32:05, 15.34s/batch, batch_loss=21, batch_i

Validation:  20%|▏| 149/743 [37:29<2:30:07, 15.16s/batch, batch_loss=21, batch_i

Validation:  20%|▏| 149/743 [37:43<2:30:07, 15.16s/batch, batch_loss=23.4, batch

Validation:  20%|▏| 150/743 [37:43<2:27:39, 14.94s/batch, batch_loss=23.4, batch

Validation:  20%|▏| 150/743 [37:58<2:27:39, 14.94s/batch, batch_loss=13.7, batch

Validation:  20%|▏| 151/743 [37:58<2:26:53, 14.89s/batch, batch_loss=13.7, batch

Validation:  20%|▏| 151/743 [38:13<2:26:53, 14.89s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [38:13<2:27:35, 14.98s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [38:28<2:27:35, 14.98s/batch, batch_loss=15.6, batch

Validation:  21%|▏| 153/743 [38:28<2:26:48, 14.93s/batch, batch_loss=15.6, batch

Validation:  21%|▏| 153/743 [38:43<2:26:48, 14.93s/batch, batch_loss=15.1, batch

Validation:  21%|▏| 154/743 [38:43<2:27:09, 14.99s/batch, batch_loss=15.1, batch

Validation:  21%|▏| 154/743 [38:58<2:27:09, 14.99s/batch, batch_loss=18, batch_i

Validation:  21%|▏| 155/743 [38:58<2:24:48, 14.78s/batch, batch_loss=18, batch_i

Validation:  21%|▏| 155/743 [39:12<2:24:48, 14.78s/batch, batch_loss=16.2, batch

Validation:  21%|▏| 156/743 [39:12<2:23:46, 14.70s/batch, batch_loss=16.2, batch

Validation:  21%|▏| 156/743 [39:26<2:23:46, 14.70s/batch, batch_loss=17.6, batch

Validation:  21%|▏| 157/743 [39:26<2:22:02, 14.54s/batch, batch_loss=17.6, batch

Validation:  21%|▏| 157/743 [39:40<2:22:02, 14.54s/batch, batch_loss=21.6, batch

Validation:  21%|▏| 158/743 [39:40<2:19:34, 14.31s/batch, batch_loss=21.6, batch

Validation:  21%|▏| 158/743 [39:55<2:19:34, 14.31s/batch, batch_loss=22, batch_i

Validation:  21%|▏| 159/743 [39:55<2:21:19, 14.52s/batch, batch_loss=22, batch_i

Validation:  21%|▏| 159/743 [40:10<2:21:19, 14.52s/batch, batch_loss=14.5, batch

Validation:  22%|▏| 160/743 [40:10<2:22:12, 14.64s/batch, batch_loss=14.5, batch

Validation:  22%|▏| 160/743 [40:25<2:22:12, 14.64s/batch, batch_loss=15.7, batch

Validation:  22%|▏| 161/743 [40:25<2:23:00, 14.74s/batch, batch_loss=15.7, batch

Validation:  22%|▏| 161/743 [40:40<2:23:00, 14.74s/batch, batch_loss=19.5, batch

Validation:  22%|▏| 162/743 [40:40<2:24:39, 14.94s/batch, batch_loss=19.5, batch

Validation:  22%|▏| 162/743 [40:56<2:24:39, 14.94s/batch, batch_loss=12.3, batch

Validation:  22%|▏| 163/743 [40:56<2:25:26, 15.05s/batch, batch_loss=12.3, batch

Validation:  22%|▏| 163/743 [41:11<2:25:26, 15.05s/batch, batch_loss=9.48, batch

Validation:  22%|▏| 164/743 [41:11<2:26:32, 15.19s/batch, batch_loss=9.48, batch

Validation:  22%|▏| 164/743 [41:27<2:26:32, 15.19s/batch, batch_loss=13.6, batch

Validation:  22%|▏| 165/743 [41:27<2:27:36, 15.32s/batch, batch_loss=13.6, batch

Validation:  22%|▏| 165/743 [41:43<2:27:36, 15.32s/batch, batch_loss=10, batch_i

Validation:  22%|▏| 166/743 [41:43<2:29:21, 15.53s/batch, batch_loss=10, batch_i

Validation:  22%|▏| 166/743 [41:58<2:29:21, 15.53s/batch, batch_loss=11.8, batch

Validation:  22%|▏| 167/743 [41:58<2:28:47, 15.50s/batch, batch_loss=11.8, batch

Validation:  22%|▏| 167/743 [42:14<2:28:47, 15.50s/batch, batch_loss=20.7, batch

Validation:  23%|▏| 168/743 [42:14<2:28:16, 15.47s/batch, batch_loss=20.7, batch

Validation:  23%|▏| 168/743 [42:30<2:28:16, 15.47s/batch, batch_loss=21.3, batch

Validation:  23%|▏| 169/743 [42:30<2:30:37, 15.75s/batch, batch_loss=21.3, batch

Validation:  23%|▏| 169/743 [42:47<2:30:37, 15.75s/batch, batch_loss=18.8, batch

Validation:  23%|▏| 170/743 [42:47<2:33:26, 16.07s/batch, batch_loss=18.8, batch

Validation:  23%|▏| 170/743 [43:01<2:33:26, 16.07s/batch, batch_loss=18.3, batch

Validation:  23%|▏| 171/743 [43:01<2:27:22, 15.46s/batch, batch_loss=18.3, batch

Validation:  23%|▏| 171/743 [43:15<2:27:22, 15.46s/batch, batch_loss=16.3, batch

Validation:  23%|▏| 172/743 [43:15<2:22:41, 14.99s/batch, batch_loss=16.3, batch

Validation:  23%|▏| 172/743 [43:30<2:22:41, 14.99s/batch, batch_loss=20.1, batch

Validation:  23%|▏| 173/743 [43:30<2:23:46, 15.13s/batch, batch_loss=20.1, batch

Validation:  23%|▏| 173/743 [43:45<2:23:46, 15.13s/batch, batch_loss=13.8, batch

Validation:  23%|▏| 174/743 [43:45<2:21:03, 14.87s/batch, batch_loss=13.8, batch

Validation:  23%|▏| 174/743 [44:00<2:21:03, 14.87s/batch, batch_loss=19, batch_i

Validation:  24%|▏| 175/743 [44:00<2:21:46, 14.98s/batch, batch_loss=19, batch_i

Validation:  24%|▏| 175/743 [44:14<2:21:46, 14.98s/batch, batch_loss=13.2, batch

Validation:  24%|▏| 176/743 [44:14<2:19:43, 14.79s/batch, batch_loss=13.2, batch

Validation:  24%|▏| 176/743 [44:28<2:19:43, 14.79s/batch, batch_loss=16.9, batch

Validation:  24%|▏| 177/743 [44:28<2:15:50, 14.40s/batch, batch_loss=16.9, batch

Validation:  24%|▏| 177/743 [44:43<2:15:50, 14.40s/batch, batch_loss=22.4, batch

Validation:  24%|▏| 178/743 [44:43<2:17:45, 14.63s/batch, batch_loss=22.4, batch

Validation:  24%|▏| 178/743 [45:00<2:17:45, 14.63s/batch, batch_loss=17.2, batch

Validation:  24%|▏| 179/743 [45:00<2:25:18, 15.46s/batch, batch_loss=17.2, batch

Validation:  24%|▏| 179/743 [45:15<2:25:18, 15.46s/batch, batch_loss=7.24e+3, ba

Validation:  24%|▏| 180/743 [45:15<2:22:16, 15.16s/batch, batch_loss=7.24e+3, ba

Validation:  24%|▏| 180/743 [45:30<2:22:16, 15.16s/batch, batch_loss=16.2, batch

Validation:  24%|▏| 181/743 [45:30<2:22:30, 15.21s/batch, batch_loss=16.2, batch

Validation:  24%|▏| 181/743 [45:45<2:22:30, 15.21s/batch, batch_loss=18.2, batch

Validation:  24%|▏| 182/743 [45:45<2:21:52, 15.17s/batch, batch_loss=18.2, batch

Validation:  24%|▏| 182/743 [46:00<2:21:52, 15.17s/batch, batch_loss=15.8, batch

Validation:  25%|▏| 183/743 [46:00<2:19:56, 14.99s/batch, batch_loss=15.8, batch

Validation:  25%|▏| 183/743 [46:14<2:19:56, 14.99s/batch, batch_loss=9.76, batch

Validation:  25%|▏| 184/743 [46:14<2:17:22, 14.74s/batch, batch_loss=9.76, batch

Validation:  25%|▏| 184/743 [46:28<2:17:22, 14.74s/batch, batch_loss=16.2, batch

Validation:  25%|▏| 185/743 [46:28<2:16:30, 14.68s/batch, batch_loss=16.2, batch

Validation:  25%|▏| 185/743 [46:44<2:16:30, 14.68s/batch, batch_loss=23.6, batch

Validation:  25%|▎| 186/743 [46:44<2:18:42, 14.94s/batch, batch_loss=23.6, batch

Validation:  25%|▎| 186/743 [47:01<2:18:42, 14.94s/batch, batch_loss=28.7, batch

Validation:  25%|▎| 187/743 [47:01<2:25:25, 15.69s/batch, batch_loss=28.7, batch

Validation:  25%|▎| 187/743 [47:17<2:25:25, 15.69s/batch, batch_loss=14.2, batch

Validation:  25%|▎| 188/743 [47:17<2:23:53, 15.56s/batch, batch_loss=14.2, batch

Validation:  25%|▎| 188/743 [47:32<2:23:53, 15.56s/batch, batch_loss=15.7, batch

Validation:  25%|▎| 189/743 [47:32<2:22:42, 15.46s/batch, batch_loss=15.7, batch

Validation:  25%|▎| 189/743 [47:46<2:22:42, 15.46s/batch, batch_loss=973, batch_

Validation:  26%|▎| 190/743 [47:46<2:19:41, 15.16s/batch, batch_loss=973, batch_

Validation:  26%|▎| 190/743 [48:01<2:19:41, 15.16s/batch, batch_loss=21.5, batch

Validation:  26%|▎| 191/743 [48:01<2:17:50, 14.98s/batch, batch_loss=21.5, batch

Validation:  26%|▎| 191/743 [48:16<2:17:50, 14.98s/batch, batch_loss=10.9, batch

Validation:  26%|▎| 192/743 [48:16<2:18:31, 15.08s/batch, batch_loss=10.9, batch

Validation:  26%|▎| 192/743 [48:30<2:18:31, 15.08s/batch, batch_loss=16.6, batch

Validation:  26%|▎| 193/743 [48:30<2:16:14, 14.86s/batch, batch_loss=16.6, batch

Validation:  26%|▎| 193/743 [48:45<2:16:14, 14.86s/batch, batch_loss=16.9, batch

Validation:  26%|▎| 194/743 [48:45<2:14:04, 14.65s/batch, batch_loss=16.9, batch

Validation:  26%|▎| 194/743 [49:01<2:14:04, 14.65s/batch, batch_loss=9.09, batch

Validation:  26%|▎| 195/743 [49:01<2:17:50, 15.09s/batch, batch_loss=9.09, batch

Validation:  26%|▎| 195/743 [49:16<2:17:50, 15.09s/batch, batch_loss=15.2, batch

Validation:  26%|▎| 196/743 [49:16<2:16:54, 15.02s/batch, batch_loss=15.2, batch

Validation:  26%|▎| 196/743 [49:31<2:16:54, 15.02s/batch, batch_loss=8.99, batch

Validation:  27%|▎| 197/743 [49:31<2:17:30, 15.11s/batch, batch_loss=8.99, batch

Validation:  27%|▎| 197/743 [49:46<2:17:30, 15.11s/batch, batch_loss=18.7, batch

Validation:  27%|▎| 198/743 [49:46<2:16:39, 15.05s/batch, batch_loss=18.7, batch

Validation:  27%|▎| 198/743 [50:00<2:16:39, 15.05s/batch, batch_loss=18.7, batch

Validation:  27%|▎| 199/743 [50:00<2:15:20, 14.93s/batch, batch_loss=18.7, batch

Validation:  27%|▎| 199/743 [50:15<2:15:20, 14.93s/batch, batch_loss=277, batch_

Validation:  27%|▎| 200/743 [50:15<2:14:08, 14.82s/batch, batch_loss=277, batch_

Validation:  27%|▎| 200/743 [50:29<2:14:08, 14.82s/batch, batch_loss=32.6, batch

Validation:  27%|▎| 201/743 [50:29<2:12:01, 14.62s/batch, batch_loss=32.6, batch

Validation:  27%|▎| 201/743 [50:47<2:12:01, 14.62s/batch, batch_loss=22.4, batch

Validation:  27%|▎| 202/743 [50:47<2:19:11, 15.44s/batch, batch_loss=22.4, batch

Validation:  27%|▎| 202/743 [51:02<2:19:11, 15.44s/batch, batch_loss=13.4, batch

Validation:  27%|▎| 203/743 [51:02<2:19:23, 15.49s/batch, batch_loss=13.4, batch

Validation:  27%|▎| 203/743 [51:17<2:19:23, 15.49s/batch, batch_loss=19.3, batch

Validation:  27%|▎| 204/743 [51:17<2:16:22, 15.18s/batch, batch_loss=19.3, batch

Validation:  27%|▎| 204/743 [51:31<2:16:22, 15.18s/batch, batch_loss=19.7, batch

Validation:  28%|▎| 205/743 [51:31<2:14:12, 14.97s/batch, batch_loss=19.7, batch

Validation:  28%|▎| 205/743 [51:44<2:14:12, 14.97s/batch, batch_loss=11.9, batch

Validation:  28%|▎| 206/743 [51:44<2:08:53, 14.40s/batch, batch_loss=11.9, batch

Validation:  28%|▎| 206/743 [51:57<2:08:53, 14.40s/batch, batch_loss=18.5, batch

Validation:  28%|▎| 207/743 [51:57<2:05:26, 14.04s/batch, batch_loss=18.5, batch

Validation:  28%|▎| 207/743 [52:11<2:05:26, 14.04s/batch, batch_loss=17.4, batch

Validation:  28%|▎| 208/743 [52:11<2:03:25, 13.84s/batch, batch_loss=17.4, batch

Validation:  28%|▎| 208/743 [52:25<2:03:25, 13.84s/batch, batch_loss=6.97, batch

Validation:  28%|▎| 209/743 [52:25<2:03:47, 13.91s/batch, batch_loss=6.97, batch

Validation:  28%|▎| 209/743 [52:39<2:03:47, 13.91s/batch, batch_loss=9.26, batch

Validation:  28%|▎| 210/743 [52:39<2:05:30, 14.13s/batch, batch_loss=9.26, batch

Validation:  28%|▎| 210/743 [52:54<2:05:30, 14.13s/batch, batch_loss=13.5, batch

Validation:  28%|▎| 211/743 [52:54<2:05:58, 14.21s/batch, batch_loss=13.5, batch

Validation:  28%|▎| 211/743 [53:09<2:05:58, 14.21s/batch, batch_loss=11.6, batch

Validation:  29%|▎| 212/743 [53:09<2:08:55, 14.57s/batch, batch_loss=11.6, batch

Validation:  29%|▎| 212/743 [53:24<2:08:55, 14.57s/batch, batch_loss=538, batch_

Validation:  29%|▎| 213/743 [53:24<2:09:59, 14.72s/batch, batch_loss=538, batch_

Validation:  29%|▎| 213/743 [53:39<2:09:59, 14.72s/batch, batch_loss=11.1, batch

Validation:  29%|▎| 214/743 [53:39<2:10:13, 14.77s/batch, batch_loss=11.1, batch

Validation:  29%|▎| 214/743 [53:54<2:10:13, 14.77s/batch, batch_loss=12, batch_i

Validation:  29%|▎| 215/743 [53:54<2:10:16, 14.80s/batch, batch_loss=12, batch_i

Validation:  29%|▎| 215/743 [54:07<2:10:16, 14.80s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [54:07<2:05:41, 14.31s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [54:23<2:05:41, 14.31s/batch, batch_loss=20.3, batch

Validation:  29%|▎| 217/743 [54:23<2:08:23, 14.65s/batch, batch_loss=20.3, batch

Validation:  29%|▎| 217/743 [54:38<2:08:23, 14.65s/batch, batch_loss=12.6, batch

Validation:  29%|▎| 218/743 [54:38<2:10:08, 14.87s/batch, batch_loss=12.6, batch

Validation:  29%|▎| 218/743 [54:55<2:10:08, 14.87s/batch, batch_loss=27, batch_i

Validation:  29%|▎| 219/743 [54:55<2:14:45, 15.43s/batch, batch_loss=27, batch_i

Validation:  29%|▎| 219/743 [55:11<2:14:45, 15.43s/batch, batch_loss=26.2, batch

Validation:  30%|▎| 220/743 [55:11<2:16:14, 15.63s/batch, batch_loss=26.2, batch

Validation:  30%|▎| 220/743 [55:29<2:16:14, 15.63s/batch, batch_loss=16.4, batch

Validation:  30%|▎| 221/743 [55:29<2:23:03, 16.44s/batch, batch_loss=16.4, batch

Validation:  30%|▎| 221/743 [55:44<2:23:03, 16.44s/batch, batch_loss=10.5, batch

Validation:  30%|▎| 222/743 [55:44<2:18:55, 16.00s/batch, batch_loss=10.5, batch

Validation:  30%|▎| 222/743 [55:59<2:18:55, 16.00s/batch, batch_loss=11, batch_i

Validation:  30%|▎| 223/743 [55:59<2:16:09, 15.71s/batch, batch_loss=11, batch_i

Validation:  30%|▎| 223/743 [56:14<2:16:09, 15.71s/batch, batch_loss=9.96, batch

Validation:  30%|▎| 224/743 [56:14<2:13:35, 15.44s/batch, batch_loss=9.96, batch

Validation:  30%|▎| 224/743 [56:29<2:13:35, 15.44s/batch, batch_loss=4.94e+3, ba

Validation:  30%|▎| 225/743 [56:29<2:12:24, 15.34s/batch, batch_loss=4.94e+3, ba

Validation:  30%|▎| 225/743 [56:44<2:12:24, 15.34s/batch, batch_loss=16.9, batch

Validation:  30%|▎| 226/743 [56:44<2:10:33, 15.15s/batch, batch_loss=16.9, batch

Validation:  30%|▎| 226/743 [56:59<2:10:33, 15.15s/batch, batch_loss=16, batch_i

Validation:  31%|▎| 227/743 [56:59<2:10:38, 15.19s/batch, batch_loss=16, batch_i

Validation:  31%|▎| 227/743 [57:17<2:10:38, 15.19s/batch, batch_loss=16.9, batch

Validation:  31%|▎| 228/743 [57:17<2:17:35, 16.03s/batch, batch_loss=16.9, batch

Validation:  31%|▎| 228/743 [57:33<2:17:35, 16.03s/batch, batch_loss=16.6, batch

Validation:  31%|▎| 229/743 [57:33<2:16:36, 15.95s/batch, batch_loss=16.6, batch

Validation:  31%|▎| 229/743 [57:47<2:16:36, 15.95s/batch, batch_loss=19.2, batch

Validation:  31%|▎| 230/743 [57:47<2:11:04, 15.33s/batch, batch_loss=19.2, batch

Validation:  31%|▎| 230/743 [58:02<2:11:04, 15.33s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [58:02<2:10:02, 15.24s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [58:17<2:10:02, 15.24s/batch, batch_loss=15.2, batch

Validation:  31%|▎| 232/743 [58:17<2:09:24, 15.19s/batch, batch_loss=15.2, batch

Validation:  31%|▎| 232/743 [58:31<2:09:24, 15.19s/batch, batch_loss=8.22, batch

Validation:  31%|▎| 233/743 [58:31<2:06:45, 14.91s/batch, batch_loss=8.22, batch

Validation:  31%|▎| 233/743 [58:49<2:06:45, 14.91s/batch, batch_loss=12.9, batch

Validation:  31%|▎| 234/743 [58:49<2:13:18, 15.71s/batch, batch_loss=12.9, batch

Validation:  31%|▎| 234/743 [59:03<2:13:18, 15.71s/batch, batch_loss=15.4, batch

Validation:  32%|▎| 235/743 [59:03<2:10:24, 15.40s/batch, batch_loss=15.4, batch

Validation:  32%|▎| 235/743 [59:18<2:10:24, 15.40s/batch, batch_loss=2.89, batch

Validation:  32%|▎| 236/743 [59:18<2:08:39, 15.23s/batch, batch_loss=2.89, batch

Validation:  32%|▎| 236/743 [59:33<2:08:39, 15.23s/batch, batch_loss=19, batch_i

Validation:  32%|▎| 237/743 [59:33<2:06:35, 15.01s/batch, batch_loss=19, batch_i

Validation:  32%|▎| 237/743 [59:47<2:06:35, 15.01s/batch, batch_loss=14.2, batch

Validation:  32%|▎| 238/743 [59:47<2:03:32, 14.68s/batch, batch_loss=14.2, batch

Validation:  32%|▎| 238/743 [1:00:01<2:03:32, 14.68s/batch, batch_loss=4.5e+3, b

Validation:  32%|▎| 239/743 [1:00:01<2:03:09, 14.66s/batch, batch_loss=4.5e+3, b

Validation:  32%|▎| 239/743 [1:00:15<2:03:09, 14.66s/batch, batch_loss=18.3, bat

Validation:  32%|▎| 240/743 [1:00:15<2:00:29, 14.37s/batch, batch_loss=18.3, bat

Validation:  32%|▎| 240/743 [1:00:28<2:00:29, 14.37s/batch, batch_loss=16.3, bat

Validation:  32%|▎| 241/743 [1:00:28<1:58:10, 14.12s/batch, batch_loss=16.3, bat

Validation:  32%|▎| 241/743 [1:00:44<1:58:10, 14.12s/batch, batch_loss=230, batc

Validation:  33%|▎| 242/743 [1:00:44<2:00:37, 14.45s/batch, batch_loss=230, batc

Validation:  33%|▎| 242/743 [1:00:58<2:00:37, 14.45s/batch, batch_loss=9.18, bat

Validation:  33%|▎| 243/743 [1:00:58<1:59:55, 14.39s/batch, batch_loss=9.18, bat

Validation:  33%|▎| 243/743 [1:01:12<1:59:55, 14.39s/batch, batch_loss=13.2, bat

Validation:  33%|▎| 244/743 [1:01:12<1:58:44, 14.28s/batch, batch_loss=13.2, bat

Validation:  33%|▎| 244/743 [1:01:27<1:58:44, 14.28s/batch, batch_loss=21.5, bat

Validation:  33%|▎| 245/743 [1:01:27<1:59:48, 14.43s/batch, batch_loss=21.5, bat

Validation:  33%|▎| 245/743 [1:01:41<1:59:48, 14.43s/batch, batch_loss=5.82, bat

Validation:  33%|▎| 246/743 [1:01:41<1:58:44, 14.34s/batch, batch_loss=5.82, bat

Validation:  33%|▎| 246/743 [1:01:56<1:58:44, 14.34s/batch, batch_loss=14.1, bat

Validation:  33%|▎| 247/743 [1:01:56<2:00:12, 14.54s/batch, batch_loss=14.1, bat

Validation:  33%|▎| 247/743 [1:02:10<2:00:12, 14.54s/batch, batch_loss=37.5, bat

Validation:  33%|▎| 248/743 [1:02:10<1:59:54, 14.53s/batch, batch_loss=37.5, bat

Validation:  33%|▎| 248/743 [1:02:25<1:59:54, 14.53s/batch, batch_loss=12, batch

Validation:  34%|▎| 249/743 [1:02:25<2:00:19, 14.61s/batch, batch_loss=12, batch

Validation:  34%|▎| 249/743 [1:02:40<2:00:19, 14.61s/batch, batch_loss=18.6, bat

Validation:  34%|▎| 250/743 [1:02:40<2:01:16, 14.76s/batch, batch_loss=18.6, bat

Validation:  34%|▎| 250/743 [1:02:58<2:01:16, 14.76s/batch, batch_loss=18.8, bat

Validation:  34%|▎| 251/743 [1:02:58<2:09:11, 15.76s/batch, batch_loss=18.8, bat

Validation:  34%|▎| 251/743 [1:03:14<2:09:11, 15.76s/batch, batch_loss=20.5, bat

Validation:  34%|▎| 252/743 [1:03:14<2:07:38, 15.60s/batch, batch_loss=20.5, bat

Validation:  34%|▎| 252/743 [1:03:28<2:07:38, 15.60s/batch, batch_loss=18.3, bat

Validation:  34%|▎| 253/743 [1:03:28<2:05:26, 15.36s/batch, batch_loss=18.3, bat

Validation:  34%|▎| 253/743 [1:03:44<2:05:26, 15.36s/batch, batch_loss=1.15e+4, 

Validation:  34%|▎| 254/743 [1:03:44<2:04:51, 15.32s/batch, batch_loss=1.15e+4, 

Validation:  34%|▎| 254/743 [1:03:59<2:04:51, 15.32s/batch, batch_loss=2.44e+3, 

Validation:  34%|▎| 255/743 [1:03:59<2:03:31, 15.19s/batch, batch_loss=2.44e+3, 

Validation:  34%|▎| 255/743 [1:04:15<2:03:31, 15.19s/batch, batch_loss=19.7, bat

Validation:  34%|▎| 256/743 [1:04:15<2:05:10, 15.42s/batch, batch_loss=19.7, bat

Validation:  34%|▎| 256/743 [1:04:29<2:05:10, 15.42s/batch, batch_loss=21, batch

Validation:  35%|▎| 257/743 [1:04:29<2:03:40, 15.27s/batch, batch_loss=21, batch

Validation:  35%|▎| 257/743 [1:04:44<2:03:40, 15.27s/batch, batch_loss=12.1, bat

Validation:  35%|▎| 258/743 [1:04:44<2:01:19, 15.01s/batch, batch_loss=12.1, bat

Validation:  35%|▎| 258/743 [1:04:58<2:01:19, 15.01s/batch, batch_loss=2.89, bat

Validation:  35%|▎| 259/743 [1:04:58<2:00:05, 14.89s/batch, batch_loss=2.89, bat

Validation:  35%|▎| 259/743 [1:05:13<2:00:05, 14.89s/batch, batch_loss=1.61, bat

Validation:  35%|▎| 260/743 [1:05:13<1:59:12, 14.81s/batch, batch_loss=1.61, bat

Validation:  35%|▎| 260/743 [1:05:28<1:59:12, 14.81s/batch, batch_loss=7.38, bat

Validation:  35%|▎| 261/743 [1:05:28<1:59:40, 14.90s/batch, batch_loss=7.38, bat

Validation:  35%|▎| 261/743 [1:05:44<1:59:40, 14.90s/batch, batch_loss=26.8, bat

Validation:  35%|▎| 262/743 [1:05:44<2:01:27, 15.15s/batch, batch_loss=26.8, bat

Validation:  35%|▎| 262/743 [1:06:01<2:01:27, 15.15s/batch, batch_loss=2.71e+3, 

Validation:  35%|▎| 263/743 [1:06:01<2:04:46, 15.60s/batch, batch_loss=2.71e+3, 

Validation:  35%|▎| 263/743 [1:06:17<2:04:46, 15.60s/batch, batch_loss=9.92, bat

Validation:  36%|▎| 264/743 [1:06:17<2:06:45, 15.88s/batch, batch_loss=9.92, bat

Validation:  36%|▎| 264/743 [1:06:31<2:06:45, 15.88s/batch, batch_loss=19.6, bat

Validation:  36%|▎| 265/743 [1:06:31<2:02:19, 15.35s/batch, batch_loss=19.6, bat

Validation:  36%|▎| 265/743 [1:06:48<2:02:19, 15.35s/batch, batch_loss=23.7, bat

Validation:  36%|▎| 266/743 [1:06:48<2:05:44, 15.82s/batch, batch_loss=23.7, bat

Validation:  36%|▎| 266/743 [1:07:02<2:05:44, 15.82s/batch, batch_loss=18.3, bat

Validation:  36%|▎| 267/743 [1:07:02<2:01:54, 15.37s/batch, batch_loss=18.3, bat

Validation:  36%|▎| 267/743 [1:07:17<2:01:54, 15.37s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:07:17<2:00:33, 15.23s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:07:32<2:00:33, 15.23s/batch, batch_loss=38.6, bat

Validation:  36%|▎| 269/743 [1:07:32<1:59:42, 15.15s/batch, batch_loss=38.6, bat

Validation:  36%|▎| 269/743 [1:07:46<1:59:42, 15.15s/batch, batch_loss=31.8, bat

Validation:  36%|▎| 270/743 [1:07:46<1:56:16, 14.75s/batch, batch_loss=31.8, bat

Validation:  36%|▎| 270/743 [1:08:01<1:56:16, 14.75s/batch, batch_loss=24.6, bat

Validation:  36%|▎| 271/743 [1:08:01<1:56:16, 14.78s/batch, batch_loss=24.6, bat

Validation:  36%|▎| 271/743 [1:08:15<1:56:16, 14.78s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:08:15<1:55:02, 14.65s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:08:32<1:55:02, 14.65s/batch, batch_loss=16.5, bat

Validation:  37%|▎| 273/743 [1:08:32<1:59:41, 15.28s/batch, batch_loss=16.5, bat

Validation:  37%|▎| 273/743 [1:08:47<1:59:41, 15.28s/batch, batch_loss=21, batch

Validation:  37%|▎| 274/743 [1:08:47<1:58:09, 15.12s/batch, batch_loss=21, batch

Validation:  37%|▎| 274/743 [1:09:01<1:58:09, 15.12s/batch, batch_loss=17.6, bat

Validation:  37%|▎| 275/743 [1:09:01<1:54:53, 14.73s/batch, batch_loss=17.6, bat

Validation:  37%|▎| 275/743 [1:09:16<1:54:53, 14.73s/batch, batch_loss=12.6, bat

Validation:  37%|▎| 276/743 [1:09:16<1:55:39, 14.86s/batch, batch_loss=12.6, bat

Validation:  37%|▎| 276/743 [1:09:30<1:55:39, 14.86s/batch, batch_loss=24.8, bat

Validation:  37%|▎| 277/743 [1:09:30<1:54:19, 14.72s/batch, batch_loss=24.8, bat

Validation:  37%|▎| 277/743 [1:09:45<1:54:19, 14.72s/batch, batch_loss=19.2, bat

Validation:  37%|▎| 278/743 [1:09:45<1:54:20, 14.75s/batch, batch_loss=19.2, bat

Validation:  37%|▎| 278/743 [1:09:59<1:54:20, 14.75s/batch, batch_loss=8.7, batc

Validation:  38%|▍| 279/743 [1:09:59<1:52:45, 14.58s/batch, batch_loss=8.7, batc

Validation:  38%|▍| 279/743 [1:10:13<1:52:45, 14.58s/batch, batch_loss=13.9, bat

Validation:  38%|▍| 280/743 [1:10:13<1:50:45, 14.35s/batch, batch_loss=13.9, bat

Validation:  38%|▍| 280/743 [1:10:27<1:50:45, 14.35s/batch, batch_loss=17.9, bat

Validation:  38%|▍| 281/743 [1:10:27<1:50:08, 14.30s/batch, batch_loss=17.9, bat

Validation:  38%|▍| 281/743 [1:10:41<1:50:08, 14.30s/batch, batch_loss=20.8, bat

Validation:  38%|▍| 282/743 [1:10:41<1:49:21, 14.23s/batch, batch_loss=20.8, bat

Validation:  38%|▍| 282/743 [1:10:56<1:49:21, 14.23s/batch, batch_loss=16, batch

Validation:  38%|▍| 283/743 [1:10:56<1:50:51, 14.46s/batch, batch_loss=16, batch

Validation:  38%|▍| 283/743 [1:11:12<1:50:51, 14.46s/batch, batch_loss=14.4, bat

Validation:  38%|▍| 284/743 [1:11:12<1:52:55, 14.76s/batch, batch_loss=14.4, bat

Validation:  38%|▍| 284/743 [1:11:26<1:52:55, 14.76s/batch, batch_loss=13.3, bat

Validation:  38%|▍| 285/743 [1:11:26<1:52:17, 14.71s/batch, batch_loss=13.3, bat

Validation:  38%|▍| 285/743 [1:11:42<1:52:17, 14.71s/batch, batch_loss=15.1, bat

Validation:  38%|▍| 286/743 [1:11:42<1:53:39, 14.92s/batch, batch_loss=15.1, bat

Validation:  38%|▍| 286/743 [1:11:57<1:53:39, 14.92s/batch, batch_loss=1.19e+4, 

Validation:  39%|▍| 287/743 [1:11:57<1:53:15, 14.90s/batch, batch_loss=1.19e+4, 

Validation:  39%|▍| 287/743 [1:12:12<1:53:15, 14.90s/batch, batch_loss=21.4, bat

Validation:  39%|▍| 288/743 [1:12:12<1:53:19, 14.94s/batch, batch_loss=21.4, bat

Validation:  39%|▍| 288/743 [1:12:27<1:53:19, 14.94s/batch, batch_loss=21.5, bat

Validation:  39%|▍| 289/743 [1:12:27<1:54:44, 15.16s/batch, batch_loss=21.5, bat

Validation:  39%|▍| 289/743 [1:12:42<1:54:44, 15.16s/batch, batch_loss=483, batc

Validation:  39%|▍| 290/743 [1:12:42<1:52:49, 14.94s/batch, batch_loss=483, batc

Validation:  39%|▍| 290/743 [1:12:59<1:52:49, 14.94s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:12:59<1:58:47, 15.77s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:13:15<1:58:47, 15.77s/batch, batch_loss=1.2e+3, b

Validation:  39%|▍| 292/743 [1:13:15<1:58:01, 15.70s/batch, batch_loss=1.2e+3, b

Validation:  39%|▍| 292/743 [1:13:31<1:58:01, 15.70s/batch, batch_loss=26.2, bat

Validation:  39%|▍| 293/743 [1:13:31<1:57:46, 15.70s/batch, batch_loss=26.2, bat

Validation:  39%|▍| 293/743 [1:13:47<1:57:46, 15.70s/batch, batch_loss=1.1e+3, b

Validation:  40%|▍| 294/743 [1:13:47<1:58:11, 15.79s/batch, batch_loss=1.1e+3, b

Validation:  40%|▍| 294/743 [1:14:00<1:58:11, 15.79s/batch, batch_loss=15.8, bat

Validation:  40%|▍| 295/743 [1:14:00<1:53:29, 15.20s/batch, batch_loss=15.8, bat

Validation:  40%|▍| 295/743 [1:14:15<1:53:29, 15.20s/batch, batch_loss=17.2, bat

Validation:  40%|▍| 296/743 [1:14:15<1:51:34, 14.98s/batch, batch_loss=17.2, bat

Validation:  40%|▍| 296/743 [1:14:29<1:51:34, 14.98s/batch, batch_loss=11.1, bat

Validation:  40%|▍| 297/743 [1:14:29<1:49:23, 14.72s/batch, batch_loss=11.1, bat

Validation:  40%|▍| 297/743 [1:14:44<1:49:23, 14.72s/batch, batch_loss=22.4, bat

Validation:  40%|▍| 298/743 [1:14:44<1:49:18, 14.74s/batch, batch_loss=22.4, bat

Validation:  40%|▍| 298/743 [1:14:57<1:49:18, 14.74s/batch, batch_loss=30.2, bat

Validation:  40%|▍| 299/743 [1:14:57<1:46:03, 14.33s/batch, batch_loss=30.2, bat

Validation:  40%|▍| 299/743 [1:15:11<1:46:03, 14.33s/batch, batch_loss=34.5, bat

Validation:  40%|▍| 300/743 [1:15:11<1:43:29, 14.02s/batch, batch_loss=34.5, bat

Validation:  40%|▍| 300/743 [1:15:24<1:43:29, 14.02s/batch, batch_loss=833, batc

Validation:  41%|▍| 301/743 [1:15:24<1:42:51, 13.96s/batch, batch_loss=833, batc

Validation:  41%|▍| 301/743 [1:15:39<1:42:51, 13.96s/batch, batch_loss=9.69, bat

Validation:  41%|▍| 302/743 [1:15:39<1:45:14, 14.32s/batch, batch_loss=9.69, bat

Validation:  41%|▍| 302/743 [1:15:54<1:45:14, 14.32s/batch, batch_loss=13, batch

Validation:  41%|▍| 303/743 [1:15:54<1:45:13, 14.35s/batch, batch_loss=13, batch

Validation:  41%|▍| 303/743 [1:16:08<1:45:13, 14.35s/batch, batch_loss=15.9, bat

Validation:  41%|▍| 304/743 [1:16:08<1:45:18, 14.39s/batch, batch_loss=15.9, bat

Validation:  41%|▍| 304/743 [1:16:23<1:45:18, 14.39s/batch, batch_loss=11.4, bat

Validation:  41%|▍| 305/743 [1:16:23<1:46:07, 14.54s/batch, batch_loss=11.4, bat

Validation:  41%|▍| 305/743 [1:16:37<1:46:07, 14.54s/batch, batch_loss=18.8, bat

Validation:  41%|▍| 306/743 [1:16:37<1:44:56, 14.41s/batch, batch_loss=18.8, bat

Validation:  41%|▍| 306/743 [1:16:51<1:44:56, 14.41s/batch, batch_loss=17.8, bat

Validation:  41%|▍| 307/743 [1:16:51<1:42:25, 14.10s/batch, batch_loss=17.8, bat

Validation:  41%|▍| 307/743 [1:17:05<1:42:25, 14.10s/batch, batch_loss=878, batc

Validation:  41%|▍| 308/743 [1:17:05<1:42:54, 14.19s/batch, batch_loss=878, batc

Validation:  41%|▍| 308/743 [1:17:20<1:42:54, 14.19s/batch, batch_loss=24.6, bat

Validation:  42%|▍| 309/743 [1:17:20<1:43:02, 14.24s/batch, batch_loss=24.6, bat

Validation:  42%|▍| 309/743 [1:17:34<1:43:02, 14.24s/batch, batch_loss=16.8, bat

Validation:  42%|▍| 310/743 [1:17:34<1:42:13, 14.16s/batch, batch_loss=16.8, bat

Validation:  42%|▍| 310/743 [1:17:48<1:42:13, 14.16s/batch, batch_loss=18, batch

Validation:  42%|▍| 311/743 [1:17:48<1:41:41, 14.12s/batch, batch_loss=18, batch

Validation:  42%|▍| 311/743 [1:18:02<1:41:41, 14.12s/batch, batch_loss=15, batch

Validation:  42%|▍| 312/743 [1:18:02<1:42:04, 14.21s/batch, batch_loss=15, batch

Validation:  42%|▍| 312/743 [1:18:17<1:42:04, 14.21s/batch, batch_loss=6.7, batc

Validation:  42%|▍| 313/743 [1:18:17<1:43:47, 14.48s/batch, batch_loss=6.7, batc

Validation:  42%|▍| 313/743 [1:18:32<1:43:47, 14.48s/batch, batch_loss=11.9, bat

Validation:  42%|▍| 314/743 [1:18:32<1:45:26, 14.75s/batch, batch_loss=11.9, bat

Validation:  42%|▍| 314/743 [1:18:47<1:45:26, 14.75s/batch, batch_loss=19.9, bat

Validation:  42%|▍| 315/743 [1:18:47<1:45:30, 14.79s/batch, batch_loss=19.9, bat

Validation:  42%|▍| 315/743 [1:19:03<1:45:30, 14.79s/batch, batch_loss=20, batch

Validation:  43%|▍| 316/743 [1:19:03<1:46:45, 15.00s/batch, batch_loss=20, batch

Validation:  43%|▍| 316/743 [1:19:18<1:46:45, 15.00s/batch, batch_loss=20, batch

Validation:  43%|▍| 317/743 [1:19:18<1:46:00, 14.93s/batch, batch_loss=20, batch

Validation:  43%|▍| 317/743 [1:19:32<1:46:00, 14.93s/batch, batch_loss=14.1, bat

Validation:  43%|▍| 318/743 [1:19:32<1:45:03, 14.83s/batch, batch_loss=14.1, bat

Validation:  43%|▍| 318/743 [1:19:46<1:45:03, 14.83s/batch, batch_loss=18.9, bat

Validation:  43%|▍| 319/743 [1:19:46<1:43:31, 14.65s/batch, batch_loss=18.9, bat

Validation:  43%|▍| 319/743 [1:20:03<1:43:31, 14.65s/batch, batch_loss=16, batch

Validation:  43%|▍| 320/743 [1:20:03<1:46:42, 15.14s/batch, batch_loss=16, batch

Validation:  43%|▍| 320/743 [1:20:18<1:46:42, 15.14s/batch, batch_loss=15, batch

Validation:  43%|▍| 321/743 [1:20:18<1:45:53, 15.06s/batch, batch_loss=15, batch

Validation:  43%|▍| 321/743 [1:20:34<1:45:53, 15.06s/batch, batch_loss=14, batch

Validation:  43%|▍| 322/743 [1:20:34<1:49:14, 15.57s/batch, batch_loss=14, batch

Validation:  43%|▍| 322/743 [1:20:49<1:49:14, 15.57s/batch, batch_loss=17.4, bat

Validation:  43%|▍| 323/743 [1:20:49<1:48:07, 15.45s/batch, batch_loss=17.4, bat

Validation:  43%|▍| 323/743 [1:21:04<1:48:07, 15.45s/batch, batch_loss=296, batc

Validation:  44%|▍| 324/743 [1:21:04<1:46:52, 15.30s/batch, batch_loss=296, batc

Validation:  44%|▍| 324/743 [1:21:20<1:46:52, 15.30s/batch, batch_loss=16.9, bat

Validation:  44%|▍| 325/743 [1:21:20<1:46:29, 15.29s/batch, batch_loss=16.9, bat

Validation:  44%|▍| 325/743 [1:21:35<1:46:29, 15.29s/batch, batch_loss=16.7, bat

Validation:  44%|▍| 326/743 [1:21:35<1:46:03, 15.26s/batch, batch_loss=16.7, bat

Validation:  44%|▍| 326/743 [1:21:50<1:46:03, 15.26s/batch, batch_loss=18.5, bat

Validation:  44%|▍| 327/743 [1:21:50<1:45:13, 15.18s/batch, batch_loss=18.5, bat

Validation:  44%|▍| 327/743 [1:22:05<1:45:13, 15.18s/batch, batch_loss=17.5, bat

Validation:  44%|▍| 328/743 [1:22:05<1:45:18, 15.22s/batch, batch_loss=17.5, bat

Validation:  44%|▍| 328/743 [1:22:21<1:45:18, 15.22s/batch, batch_loss=6.31, bat

Validation:  44%|▍| 329/743 [1:22:21<1:45:18, 15.26s/batch, batch_loss=6.31, bat

Validation:  44%|▍| 329/743 [1:22:37<1:45:18, 15.26s/batch, batch_loss=13.8, bat

Validation:  44%|▍| 330/743 [1:22:37<1:47:11, 15.57s/batch, batch_loss=13.8, bat

Validation:  44%|▍| 330/743 [1:22:53<1:47:11, 15.57s/batch, batch_loss=19.3, bat

Validation:  45%|▍| 331/743 [1:22:53<1:47:12, 15.61s/batch, batch_loss=19.3, bat

Validation:  45%|▍| 331/743 [1:23:08<1:47:12, 15.61s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:23:08<1:46:11, 15.50s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:23:23<1:46:11, 15.50s/batch, batch_loss=31.2, bat

Validation:  45%|▍| 333/743 [1:23:23<1:45:51, 15.49s/batch, batch_loss=31.2, bat

Validation:  45%|▍| 333/743 [1:23:38<1:45:51, 15.49s/batch, batch_loss=22, batch

Validation:  45%|▍| 334/743 [1:23:38<1:44:14, 15.29s/batch, batch_loss=22, batch

Validation:  45%|▍| 334/743 [1:23:54<1:44:14, 15.29s/batch, batch_loss=34.4, bat

Validation:  45%|▍| 335/743 [1:23:54<1:44:30, 15.37s/batch, batch_loss=34.4, bat

Validation:  45%|▍| 335/743 [1:24:10<1:44:30, 15.37s/batch, batch_loss=11.9, bat

Validation:  45%|▍| 336/743 [1:24:10<1:45:23, 15.54s/batch, batch_loss=11.9, bat

Validation:  45%|▍| 336/743 [1:24:25<1:45:23, 15.54s/batch, batch_loss=23.6, bat

Validation:  45%|▍| 337/743 [1:24:25<1:45:09, 15.54s/batch, batch_loss=23.6, bat

Validation:  45%|▍| 337/743 [1:24:44<1:45:09, 15.54s/batch, batch_loss=33.6, bat

Validation:  45%|▍| 338/743 [1:24:44<1:51:53, 16.58s/batch, batch_loss=33.6, bat

Validation:  45%|▍| 338/743 [1:25:00<1:51:53, 16.58s/batch, batch_loss=31.2, bat

Validation:  46%|▍| 339/743 [1:25:00<1:50:08, 16.36s/batch, batch_loss=31.2, bat

Validation:  46%|▍| 339/743 [1:25:15<1:50:08, 16.36s/batch, batch_loss=30.4, bat

Validation:  46%|▍| 340/743 [1:25:15<1:46:57, 15.92s/batch, batch_loss=30.4, bat

Validation:  46%|▍| 340/743 [1:25:30<1:46:57, 15.92s/batch, batch_loss=15.3, bat

Validation:  46%|▍| 341/743 [1:25:30<1:46:01, 15.83s/batch, batch_loss=15.3, bat

Validation:  46%|▍| 341/743 [1:25:46<1:46:01, 15.83s/batch, batch_loss=21.4, bat

Validation:  46%|▍| 342/743 [1:25:46<1:45:40, 15.81s/batch, batch_loss=21.4, bat

Validation:  46%|▍| 342/743 [1:26:01<1:45:40, 15.81s/batch, batch_loss=20.7, bat

Validation:  46%|▍| 343/743 [1:26:01<1:43:49, 15.57s/batch, batch_loss=20.7, bat

Validation:  46%|▍| 343/743 [1:26:16<1:43:49, 15.57s/batch, batch_loss=23, batch

Validation:  46%|▍| 344/743 [1:26:16<1:41:55, 15.33s/batch, batch_loss=23, batch

Validation:  46%|▍| 344/743 [1:26:31<1:41:55, 15.33s/batch, batch_loss=20.2, bat

Validation:  46%|▍| 345/743 [1:26:31<1:41:09, 15.25s/batch, batch_loss=20.2, bat

Validation:  46%|▍| 345/743 [1:26:48<1:41:09, 15.25s/batch, batch_loss=30.7, bat

Validation:  47%|▍| 346/743 [1:26:48<1:44:06, 15.73s/batch, batch_loss=30.7, bat

Validation:  47%|▍| 346/743 [1:27:08<1:44:06, 15.73s/batch, batch_loss=22.7, bat

Validation:  47%|▍| 347/743 [1:27:08<1:51:29, 16.89s/batch, batch_loss=22.7, bat

Validation:  47%|▍| 347/743 [1:27:23<1:51:29, 16.89s/batch, batch_loss=30.1, bat

Validation:  47%|▍| 348/743 [1:27:23<1:47:22, 16.31s/batch, batch_loss=30.1, bat

Validation:  47%|▍| 348/743 [1:27:38<1:47:22, 16.31s/batch, batch_loss=24.8, bat

Validation:  47%|▍| 349/743 [1:27:38<1:44:40, 15.94s/batch, batch_loss=24.8, bat

Validation:  47%|▍| 349/743 [1:27:54<1:44:40, 15.94s/batch, batch_loss=19.3, bat

Validation:  47%|▍| 350/743 [1:27:54<1:45:25, 16.10s/batch, batch_loss=19.3, bat

Validation:  47%|▍| 350/743 [1:28:10<1:45:25, 16.10s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:28:10<1:44:35, 16.01s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:28:26<1:44:35, 16.01s/batch, batch_loss=30, batch

Validation:  47%|▍| 352/743 [1:28:26<1:43:49, 15.93s/batch, batch_loss=30, batch

Validation:  47%|▍| 352/743 [1:28:42<1:43:49, 15.93s/batch, batch_loss=18.6, bat

Validation:  48%|▍| 353/743 [1:28:42<1:43:35, 15.94s/batch, batch_loss=18.6, bat

Validation:  48%|▍| 353/743 [1:29:00<1:43:35, 15.94s/batch, batch_loss=22.7, bat

Validation:  48%|▍| 354/743 [1:29:00<1:47:22, 16.56s/batch, batch_loss=22.7, bat

Validation:  48%|▍| 354/743 [1:29:14<1:47:22, 16.56s/batch, batch_loss=25.4, bat

Validation:  48%|▍| 355/743 [1:29:14<1:43:13, 15.96s/batch, batch_loss=25.4, bat

Validation:  48%|▍| 355/743 [1:29:30<1:43:13, 15.96s/batch, batch_loss=37.4, bat

Validation:  48%|▍| 356/743 [1:29:30<1:42:54, 15.95s/batch, batch_loss=37.4, bat

Validation:  48%|▍| 356/743 [1:29:45<1:42:54, 15.95s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:29:45<1:40:10, 15.57s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:30:01<1:40:10, 15.57s/batch, batch_loss=14, batch

Validation:  48%|▍| 358/743 [1:30:01<1:41:58, 15.89s/batch, batch_loss=14, batch

Validation:  48%|▍| 358/743 [1:30:18<1:41:58, 15.89s/batch, batch_loss=10.4, bat

Validation:  48%|▍| 359/743 [1:30:18<1:43:26, 16.16s/batch, batch_loss=10.4, bat

Validation:  48%|▍| 359/743 [1:30:33<1:43:26, 16.16s/batch, batch_loss=22.5, bat

Validation:  48%|▍| 360/743 [1:30:33<1:40:56, 15.81s/batch, batch_loss=22.5, bat

Validation:  48%|▍| 360/743 [1:30:48<1:40:56, 15.81s/batch, batch_loss=14.2, bat

Validation:  49%|▍| 361/743 [1:30:48<1:38:42, 15.50s/batch, batch_loss=14.2, bat

Validation:  49%|▍| 361/743 [1:31:04<1:38:42, 15.50s/batch, batch_loss=25.1, bat

Validation:  49%|▍| 362/743 [1:31:04<1:39:38, 15.69s/batch, batch_loss=25.1, bat

Validation:  49%|▍| 362/743 [1:31:19<1:39:38, 15.69s/batch, batch_loss=24.5, bat

Validation:  49%|▍| 363/743 [1:31:19<1:38:06, 15.49s/batch, batch_loss=24.5, bat

Validation:  49%|▍| 363/743 [1:31:34<1:38:06, 15.49s/batch, batch_loss=22, batch

Validation:  49%|▍| 364/743 [1:31:34<1:36:04, 15.21s/batch, batch_loss=22, batch

Validation:  49%|▍| 364/743 [1:31:48<1:36:04, 15.21s/batch, batch_loss=17.1, bat

Validation:  49%|▍| 365/743 [1:31:48<1:34:44, 15.04s/batch, batch_loss=17.1, bat

Validation:  49%|▍| 365/743 [1:32:03<1:34:44, 15.04s/batch, batch_loss=14.4, bat

Validation:  49%|▍| 366/743 [1:32:03<1:33:00, 14.80s/batch, batch_loss=14.4, bat

Validation:  49%|▍| 366/743 [1:32:18<1:33:00, 14.80s/batch, batch_loss=16.7, bat

Validation:  49%|▍| 367/743 [1:32:18<1:34:49, 15.13s/batch, batch_loss=16.7, bat

Validation:  49%|▍| 367/743 [1:32:34<1:34:49, 15.13s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:32:34<1:35:23, 15.26s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:32:51<1:35:23, 15.26s/batch, batch_loss=16.3, bat

Validation:  50%|▍| 369/743 [1:32:51<1:37:35, 15.66s/batch, batch_loss=16.3, bat

Validation:  50%|▍| 369/743 [1:33:07<1:37:35, 15.66s/batch, batch_loss=26.7, bat

Validation:  50%|▍| 370/743 [1:33:07<1:37:47, 15.73s/batch, batch_loss=26.7, bat

Validation:  50%|▍| 370/743 [1:33:22<1:37:47, 15.73s/batch, batch_loss=19.7, bat

Validation:  50%|▍| 371/743 [1:33:22<1:37:21, 15.70s/batch, batch_loss=19.7, bat

Validation:  50%|▍| 371/743 [1:33:38<1:37:21, 15.70s/batch, batch_loss=18.8, bat

Validation:  50%|▌| 372/743 [1:33:38<1:36:41, 15.64s/batch, batch_loss=18.8, bat

Validation:  50%|▌| 372/743 [1:33:52<1:36:41, 15.64s/batch, batch_loss=22.5, bat

Validation:  50%|▌| 373/743 [1:33:52<1:34:42, 15.36s/batch, batch_loss=22.5, bat

Validation:  50%|▌| 373/743 [1:34:07<1:34:42, 15.36s/batch, batch_loss=14.8, bat

Validation:  50%|▌| 374/743 [1:34:07<1:33:21, 15.18s/batch, batch_loss=14.8, bat

Validation:  50%|▌| 374/743 [1:34:22<1:33:21, 15.18s/batch, batch_loss=8.12, bat

Validation:  50%|▌| 375/743 [1:34:22<1:32:26, 15.07s/batch, batch_loss=8.12, bat

Validation:  50%|▌| 375/743 [1:34:40<1:32:26, 15.07s/batch, batch_loss=30.7, bat

Validation:  51%|▌| 376/743 [1:34:40<1:38:23, 16.09s/batch, batch_loss=30.7, bat

Validation:  51%|▌| 376/743 [1:34:55<1:38:23, 16.09s/batch, batch_loss=11.1, bat

Validation:  51%|▌| 377/743 [1:34:55<1:36:21, 15.80s/batch, batch_loss=11.1, bat

Validation:  51%|▌| 377/743 [1:35:10<1:36:21, 15.80s/batch, batch_loss=19.4, bat

Validation:  51%|▌| 378/743 [1:35:10<1:34:05, 15.47s/batch, batch_loss=19.4, bat

Validation:  51%|▌| 378/743 [1:35:25<1:34:05, 15.47s/batch, batch_loss=7.51, bat

Validation:  51%|▌| 379/743 [1:35:25<1:32:58, 15.32s/batch, batch_loss=7.51, bat

Validation:  51%|▌| 379/743 [1:35:41<1:32:58, 15.32s/batch, batch_loss=7.5, batc

Validation:  51%|▌| 380/743 [1:35:41<1:33:55, 15.53s/batch, batch_loss=7.5, batc

Validation:  51%|▌| 380/743 [1:35:56<1:33:55, 15.53s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:35:56<1:32:30, 15.33s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:36:11<1:32:30, 15.33s/batch, batch_loss=917, batc

Validation:  51%|▌| 382/743 [1:36:11<1:32:14, 15.33s/batch, batch_loss=917, batc

Validation:  51%|▌| 382/743 [1:36:26<1:32:14, 15.33s/batch, batch_loss=211, batc

Validation:  52%|▌| 383/743 [1:36:26<1:31:26, 15.24s/batch, batch_loss=211, batc

Validation:  52%|▌| 383/743 [1:36:42<1:31:26, 15.24s/batch, batch_loss=280, batc

Validation:  52%|▌| 384/743 [1:36:42<1:31:35, 15.31s/batch, batch_loss=280, batc

Validation:  52%|▌| 384/743 [1:36:58<1:31:35, 15.31s/batch, batch_loss=20.2, bat

Validation:  52%|▌| 385/743 [1:36:58<1:32:18, 15.47s/batch, batch_loss=20.2, bat

Validation:  52%|▌| 385/743 [1:37:12<1:32:18, 15.47s/batch, batch_loss=10.2, bat

Validation:  52%|▌| 386/743 [1:37:12<1:30:25, 15.20s/batch, batch_loss=10.2, bat

Validation:  52%|▌| 386/743 [1:37:31<1:30:25, 15.20s/batch, batch_loss=7.67, bat

Validation:  52%|▌| 387/743 [1:37:31<1:36:06, 16.20s/batch, batch_loss=7.67, bat

Validation:  52%|▌| 387/743 [1:37:47<1:36:06, 16.20s/batch, batch_loss=14.7, bat

Validation:  52%|▌| 388/743 [1:37:47<1:35:06, 16.07s/batch, batch_loss=14.7, bat

Validation:  52%|▌| 388/743 [1:38:02<1:35:06, 16.07s/batch, batch_loss=12.2, bat

Validation:  52%|▌| 389/743 [1:38:02<1:32:58, 15.76s/batch, batch_loss=12.2, bat

Validation:  52%|▌| 389/743 [1:38:17<1:32:58, 15.76s/batch, batch_loss=16.2, bat

Validation:  52%|▌| 390/743 [1:38:17<1:31:26, 15.54s/batch, batch_loss=16.2, bat

Validation:  52%|▌| 390/743 [1:38:31<1:31:26, 15.54s/batch, batch_loss=14.2, bat

Validation:  53%|▌| 391/743 [1:38:31<1:28:33, 15.10s/batch, batch_loss=14.2, bat

Validation:  53%|▌| 391/743 [1:38:46<1:28:33, 15.10s/batch, batch_loss=15, batch

Validation:  53%|▌| 392/743 [1:38:46<1:27:51, 15.02s/batch, batch_loss=15, batch

Validation:  53%|▌| 392/743 [1:39:01<1:27:51, 15.02s/batch, batch_loss=15.9, bat

Validation:  53%|▌| 393/743 [1:39:01<1:28:30, 15.17s/batch, batch_loss=15.9, bat

Validation:  53%|▌| 393/743 [1:39:16<1:28:30, 15.17s/batch, batch_loss=15.6, bat

Validation:  53%|▌| 394/743 [1:39:16<1:27:39, 15.07s/batch, batch_loss=15.6, bat

Validation:  53%|▌| 394/743 [1:39:31<1:27:39, 15.07s/batch, batch_loss=10.1, bat

Validation:  53%|▌| 395/743 [1:39:31<1:26:55, 14.99s/batch, batch_loss=10.1, bat

Validation:  53%|▌| 395/743 [1:39:47<1:26:55, 14.99s/batch, batch_loss=14, batch

Validation:  53%|▌| 396/743 [1:39:47<1:28:25, 15.29s/batch, batch_loss=14, batch

Validation:  53%|▌| 396/743 [1:40:02<1:28:25, 15.29s/batch, batch_loss=10.2, bat

Validation:  53%|▌| 397/743 [1:40:02<1:28:12, 15.30s/batch, batch_loss=10.2, bat

Validation:  53%|▌| 397/743 [1:40:18<1:28:12, 15.30s/batch, batch_loss=17.9, bat

Validation:  54%|▌| 398/743 [1:40:18<1:28:32, 15.40s/batch, batch_loss=17.9, bat

Validation:  54%|▌| 398/743 [1:40:32<1:28:32, 15.40s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 399/743 [1:40:32<1:27:10, 15.21s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 399/743 [1:40:48<1:27:10, 15.21s/batch, batch_loss=20.8, bat

Validation:  54%|▌| 400/743 [1:40:48<1:26:48, 15.19s/batch, batch_loss=20.8, bat

Validation:  54%|▌| 400/743 [1:41:03<1:26:48, 15.19s/batch, batch_loss=16.7, bat

Validation:  54%|▌| 401/743 [1:41:03<1:27:07, 15.28s/batch, batch_loss=16.7, bat

Validation:  54%|▌| 401/743 [1:41:18<1:27:07, 15.28s/batch, batch_loss=6.56, bat

Validation:  54%|▌| 402/743 [1:41:18<1:25:37, 15.07s/batch, batch_loss=6.56, bat

Validation:  54%|▌| 402/743 [1:41:32<1:25:37, 15.07s/batch, batch_loss=14.8, bat

Validation:  54%|▌| 403/743 [1:41:32<1:24:04, 14.84s/batch, batch_loss=14.8, bat

Validation:  54%|▌| 403/743 [1:41:46<1:24:04, 14.84s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 404/743 [1:41:46<1:22:44, 14.65s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 404/743 [1:42:01<1:22:44, 14.65s/batch, batch_loss=9.92, bat

Validation:  55%|▌| 405/743 [1:42:01<1:22:42, 14.68s/batch, batch_loss=9.92, bat

Validation:  55%|▌| 405/743 [1:42:16<1:22:42, 14.68s/batch, batch_loss=11.3, bat

Validation:  55%|▌| 406/743 [1:42:16<1:23:34, 14.88s/batch, batch_loss=11.3, bat

Validation:  55%|▌| 406/743 [1:42:32<1:23:34, 14.88s/batch, batch_loss=16.8, bat

Validation:  55%|▌| 407/743 [1:42:32<1:24:02, 15.01s/batch, batch_loss=16.8, bat

Validation:  55%|▌| 407/743 [1:42:50<1:24:02, 15.01s/batch, batch_loss=22.8, bat

Validation:  55%|▌| 408/743 [1:42:50<1:29:38, 16.06s/batch, batch_loss=22.8, bat

Validation:  55%|▌| 408/743 [1:43:06<1:29:38, 16.06s/batch, batch_loss=10.9, bat

Validation:  55%|▌| 409/743 [1:43:06<1:28:53, 15.97s/batch, batch_loss=10.9, bat

Validation:  55%|▌| 409/743 [1:43:20<1:28:53, 15.97s/batch, batch_loss=16.8, bat

Validation:  55%|▌| 410/743 [1:43:20<1:25:51, 15.47s/batch, batch_loss=16.8, bat

Validation:  55%|▌| 410/743 [1:43:35<1:25:51, 15.47s/batch, batch_loss=19.1, bat

Validation:  55%|▌| 411/743 [1:43:35<1:25:14, 15.40s/batch, batch_loss=19.1, bat

Validation:  55%|▌| 411/743 [1:43:48<1:25:14, 15.40s/batch, batch_loss=16.5, bat

Validation:  55%|▌| 412/743 [1:43:48<1:20:48, 14.65s/batch, batch_loss=16.5, bat

Validation:  55%|▌| 412/743 [1:44:01<1:20:48, 14.65s/batch, batch_loss=1.92e+3, 

Validation:  56%|▌| 413/743 [1:44:01<1:16:51, 13.97s/batch, batch_loss=1.92e+3, 

Validation:  56%|▌| 413/743 [1:44:13<1:16:51, 13.97s/batch, batch_loss=25.4, bat

Validation:  56%|▌| 414/743 [1:44:13<1:14:23, 13.57s/batch, batch_loss=25.4, bat

Validation:  56%|▌| 414/743 [1:44:26<1:14:23, 13.57s/batch, batch_loss=26.1, bat

Validation:  56%|▌| 415/743 [1:44:26<1:12:28, 13.26s/batch, batch_loss=26.1, bat

Validation:  56%|▌| 415/743 [1:44:38<1:12:28, 13.26s/batch, batch_loss=6.47e+3, 

Validation:  56%|▌| 416/743 [1:44:38<1:10:12, 12.88s/batch, batch_loss=6.47e+3, 

Validation:  56%|▌| 416/743 [1:44:54<1:10:12, 12.88s/batch, batch_loss=16.2, bat

Validation:  56%|▌| 417/743 [1:44:54<1:15:43, 13.94s/batch, batch_loss=16.2, bat

Validation:  56%|▌| 417/743 [1:45:10<1:15:43, 13.94s/batch, batch_loss=14.7, bat

Validation:  56%|▌| 418/743 [1:45:10<1:17:52, 14.38s/batch, batch_loss=14.7, bat

Validation:  56%|▌| 418/743 [1:45:26<1:17:52, 14.38s/batch, batch_loss=16.7, bat

Validation:  56%|▌| 419/743 [1:45:26<1:21:06, 15.02s/batch, batch_loss=16.7, bat

Validation:  56%|▌| 419/743 [1:45:41<1:21:06, 15.02s/batch, batch_loss=13.4, bat

Validation:  57%|▌| 420/743 [1:45:41<1:20:43, 14.99s/batch, batch_loss=13.4, bat

Validation:  57%|▌| 420/743 [1:45:55<1:20:43, 14.99s/batch, batch_loss=29.1, bat

Validation:  57%|▌| 421/743 [1:45:55<1:19:21, 14.79s/batch, batch_loss=29.1, bat

Validation:  57%|▌| 421/743 [1:46:11<1:19:21, 14.79s/batch, batch_loss=8.74, bat

Validation:  57%|▌| 422/743 [1:46:11<1:20:03, 14.96s/batch, batch_loss=8.74, bat

Validation:  57%|▌| 422/743 [1:46:26<1:20:03, 14.96s/batch, batch_loss=21.7, bat

Validation:  57%|▌| 423/743 [1:46:26<1:19:36, 14.93s/batch, batch_loss=21.7, bat

Validation:  57%|▌| 423/743 [1:46:40<1:19:36, 14.93s/batch, batch_loss=321, batc

Validation:  57%|▌| 424/743 [1:46:40<1:19:07, 14.88s/batch, batch_loss=321, batc

Validation:  57%|▌| 424/743 [1:46:58<1:19:07, 14.88s/batch, batch_loss=22.2, bat

Validation:  57%|▌| 425/743 [1:46:58<1:22:53, 15.64s/batch, batch_loss=22.2, bat

Validation:  57%|▌| 425/743 [1:47:13<1:22:53, 15.64s/batch, batch_loss=23, batch

Validation:  57%|▌| 426/743 [1:47:13<1:21:40, 15.46s/batch, batch_loss=23, batch

Validation:  57%|▌| 426/743 [1:47:28<1:21:40, 15.46s/batch, batch_loss=20.2, bat

Validation:  57%|▌| 427/743 [1:47:28<1:20:16, 15.24s/batch, batch_loss=20.2, bat

Validation:  57%|▌| 427/743 [1:47:43<1:20:16, 15.24s/batch, batch_loss=5.28e+3, 

Validation:  58%|▌| 428/743 [1:47:43<1:20:38, 15.36s/batch, batch_loss=5.28e+3, 

Validation:  58%|▌| 428/743 [1:47:58<1:20:38, 15.36s/batch, batch_loss=16.6, bat

Validation:  58%|▌| 429/743 [1:47:58<1:20:03, 15.30s/batch, batch_loss=16.6, bat

Validation:  58%|▌| 429/743 [1:48:13<1:20:03, 15.30s/batch, batch_loss=5.4e+3, b

Validation:  58%|▌| 430/743 [1:48:13<1:18:42, 15.09s/batch, batch_loss=5.4e+3, b

Validation:  58%|▌| 430/743 [1:48:28<1:18:42, 15.09s/batch, batch_loss=1.39e+4, 

Validation:  58%|▌| 431/743 [1:48:28<1:18:03, 15.01s/batch, batch_loss=1.39e+4, 

Validation:  58%|▌| 431/743 [1:48:42<1:18:03, 15.01s/batch, batch_loss=958, batc

Validation:  58%|▌| 432/743 [1:48:42<1:17:13, 14.90s/batch, batch_loss=958, batc

Validation:  58%|▌| 432/743 [1:48:56<1:17:13, 14.90s/batch, batch_loss=15, batch

Validation:  58%|▌| 433/743 [1:48:56<1:15:23, 14.59s/batch, batch_loss=15, batch

Validation:  58%|▌| 433/743 [1:49:12<1:15:23, 14.59s/batch, batch_loss=10.4, bat

Validation:  58%|▌| 434/743 [1:49:12<1:16:12, 14.80s/batch, batch_loss=10.4, bat

Validation:  58%|▌| 434/743 [1:49:26<1:16:12, 14.80s/batch, batch_loss=14.1, bat

Validation:  59%|▌| 435/743 [1:49:26<1:14:57, 14.60s/batch, batch_loss=14.1, bat

Validation:  59%|▌| 435/743 [1:49:40<1:14:57, 14.60s/batch, batch_loss=13.2, bat

Validation:  59%|▌| 436/743 [1:49:40<1:14:35, 14.58s/batch, batch_loss=13.2, bat

Validation:  59%|▌| 436/743 [1:49:55<1:14:35, 14.58s/batch, batch_loss=21.6, bat

Validation:  59%|▌| 437/743 [1:49:55<1:14:41, 14.65s/batch, batch_loss=21.6, bat

Validation:  59%|▌| 437/743 [1:50:09<1:14:41, 14.65s/batch, batch_loss=972, batc

Validation:  59%|▌| 438/743 [1:50:09<1:13:23, 14.44s/batch, batch_loss=972, batc

Validation:  59%|▌| 438/743 [1:50:21<1:13:23, 14.44s/batch, batch_loss=901, batc

Validation:  59%|▌| 439/743 [1:50:21<1:09:59, 13.81s/batch, batch_loss=901, batc

Validation:  59%|▌| 439/743 [1:50:37<1:09:59, 13.81s/batch, batch_loss=18, batch

Validation:  59%|▌| 440/743 [1:50:37<1:12:03, 14.27s/batch, batch_loss=18, batch

Validation:  59%|▌| 440/743 [1:50:50<1:12:03, 14.27s/batch, batch_loss=13.6, bat

Validation:  59%|▌| 441/743 [1:50:50<1:10:51, 14.08s/batch, batch_loss=13.6, bat

Validation:  59%|▌| 441/743 [1:51:05<1:10:51, 14.08s/batch, batch_loss=15.1, bat

Validation:  59%|▌| 442/743 [1:51:05<1:11:12, 14.19s/batch, batch_loss=15.1, bat

Validation:  59%|▌| 442/743 [1:51:19<1:11:12, 14.19s/batch, batch_loss=10.4, bat

Validation:  60%|▌| 443/743 [1:51:19<1:11:11, 14.24s/batch, batch_loss=10.4, bat

Validation:  60%|▌| 443/743 [1:51:33<1:11:11, 14.24s/batch, batch_loss=15.2, bat

Validation:  60%|▌| 444/743 [1:51:33<1:11:01, 14.25s/batch, batch_loss=15.2, bat

Validation:  60%|▌| 444/743 [1:51:47<1:11:01, 14.25s/batch, batch_loss=7.92, bat

Validation:  60%|▌| 445/743 [1:51:47<1:10:05, 14.11s/batch, batch_loss=7.92, bat

Validation:  60%|▌| 445/743 [1:52:02<1:10:05, 14.11s/batch, batch_loss=15, batch

Validation:  60%|▌| 446/743 [1:52:02<1:10:34, 14.26s/batch, batch_loss=15, batch

Validation:  60%|▌| 446/743 [1:52:17<1:10:34, 14.26s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [1:52:17<1:11:36, 14.51s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [1:52:32<1:11:36, 14.51s/batch, batch_loss=5.43, bat

Validation:  60%|▌| 448/743 [1:52:32<1:12:21, 14.72s/batch, batch_loss=5.43, bat

Validation:  60%|▌| 448/743 [1:52:46<1:12:21, 14.72s/batch, batch_loss=11.1, bat

Validation:  60%|▌| 449/743 [1:52:46<1:11:26, 14.58s/batch, batch_loss=11.1, bat

Validation:  60%|▌| 449/743 [1:53:02<1:11:26, 14.58s/batch, batch_loss=15.9, bat

Validation:  61%|▌| 450/743 [1:53:02<1:12:52, 14.92s/batch, batch_loss=15.9, bat

Validation:  61%|▌| 450/743 [1:53:17<1:12:52, 14.92s/batch, batch_loss=12.2, bat

Validation:  61%|▌| 451/743 [1:53:17<1:12:35, 14.92s/batch, batch_loss=12.2, bat

Validation:  61%|▌| 451/743 [1:53:32<1:12:35, 14.92s/batch, batch_loss=20, batch

Validation:  61%|▌| 452/743 [1:53:32<1:11:52, 14.82s/batch, batch_loss=20, batch

Validation:  61%|▌| 452/743 [1:53:47<1:11:52, 14.82s/batch, batch_loss=13, batch

Validation:  61%|▌| 453/743 [1:53:47<1:12:14, 14.95s/batch, batch_loss=13, batch

Validation:  61%|▌| 453/743 [1:54:02<1:12:14, 14.95s/batch, batch_loss=6.52, bat

Validation:  61%|▌| 454/743 [1:54:02<1:12:22, 15.03s/batch, batch_loss=6.52, bat

Validation:  61%|▌| 454/743 [1:54:17<1:12:22, 15.03s/batch, batch_loss=9.87, bat

Validation:  61%|▌| 455/743 [1:54:17<1:12:28, 15.10s/batch, batch_loss=9.87, bat

Validation:  61%|▌| 455/743 [1:54:32<1:12:28, 15.10s/batch, batch_loss=8.01, bat

Validation:  61%|▌| 456/743 [1:54:32<1:11:23, 14.92s/batch, batch_loss=8.01, bat

Validation:  61%|▌| 456/743 [1:54:47<1:11:23, 14.92s/batch, batch_loss=14.1, bat

Validation:  62%|▌| 457/743 [1:54:47<1:10:49, 14.86s/batch, batch_loss=14.1, bat

Validation:  62%|▌| 457/743 [1:55:03<1:10:49, 14.86s/batch, batch_loss=26.5, bat

Validation:  62%|▌| 458/743 [1:55:03<1:13:05, 15.39s/batch, batch_loss=26.5, bat

Validation:  62%|▌| 458/743 [1:55:17<1:13:05, 15.39s/batch, batch_loss=13.5, bat

Validation:  62%|▌| 459/743 [1:55:17<1:10:46, 14.95s/batch, batch_loss=13.5, bat

Validation:  62%|▌| 459/743 [1:55:30<1:10:46, 14.95s/batch, batch_loss=17.5, bat

Validation:  62%|▌| 460/743 [1:55:30<1:07:43, 14.36s/batch, batch_loss=17.5, bat

Validation:  62%|▌| 460/743 [1:55:43<1:07:43, 14.36s/batch, batch_loss=14.8, bat

Validation:  62%|▌| 461/743 [1:55:43<1:06:09, 14.07s/batch, batch_loss=14.8, bat

Validation:  62%|▌| 461/743 [1:55:56<1:06:09, 14.07s/batch, batch_loss=14, batch

Validation:  62%|▌| 462/743 [1:55:56<1:04:05, 13.69s/batch, batch_loss=14, batch

Validation:  62%|▌| 462/743 [1:56:09<1:04:05, 13.69s/batch, batch_loss=11.9, bat

Validation:  62%|▌| 463/743 [1:56:09<1:02:43, 13.44s/batch, batch_loss=11.9, bat

Validation:  62%|▌| 463/743 [1:56:22<1:02:43, 13.44s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:56:22<1:01:59, 13.33s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:56:35<1:01:59, 13.33s/batch, batch_loss=19.1, bat

Validation:  63%|▋| 465/743 [1:56:35<1:01:13, 13.21s/batch, batch_loss=19.1, bat

Validation:  63%|▋| 465/743 [1:56:48<1:01:13, 13.21s/batch, batch_loss=14.2, bat

Validation:  63%|▋| 466/743 [1:56:48<1:00:15, 13.05s/batch, batch_loss=14.2, bat

Validation:  63%|▋| 466/743 [1:57:04<1:00:15, 13.05s/batch, batch_loss=24.9, bat

Validation:  63%|▋| 467/743 [1:57:04<1:03:42, 13.85s/batch, batch_loss=24.9, bat

Validation:  63%|▋| 467/743 [1:57:17<1:03:42, 13.85s/batch, batch_loss=12.7, bat

Validation:  63%|▋| 468/743 [1:57:17<1:02:35, 13.66s/batch, batch_loss=12.7, bat

Validation:  63%|▋| 468/743 [1:57:30<1:02:35, 13.66s/batch, batch_loss=19.7, bat

Validation:  63%|▋| 469/743 [1:57:30<1:01:24, 13.45s/batch, batch_loss=19.7, bat

Validation:  63%|▋| 469/743 [1:57:43<1:01:24, 13.45s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [1:57:43<1:00:42, 13.34s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [1:57:56<1:00:42, 13.34s/batch, batch_loss=14.2, bat

Validation:  63%|▋| 471/743 [1:57:56<59:54, 13.22s/batch, batch_loss=14.2, batch

Validation:  63%|▋| 471/743 [1:58:09<59:54, 13.22s/batch, batch_loss=20.4, batch

Validation:  64%|▋| 472/743 [1:58:09<59:31, 13.18s/batch, batch_loss=20.4, batch

Validation:  64%|▋| 472/743 [1:58:22<59:31, 13.18s/batch, batch_loss=586, batch_

Validation:  64%|▋| 473/743 [1:58:22<59:02, 13.12s/batch, batch_loss=586, batch_

Validation:  64%|▋| 473/743 [1:58:35<59:02, 13.12s/batch, batch_loss=16.5, batch

Validation:  64%|▋| 474/743 [1:58:35<59:27, 13.26s/batch, batch_loss=16.5, batch

Validation:  64%|▋| 474/743 [1:58:50<59:27, 13.26s/batch, batch_loss=18.7, batch

Validation:  64%|▋| 475/743 [1:58:50<1:00:45, 13.60s/batch, batch_loss=18.7, bat

Validation:  64%|▋| 475/743 [1:59:05<1:00:45, 13.60s/batch, batch_loss=9.36, bat

Validation:  64%|▋| 476/743 [1:59:05<1:02:51, 14.12s/batch, batch_loss=9.36, bat

Validation:  64%|▋| 476/743 [1:59:18<1:02:51, 14.12s/batch, batch_loss=13.7, bat

Validation:  64%|▋| 477/743 [1:59:18<1:01:26, 13.86s/batch, batch_loss=13.7, bat

Validation:  64%|▋| 477/743 [1:59:31<1:01:26, 13.86s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [1:59:31<1:00:08, 13.62s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [1:59:44<1:00:08, 13.62s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [1:59:44<59:06, 13.43s/batch, batch_loss=2.08e+4, ba

Validation:  64%|▋| 479/743 [1:59:58<59:06, 13.43s/batch, batch_loss=10.4, batch

Validation:  65%|▋| 480/743 [1:59:58<58:35, 13.37s/batch, batch_loss=10.4, batch

Validation:  65%|▋| 480/743 [2:00:11<58:35, 13.37s/batch, batch_loss=12.3, batch

Validation:  65%|▋| 481/743 [2:00:11<57:47, 13.23s/batch, batch_loss=12.3, batch

Validation:  65%|▋| 481/743 [2:00:23<57:47, 13.23s/batch, batch_loss=6.96e+3, ba

Validation:  65%|▋| 482/743 [2:00:23<57:08, 13.14s/batch, batch_loss=6.96e+3, ba

Validation:  65%|▋| 482/743 [2:00:37<57:08, 13.14s/batch, batch_loss=19.3, batch

Validation:  65%|▋| 483/743 [2:00:37<57:19, 13.23s/batch, batch_loss=19.3, batch

Validation:  65%|▋| 483/743 [2:00:50<57:19, 13.23s/batch, batch_loss=2.31e+4, ba

Validation:  65%|▋| 484/743 [2:00:50<56:52, 13.17s/batch, batch_loss=2.31e+4, ba

Validation:  65%|▋| 484/743 [2:01:04<56:52, 13.17s/batch, batch_loss=3.13e+4, ba

Validation:  65%|▋| 485/743 [2:01:04<57:21, 13.34s/batch, batch_loss=3.13e+4, ba

Validation:  65%|▋| 485/743 [2:01:17<57:21, 13.34s/batch, batch_loss=15, batch_i

Validation:  65%|▋| 486/743 [2:01:17<57:09, 13.34s/batch, batch_loss=15, batch_i

Validation:  65%|▋| 486/743 [2:01:31<57:09, 13.34s/batch, batch_loss=35.8, batch

Validation:  66%|▋| 487/743 [2:01:31<58:12, 13.64s/batch, batch_loss=35.8, batch

Validation:  66%|▋| 487/743 [2:01:45<58:12, 13.64s/batch, batch_loss=23.9, batch

Validation:  66%|▋| 488/743 [2:01:45<58:18, 13.72s/batch, batch_loss=23.9, batch

Validation:  66%|▋| 488/743 [2:01:59<58:18, 13.72s/batch, batch_loss=11, batch_i

Validation:  66%|▋| 489/743 [2:01:59<57:50, 13.66s/batch, batch_loss=11, batch_i

Validation:  66%|▋| 489/743 [2:02:12<57:50, 13.66s/batch, batch_loss=17.8, batch

Validation:  66%|▋| 490/743 [2:02:12<57:36, 13.66s/batch, batch_loss=17.8, batch

Validation:  66%|▋| 490/743 [2:02:26<57:36, 13.66s/batch, batch_loss=17.9, batch

Validation:  66%|▋| 491/743 [2:02:26<56:42, 13.50s/batch, batch_loss=17.9, batch

Validation:  66%|▋| 491/743 [2:02:39<56:42, 13.50s/batch, batch_loss=1.04e+3, ba

Validation:  66%|▋| 492/743 [2:02:39<56:28, 13.50s/batch, batch_loss=1.04e+3, ba

Validation:  66%|▋| 492/743 [2:02:55<56:28, 13.50s/batch, batch_loss=1.43e+4, ba

Validation:  66%|▋| 493/743 [2:02:55<58:45, 14.10s/batch, batch_loss=1.43e+4, ba

Validation:  66%|▋| 493/743 [2:03:08<58:45, 14.10s/batch, batch_loss=8.6, batch_

Validation:  66%|▋| 494/743 [2:03:08<57:19, 13.81s/batch, batch_loss=8.6, batch_

Validation:  66%|▋| 494/743 [2:03:21<57:19, 13.81s/batch, batch_loss=1.18e+4, ba

Validation:  67%|▋| 495/743 [2:03:21<56:34, 13.69s/batch, batch_loss=1.18e+4, ba

Validation:  67%|▋| 495/743 [2:03:34<56:34, 13.69s/batch, batch_loss=16.7, batch

Validation:  67%|▋| 496/743 [2:03:34<55:25, 13.46s/batch, batch_loss=16.7, batch

Validation:  67%|▋| 496/743 [2:03:47<55:25, 13.46s/batch, batch_loss=12.7, batch

Validation:  67%|▋| 497/743 [2:03:47<54:45, 13.36s/batch, batch_loss=12.7, batch

Validation:  67%|▋| 497/743 [2:04:00<54:45, 13.36s/batch, batch_loss=15, batch_i

Validation:  67%|▋| 498/743 [2:04:00<54:15, 13.29s/batch, batch_loss=15, batch_i

Validation:  67%|▋| 498/743 [2:04:14<54:15, 13.29s/batch, batch_loss=4.31, batch

Validation:  67%|▋| 499/743 [2:04:14<54:18, 13.36s/batch, batch_loss=4.31, batch

Validation:  67%|▋| 499/743 [2:04:27<54:18, 13.36s/batch, batch_loss=2.51e+4, ba

Validation:  67%|▋| 500/743 [2:04:27<53:54, 13.31s/batch, batch_loss=2.51e+4, ba

Validation:  67%|▋| 500/743 [2:04:40<53:54, 13.31s/batch, batch_loss=19.8, batch

Validation:  67%|▋| 501/743 [2:04:40<53:43, 13.32s/batch, batch_loss=19.8, batch

Validation:  67%|▋| 501/743 [2:04:54<53:43, 13.32s/batch, batch_loss=3.15e+3, ba

Validation:  68%|▋| 502/743 [2:04:54<53:17, 13.27s/batch, batch_loss=3.15e+3, ba

Validation:  68%|▋| 502/743 [2:05:07<53:17, 13.27s/batch, batch_loss=15.3, batch

Validation:  68%|▋| 503/743 [2:05:07<53:29, 13.37s/batch, batch_loss=15.3, batch

Validation:  68%|▋| 503/743 [2:05:22<53:29, 13.37s/batch, batch_loss=10.9, batch

Validation:  68%|▋| 504/743 [2:05:22<55:07, 13.84s/batch, batch_loss=10.9, batch

Validation:  68%|▋| 504/743 [2:05:37<55:07, 13.84s/batch, batch_loss=19.5, batch

Validation:  68%|▋| 505/743 [2:05:37<56:41, 14.29s/batch, batch_loss=19.5, batch

Validation:  68%|▋| 505/743 [2:05:52<56:41, 14.29s/batch, batch_loss=2.83e+3, ba

Validation:  68%|▋| 506/743 [2:05:52<57:10, 14.47s/batch, batch_loss=2.83e+3, ba

Validation:  68%|▋| 506/743 [2:06:07<57:10, 14.47s/batch, batch_loss=2e+3, batch

Validation:  68%|▋| 507/743 [2:06:07<57:01, 14.50s/batch, batch_loss=2e+3, batch

Validation:  68%|▋| 507/743 [2:06:22<57:01, 14.50s/batch, batch_loss=8.37e+3, ba

Validation:  68%|▋| 508/743 [2:06:22<57:15, 14.62s/batch, batch_loss=8.37e+3, ba

Validation:  68%|▋| 508/743 [2:06:38<57:15, 14.62s/batch, batch_loss=8.47e+3, ba

Validation:  69%|▋| 509/743 [2:06:38<58:21, 14.97s/batch, batch_loss=8.47e+3, ba

Validation:  69%|▋| 509/743 [2:06:56<58:21, 14.97s/batch, batch_loss=13.1, batch

Validation:  69%|▋| 510/743 [2:06:56<1:01:51, 15.93s/batch, batch_loss=13.1, bat

Validation:  69%|▋| 510/743 [2:07:12<1:01:51, 15.93s/batch, batch_loss=17.5, bat

Validation:  69%|▋| 511/743 [2:07:12<1:01:42, 15.96s/batch, batch_loss=17.5, bat

Validation:  69%|▋| 511/743 [2:07:27<1:01:42, 15.96s/batch, batch_loss=16.6, bat

Validation:  69%|▋| 512/743 [2:07:27<1:01:03, 15.86s/batch, batch_loss=16.6, bat

Validation:  69%|▋| 512/743 [2:07:42<1:01:03, 15.86s/batch, batch_loss=16.9, bat

Validation:  69%|▋| 513/743 [2:07:42<59:51, 15.62s/batch, batch_loss=16.9, batch

Validation:  69%|▋| 513/743 [2:07:59<59:51, 15.62s/batch, batch_loss=13.6, batch

Validation:  69%|▋| 514/743 [2:07:59<1:00:33, 15.87s/batch, batch_loss=13.6, bat

Validation:  69%|▋| 514/743 [2:08:14<1:00:33, 15.87s/batch, batch_loss=12.6, bat

Validation:  69%|▋| 515/743 [2:08:14<59:06, 15.55s/batch, batch_loss=12.6, batch

Validation:  69%|▋| 515/743 [2:08:29<59:06, 15.55s/batch, batch_loss=13.2, batch

Validation:  69%|▋| 516/743 [2:08:29<58:38, 15.50s/batch, batch_loss=13.2, batch

Validation:  69%|▋| 516/743 [2:08:48<58:38, 15.50s/batch, batch_loss=6.16e+4, ba

Validation:  70%|▋| 517/743 [2:08:48<1:02:00, 16.46s/batch, batch_loss=6.16e+4, 

Validation:  70%|▋| 517/743 [2:09:03<1:02:00, 16.46s/batch, batch_loss=497, batc

Validation:  70%|▋| 518/743 [2:09:03<1:00:31, 16.14s/batch, batch_loss=497, batc

Validation:  70%|▋| 518/743 [2:09:19<1:00:31, 16.14s/batch, batch_loss=10.5, bat

Validation:  70%|▋| 519/743 [2:09:19<1:00:02, 16.08s/batch, batch_loss=10.5, bat

Validation:  70%|▋| 519/743 [2:09:35<1:00:02, 16.08s/batch, batch_loss=17.6, bat

Validation:  70%|▋| 520/743 [2:09:35<59:22, 15.97s/batch, batch_loss=17.6, batch

Validation:  70%|▋| 520/743 [2:09:50<59:22, 15.97s/batch, batch_loss=13.3, batch

Validation:  70%|▋| 521/743 [2:09:50<58:29, 15.81s/batch, batch_loss=13.3, batch

Validation:  70%|▋| 521/743 [2:10:06<58:29, 15.81s/batch, batch_loss=12.6, batch

Validation:  70%|▋| 522/743 [2:10:06<57:43, 15.67s/batch, batch_loss=12.6, batch

Validation:  70%|▋| 522/743 [2:10:21<57:43, 15.67s/batch, batch_loss=427, batch_

Validation:  70%|▋| 523/743 [2:10:21<57:31, 15.69s/batch, batch_loss=427, batch_

Validation:  70%|▋| 523/743 [2:10:41<57:31, 15.69s/batch, batch_loss=13.6, batch

Validation:  71%|▋| 524/743 [2:10:41<1:01:41, 16.90s/batch, batch_loss=13.6, bat

Validation:  71%|▋| 524/743 [2:10:56<1:01:41, 16.90s/batch, batch_loss=23.5, bat

Validation:  71%|▋| 525/743 [2:10:56<59:42, 16.43s/batch, batch_loss=23.5, batch

Validation:  71%|▋| 525/743 [2:11:12<59:42, 16.43s/batch, batch_loss=9.77, batch

Validation:  71%|▋| 526/743 [2:11:12<58:11, 16.09s/batch, batch_loss=9.77, batch

Validation:  71%|▋| 526/743 [2:11:27<58:11, 16.09s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:11:27<57:18, 15.92s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:11:44<57:18, 15.92s/batch, batch_loss=508, batch_

Validation:  71%|▋| 528/743 [2:11:44<57:34, 16.07s/batch, batch_loss=508, batch_

Validation:  71%|▋| 528/743 [2:12:00<57:34, 16.07s/batch, batch_loss=6.51e+3, ba

Validation:  71%|▋| 529/743 [2:12:00<57:44, 16.19s/batch, batch_loss=6.51e+3, ba

Validation:  71%|▋| 529/743 [2:12:17<57:44, 16.19s/batch, batch_loss=208, batch_

Validation:  71%|▋| 530/743 [2:12:17<58:19, 16.43s/batch, batch_loss=208, batch_

Validation:  71%|▋| 530/743 [2:12:34<58:19, 16.43s/batch, batch_loss=38.6, batch

Validation:  71%|▋| 531/743 [2:12:34<58:30, 16.56s/batch, batch_loss=38.6, batch

Validation:  71%|▋| 531/743 [2:12:50<58:30, 16.56s/batch, batch_loss=254, batch_

Validation:  72%|▋| 532/743 [2:12:50<57:22, 16.32s/batch, batch_loss=254, batch_

Validation:  72%|▋| 532/743 [2:13:05<57:22, 16.32s/batch, batch_loss=8.43, batch

Validation:  72%|▋| 533/743 [2:13:05<56:07, 16.04s/batch, batch_loss=8.43, batch

Validation:  72%|▋| 533/743 [2:13:21<56:07, 16.04s/batch, batch_loss=11, batch_i

Validation:  72%|▋| 534/743 [2:13:21<55:44, 16.00s/batch, batch_loss=11, batch_i

Validation:  72%|▋| 534/743 [2:13:36<55:44, 16.00s/batch, batch_loss=18.1, batch

Validation:  72%|▋| 535/743 [2:13:36<54:40, 15.77s/batch, batch_loss=18.1, batch

Validation:  72%|▋| 535/743 [2:13:52<54:40, 15.77s/batch, batch_loss=18, batch_i

Validation:  72%|▋| 536/743 [2:13:52<54:07, 15.69s/batch, batch_loss=18, batch_i

Validation:  72%|▋| 536/743 [2:14:08<54:07, 15.69s/batch, batch_loss=11.9, batch

Validation:  72%|▋| 537/743 [2:14:08<54:16, 15.81s/batch, batch_loss=11.9, batch

Validation:  72%|▋| 537/743 [2:14:24<54:16, 15.81s/batch, batch_loss=15.6, batch

Validation:  72%|▋| 538/743 [2:14:24<53:58, 15.80s/batch, batch_loss=15.6, batch

Validation:  72%|▋| 538/743 [2:14:40<53:58, 15.80s/batch, batch_loss=249, batch_

Validation:  73%|▋| 539/743 [2:14:40<54:39, 16.07s/batch, batch_loss=249, batch_

Validation:  73%|▋| 539/743 [2:14:56<54:39, 16.07s/batch, batch_loss=16.4, batch

Validation:  73%|▋| 540/743 [2:14:56<54:04, 15.98s/batch, batch_loss=16.4, batch

Validation:  73%|▋| 540/743 [2:15:13<54:04, 15.98s/batch, batch_loss=27.1, batch

Validation:  73%|▋| 541/743 [2:15:13<54:29, 16.19s/batch, batch_loss=27.1, batch

Validation:  73%|▋| 541/743 [2:15:28<54:29, 16.19s/batch, batch_loss=1.94e+3, ba

Validation:  73%|▋| 542/743 [2:15:28<53:32, 15.98s/batch, batch_loss=1.94e+3, ba

Validation:  73%|▋| 542/743 [2:15:44<53:32, 15.98s/batch, batch_loss=16, batch_i

Validation:  73%|▋| 543/743 [2:15:44<53:01, 15.91s/batch, batch_loss=16, batch_i

Validation:  73%|▋| 543/743 [2:16:00<53:01, 15.91s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:16:00<52:23, 15.80s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:16:15<52:23, 15.80s/batch, batch_loss=2.75e+3, ba

Validation:  73%|▋| 545/743 [2:16:15<52:12, 15.82s/batch, batch_loss=2.75e+3, ba

Validation:  73%|▋| 545/743 [2:16:31<52:12, 15.82s/batch, batch_loss=7.31, batch

Validation:  73%|▋| 546/743 [2:16:31<52:06, 15.87s/batch, batch_loss=7.31, batch

Validation:  73%|▋| 546/743 [2:16:47<52:06, 15.87s/batch, batch_loss=259, batch_

Validation:  74%|▋| 547/743 [2:16:47<51:07, 15.65s/batch, batch_loss=259, batch_

Validation:  74%|▋| 547/743 [2:17:01<51:07, 15.65s/batch, batch_loss=24.3, batch

Validation:  74%|▋| 548/743 [2:17:01<50:05, 15.41s/batch, batch_loss=24.3, batch

Validation:  74%|▋| 548/743 [2:17:15<50:05, 15.41s/batch, batch_loss=4.11e+3, ba

Validation:  74%|▋| 549/743 [2:17:15<48:21, 14.96s/batch, batch_loss=4.11e+3, ba

Validation:  74%|▋| 549/743 [2:17:30<48:21, 14.96s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:17:30<48:00, 14.93s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:17:46<48:00, 14.93s/batch, batch_loss=14.1, batch

Validation:  74%|▋| 551/743 [2:17:46<48:38, 15.20s/batch, batch_loss=14.1, batch

Validation:  74%|▋| 551/743 [2:18:01<48:38, 15.20s/batch, batch_loss=6.77e+3, ba

Validation:  74%|▋| 552/743 [2:18:01<48:12, 15.14s/batch, batch_loss=6.77e+3, ba

Validation:  74%|▋| 552/743 [2:18:17<48:12, 15.14s/batch, batch_loss=24.1, batch

Validation:  74%|▋| 553/743 [2:18:17<48:31, 15.33s/batch, batch_loss=24.1, batch

Validation:  74%|▋| 553/743 [2:18:31<48:31, 15.33s/batch, batch_loss=19.8, batch

Validation:  75%|▋| 554/743 [2:18:31<47:42, 15.15s/batch, batch_loss=19.8, batch

Validation:  75%|▋| 554/743 [2:18:45<47:42, 15.15s/batch, batch_loss=2.47e+3, ba

Validation:  75%|▋| 555/743 [2:18:45<46:11, 14.74s/batch, batch_loss=2.47e+3, ba

Validation:  75%|▋| 555/743 [2:18:59<46:11, 14.74s/batch, batch_loss=32.5, batch

Validation:  75%|▋| 556/743 [2:18:59<44:55, 14.41s/batch, batch_loss=32.5, batch

Validation:  75%|▋| 556/743 [2:19:13<44:55, 14.41s/batch, batch_loss=8.44, batch

Validation:  75%|▋| 557/743 [2:19:13<44:32, 14.37s/batch, batch_loss=8.44, batch

Validation:  75%|▋| 557/743 [2:19:29<44:32, 14.37s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:19:29<45:22, 14.72s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:19:43<45:22, 14.72s/batch, batch_loss=3.59e+3, ba

Validation:  75%|▊| 559/743 [2:19:43<44:59, 14.67s/batch, batch_loss=3.59e+3, ba

Validation:  75%|▊| 559/743 [2:19:58<44:59, 14.67s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:19:58<44:44, 14.67s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:20:12<44:44, 14.67s/batch, batch_loss=9.75, batch

Validation:  76%|▊| 561/743 [2:20:12<44:16, 14.60s/batch, batch_loss=9.75, batch

Validation:  76%|▊| 561/743 [2:20:26<44:16, 14.60s/batch, batch_loss=15.4, batch

Validation:  76%|▊| 562/743 [2:20:26<43:02, 14.27s/batch, batch_loss=15.4, batch

Validation:  76%|▊| 562/743 [2:20:43<43:02, 14.27s/batch, batch_loss=17.4, batch

Validation:  76%|▊| 563/743 [2:20:43<45:33, 15.19s/batch, batch_loss=17.4, batch

Validation:  76%|▊| 563/743 [2:20:58<45:33, 15.19s/batch, batch_loss=1.08e+3, ba

Validation:  76%|▊| 564/743 [2:20:58<44:52, 15.04s/batch, batch_loss=1.08e+3, ba

Validation:  76%|▊| 564/743 [2:21:13<44:52, 15.04s/batch, batch_loss=3.68e+3, ba

Validation:  76%|▊| 565/743 [2:21:13<44:37, 15.04s/batch, batch_loss=3.68e+3, ba

Validation:  76%|▊| 565/743 [2:21:28<44:37, 15.04s/batch, batch_loss=12.7, batch

Validation:  76%|▊| 566/743 [2:21:28<44:19, 15.03s/batch, batch_loss=12.7, batch

Validation:  76%|▊| 566/743 [2:21:44<44:19, 15.03s/batch, batch_loss=14.4, batch

Validation:  76%|▊| 567/743 [2:21:44<44:58, 15.33s/batch, batch_loss=14.4, batch

Validation:  76%|▊| 567/743 [2:21:58<44:58, 15.33s/batch, batch_loss=11.5, batch

Validation:  76%|▊| 568/743 [2:21:58<43:57, 15.07s/batch, batch_loss=11.5, batch

Validation:  76%|▊| 568/743 [2:22:14<43:57, 15.07s/batch, batch_loss=15.9, batch

Validation:  77%|▊| 569/743 [2:22:14<43:57, 15.16s/batch, batch_loss=15.9, batch

Validation:  77%|▊| 569/743 [2:22:29<43:57, 15.16s/batch, batch_loss=19, batch_i

Validation:  77%|▊| 570/743 [2:22:29<44:10, 15.32s/batch, batch_loss=19, batch_i

Validation:  77%|▊| 570/743 [2:22:50<44:10, 15.32s/batch, batch_loss=10, batch_i

Validation:  77%|▊| 571/743 [2:22:50<48:06, 16.78s/batch, batch_loss=10, batch_i

Validation:  77%|▊| 571/743 [2:23:06<48:06, 16.78s/batch, batch_loss=22.1, batch

Validation:  77%|▊| 572/743 [2:23:06<47:26, 16.65s/batch, batch_loss=22.1, batch

Validation:  77%|▊| 572/743 [2:23:22<47:26, 16.65s/batch, batch_loss=13, batch_i

Validation:  77%|▊| 573/743 [2:23:22<46:37, 16.45s/batch, batch_loss=13, batch_i

Validation:  77%|▊| 573/743 [2:23:37<46:37, 16.45s/batch, batch_loss=15.3, batch

Validation:  77%|▊| 574/743 [2:23:37<45:19, 16.09s/batch, batch_loss=15.3, batch

Validation:  77%|▊| 574/743 [2:23:53<45:19, 16.09s/batch, batch_loss=13.7, batch

Validation:  77%|▊| 575/743 [2:23:53<44:51, 16.02s/batch, batch_loss=13.7, batch

Validation:  77%|▊| 575/743 [2:24:09<44:51, 16.02s/batch, batch_loss=22.4, batch

Validation:  78%|▊| 576/743 [2:24:09<44:28, 15.98s/batch, batch_loss=22.4, batch

Validation:  78%|▊| 576/743 [2:24:26<44:28, 15.98s/batch, batch_loss=19.2, batch

Validation:  78%|▊| 577/743 [2:24:26<45:00, 16.27s/batch, batch_loss=19.2, batch

Validation:  78%|▊| 577/743 [2:24:44<45:00, 16.27s/batch, batch_loss=24.9, batch

Validation:  78%|▊| 578/743 [2:24:44<46:28, 16.90s/batch, batch_loss=24.9, batch

Validation:  78%|▊| 578/743 [2:24:58<46:28, 16.90s/batch, batch_loss=315, batch_

Validation:  78%|▊| 579/743 [2:24:58<43:43, 16.00s/batch, batch_loss=315, batch_

Validation:  78%|▊| 579/743 [2:25:12<43:43, 16.00s/batch, batch_loss=5.97, batch

Validation:  78%|▊| 580/743 [2:25:12<41:46, 15.38s/batch, batch_loss=5.97, batch

Validation:  78%|▊| 580/743 [2:25:27<41:46, 15.38s/batch, batch_loss=8.7, batch_

Validation:  78%|▊| 581/743 [2:25:27<40:58, 15.17s/batch, batch_loss=8.7, batch_

Validation:  78%|▊| 581/743 [2:25:40<40:58, 15.17s/batch, batch_loss=15.8, batch

Validation:  78%|▊| 582/743 [2:25:40<39:22, 14.68s/batch, batch_loss=15.8, batch

Validation:  78%|▊| 582/743 [2:25:54<39:22, 14.68s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:25:54<38:34, 14.47s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:26:08<38:34, 14.47s/batch, batch_loss=1.95, batch

Validation:  79%|▊| 584/743 [2:26:08<37:37, 14.20s/batch, batch_loss=1.95, batch

Validation:  79%|▊| 584/743 [2:26:21<37:37, 14.20s/batch, batch_loss=19, batch_i

Validation:  79%|▊| 585/743 [2:26:21<36:43, 13.95s/batch, batch_loss=19, batch_i

Validation:  79%|▊| 585/743 [2:26:34<36:43, 13.95s/batch, batch_loss=551, batch_

Validation:  79%|▊| 586/743 [2:26:34<35:51, 13.70s/batch, batch_loss=551, batch_

Validation:  79%|▊| 586/743 [2:26:50<35:51, 13.70s/batch, batch_loss=8.53, batch

Validation:  79%|▊| 587/743 [2:26:50<37:26, 14.40s/batch, batch_loss=8.53, batch

Validation:  79%|▊| 587/743 [2:27:03<37:26, 14.40s/batch, batch_loss=402, batch_

Validation:  79%|▊| 588/743 [2:27:03<36:00, 13.94s/batch, batch_loss=402, batch_

Validation:  79%|▊| 588/743 [2:27:16<36:00, 13.94s/batch, batch_loss=2.51e+4, ba

Validation:  79%|▊| 589/743 [2:27:16<34:48, 13.56s/batch, batch_loss=2.51e+4, ba

Validation:  79%|▊| 589/743 [2:27:29<34:48, 13.56s/batch, batch_loss=17.9, batch

Validation:  79%|▊| 590/743 [2:27:29<34:11, 13.41s/batch, batch_loss=17.9, batch

Validation:  79%|▊| 590/743 [2:27:42<34:11, 13.41s/batch, batch_loss=13.4, batch

Validation:  80%|▊| 591/743 [2:27:42<33:37, 13.28s/batch, batch_loss=13.4, batch

Validation:  80%|▊| 591/743 [2:27:55<33:37, 13.28s/batch, batch_loss=10.9, batch

Validation:  80%|▊| 592/743 [2:27:55<32:54, 13.07s/batch, batch_loss=10.9, batch

Validation:  80%|▊| 592/743 [2:28:08<32:54, 13.07s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:28:08<32:50, 13.13s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:28:21<32:50, 13.13s/batch, batch_loss=3.67, batch

Validation:  80%|▊| 594/743 [2:28:21<32:35, 13.12s/batch, batch_loss=3.67, batch

Validation:  80%|▊| 594/743 [2:28:34<32:35, 13.12s/batch, batch_loss=5.01, batch

Validation:  80%|▊| 595/743 [2:28:34<32:15, 13.08s/batch, batch_loss=5.01, batch

Validation:  80%|▊| 595/743 [2:28:47<32:15, 13.08s/batch, batch_loss=5.85, batch

Validation:  80%|▊| 596/743 [2:28:47<31:48, 12.98s/batch, batch_loss=5.85, batch

Validation:  80%|▊| 596/743 [2:29:00<31:48, 12.98s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:29:00<31:39, 13.01s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:29:13<31:39, 13.01s/batch, batch_loss=13.5, batch

Validation:  80%|▊| 598/743 [2:29:13<31:19, 12.96s/batch, batch_loss=13.5, batch

Validation:  80%|▊| 598/743 [2:29:26<31:19, 12.96s/batch, batch_loss=12.8, batch

Validation:  81%|▊| 599/743 [2:29:26<31:09, 12.98s/batch, batch_loss=12.8, batch

Validation:  81%|▊| 599/743 [2:29:39<31:09, 12.98s/batch, batch_loss=18.8, batch

Validation:  81%|▊| 600/743 [2:29:39<31:11, 13.09s/batch, batch_loss=18.8, batch

Validation:  81%|▊| 600/743 [2:29:52<31:11, 13.09s/batch, batch_loss=12.7, batch

Validation:  81%|▊| 601/743 [2:29:52<30:51, 13.04s/batch, batch_loss=12.7, batch

Validation:  81%|▊| 601/743 [2:30:05<30:51, 13.04s/batch, batch_loss=17, batch_i

Validation:  81%|▊| 602/743 [2:30:05<30:42, 13.07s/batch, batch_loss=17, batch_i

Validation:  81%|▊| 602/743 [2:30:18<30:42, 13.07s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:30:18<30:07, 12.91s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:30:31<30:07, 12.91s/batch, batch_loss=20.2, batch

Validation:  81%|▊| 604/743 [2:30:31<29:59, 12.94s/batch, batch_loss=20.2, batch

Validation:  81%|▊| 604/743 [2:30:44<29:59, 12.94s/batch, batch_loss=24.3, batch

Validation:  81%|▊| 605/743 [2:30:44<29:58, 13.04s/batch, batch_loss=24.3, batch

Validation:  81%|▊| 605/743 [2:31:00<29:58, 13.04s/batch, batch_loss=252, batch_

Validation:  82%|▊| 606/743 [2:31:00<32:10, 14.09s/batch, batch_loss=252, batch_

Validation:  82%|▊| 606/743 [2:31:14<32:10, 14.09s/batch, batch_loss=27.4, batch

Validation:  82%|▊| 607/743 [2:31:14<31:38, 13.96s/batch, batch_loss=27.4, batch

Validation:  82%|▊| 607/743 [2:31:28<31:38, 13.96s/batch, batch_loss=18.8, batch

Validation:  82%|▊| 608/743 [2:31:28<31:17, 13.91s/batch, batch_loss=18.8, batch

Validation:  82%|▊| 608/743 [2:31:42<31:17, 13.91s/batch, batch_loss=15.2, batch

Validation:  82%|▊| 609/743 [2:31:42<31:03, 13.91s/batch, batch_loss=15.2, batch

Validation:  82%|▊| 609/743 [2:31:55<31:03, 13.91s/batch, batch_loss=17.6, batch

Validation:  82%|▊| 610/743 [2:31:55<30:35, 13.80s/batch, batch_loss=17.6, batch

Validation:  82%|▊| 610/743 [2:32:09<30:35, 13.80s/batch, batch_loss=20.8, batch

Validation:  82%|▊| 611/743 [2:32:09<30:10, 13.72s/batch, batch_loss=20.8, batch

Validation:  82%|▊| 611/743 [2:32:23<30:10, 13.72s/batch, batch_loss=10, batch_i

Validation:  82%|▊| 612/743 [2:32:23<30:06, 13.79s/batch, batch_loss=10, batch_i

Validation:  82%|▊| 612/743 [2:32:37<30:06, 13.79s/batch, batch_loss=13.9, batch

Validation:  83%|▊| 613/743 [2:32:37<29:56, 13.82s/batch, batch_loss=13.9, batch

Validation:  83%|▊| 613/743 [2:32:51<29:56, 13.82s/batch, batch_loss=5.63e+3, ba

Validation:  83%|▊| 614/743 [2:32:51<29:49, 13.87s/batch, batch_loss=5.63e+3, ba

Validation:  83%|▊| 614/743 [2:33:05<29:49, 13.87s/batch, batch_loss=13.1, batch

Validation:  83%|▊| 615/743 [2:33:05<29:43, 13.93s/batch, batch_loss=13.1, batch

Validation:  83%|▊| 615/743 [2:33:18<29:43, 13.93s/batch, batch_loss=12, batch_i

Validation:  83%|▊| 616/743 [2:33:18<29:11, 13.79s/batch, batch_loss=12, batch_i

Validation:  83%|▊| 616/743 [2:33:32<29:11, 13.79s/batch, batch_loss=5.86, batch

Validation:  83%|▊| 617/743 [2:33:32<29:04, 13.85s/batch, batch_loss=5.86, batch

Validation:  83%|▊| 617/743 [2:33:46<29:04, 13.85s/batch, batch_loss=8.61, batch

Validation:  83%|▊| 618/743 [2:33:46<28:53, 13.87s/batch, batch_loss=8.61, batch

Validation:  83%|▊| 618/743 [2:34:00<28:53, 13.87s/batch, batch_loss=342, batch_

Validation:  83%|▊| 619/743 [2:34:00<28:45, 13.91s/batch, batch_loss=342, batch_

Validation:  83%|▊| 619/743 [2:34:14<28:45, 13.91s/batch, batch_loss=14.8, batch

Validation:  83%|▊| 620/743 [2:34:14<28:28, 13.89s/batch, batch_loss=14.8, batch

Validation:  83%|▊| 620/743 [2:34:27<28:28, 13.89s/batch, batch_loss=7.62, batch

Validation:  84%|▊| 621/743 [2:34:27<27:58, 13.76s/batch, batch_loss=7.62, batch

Validation:  84%|▊| 621/743 [2:34:43<27:58, 13.76s/batch, batch_loss=12.7, batch

Validation:  84%|▊| 622/743 [2:34:43<28:47, 14.28s/batch, batch_loss=12.7, batch

Validation:  84%|▊| 622/743 [2:34:56<28:47, 14.28s/batch, batch_loss=191, batch_

Validation:  84%|▊| 623/743 [2:34:56<27:57, 13.98s/batch, batch_loss=191, batch_

Validation:  84%|▊| 623/743 [2:35:10<27:57, 13.98s/batch, batch_loss=11.3, batch

Validation:  84%|▊| 624/743 [2:35:10<27:39, 13.94s/batch, batch_loss=11.3, batch

Validation:  84%|▊| 624/743 [2:35:23<27:39, 13.94s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:35:23<26:43, 13.59s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:35:36<26:43, 13.59s/batch, batch_loss=18.3, batch

Validation:  84%|▊| 626/743 [2:35:36<26:07, 13.40s/batch, batch_loss=18.3, batch

Validation:  84%|▊| 626/743 [2:35:49<26:07, 13.40s/batch, batch_loss=17.3, batch

Validation:  84%|▊| 627/743 [2:35:49<25:42, 13.29s/batch, batch_loss=17.3, batch

Validation:  84%|▊| 627/743 [2:36:02<25:42, 13.29s/batch, batch_loss=16, batch_i

Validation:  85%|▊| 628/743 [2:36:02<25:30, 13.31s/batch, batch_loss=16, batch_i

Validation:  85%|▊| 628/743 [2:36:15<25:30, 13.31s/batch, batch_loss=12.4, batch

Validation:  85%|▊| 629/743 [2:36:15<25:03, 13.19s/batch, batch_loss=12.4, batch

Validation:  85%|▊| 629/743 [2:36:28<25:03, 13.19s/batch, batch_loss=15.3, batch

Validation:  85%|▊| 630/743 [2:36:28<24:30, 13.02s/batch, batch_loss=15.3, batch

Validation:  85%|▊| 630/743 [2:36:40<24:30, 13.02s/batch, batch_loss=241, batch_

Validation:  85%|▊| 631/743 [2:36:40<24:04, 12.90s/batch, batch_loss=241, batch_

Validation:  85%|▊| 631/743 [2:36:53<24:04, 12.90s/batch, batch_loss=18.9, batch

Validation:  85%|▊| 632/743 [2:36:53<23:58, 12.96s/batch, batch_loss=18.9, batch

Validation:  85%|▊| 632/743 [2:37:06<23:58, 12.96s/batch, batch_loss=14, batch_i

Validation:  85%|▊| 633/743 [2:37:06<23:41, 12.93s/batch, batch_loss=14, batch_i

Validation:  85%|▊| 633/743 [2:37:22<23:41, 12.93s/batch, batch_loss=10.8, batch

Validation:  85%|▊| 634/743 [2:37:22<24:54, 13.71s/batch, batch_loss=10.8, batch

Validation:  85%|▊| 634/743 [2:37:35<24:54, 13.71s/batch, batch_loss=7.57, batch

Validation:  85%|▊| 635/743 [2:37:35<24:28, 13.60s/batch, batch_loss=7.57, batch

Validation:  85%|▊| 635/743 [2:37:49<24:28, 13.60s/batch, batch_loss=803, batch_

Validation:  86%|▊| 636/743 [2:37:49<24:09, 13.54s/batch, batch_loss=803, batch_

Validation:  86%|▊| 636/743 [2:38:02<24:09, 13.54s/batch, batch_loss=711, batch_

Validation:  86%|▊| 637/743 [2:38:02<23:37, 13.38s/batch, batch_loss=711, batch_

Validation:  86%|▊| 637/743 [2:38:15<23:37, 13.38s/batch, batch_loss=20.4, batch

Validation:  86%|▊| 638/743 [2:38:15<23:28, 13.41s/batch, batch_loss=20.4, batch

Validation:  86%|▊| 638/743 [2:38:29<23:28, 13.41s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:38:29<23:27, 13.53s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:38:43<23:27, 13.53s/batch, batch_loss=21.3, batch

Validation:  86%|▊| 640/743 [2:38:43<23:17, 13.57s/batch, batch_loss=21.3, batch

Validation:  86%|▊| 640/743 [2:38:56<23:17, 13.57s/batch, batch_loss=30.9, batch

Validation:  86%|▊| 641/743 [2:38:56<23:14, 13.67s/batch, batch_loss=30.9, batch

Validation:  86%|▊| 641/743 [2:39:10<23:14, 13.67s/batch, batch_loss=30, batch_i

Validation:  86%|▊| 642/743 [2:39:10<22:59, 13.66s/batch, batch_loss=30, batch_i

Validation:  86%|▊| 642/743 [2:39:23<22:59, 13.66s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [2:39:23<22:33, 13.53s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [2:39:37<22:33, 13.53s/batch, batch_loss=17.8, batch

Validation:  87%|▊| 644/743 [2:39:37<22:21, 13.55s/batch, batch_loss=17.8, batch

Validation:  87%|▊| 644/743 [2:39:50<22:21, 13.55s/batch, batch_loss=17.9, batch

Validation:  87%|▊| 645/743 [2:39:50<22:04, 13.52s/batch, batch_loss=17.9, batch

Validation:  87%|▊| 645/743 [2:40:04<22:04, 13.52s/batch, batch_loss=6.24e+3, ba

Validation:  87%|▊| 646/743 [2:40:04<22:00, 13.62s/batch, batch_loss=6.24e+3, ba

Validation:  87%|▊| 646/743 [2:40:18<22:00, 13.62s/batch, batch_loss=15.7, batch

Validation:  87%|▊| 647/743 [2:40:18<21:57, 13.72s/batch, batch_loss=15.7, batch

Validation:  87%|▊| 647/743 [2:40:32<21:57, 13.72s/batch, batch_loss=5.38, batch

Validation:  87%|▊| 648/743 [2:40:32<21:44, 13.73s/batch, batch_loss=5.38, batch

Validation:  87%|▊| 648/743 [2:40:46<21:44, 13.73s/batch, batch_loss=9.45, batch

Validation:  87%|▊| 649/743 [2:40:46<21:27, 13.70s/batch, batch_loss=9.45, batch

Validation:  87%|▊| 649/743 [2:40:59<21:27, 13.70s/batch, batch_loss=12.2, batch

Validation:  87%|▊| 650/743 [2:40:59<21:18, 13.74s/batch, batch_loss=12.2, batch

Validation:  87%|▊| 650/743 [2:41:13<21:18, 13.74s/batch, batch_loss=21.6, batch

Validation:  88%|▉| 651/743 [2:41:13<21:04, 13.75s/batch, batch_loss=21.6, batch

Validation:  88%|▉| 651/743 [2:41:29<21:04, 13.75s/batch, batch_loss=23.2, batch

Validation:  88%|▉| 652/743 [2:41:29<21:57, 14.48s/batch, batch_loss=23.2, batch

Validation:  88%|▉| 652/743 [2:41:43<21:57, 14.48s/batch, batch_loss=14.2, batch

Validation:  88%|▉| 653/743 [2:41:43<21:18, 14.20s/batch, batch_loss=14.2, batch

Validation:  88%|▉| 653/743 [2:41:56<21:18, 14.20s/batch, batch_loss=20.7, batch

Validation:  88%|▉| 654/743 [2:41:56<20:45, 14.00s/batch, batch_loss=20.7, batch

Validation:  88%|▉| 654/743 [2:42:10<20:45, 14.00s/batch, batch_loss=27.3, batch

Validation:  88%|▉| 655/743 [2:42:10<20:20, 13.87s/batch, batch_loss=27.3, batch

Validation:  88%|▉| 655/743 [2:42:23<20:20, 13.87s/batch, batch_loss=18.7, batch

Validation:  88%|▉| 656/743 [2:42:23<19:55, 13.74s/batch, batch_loss=18.7, batch

Validation:  88%|▉| 656/743 [2:42:36<19:55, 13.74s/batch, batch_loss=14.2, batch

Validation:  88%|▉| 657/743 [2:42:36<19:13, 13.41s/batch, batch_loss=14.2, batch

Validation:  88%|▉| 657/743 [2:42:49<19:13, 13.41s/batch, batch_loss=14.4, batch

Validation:  89%|▉| 658/743 [2:42:49<18:47, 13.27s/batch, batch_loss=14.4, batch

Validation:  89%|▉| 658/743 [2:43:02<18:47, 13.27s/batch, batch_loss=23.5, batch

Validation:  89%|▉| 659/743 [2:43:02<18:28, 13.20s/batch, batch_loss=23.5, batch

Validation:  89%|▉| 659/743 [2:43:17<18:28, 13.20s/batch, batch_loss=21, batch_i

Validation:  89%|▉| 660/743 [2:43:17<18:57, 13.70s/batch, batch_loss=21, batch_i

Validation:  89%|▉| 660/743 [2:43:30<18:57, 13.70s/batch, batch_loss=16.1, batch

Validation:  89%|▉| 661/743 [2:43:30<18:35, 13.60s/batch, batch_loss=16.1, batch

Validation:  89%|▉| 661/743 [2:43:43<18:35, 13.60s/batch, batch_loss=6.4, batch_

Validation:  89%|▉| 662/743 [2:43:43<18:08, 13.44s/batch, batch_loss=6.4, batch_

Validation:  89%|▉| 662/743 [2:43:56<18:08, 13.44s/batch, batch_loss=3.58e+3, ba

Validation:  89%|▉| 663/743 [2:43:56<17:45, 13.31s/batch, batch_loss=3.58e+3, ba

Validation:  89%|▉| 663/743 [2:44:10<17:45, 13.31s/batch, batch_loss=13, batch_i

Validation:  89%|▉| 664/743 [2:44:10<17:29, 13.29s/batch, batch_loss=13, batch_i

Validation:  89%|▉| 664/743 [2:44:23<17:29, 13.29s/batch, batch_loss=19.2, batch

Validation:  90%|▉| 665/743 [2:44:23<17:16, 13.29s/batch, batch_loss=19.2, batch

Validation:  90%|▉| 665/743 [2:44:36<17:16, 13.29s/batch, batch_loss=12.5, batch

Validation:  90%|▉| 666/743 [2:44:36<17:00, 13.25s/batch, batch_loss=12.5, batch

Validation:  90%|▉| 666/743 [2:44:49<17:00, 13.25s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:44:49<16:40, 13.16s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:45:05<16:40, 13.16s/batch, batch_loss=18.6, batch

Validation:  90%|▉| 668/743 [2:45:05<17:29, 14.00s/batch, batch_loss=18.6, batch

Validation:  90%|▉| 668/743 [2:45:18<17:29, 14.00s/batch, batch_loss=21.9, batch

Validation:  90%|▉| 669/743 [2:45:18<16:57, 13.75s/batch, batch_loss=21.9, batch

Validation:  90%|▉| 669/743 [2:45:31<16:57, 13.75s/batch, batch_loss=23.5, batch

Validation:  90%|▉| 670/743 [2:45:31<16:35, 13.63s/batch, batch_loss=23.5, batch

Validation:  90%|▉| 670/743 [2:45:44<16:35, 13.63s/batch, batch_loss=3.11e+3, ba

Validation:  90%|▉| 671/743 [2:45:44<16:08, 13.44s/batch, batch_loss=3.11e+3, ba

Validation:  90%|▉| 671/743 [2:45:57<16:08, 13.44s/batch, batch_loss=20.7, batch

Validation:  90%|▉| 672/743 [2:45:57<15:41, 13.26s/batch, batch_loss=20.7, batch

Validation:  90%|▉| 672/743 [2:46:10<15:41, 13.26s/batch, batch_loss=14.3, batch

Validation:  91%|▉| 673/743 [2:46:10<15:18, 13.13s/batch, batch_loss=14.3, batch

Validation:  91%|▉| 673/743 [2:46:24<15:18, 13.13s/batch, batch_loss=12, batch_i

Validation:  91%|▉| 674/743 [2:46:24<15:15, 13.26s/batch, batch_loss=12, batch_i

Validation:  91%|▉| 674/743 [2:46:38<15:15, 13.26s/batch, batch_loss=21.3, batch

Validation:  91%|▉| 675/743 [2:46:38<15:18, 13.51s/batch, batch_loss=21.3, batch

Validation:  91%|▉| 675/743 [2:46:51<15:18, 13.51s/batch, batch_loss=19, batch_i

Validation:  91%|▉| 676/743 [2:46:51<15:07, 13.54s/batch, batch_loss=19, batch_i

Validation:  91%|▉| 676/743 [2:47:08<15:07, 13.54s/batch, batch_loss=20.5, batch

Validation:  91%|▉| 677/743 [2:47:08<15:45, 14.33s/batch, batch_loss=20.5, batch

Validation:  91%|▉| 677/743 [2:47:21<15:45, 14.33s/batch, batch_loss=15.7, batch

Validation:  91%|▉| 678/743 [2:47:21<15:08, 13.97s/batch, batch_loss=15.7, batch

Validation:  91%|▉| 678/743 [2:47:34<15:08, 13.97s/batch, batch_loss=14.1, batch

Validation:  91%|▉| 679/743 [2:47:34<14:42, 13.79s/batch, batch_loss=14.1, batch

Validation:  91%|▉| 679/743 [2:47:48<14:42, 13.79s/batch, batch_loss=19.5, batch

Validation:  92%|▉| 680/743 [2:47:48<14:30, 13.81s/batch, batch_loss=19.5, batch

Validation:  92%|▉| 680/743 [2:48:02<14:30, 13.81s/batch, batch_loss=21.8, batch

Validation:  92%|▉| 681/743 [2:48:02<14:14, 13.78s/batch, batch_loss=21.8, batch

Validation:  92%|▉| 681/743 [2:48:15<14:14, 13.78s/batch, batch_loss=28, batch_i

Validation:  92%|▉| 682/743 [2:48:15<14:00, 13.77s/batch, batch_loss=28, batch_i

Validation:  92%|▉| 682/743 [2:48:29<14:00, 13.77s/batch, batch_loss=17.8, batch

Validation:  92%|▉| 683/743 [2:48:29<13:36, 13.61s/batch, batch_loss=17.8, batch

Validation:  92%|▉| 683/743 [2:48:42<13:36, 13.61s/batch, batch_loss=14.2, batch

Validation:  92%|▉| 684/743 [2:48:42<13:15, 13.49s/batch, batch_loss=14.2, batch

Validation:  92%|▉| 684/743 [2:48:55<13:15, 13.49s/batch, batch_loss=13.9, batch

Validation:  92%|▉| 685/743 [2:48:55<12:56, 13.38s/batch, batch_loss=13.9, batch

Validation:  92%|▉| 685/743 [2:49:08<12:56, 13.38s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [2:49:08<12:44, 13.41s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [2:49:22<12:44, 13.41s/batch, batch_loss=23.4, batch

Validation:  92%|▉| 687/743 [2:49:22<12:27, 13.34s/batch, batch_loss=23.4, batch

Validation:  92%|▉| 687/743 [2:49:35<12:27, 13.34s/batch, batch_loss=12.9, batch

Validation:  93%|▉| 688/743 [2:49:35<12:16, 13.38s/batch, batch_loss=12.9, batch

Validation:  93%|▉| 688/743 [2:49:48<12:16, 13.38s/batch, batch_loss=14.2, batch

Validation:  93%|▉| 689/743 [2:49:48<11:58, 13.31s/batch, batch_loss=14.2, batch

Validation:  93%|▉| 689/743 [2:50:02<11:58, 13.31s/batch, batch_loss=19.2, batch

Validation:  93%|▉| 690/743 [2:50:02<11:53, 13.47s/batch, batch_loss=19.2, batch

Validation:  93%|▉| 690/743 [2:50:15<11:53, 13.47s/batch, batch_loss=12.7, batch

Validation:  93%|▉| 691/743 [2:50:15<11:38, 13.43s/batch, batch_loss=12.7, batch

Validation:  93%|▉| 691/743 [2:50:29<11:38, 13.43s/batch, batch_loss=19, batch_i

Validation:  93%|▉| 692/743 [2:50:29<11:23, 13.40s/batch, batch_loss=19, batch_i

Validation:  93%|▉| 692/743 [2:50:43<11:23, 13.40s/batch, batch_loss=20.3, batch

Validation:  93%|▉| 693/743 [2:50:43<11:28, 13.77s/batch, batch_loss=20.3, batch

Validation:  93%|▉| 693/743 [2:50:56<11:28, 13.77s/batch, batch_loss=23, batch_i

Validation:  93%|▉| 694/743 [2:50:56<11:03, 13.55s/batch, batch_loss=23, batch_i

Validation:  93%|▉| 694/743 [2:51:10<11:03, 13.55s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [2:51:10<10:44, 13.43s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [2:51:23<10:44, 13.43s/batch, batch_loss=7.88, batch

Validation:  94%|▉| 696/743 [2:51:23<10:31, 13.43s/batch, batch_loss=7.88, batch

Validation:  94%|▉| 696/743 [2:51:39<10:31, 13.43s/batch, batch_loss=35, batch_i

Validation:  94%|▉| 697/743 [2:51:39<10:55, 14.25s/batch, batch_loss=35, batch_i

Validation:  94%|▉| 697/743 [2:51:58<10:55, 14.25s/batch, batch_loss=757, batch_

Validation:  94%|▉| 698/743 [2:51:58<11:49, 15.77s/batch, batch_loss=757, batch_

Validation:  94%|▉| 698/743 [2:52:16<11:49, 15.77s/batch, batch_loss=6.04, batch

Validation:  94%|▉| 699/743 [2:52:16<12:01, 16.40s/batch, batch_loss=6.04, batch

Validation:  94%|▉| 699/743 [2:52:29<12:01, 16.40s/batch, batch_loss=944, batch_

Validation:  94%|▉| 700/743 [2:52:29<11:01, 15.38s/batch, batch_loss=944, batch_

Validation:  94%|▉| 700/743 [2:52:42<11:01, 15.38s/batch, batch_loss=6.69, batch

Validation:  94%|▉| 701/743 [2:52:42<10:16, 14.68s/batch, batch_loss=6.69, batch

Validation:  94%|▉| 701/743 [2:52:55<10:16, 14.68s/batch, batch_loss=6.4, batch_

Validation:  94%|▉| 702/743 [2:52:55<09:37, 14.08s/batch, batch_loss=6.4, batch_

Validation:  94%|▉| 702/743 [2:53:08<09:37, 14.08s/batch, batch_loss=176, batch_

Validation:  95%|▉| 703/743 [2:53:08<09:13, 13.84s/batch, batch_loss=176, batch_

Validation:  95%|▉| 703/743 [2:53:21<09:13, 13.84s/batch, batch_loss=471, batch_

Validation:  95%|▉| 704/743 [2:53:21<08:44, 13.45s/batch, batch_loss=471, batch_

Validation:  95%|▉| 704/743 [2:53:34<08:44, 13.45s/batch, batch_loss=8.38, batch

Validation:  95%|▉| 705/743 [2:53:34<08:27, 13.34s/batch, batch_loss=8.38, batch

Validation:  95%|▉| 705/743 [2:53:47<08:27, 13.34s/batch, batch_loss=16.3, batch

Validation:  95%|▉| 706/743 [2:53:47<08:09, 13.24s/batch, batch_loss=16.3, batch

Validation:  95%|▉| 706/743 [2:54:00<08:09, 13.24s/batch, batch_loss=417, batch_

Validation:  95%|▉| 707/743 [2:54:00<07:51, 13.11s/batch, batch_loss=417, batch_

Validation:  95%|▉| 707/743 [2:54:13<07:51, 13.11s/batch, batch_loss=16, batch_i

Validation:  95%|▉| 708/743 [2:54:13<07:36, 13.03s/batch, batch_loss=16, batch_i

Validation:  95%|▉| 708/743 [2:54:26<07:36, 13.03s/batch, batch_loss=23.4, batch

Validation:  95%|▉| 709/743 [2:54:26<07:28, 13.19s/batch, batch_loss=23.4, batch

Validation:  95%|▉| 709/743 [2:54:40<07:28, 13.19s/batch, batch_loss=15.9, batch

Validation:  96%|▉| 710/743 [2:54:40<07:20, 13.35s/batch, batch_loss=15.9, batch

Validation:  96%|▉| 710/743 [2:54:54<07:20, 13.35s/batch, batch_loss=12.9, batch

Validation:  96%|▉| 711/743 [2:54:54<07:12, 13.50s/batch, batch_loss=12.9, batch

Validation:  96%|▉| 711/743 [2:55:11<07:12, 13.50s/batch, batch_loss=19.6, batch

Validation:  96%|▉| 712/743 [2:55:11<07:36, 14.72s/batch, batch_loss=19.6, batch

Validation:  96%|▉| 712/743 [2:55:25<07:36, 14.72s/batch, batch_loss=17, batch_i

Validation:  96%|▉| 713/743 [2:55:25<07:13, 14.46s/batch, batch_loss=17, batch_i

Validation:  96%|▉| 713/743 [2:55:39<07:13, 14.46s/batch, batch_loss=5.05, batch

Validation:  96%|▉| 714/743 [2:55:39<06:53, 14.26s/batch, batch_loss=5.05, batch

Validation:  96%|▉| 714/743 [2:55:53<06:53, 14.26s/batch, batch_loss=9.05, batch

Validation:  96%|▉| 715/743 [2:55:53<06:37, 14.20s/batch, batch_loss=9.05, batch

Validation:  96%|▉| 715/743 [2:56:07<06:37, 14.20s/batch, batch_loss=18, batch_i

Validation:  96%|▉| 716/743 [2:56:07<06:17, 13.99s/batch, batch_loss=18, batch_i

Validation:  96%|▉| 716/743 [2:56:20<06:17, 13.99s/batch, batch_loss=386, batch_

Validation:  97%|▉| 717/743 [2:56:20<06:00, 13.86s/batch, batch_loss=386, batch_

Validation:  97%|▉| 717/743 [2:56:33<06:00, 13.86s/batch, batch_loss=20.4, batch

Validation:  97%|▉| 718/743 [2:56:33<05:42, 13.70s/batch, batch_loss=20.4, batch

Validation:  97%|▉| 718/743 [2:56:47<05:42, 13.70s/batch, batch_loss=15.9, batch

Validation:  97%|▉| 719/743 [2:56:47<05:27, 13.63s/batch, batch_loss=15.9, batch

Validation:  97%|▉| 719/743 [2:57:00<05:27, 13.63s/batch, batch_loss=16.2, batch

Validation:  97%|▉| 720/743 [2:57:00<05:12, 13.58s/batch, batch_loss=16.2, batch

Validation:  97%|▉| 720/743 [2:57:16<05:12, 13.58s/batch, batch_loss=10.8, batch

Validation:  97%|▉| 721/743 [2:57:16<05:11, 14.15s/batch, batch_loss=10.8, batch

Validation:  97%|▉| 721/743 [2:57:29<05:11, 14.15s/batch, batch_loss=23.3, batch

Validation:  97%|▉| 722/743 [2:57:29<04:52, 13.94s/batch, batch_loss=23.3, batch

Validation:  97%|▉| 722/743 [2:57:43<04:52, 13.94s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [2:57:43<04:35, 13.78s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [2:57:56<04:35, 13.78s/batch, batch_loss=16.7, batch

Validation:  97%|▉| 724/743 [2:57:56<04:18, 13.62s/batch, batch_loss=16.7, batch

Validation:  97%|▉| 724/743 [2:58:09<04:18, 13.62s/batch, batch_loss=14.8, batch

Validation:  98%|▉| 725/743 [2:58:09<04:04, 13.56s/batch, batch_loss=14.8, batch

Validation:  98%|▉| 725/743 [2:58:23<04:04, 13.56s/batch, batch_loss=20.5, batch

Validation:  98%|▉| 726/743 [2:58:23<03:48, 13.46s/batch, batch_loss=20.5, batch

Validation:  98%|▉| 726/743 [2:58:36<03:48, 13.46s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [2:58:36<03:33, 13.32s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [2:58:48<03:33, 13.32s/batch, batch_loss=27.7, batch

Validation:  98%|▉| 728/743 [2:58:48<03:16, 13.07s/batch, batch_loss=27.7, batch

Validation:  98%|▉| 728/743 [2:59:04<03:16, 13.07s/batch, batch_loss=34.3, batch

Validation:  98%|▉| 729/743 [2:59:04<03:12, 13.78s/batch, batch_loss=34.3, batch

Validation:  98%|▉| 729/743 [2:59:16<03:12, 13.78s/batch, batch_loss=21.1, batch

Validation:  98%|▉| 730/743 [2:59:16<02:54, 13.44s/batch, batch_loss=21.1, batch

Validation:  98%|▉| 730/743 [2:59:29<02:54, 13.44s/batch, batch_loss=13.3, batch

Validation:  98%|▉| 731/743 [2:59:29<02:40, 13.35s/batch, batch_loss=13.3, batch

Validation:  98%|▉| 731/743 [2:59:42<02:40, 13.35s/batch, batch_loss=9.32, batch

Validation:  99%|▉| 732/743 [2:59:42<02:25, 13.26s/batch, batch_loss=9.32, batch

Validation:  99%|▉| 732/743 [2:59:55<02:25, 13.26s/batch, batch_loss=27.4, batch

Validation:  99%|▉| 733/743 [2:59:55<02:11, 13.12s/batch, batch_loss=27.4, batch

Validation:  99%|▉| 733/743 [3:00:08<02:11, 13.12s/batch, batch_loss=3.35, batch

Validation:  99%|▉| 734/743 [3:00:08<01:56, 12.95s/batch, batch_loss=3.35, batch

Validation:  99%|▉| 734/743 [3:00:20<01:56, 12.95s/batch, batch_loss=7.15, batch

Validation:  99%|▉| 735/743 [3:00:20<01:42, 12.85s/batch, batch_loss=7.15, batch

Validation:  99%|▉| 735/743 [3:00:33<01:42, 12.85s/batch, batch_loss=1.15, batch

Validation:  99%|▉| 736/743 [3:00:33<01:29, 12.74s/batch, batch_loss=1.15, batch

Validation:  99%|▉| 736/743 [3:00:44<01:29, 12.74s/batch, batch_loss=0.0779, bat

Validation:  99%|▉| 737/743 [3:00:44<01:14, 12.40s/batch, batch_loss=0.0779, bat

Validation:  99%|▉| 737/743 [3:00:56<01:14, 12.40s/batch, batch_loss=0.0779, bat

Validation:  99%|▉| 738/743 [3:00:56<01:00, 12.13s/batch, batch_loss=0.0779, bat

Validation:  99%|▉| 738/743 [3:01:07<01:00, 12.13s/batch, batch_loss=0.0779, bat

Validation:  99%|▉| 739/743 [3:01:07<00:47, 11.90s/batch, batch_loss=0.0779, bat

Validation:  99%|▉| 739/743 [3:01:19<00:47, 11.90s/batch, batch_loss=0.0779, bat

Validation: 100%|▉| 740/743 [3:01:19<00:35, 11.84s/batch, batch_loss=0.0779, bat

Validation: 100%|▉| 740/743 [3:01:31<00:35, 11.84s/batch, batch_loss=0.0779, bat

Validation: 100%|▉| 741/743 [3:01:31<00:23, 11.87s/batch, batch_loss=0.0779, bat

Validation: 100%|▉| 741/743 [3:01:42<00:23, 11.87s/batch, batch_loss=0.0779, bat

Validation: 100%|▉| 742/743 [3:01:42<00:11, 11.77s/batch, batch_loss=0.0779, bat

Validation: 100%|▉| 742/743 [3:01:53<00:11, 11.77s/batch, batch_loss=0.0771, bat

Validation: 100%|█| 743/743 [3:01:53<00:00, 11.42s/batch, batch_loss=0.0771, bat

Validation: 100%|█| 743/743 [3:01:53<00:00, 14.69s/batch, batch_loss=0.0771, bat




Val Loss: 1295.8918


Epoch 4/10:   0%|                                    | 0/991 [00:00<?, ?batch/s]

Epoch 4/10:   0%| | 0/991 [00:13<?, ?batch/s, batch_loss=16.2, batch_index=1, ba

Epoch 4/10:   0%| | 1/991 [00:13<3:43:01, 13.52s/batch, batch_loss=16.2, batch_i

Epoch 4/10:   0%| | 1/991 [00:27<3:43:01, 13.52s/batch, batch_loss=16.5, batch_i

Epoch 4/10:   0%| | 2/991 [00:27<3:42:37, 13.51s/batch, batch_loss=16.5, batch_i

Epoch 4/10:   0%| | 2/991 [00:41<3:42:37, 13.51s/batch, batch_loss=11.5, batch_i

Epoch 4/10:   0%| | 3/991 [00:41<3:49:23, 13.93s/batch, batch_loss=11.5, batch_i

Epoch 4/10:   0%| | 3/991 [00:55<3:49:23, 13.93s/batch, batch_loss=6.88, batch_i

Epoch 4/10:   0%| | 4/991 [00:55<3:49:55, 13.98s/batch, batch_loss=6.88, batch_i

Epoch 4/10:   0%| | 4/991 [01:12<3:49:55, 13.98s/batch, batch_loss=20.2, batch_i

Epoch 4/10:   1%| | 5/991 [01:12<4:04:51, 14.90s/batch, batch_loss=20.2, batch_i

Epoch 4/10:   1%| | 5/991 [01:26<4:04:51, 14.90s/batch, batch_loss=23.3, batch_i

Epoch 4/10:   1%| | 6/991 [01:26<4:01:37, 14.72s/batch, batch_loss=23.3, batch_i

Epoch 4/10:   1%| | 6/991 [01:40<4:01:37, 14.72s/batch, batch_loss=16.8, batch_i

Epoch 4/10:   1%| | 7/991 [01:40<3:56:33, 14.42s/batch, batch_loss=16.8, batch_i

Epoch 4/10:   1%| | 7/991 [01:54<3:56:33, 14.42s/batch, batch_loss=604, batch_in

Epoch 4/10:   1%| | 8/991 [01:54<3:53:41, 14.26s/batch, batch_loss=604, batch_in

Epoch 4/10:   1%| | 8/991 [02:08<3:53:41, 14.26s/batch, batch_loss=16.5, batch_i

Epoch 4/10:   1%| | 9/991 [02:08<3:52:27, 14.20s/batch, batch_loss=16.5, batch_i

Epoch 4/10:   1%| | 9/991 [02:22<3:52:27, 14.20s/batch, batch_loss=15.3, batch_i

Epoch 4/10:   1%| | 10/991 [02:22<3:53:11, 14.26s/batch, batch_loss=15.3, batch_

Epoch 4/10:   1%| | 10/991 [02:37<3:53:11, 14.26s/batch, batch_loss=11.3, batch_

Epoch 4/10:   1%| | 11/991 [02:37<3:53:49, 14.32s/batch, batch_loss=11.3, batch_

Epoch 4/10:   1%| | 11/991 [02:51<3:53:49, 14.32s/batch, batch_loss=1.99e+3, bat

Epoch 4/10:   1%| | 12/991 [02:51<3:52:45, 14.27s/batch, batch_loss=1.99e+3, bat

Epoch 4/10:   1%| | 12/991 [03:05<3:52:45, 14.27s/batch, batch_loss=19, batch_in

Epoch 4/10:   1%| | 13/991 [03:05<3:54:01, 14.36s/batch, batch_loss=19, batch_in

Epoch 4/10:   1%| | 13/991 [03:20<3:54:01, 14.36s/batch, batch_loss=11.6, batch_

Epoch 4/10:   1%| | 14/991 [03:20<3:54:15, 14.39s/batch, batch_loss=11.6, batch_

Epoch 4/10:   1%| | 14/991 [03:34<3:54:15, 14.39s/batch, batch_loss=10.1, batch_

Epoch 4/10:   2%| | 15/991 [03:34<3:54:40, 14.43s/batch, batch_loss=10.1, batch_

Epoch 4/10:   2%| | 15/991 [03:49<3:54:40, 14.43s/batch, batch_loss=13, batch_in

Epoch 4/10:   2%| | 16/991 [03:49<3:54:47, 14.45s/batch, batch_loss=13, batch_in

Epoch 4/10:   2%| | 16/991 [04:03<3:54:47, 14.45s/batch, batch_loss=13.8, batch_

Epoch 4/10:   2%| | 17/991 [04:03<3:54:14, 14.43s/batch, batch_loss=13.8, batch_

Epoch 4/10:   2%| | 17/991 [04:17<3:54:14, 14.43s/batch, batch_loss=9.69, batch_

Epoch 4/10:   2%| | 18/991 [04:17<3:53:44, 14.41s/batch, batch_loss=9.69, batch_

Epoch 4/10:   2%| | 18/991 [04:31<3:53:44, 14.41s/batch, batch_loss=8.95e+3, bat

Epoch 4/10:   2%| | 19/991 [04:31<3:51:06, 14.27s/batch, batch_loss=8.95e+3, bat

Epoch 4/10:   2%| | 19/991 [04:45<3:51:06, 14.27s/batch, batch_loss=11.1, batch_

Epoch 4/10:   2%| | 20/991 [04:45<3:46:32, 14.00s/batch, batch_loss=11.1, batch_

Epoch 4/10:   2%| | 20/991 [05:00<3:46:32, 14.00s/batch, batch_loss=16.8, batch_

Epoch 4/10:   2%| | 21/991 [05:00<3:51:33, 14.32s/batch, batch_loss=16.8, batch_

Epoch 4/10:   2%| | 21/991 [05:13<3:51:33, 14.32s/batch, batch_loss=1.08e+3, bat

Epoch 4/10:   2%| | 22/991 [05:13<3:47:22, 14.08s/batch, batch_loss=1.08e+3, bat

Epoch 4/10:   2%| | 22/991 [05:27<3:47:22, 14.08s/batch, batch_loss=7.91, batch_

Epoch 4/10:   2%| | 23/991 [05:27<3:46:40, 14.05s/batch, batch_loss=7.91, batch_

Epoch 4/10:   2%| | 23/991 [05:41<3:46:40, 14.05s/batch, batch_loss=12.4, batch_

Epoch 4/10:   2%| | 24/991 [05:41<3:45:11, 13.97s/batch, batch_loss=12.4, batch_

Epoch 4/10:   2%| | 24/991 [05:55<3:45:11, 13.97s/batch, batch_loss=13.2, batch_

Epoch 4/10:   3%| | 25/991 [05:55<3:44:48, 13.96s/batch, batch_loss=13.2, batch_

Epoch 4/10:   3%| | 25/991 [06:09<3:44:48, 13.96s/batch, batch_loss=15.9, batch_

Epoch 4/10:   3%| | 26/991 [06:09<3:43:46, 13.91s/batch, batch_loss=15.9, batch_

Epoch 4/10:   3%| | 26/991 [06:23<3:43:46, 13.91s/batch, batch_loss=15.7, batch_

Epoch 4/10:   3%| | 27/991 [06:23<3:44:07, 13.95s/batch, batch_loss=15.7, batch_

Epoch 4/10:   3%| | 27/991 [06:40<3:44:07, 13.95s/batch, batch_loss=1.14e+3, bat

Epoch 4/10:   3%| | 28/991 [06:40<3:57:28, 14.80s/batch, batch_loss=1.14e+3, bat

Epoch 4/10:   3%| | 28/991 [06:55<3:57:28, 14.80s/batch, batch_loss=11.3, batch_

Epoch 4/10:   3%| | 29/991 [06:55<3:57:55, 14.84s/batch, batch_loss=11.3, batch_

Epoch 4/10:   3%| | 29/991 [07:09<3:57:55, 14.84s/batch, batch_loss=10.6, batch_

Epoch 4/10:   3%| | 30/991 [07:09<3:55:07, 14.68s/batch, batch_loss=10.6, batch_

Epoch 4/10:   3%| | 30/991 [07:24<3:55:07, 14.68s/batch, batch_loss=9.52, batch_

Epoch 4/10:   3%| | 31/991 [07:24<3:57:50, 14.87s/batch, batch_loss=9.52, batch_

Epoch 4/10:   3%| | 31/991 [07:38<3:57:50, 14.87s/batch, batch_loss=1.3e+4, batc

Epoch 4/10:   3%| | 32/991 [07:38<3:51:34, 14.49s/batch, batch_loss=1.3e+4, batc

Epoch 4/10:   3%| | 32/991 [07:52<3:51:34, 14.49s/batch, batch_loss=13.4, batch_

Epoch 4/10:   3%| | 33/991 [07:52<3:47:56, 14.28s/batch, batch_loss=13.4, batch_

Epoch 4/10:   3%| | 33/991 [08:06<3:47:56, 14.28s/batch, batch_loss=8.1, batch_i

Epoch 4/10:   3%| | 34/991 [08:06<3:46:35, 14.21s/batch, batch_loss=8.1, batch_i

Epoch 4/10:   3%| | 34/991 [08:21<3:46:35, 14.21s/batch, batch_loss=12.5, batch_

Epoch 4/10:   4%| | 35/991 [08:21<3:51:51, 14.55s/batch, batch_loss=12.5, batch_

Epoch 4/10:   4%| | 35/991 [08:34<3:51:51, 14.55s/batch, batch_loss=10, batch_in

Epoch 4/10:   4%| | 36/991 [08:34<3:46:21, 14.22s/batch, batch_loss=10, batch_in

Epoch 4/10:   4%| | 36/991 [08:51<3:46:21, 14.22s/batch, batch_loss=10.5, batch_

Epoch 4/10:   4%| | 37/991 [08:51<3:55:52, 14.83s/batch, batch_loss=10.5, batch_

Epoch 4/10:   4%| | 37/991 [09:05<3:55:52, 14.83s/batch, batch_loss=6.76, batch_

Epoch 4/10:   4%| | 38/991 [09:05<3:51:45, 14.59s/batch, batch_loss=6.76, batch_

Epoch 4/10:   4%| | 38/991 [09:19<3:51:45, 14.59s/batch, batch_loss=1.72e+3, bat

Epoch 4/10:   4%| | 39/991 [09:19<3:48:27, 14.40s/batch, batch_loss=1.72e+3, bat

Epoch 4/10:   4%| | 39/991 [09:33<3:48:27, 14.40s/batch, batch_loss=14, batch_in

Epoch 4/10:   4%| | 40/991 [09:33<3:45:28, 14.23s/batch, batch_loss=14, batch_in

Epoch 4/10:   4%| | 40/991 [09:46<3:45:28, 14.23s/batch, batch_loss=6.2e+3, batc

Epoch 4/10:   4%| | 41/991 [09:46<3:43:11, 14.10s/batch, batch_loss=6.2e+3, batc

Epoch 4/10:   4%| | 41/991 [10:00<3:43:11, 14.10s/batch, batch_loss=15.4, batch_

Epoch 4/10:   4%| | 42/991 [10:00<3:40:20, 13.93s/batch, batch_loss=15.4, batch_

Epoch 4/10:   4%| | 42/991 [10:14<3:40:20, 13.93s/batch, batch_loss=9.38, batch_

Epoch 4/10:   4%| | 43/991 [10:14<3:39:40, 13.90s/batch, batch_loss=9.38, batch_

Epoch 4/10:   4%| | 43/991 [10:28<3:39:40, 13.90s/batch, batch_loss=14.4, batch_

Epoch 4/10:   4%| | 44/991 [10:28<3:40:00, 13.94s/batch, batch_loss=14.4, batch_

Epoch 4/10:   4%| | 44/991 [10:42<3:40:00, 13.94s/batch, batch_loss=15.1, batch_

Epoch 4/10:   5%| | 45/991 [10:42<3:39:53, 13.95s/batch, batch_loss=15.1, batch_

Epoch 4/10:   5%| | 45/991 [10:55<3:39:53, 13.95s/batch, batch_loss=12.3, batch_

Epoch 4/10:   5%| | 46/991 [10:55<3:38:13, 13.86s/batch, batch_loss=12.3, batch_

Epoch 4/10:   5%| | 46/991 [11:09<3:38:13, 13.86s/batch, batch_loss=6.95, batch_

Epoch 4/10:   5%| | 47/991 [11:09<3:36:42, 13.77s/batch, batch_loss=6.95, batch_

Epoch 4/10:   5%| | 47/991 [11:23<3:36:42, 13.77s/batch, batch_loss=12.6, batch_

Epoch 4/10:   5%| | 48/991 [11:23<3:36:09, 13.75s/batch, batch_loss=12.6, batch_

Epoch 4/10:   5%| | 48/991 [11:36<3:36:09, 13.75s/batch, batch_loss=12.9, batch_

Epoch 4/10:   5%| | 49/991 [11:36<3:35:25, 13.72s/batch, batch_loss=12.9, batch_

Epoch 4/10:   5%| | 49/991 [11:50<3:35:25, 13.72s/batch, batch_loss=13.8, batch_

Epoch 4/10:   5%| | 50/991 [11:50<3:35:10, 13.72s/batch, batch_loss=13.8, batch_

Epoch 4/10:   5%| | 50/991 [12:04<3:35:10, 13.72s/batch, batch_loss=8.24, batch_

Epoch 4/10:   5%| | 51/991 [12:04<3:33:57, 13.66s/batch, batch_loss=8.24, batch_

Epoch 4/10:   5%| | 51/991 [12:17<3:33:57, 13.66s/batch, batch_loss=13.7, batch_

Epoch 4/10:   5%| | 52/991 [12:17<3:33:44, 13.66s/batch, batch_loss=13.7, batch_

Epoch 4/10:   5%| | 52/991 [12:33<3:33:44, 13.66s/batch, batch_loss=13, batch_in

Epoch 4/10:   5%| | 53/991 [12:33<3:41:35, 14.17s/batch, batch_loss=13, batch_in

Epoch 4/10:   5%| | 53/991 [12:50<3:41:35, 14.17s/batch, batch_loss=8.89, batch_

Epoch 4/10:   5%| | 54/991 [12:50<3:58:59, 15.30s/batch, batch_loss=8.89, batch_

Epoch 4/10:   5%| | 54/991 [13:07<3:58:59, 15.30s/batch, batch_loss=9.5, batch_i

Epoch 4/10:   6%| | 55/991 [13:07<4:05:49, 15.76s/batch, batch_loss=9.5, batch_i

Epoch 4/10:   6%| | 55/991 [13:24<4:05:49, 15.76s/batch, batch_loss=12.1, batch_

Epoch 4/10:   6%| | 56/991 [13:24<4:09:18, 16.00s/batch, batch_loss=12.1, batch_

Epoch 4/10:   6%| | 56/991 [13:40<4:09:18, 16.00s/batch, batch_loss=6.86, batch_

Epoch 4/10:   6%| | 57/991 [13:40<4:11:48, 16.18s/batch, batch_loss=6.86, batch_

Epoch 4/10:   6%| | 57/991 [13:57<4:11:48, 16.18s/batch, batch_loss=14.3, batch_

Epoch 4/10:   6%| | 58/991 [13:57<4:12:13, 16.22s/batch, batch_loss=14.3, batch_

Epoch 4/10:   6%| | 58/991 [14:14<4:12:13, 16.22s/batch, batch_loss=9.96, batch_

Epoch 4/10:   6%| | 59/991 [14:14<4:16:41, 16.53s/batch, batch_loss=9.96, batch_

Epoch 4/10:   6%| | 59/991 [14:29<4:16:41, 16.53s/batch, batch_loss=16.4, batch_

Epoch 4/10:   6%| | 60/991 [14:29<4:10:13, 16.13s/batch, batch_loss=16.4, batch_

Epoch 4/10:   6%| | 60/991 [14:46<4:10:13, 16.13s/batch, batch_loss=11.4, batch_

Epoch 4/10:   6%| | 61/991 [14:46<4:11:11, 16.21s/batch, batch_loss=11.4, batch_

Epoch 4/10:   6%| | 61/991 [15:02<4:11:11, 16.21s/batch, batch_loss=11, batch_in

Epoch 4/10:   6%| | 62/991 [15:02<4:12:14, 16.29s/batch, batch_loss=11, batch_in

Epoch 4/10:   6%| | 62/991 [15:19<4:12:14, 16.29s/batch, batch_loss=420, batch_i

Epoch 4/10:   6%| | 63/991 [15:19<4:14:52, 16.48s/batch, batch_loss=420, batch_i

Epoch 4/10:   6%| | 63/991 [15:36<4:14:52, 16.48s/batch, batch_loss=798, batch_i

Epoch 4/10:   6%| | 64/991 [15:36<4:16:38, 16.61s/batch, batch_loss=798, batch_i

Epoch 4/10:   6%| | 64/991 [15:53<4:16:38, 16.61s/batch, batch_loss=2.56e+3, bat

Epoch 4/10:   7%| | 65/991 [15:53<4:17:13, 16.67s/batch, batch_loss=2.56e+3, bat

Epoch 4/10:   7%| | 65/991 [16:09<4:17:13, 16.67s/batch, batch_loss=4.31, batch_

Epoch 4/10:   7%| | 66/991 [16:09<4:14:37, 16.52s/batch, batch_loss=4.31, batch_

Epoch 4/10:   7%| | 66/991 [16:24<4:14:37, 16.52s/batch, batch_loss=10.5, batch_

Epoch 4/10:   7%| | 67/991 [16:24<4:06:58, 16.04s/batch, batch_loss=10.5, batch_

Epoch 4/10:   7%| | 67/991 [16:38<4:06:58, 16.04s/batch, batch_loss=7.83, batch_

Epoch 4/10:   7%| | 68/991 [16:38<3:58:25, 15.50s/batch, batch_loss=7.83, batch_

Epoch 4/10:   7%| | 68/991 [16:52<3:58:25, 15.50s/batch, batch_loss=18.8, batch_

Epoch 4/10:   7%| | 69/991 [16:52<3:50:48, 15.02s/batch, batch_loss=18.8, batch_

Epoch 4/10:   7%| | 69/991 [17:06<3:50:48, 15.02s/batch, batch_loss=8.57, batch_

Epoch 4/10:   7%| | 70/991 [17:06<3:47:33, 14.83s/batch, batch_loss=8.57, batch_

Epoch 4/10:   7%| | 70/991 [17:21<3:47:33, 14.83s/batch, batch_loss=12, batch_in

Epoch 4/10:   7%| | 71/991 [17:21<3:48:04, 14.87s/batch, batch_loss=12, batch_in

Epoch 4/10:   7%| | 71/991 [17:37<3:48:04, 14.87s/batch, batch_loss=13.1, batch_

Epoch 4/10:   7%| | 72/991 [17:37<3:53:02, 15.21s/batch, batch_loss=13.1, batch_

Epoch 4/10:   7%| | 72/991 [17:54<3:53:02, 15.21s/batch, batch_loss=24.2, batch_

Epoch 4/10:   7%| | 73/991 [17:54<3:59:23, 15.65s/batch, batch_loss=24.2, batch_

Epoch 4/10:   7%| | 73/991 [18:09<3:59:23, 15.65s/batch, batch_loss=1.74e+3, bat

Epoch 4/10:   7%| | 74/991 [18:09<3:57:20, 15.53s/batch, batch_loss=1.74e+3, bat

Epoch 4/10:   7%| | 74/991 [18:25<3:57:20, 15.53s/batch, batch_loss=15.3, batch_

Epoch 4/10:   8%| | 75/991 [18:25<3:59:14, 15.67s/batch, batch_loss=15.3, batch_

Epoch 4/10:   8%| | 75/991 [18:43<3:59:14, 15.67s/batch, batch_loss=11.3, batch_

Epoch 4/10:   8%| | 76/991 [18:43<4:10:26, 16.42s/batch, batch_loss=11.3, batch_

Epoch 4/10:   8%| | 76/991 [18:59<4:10:26, 16.42s/batch, batch_loss=10.8, batch_

Epoch 4/10:   8%| | 77/991 [18:59<4:05:30, 16.12s/batch, batch_loss=10.8, batch_

Epoch 4/10:   8%| | 77/991 [19:15<4:05:30, 16.12s/batch, batch_loss=13.7, batch_

Epoch 4/10:   8%| | 78/991 [19:15<4:05:12, 16.11s/batch, batch_loss=13.7, batch_

Epoch 4/10:   8%| | 78/991 [19:31<4:05:12, 16.11s/batch, batch_loss=8.81, batch_

Epoch 4/10:   8%| | 79/991 [19:31<4:03:03, 15.99s/batch, batch_loss=8.81, batch_

Epoch 4/10:   8%| | 79/991 [19:48<4:03:03, 15.99s/batch, batch_loss=8.25, batch_

Epoch 4/10:   8%| | 80/991 [19:48<4:08:52, 16.39s/batch, batch_loss=8.25, batch_

Epoch 4/10:   8%| | 80/991 [20:03<4:08:52, 16.39s/batch, batch_loss=13.7, batch_

Epoch 4/10:   8%| | 81/991 [20:03<4:03:02, 16.02s/batch, batch_loss=13.7, batch_

Epoch 4/10:   8%| | 81/991 [20:20<4:03:02, 16.02s/batch, batch_loss=12.8, batch_

Epoch 4/10:   8%| | 82/991 [20:20<4:05:59, 16.24s/batch, batch_loss=12.8, batch_

Epoch 4/10:   8%| | 82/991 [20:39<4:05:59, 16.24s/batch, batch_loss=7.63, batch_

Epoch 4/10:   8%| | 83/991 [20:39<4:20:33, 17.22s/batch, batch_loss=7.63, batch_

Epoch 4/10:   8%| | 83/991 [20:54<4:20:33, 17.22s/batch, batch_loss=9.92, batch_

Epoch 4/10:   8%| | 84/991 [20:54<4:06:20, 16.30s/batch, batch_loss=9.92, batch_

Epoch 4/10:   8%| | 84/991 [21:08<4:06:20, 16.30s/batch, batch_loss=9.27, batch_

Epoch 4/10:   9%| | 85/991 [21:08<3:55:40, 15.61s/batch, batch_loss=9.27, batch_

Epoch 4/10:   9%| | 85/991 [21:21<3:55:40, 15.61s/batch, batch_loss=11.9, batch_

Epoch 4/10:   9%| | 86/991 [21:21<3:46:45, 15.03s/batch, batch_loss=11.9, batch_

Epoch 4/10:   9%| | 86/991 [21:35<3:46:45, 15.03s/batch, batch_loss=10.6, batch_

Epoch 4/10:   9%| | 87/991 [21:35<3:42:57, 14.80s/batch, batch_loss=10.6, batch_

Epoch 4/10:   9%| | 87/991 [21:49<3:42:57, 14.80s/batch, batch_loss=8.95, batch_

Epoch 4/10:   9%| | 88/991 [21:49<3:38:49, 14.54s/batch, batch_loss=8.95, batch_

Epoch 4/10:   9%| | 88/991 [22:03<3:38:49, 14.54s/batch, batch_loss=6.01, batch_

Epoch 4/10:   9%| | 89/991 [22:03<3:34:22, 14.26s/batch, batch_loss=6.01, batch_

Epoch 4/10:   9%| | 89/991 [22:17<3:34:22, 14.26s/batch, batch_loss=247, batch_i

Epoch 4/10:   9%| | 90/991 [22:17<3:32:33, 14.15s/batch, batch_loss=247, batch_i

Epoch 4/10:   9%| | 90/991 [22:31<3:32:33, 14.15s/batch, batch_loss=1.54e+3, bat

Epoch 4/10:   9%| | 91/991 [22:31<3:32:17, 14.15s/batch, batch_loss=1.54e+3, bat

Epoch 4/10:   9%| | 91/991 [22:48<3:32:17, 14.15s/batch, batch_loss=15.6, batch_

Epoch 4/10:   9%| | 92/991 [22:48<3:43:44, 14.93s/batch, batch_loss=15.6, batch_

Epoch 4/10:   9%| | 92/991 [23:02<3:43:44, 14.93s/batch, batch_loss=21.5, batch_

Epoch 4/10:   9%| | 93/991 [23:02<3:38:26, 14.60s/batch, batch_loss=21.5, batch_

Epoch 4/10:   9%| | 93/991 [23:15<3:38:26, 14.60s/batch, batch_loss=19.9, batch_

Epoch 4/10:   9%| | 94/991 [23:15<3:32:41, 14.23s/batch, batch_loss=19.9, batch_

Epoch 4/10:   9%| | 94/991 [23:29<3:32:41, 14.23s/batch, batch_loss=18.9, batch_

Epoch 4/10:  10%| | 95/991 [23:29<3:30:07, 14.07s/batch, batch_loss=18.9, batch_

Epoch 4/10:  10%| | 95/991 [23:42<3:30:07, 14.07s/batch, batch_loss=18.6, batch_

Epoch 4/10:  10%| | 96/991 [23:42<3:27:09, 13.89s/batch, batch_loss=18.6, batch_

Epoch 4/10:  10%| | 96/991 [23:55<3:27:09, 13.89s/batch, batch_loss=15.7, batch_

Epoch 4/10:  10%| | 97/991 [23:55<3:23:56, 13.69s/batch, batch_loss=15.7, batch_

Epoch 4/10:  10%| | 97/991 [24:09<3:23:56, 13.69s/batch, batch_loss=18.2, batch_

Epoch 4/10:  10%| | 98/991 [24:09<3:21:49, 13.56s/batch, batch_loss=18.2, batch_

Epoch 4/10:  10%| | 98/991 [24:22<3:21:49, 13.56s/batch, batch_loss=16.1, batch_

Epoch 4/10:  10%| | 99/991 [24:22<3:20:49, 13.51s/batch, batch_loss=16.1, batch_

Epoch 4/10:  10%| | 99/991 [24:36<3:20:49, 13.51s/batch, batch_loss=17.9, batch_

Epoch 4/10:  10%| | 100/991 [24:36<3:22:25, 13.63s/batch, batch_loss=17.9, batch

Epoch 4/10:  10%| | 100/991 [24:50<3:22:25, 13.63s/batch, batch_loss=13.3, batch

Epoch 4/10:  10%| | 101/991 [24:50<3:25:36, 13.86s/batch, batch_loss=13.3, batch

Epoch 4/10:  10%| | 101/991 [25:07<3:25:36, 13.86s/batch, batch_loss=21.1, batch

Epoch 4/10:  10%| | 102/991 [25:07<3:39:38, 14.82s/batch, batch_loss=21.1, batch

Epoch 4/10:  10%| | 102/991 [25:22<3:39:38, 14.82s/batch, batch_loss=915, batch_

Epoch 4/10:  10%| | 103/991 [25:22<3:37:17, 14.68s/batch, batch_loss=915, batch_

Epoch 4/10:  10%| | 103/991 [25:36<3:37:17, 14.68s/batch, batch_loss=13.6, batch

Epoch 4/10:  10%| | 104/991 [25:36<3:36:51, 14.67s/batch, batch_loss=13.6, batch

Epoch 4/10:  10%| | 104/991 [25:50<3:36:51, 14.67s/batch, batch_loss=10, batch_i

Epoch 4/10:  11%| | 105/991 [25:50<3:32:10, 14.37s/batch, batch_loss=10, batch_i

Epoch 4/10:  11%| | 105/991 [26:04<3:32:10, 14.37s/batch, batch_loss=10.5, batch

Epoch 4/10:  11%| | 106/991 [26:04<3:28:27, 14.13s/batch, batch_loss=10.5, batch

Epoch 4/10:  11%| | 106/991 [26:17<3:28:27, 14.13s/batch, batch_loss=16.9, batch

Epoch 4/10:  11%| | 107/991 [26:17<3:26:29, 14.02s/batch, batch_loss=16.9, batch

Epoch 4/10:  11%| | 107/991 [26:31<3:26:29, 14.02s/batch, batch_loss=25, batch_i

Epoch 4/10:  11%| | 108/991 [26:31<3:26:05, 14.00s/batch, batch_loss=25, batch_i

Epoch 4/10:  11%| | 108/991 [26:48<3:26:05, 14.00s/batch, batch_loss=15, batch_i

Epoch 4/10:  11%| | 109/991 [26:48<3:38:38, 14.87s/batch, batch_loss=15, batch_i

Epoch 4/10:  11%| | 109/991 [27:02<3:38:38, 14.87s/batch, batch_loss=14.3, batch

Epoch 4/10:  11%| | 110/991 [27:02<3:34:14, 14.59s/batch, batch_loss=14.3, batch

Epoch 4/10:  11%| | 110/991 [27:16<3:34:14, 14.59s/batch, batch_loss=15.9, batch

Epoch 4/10:  11%| | 111/991 [27:16<3:31:37, 14.43s/batch, batch_loss=15.9, batch

Epoch 4/10:  11%| | 111/991 [27:30<3:31:37, 14.43s/batch, batch_loss=17.3, batch

Epoch 4/10:  11%| | 112/991 [27:30<3:30:25, 14.36s/batch, batch_loss=17.3, batch

Epoch 4/10:  11%| | 112/991 [27:45<3:30:25, 14.36s/batch, batch_loss=10.4, batch

Epoch 4/10:  11%| | 113/991 [27:45<3:30:11, 14.36s/batch, batch_loss=10.4, batch

Epoch 4/10:  11%| | 113/991 [27:59<3:30:11, 14.36s/batch, batch_loss=15.2, batch

Epoch 4/10:  12%| | 114/991 [27:59<3:28:48, 14.29s/batch, batch_loss=15.2, batch

Epoch 4/10:  12%| | 114/991 [28:13<3:28:48, 14.29s/batch, batch_loss=19.1, batch

Epoch 4/10:  12%| | 115/991 [28:13<3:27:08, 14.19s/batch, batch_loss=19.1, batch

Epoch 4/10:  12%| | 115/991 [28:27<3:27:08, 14.19s/batch, batch_loss=11.1, batch

Epoch 4/10:  12%| | 116/991 [28:27<3:25:19, 14.08s/batch, batch_loss=11.1, batch

Epoch 4/10:  12%| | 116/991 [28:40<3:25:19, 14.08s/batch, batch_loss=16.4, batch

Epoch 4/10:  12%| | 117/991 [28:40<3:22:33, 13.91s/batch, batch_loss=16.4, batch

Epoch 4/10:  12%| | 117/991 [28:54<3:22:33, 13.91s/batch, batch_loss=16.9, batch

Epoch 4/10:  12%| | 118/991 [28:54<3:20:13, 13.76s/batch, batch_loss=16.9, batch

Epoch 4/10:  12%| | 118/991 [29:07<3:20:13, 13.76s/batch, batch_loss=27, batch_i

Epoch 4/10:  12%| | 119/991 [29:07<3:18:11, 13.64s/batch, batch_loss=27, batch_i

Epoch 4/10:  12%| | 119/991 [29:23<3:18:11, 13.64s/batch, batch_loss=17.7, batch

Epoch 4/10:  12%| | 120/991 [29:23<3:27:28, 14.29s/batch, batch_loss=17.7, batch

Epoch 4/10:  12%| | 120/991 [29:36<3:27:28, 14.29s/batch, batch_loss=27.4, batch

Epoch 4/10:  12%| | 121/991 [29:36<3:22:38, 13.98s/batch, batch_loss=27.4, batch

Epoch 4/10:  12%| | 121/991 [29:49<3:22:38, 13.98s/batch, batch_loss=8.93, batch

Epoch 4/10:  12%| | 122/991 [29:49<3:18:58, 13.74s/batch, batch_loss=8.93, batch

Epoch 4/10:  12%| | 122/991 [30:02<3:18:58, 13.74s/batch, batch_loss=16.6, batch

Epoch 4/10:  12%| | 123/991 [30:02<3:16:34, 13.59s/batch, batch_loss=16.6, batch

Epoch 4/10:  12%| | 123/991 [30:16<3:16:34, 13.59s/batch, batch_loss=3.48e+3, ba

Epoch 4/10:  13%|▏| 124/991 [30:16<3:14:50, 13.48s/batch, batch_loss=3.48e+3, ba

Epoch 4/10:  13%|▏| 124/991 [30:29<3:14:50, 13.48s/batch, batch_loss=6.94, batch

Epoch 4/10:  13%|▏| 125/991 [30:29<3:13:26, 13.40s/batch, batch_loss=6.94, batch

Epoch 4/10:  13%|▏| 125/991 [30:43<3:13:26, 13.40s/batch, batch_loss=14.6, batch

Epoch 4/10:  13%|▏| 126/991 [30:43<3:14:48, 13.51s/batch, batch_loss=14.6, batch

Epoch 4/10:  13%|▏| 126/991 [30:57<3:14:48, 13.51s/batch, batch_loss=1.89e+3, ba

Epoch 4/10:  13%|▏| 127/991 [30:57<3:17:26, 13.71s/batch, batch_loss=1.89e+3, ba

Epoch 4/10:  13%|▏| 127/991 [31:10<3:17:26, 13.71s/batch, batch_loss=1.57e+3, ba

Epoch 4/10:  13%|▏| 128/991 [31:10<3:16:41, 13.67s/batch, batch_loss=1.57e+3, ba

Epoch 4/10:  13%|▏| 128/991 [31:24<3:16:41, 13.67s/batch, batch_loss=230, batch_

Epoch 4/10:  13%|▏| 129/991 [31:24<3:17:14, 13.73s/batch, batch_loss=230, batch_

Epoch 4/10:  13%|▏| 129/991 [31:40<3:17:14, 13.73s/batch, batch_loss=1.02e+3, ba

Epoch 4/10:  13%|▏| 130/991 [31:40<3:24:18, 14.24s/batch, batch_loss=1.02e+3, ba

Epoch 4/10:  13%|▏| 130/991 [31:55<3:24:18, 14.24s/batch, batch_loss=8.33e+3, ba

Epoch 4/10:  13%|▏| 131/991 [31:55<3:27:01, 14.44s/batch, batch_loss=8.33e+3, ba

Epoch 4/10:  13%|▏| 131/991 [32:09<3:27:01, 14.44s/batch, batch_loss=22.6, batch

Epoch 4/10:  13%|▏| 132/991 [32:09<3:26:26, 14.42s/batch, batch_loss=22.6, batch

Epoch 4/10:  13%|▏| 132/991 [32:24<3:26:26, 14.42s/batch, batch_loss=7.99, batch

Epoch 4/10:  13%|▏| 133/991 [32:24<3:27:25, 14.51s/batch, batch_loss=7.99, batch

Epoch 4/10:  13%|▏| 133/991 [32:41<3:27:25, 14.51s/batch, batch_loss=14.4, batch

Epoch 4/10:  14%|▏| 134/991 [32:41<3:39:13, 15.35s/batch, batch_loss=14.4, batch

Epoch 4/10:  14%|▏| 134/991 [32:55<3:39:13, 15.35s/batch, batch_loss=40.3, batch

Epoch 4/10:  14%|▏| 135/991 [32:55<3:34:08, 15.01s/batch, batch_loss=40.3, batch

Epoch 4/10:  14%|▏| 135/991 [33:11<3:34:08, 15.01s/batch, batch_loss=16.6, batch

Epoch 4/10:  14%|▏| 136/991 [33:11<3:36:39, 15.20s/batch, batch_loss=16.6, batch

Epoch 4/10:  14%|▏| 136/991 [33:27<3:36:39, 15.20s/batch, batch_loss=13.1, batch

Epoch 4/10:  14%|▏| 137/991 [33:27<3:38:09, 15.33s/batch, batch_loss=13.1, batch

Epoch 4/10:  14%|▏| 137/991 [33:42<3:38:09, 15.33s/batch, batch_loss=16.7, batch

Epoch 4/10:  14%|▏| 138/991 [33:42<3:37:03, 15.27s/batch, batch_loss=16.7, batch

Epoch 4/10:  14%|▏| 138/991 [33:56<3:37:03, 15.27s/batch, batch_loss=5.86, batch

Epoch 4/10:  14%|▏| 139/991 [33:56<3:31:06, 14.87s/batch, batch_loss=5.86, batch

Epoch 4/10:  14%|▏| 139/991 [34:09<3:31:06, 14.87s/batch, batch_loss=12.2, batch

Epoch 4/10:  14%|▏| 140/991 [34:09<3:25:33, 14.49s/batch, batch_loss=12.2, batch

Epoch 4/10:  14%|▏| 140/991 [34:23<3:25:33, 14.49s/batch, batch_loss=4.94, batch

Epoch 4/10:  14%|▏| 141/991 [34:23<3:21:16, 14.21s/batch, batch_loss=4.94, batch

Epoch 4/10:  14%|▏| 141/991 [34:36<3:21:16, 14.21s/batch, batch_loss=6.79, batch

Epoch 4/10:  14%|▏| 142/991 [34:36<3:18:13, 14.01s/batch, batch_loss=6.79, batch

Epoch 4/10:  14%|▏| 142/991 [34:50<3:18:13, 14.01s/batch, batch_loss=14.4, batch

Epoch 4/10:  14%|▏| 143/991 [34:50<3:16:13, 13.88s/batch, batch_loss=14.4, batch

Epoch 4/10:  14%|▏| 143/991 [35:03<3:16:13, 13.88s/batch, batch_loss=22.5, batch

Epoch 4/10:  15%|▏| 144/991 [35:03<3:12:58, 13.67s/batch, batch_loss=22.5, batch

Epoch 4/10:  15%|▏| 144/991 [35:16<3:12:58, 13.67s/batch, batch_loss=24.3, batch

Epoch 4/10:  15%|▏| 145/991 [35:16<3:10:14, 13.49s/batch, batch_loss=24.3, batch

Epoch 4/10:  15%|▏| 145/991 [35:30<3:10:14, 13.49s/batch, batch_loss=20.6, batch

Epoch 4/10:  15%|▏| 146/991 [35:30<3:12:01, 13.63s/batch, batch_loss=20.6, batch

Epoch 4/10:  15%|▏| 146/991 [35:45<3:12:01, 13.63s/batch, batch_loss=10.1, batch

Epoch 4/10:  15%|▏| 147/991 [35:45<3:18:57, 14.14s/batch, batch_loss=10.1, batch

Epoch 4/10:  15%|▏| 147/991 [36:00<3:18:57, 14.14s/batch, batch_loss=21.1, batch

Epoch 4/10:  15%|▏| 148/991 [36:00<3:19:16, 14.18s/batch, batch_loss=21.1, batch

Epoch 4/10:  15%|▏| 148/991 [36:13<3:19:16, 14.18s/batch, batch_loss=15.8, batch

Epoch 4/10:  15%|▏| 149/991 [36:13<3:14:55, 13.89s/batch, batch_loss=15.8, batch

Epoch 4/10:  15%|▏| 149/991 [36:26<3:14:55, 13.89s/batch, batch_loss=18.3, batch

Epoch 4/10:  15%|▏| 150/991 [36:26<3:12:35, 13.74s/batch, batch_loss=18.3, batch

Epoch 4/10:  15%|▏| 150/991 [36:40<3:12:35, 13.74s/batch, batch_loss=30.7, batch

Epoch 4/10:  15%|▏| 151/991 [36:40<3:12:24, 13.74s/batch, batch_loss=30.7, batch

Epoch 4/10:  15%|▏| 151/991 [36:55<3:12:24, 13.74s/batch, batch_loss=22, batch_i

Epoch 4/10:  15%|▏| 152/991 [36:55<3:16:19, 14.04s/batch, batch_loss=22, batch_i

Epoch 4/10:  15%|▏| 152/991 [37:10<3:16:19, 14.04s/batch, batch_loss=32.7, batch

Epoch 4/10:  15%|▏| 153/991 [37:10<3:19:43, 14.30s/batch, batch_loss=32.7, batch

Epoch 4/10:  15%|▏| 153/991 [37:26<3:19:43, 14.30s/batch, batch_loss=31.1, batch

Epoch 4/10:  16%|▏| 154/991 [37:26<3:27:20, 14.86s/batch, batch_loss=31.1, batch

Epoch 4/10:  16%|▏| 154/991 [37:42<3:27:20, 14.86s/batch, batch_loss=34.7, batch

Epoch 4/10:  16%|▏| 155/991 [37:42<3:31:42, 15.19s/batch, batch_loss=34.7, batch

Epoch 4/10:  16%|▏| 155/991 [37:57<3:31:42, 15.19s/batch, batch_loss=9.57, batch

Epoch 4/10:  16%|▏| 156/991 [37:57<3:32:22, 15.26s/batch, batch_loss=9.57, batch

Epoch 4/10:  16%|▏| 156/991 [38:13<3:32:22, 15.26s/batch, batch_loss=34.1, batch

Epoch 4/10:  16%|▏| 157/991 [38:13<3:32:06, 15.26s/batch, batch_loss=34.1, batch

Epoch 4/10:  16%|▏| 157/991 [38:28<3:32:06, 15.26s/batch, batch_loss=7.55, batch

Epoch 4/10:  16%|▏| 158/991 [38:28<3:34:11, 15.43s/batch, batch_loss=7.55, batch

Epoch 4/10:  16%|▏| 158/991 [38:45<3:34:11, 15.43s/batch, batch_loss=6.14, batch

Epoch 4/10:  16%|▏| 159/991 [38:45<3:37:49, 15.71s/batch, batch_loss=6.14, batch

Epoch 4/10:  16%|▏| 159/991 [39:01<3:37:49, 15.71s/batch, batch_loss=15.1, batch

Epoch 4/10:  16%|▏| 160/991 [39:01<3:38:03, 15.74s/batch, batch_loss=15.1, batch

Epoch 4/10:  16%|▏| 160/991 [39:16<3:38:03, 15.74s/batch, batch_loss=467, batch_

Epoch 4/10:  16%|▏| 161/991 [39:16<3:36:22, 15.64s/batch, batch_loss=467, batch_

Epoch 4/10:  16%|▏| 161/991 [39:32<3:36:22, 15.64s/batch, batch_loss=23.6, batch

Epoch 4/10:  16%|▏| 162/991 [39:32<3:37:19, 15.73s/batch, batch_loss=23.6, batch

Epoch 4/10:  16%|▏| 162/991 [39:48<3:37:19, 15.73s/batch, batch_loss=9.32, batch

Epoch 4/10:  16%|▏| 163/991 [39:48<3:38:33, 15.84s/batch, batch_loss=9.32, batch

Epoch 4/10:  16%|▏| 163/991 [40:05<3:38:33, 15.84s/batch, batch_loss=16.8, batch

Epoch 4/10:  17%|▏| 164/991 [40:05<3:41:51, 16.10s/batch, batch_loss=16.8, batch

Epoch 4/10:  17%|▏| 164/991 [40:21<3:41:51, 16.10s/batch, batch_loss=13.4, batch

Epoch 4/10:  17%|▏| 165/991 [40:21<3:42:40, 16.18s/batch, batch_loss=13.4, batch

Epoch 4/10:  17%|▏| 165/991 [40:36<3:42:40, 16.18s/batch, batch_loss=13.9, batch

Epoch 4/10:  17%|▏| 166/991 [40:36<3:38:40, 15.90s/batch, batch_loss=13.9, batch

Epoch 4/10:  17%|▏| 166/991 [40:54<3:38:40, 15.90s/batch, batch_loss=25.1, batch

Epoch 4/10:  17%|▏| 167/991 [40:54<3:45:59, 16.46s/batch, batch_loss=25.1, batch

Epoch 4/10:  17%|▏| 167/991 [41:08<3:45:59, 16.46s/batch, batch_loss=24.8, batch

Epoch 4/10:  17%|▏| 168/991 [41:08<3:36:29, 15.78s/batch, batch_loss=24.8, batch

Epoch 4/10:  17%|▏| 168/991 [41:22<3:36:29, 15.78s/batch, batch_loss=17, batch_i

Epoch 4/10:  17%|▏| 169/991 [41:22<3:29:46, 15.31s/batch, batch_loss=17, batch_i

Epoch 4/10:  17%|▏| 169/991 [41:37<3:29:46, 15.31s/batch, batch_loss=10.1, batch

Epoch 4/10:  17%|▏| 170/991 [41:37<3:26:56, 15.12s/batch, batch_loss=10.1, batch

Epoch 4/10:  17%|▏| 170/991 [41:52<3:26:56, 15.12s/batch, batch_loss=7.71, batch

Epoch 4/10:  17%|▏| 171/991 [41:52<3:26:24, 15.10s/batch, batch_loss=7.71, batch

Epoch 4/10:  17%|▏| 171/991 [42:07<3:26:24, 15.10s/batch, batch_loss=13, batch_i

Epoch 4/10:  17%|▏| 172/991 [42:07<3:23:53, 14.94s/batch, batch_loss=13, batch_i

Epoch 4/10:  17%|▏| 172/991 [42:21<3:23:53, 14.94s/batch, batch_loss=12.6, batch

Epoch 4/10:  17%|▏| 173/991 [42:21<3:19:54, 14.66s/batch, batch_loss=12.6, batch

Epoch 4/10:  17%|▏| 173/991 [42:36<3:19:54, 14.66s/batch, batch_loss=3.02e+4, ba

Epoch 4/10:  18%|▏| 174/991 [42:36<3:22:04, 14.84s/batch, batch_loss=3.02e+4, ba

Epoch 4/10:  18%|▏| 174/991 [42:50<3:22:04, 14.84s/batch, batch_loss=32.4, batch

Epoch 4/10:  18%|▏| 175/991 [42:50<3:19:46, 14.69s/batch, batch_loss=32.4, batch

Epoch 4/10:  18%|▏| 175/991 [43:08<3:19:46, 14.69s/batch, batch_loss=32.1, batch

Epoch 4/10:  18%|▏| 176/991 [43:08<3:30:00, 15.46s/batch, batch_loss=32.1, batch

Epoch 4/10:  18%|▏| 176/991 [43:23<3:30:00, 15.46s/batch, batch_loss=28.5, batch

Epoch 4/10:  18%|▏| 177/991 [43:23<3:30:52, 15.54s/batch, batch_loss=28.5, batch

Epoch 4/10:  18%|▏| 177/991 [43:39<3:30:52, 15.54s/batch, batch_loss=34.3, batch

Epoch 4/10:  18%|▏| 178/991 [43:39<3:32:50, 15.71s/batch, batch_loss=34.3, batch

Epoch 4/10:  18%|▏| 178/991 [43:53<3:32:50, 15.71s/batch, batch_loss=16.7, batch

Epoch 4/10:  18%|▏| 179/991 [43:53<3:24:21, 15.10s/batch, batch_loss=16.7, batch

Epoch 4/10:  18%|▏| 179/991 [44:08<3:24:21, 15.10s/batch, batch_loss=8.95, batch

Epoch 4/10:  18%|▏| 180/991 [44:08<3:21:07, 14.88s/batch, batch_loss=8.95, batch

Epoch 4/10:  18%|▏| 180/991 [44:22<3:21:07, 14.88s/batch, batch_loss=2.51e+4, ba

Epoch 4/10:  18%|▏| 181/991 [44:22<3:20:42, 14.87s/batch, batch_loss=2.51e+4, ba

Epoch 4/10:  18%|▏| 181/991 [44:38<3:20:42, 14.87s/batch, batch_loss=14, batch_i

Epoch 4/10:  18%|▏| 182/991 [44:38<3:23:44, 15.11s/batch, batch_loss=14, batch_i

Epoch 4/10:  18%|▏| 182/991 [44:56<3:23:44, 15.11s/batch, batch_loss=23.8, batch

Epoch 4/10:  18%|▏| 183/991 [44:56<3:34:02, 15.89s/batch, batch_loss=23.8, batch

Epoch 4/10:  18%|▏| 183/991 [45:12<3:34:02, 15.89s/batch, batch_loss=22.3, batch

Epoch 4/10:  19%|▏| 184/991 [45:12<3:34:44, 15.97s/batch, batch_loss=22.3, batch

Epoch 4/10:  19%|▏| 184/991 [45:28<3:34:44, 15.97s/batch, batch_loss=13.8, batch

Epoch 4/10:  19%|▏| 185/991 [45:28<3:34:05, 15.94s/batch, batch_loss=13.8, batch

Epoch 4/10:  19%|▏| 185/991 [45:45<3:34:05, 15.94s/batch, batch_loss=23, batch_i

Epoch 4/10:  19%|▏| 186/991 [45:45<3:37:30, 16.21s/batch, batch_loss=23, batch_i

Epoch 4/10:  19%|▏| 186/991 [46:01<3:37:30, 16.21s/batch, batch_loss=17.8, batch

Epoch 4/10:  19%|▏| 187/991 [46:01<3:37:44, 16.25s/batch, batch_loss=17.8, batch

Epoch 4/10:  19%|▏| 187/991 [46:17<3:37:44, 16.25s/batch, batch_loss=15.9, batch

Epoch 4/10:  19%|▏| 188/991 [46:17<3:35:27, 16.10s/batch, batch_loss=15.9, batch

Epoch 4/10:  19%|▏| 188/991 [46:33<3:35:27, 16.10s/batch, batch_loss=21.6, batch

Epoch 4/10:  19%|▏| 189/991 [46:33<3:34:55, 16.08s/batch, batch_loss=21.6, batch

Epoch 4/10:  19%|▏| 189/991 [46:47<3:34:55, 16.08s/batch, batch_loss=20.5, batch

Epoch 4/10:  19%|▏| 190/991 [46:47<3:28:08, 15.59s/batch, batch_loss=20.5, batch

Epoch 4/10:  19%|▏| 190/991 [47:03<3:28:08, 15.59s/batch, batch_loss=21.4, batch

Epoch 4/10:  19%|▏| 191/991 [47:03<3:28:31, 15.64s/batch, batch_loss=21.4, batch

Epoch 4/10:  19%|▏| 191/991 [47:18<3:28:31, 15.64s/batch, batch_loss=17.8, batch

Epoch 4/10:  19%|▏| 192/991 [47:18<3:26:01, 15.47s/batch, batch_loss=17.8, batch

Epoch 4/10:  19%|▏| 192/991 [47:35<3:26:01, 15.47s/batch, batch_loss=22.7, batch

Epoch 4/10:  19%|▏| 193/991 [47:35<3:30:05, 15.80s/batch, batch_loss=22.7, batch

Epoch 4/10:  19%|▏| 193/991 [47:50<3:30:05, 15.80s/batch, batch_loss=9.21, batch

Epoch 4/10:  20%|▏| 194/991 [47:50<3:29:26, 15.77s/batch, batch_loss=9.21, batch

Epoch 4/10:  20%|▏| 194/991 [48:04<3:29:26, 15.77s/batch, batch_loss=4.68, batch

Epoch 4/10:  20%|▏| 195/991 [48:04<3:21:44, 15.21s/batch, batch_loss=4.68, batch

Epoch 4/10:  20%|▏| 195/991 [48:20<3:21:44, 15.21s/batch, batch_loss=8.71, batch

Epoch 4/10:  20%|▏| 196/991 [48:20<3:22:40, 15.30s/batch, batch_loss=8.71, batch

Epoch 4/10:  20%|▏| 196/991 [48:35<3:22:40, 15.30s/batch, batch_loss=15.2, batch

Epoch 4/10:  20%|▏| 197/991 [48:35<3:21:09, 15.20s/batch, batch_loss=15.2, batch

Epoch 4/10:  20%|▏| 197/991 [48:53<3:21:09, 15.20s/batch, batch_loss=9.97, batch

Epoch 4/10:  20%|▏| 198/991 [48:53<3:31:43, 16.02s/batch, batch_loss=9.97, batch

Epoch 4/10:  20%|▏| 198/991 [49:08<3:31:43, 16.02s/batch, batch_loss=15.4, batch

Epoch 4/10:  20%|▏| 199/991 [49:08<3:27:59, 15.76s/batch, batch_loss=15.4, batch

Epoch 4/10:  20%|▏| 199/991 [49:22<3:27:59, 15.76s/batch, batch_loss=8.98, batch

Epoch 4/10:  20%|▏| 200/991 [49:22<3:21:57, 15.32s/batch, batch_loss=8.98, batch

Epoch 4/10:  20%|▏| 200/991 [49:36<3:21:57, 15.32s/batch, batch_loss=15.1, batch

Epoch 4/10:  20%|▏| 201/991 [49:36<3:16:07, 14.90s/batch, batch_loss=15.1, batch

Epoch 4/10:  20%|▏| 201/991 [49:50<3:16:07, 14.90s/batch, batch_loss=13.5, batch

Epoch 4/10:  20%|▏| 202/991 [49:50<3:11:01, 14.53s/batch, batch_loss=13.5, batch

Epoch 4/10:  20%|▏| 202/991 [50:04<3:11:01, 14.53s/batch, batch_loss=17.1, batch

Epoch 4/10:  20%|▏| 203/991 [50:04<3:09:22, 14.42s/batch, batch_loss=17.1, batch

Epoch 4/10:  20%|▏| 203/991 [50:18<3:09:22, 14.42s/batch, batch_loss=38.7, batch

Epoch 4/10:  21%|▏| 204/991 [50:18<3:07:53, 14.33s/batch, batch_loss=38.7, batch

Epoch 4/10:  21%|▏| 204/991 [50:33<3:07:53, 14.33s/batch, batch_loss=33.2, batch

Epoch 4/10:  21%|▏| 205/991 [50:33<3:11:24, 14.61s/batch, batch_loss=33.2, batch

Epoch 4/10:  21%|▏| 205/991 [50:48<3:11:24, 14.61s/batch, batch_loss=9.4, batch_

Epoch 4/10:  21%|▏| 206/991 [50:48<3:10:36, 14.57s/batch, batch_loss=9.4, batch_

Epoch 4/10:  21%|▏| 206/991 [51:02<3:10:36, 14.57s/batch, batch_loss=10.9, batch

Epoch 4/10:  21%|▏| 207/991 [51:02<3:11:34, 14.66s/batch, batch_loss=10.9, batch

Epoch 4/10:  21%|▏| 207/991 [51:18<3:11:34, 14.66s/batch, batch_loss=12.9, batch

Epoch 4/10:  21%|▏| 208/991 [51:18<3:14:26, 14.90s/batch, batch_loss=12.9, batch

Epoch 4/10:  21%|▏| 208/991 [51:33<3:14:26, 14.90s/batch, batch_loss=9.81, batch

Epoch 4/10:  21%|▏| 209/991 [51:33<3:16:26, 15.07s/batch, batch_loss=9.81, batch

Epoch 4/10:  21%|▏| 209/991 [51:49<3:16:26, 15.07s/batch, batch_loss=25.7, batch

Epoch 4/10:  21%|▏| 210/991 [51:49<3:17:49, 15.20s/batch, batch_loss=25.7, batch

Epoch 4/10:  21%|▏| 210/991 [52:04<3:17:49, 15.20s/batch, batch_loss=13.5, batch

Epoch 4/10:  21%|▏| 211/991 [52:04<3:18:36, 15.28s/batch, batch_loss=13.5, batch

Epoch 4/10:  21%|▏| 211/991 [52:20<3:18:36, 15.28s/batch, batch_loss=26.3, batch

Epoch 4/10:  21%|▏| 212/991 [52:20<3:20:11, 15.42s/batch, batch_loss=26.3, batch

Epoch 4/10:  21%|▏| 212/991 [52:36<3:20:11, 15.42s/batch, batch_loss=4.05, batch

Epoch 4/10:  21%|▏| 213/991 [52:36<3:20:37, 15.47s/batch, batch_loss=4.05, batch

Epoch 4/10:  21%|▏| 213/991 [52:52<3:20:37, 15.47s/batch, batch_loss=15.1, batch

Epoch 4/10:  22%|▏| 214/991 [52:52<3:24:44, 15.81s/batch, batch_loss=15.1, batch

Epoch 4/10:  22%|▏| 214/991 [53:11<3:24:44, 15.81s/batch, batch_loss=22, batch_i

Epoch 4/10:  22%|▏| 215/991 [53:11<3:35:05, 16.63s/batch, batch_loss=22, batch_i

Epoch 4/10:  22%|▏| 215/991 [53:26<3:35:05, 16.63s/batch, batch_loss=11.1, batch

Epoch 4/10:  22%|▏| 216/991 [53:26<3:28:16, 16.12s/batch, batch_loss=11.1, batch

Epoch 4/10:  22%|▏| 216/991 [53:42<3:28:16, 16.12s/batch, batch_loss=20, batch_i

Epoch 4/10:  22%|▏| 217/991 [53:42<3:27:28, 16.08s/batch, batch_loss=20, batch_i

Epoch 4/10:  22%|▏| 217/991 [53:57<3:27:28, 16.08s/batch, batch_loss=20.3, batch

Epoch 4/10:  22%|▏| 218/991 [53:57<3:24:14, 15.85s/batch, batch_loss=20.3, batch

Epoch 4/10:  22%|▏| 218/991 [54:13<3:24:14, 15.85s/batch, batch_loss=24.5, batch

Epoch 4/10:  22%|▏| 219/991 [54:13<3:23:34, 15.82s/batch, batch_loss=24.5, batch

Epoch 4/10:  22%|▏| 219/991 [54:29<3:23:34, 15.82s/batch, batch_loss=40.3, batch

Epoch 4/10:  22%|▏| 220/991 [54:29<3:24:10, 15.89s/batch, batch_loss=40.3, batch

Epoch 4/10:  22%|▏| 220/991 [54:44<3:24:10, 15.89s/batch, batch_loss=24.5, batch

Epoch 4/10:  22%|▏| 221/991 [54:44<3:21:54, 15.73s/batch, batch_loss=24.5, batch

Epoch 4/10:  22%|▏| 221/991 [55:00<3:21:54, 15.73s/batch, batch_loss=20.2, batch

Epoch 4/10:  22%|▏| 222/991 [55:00<3:21:13, 15.70s/batch, batch_loss=20.2, batch

Epoch 4/10:  22%|▏| 222/991 [55:15<3:21:13, 15.70s/batch, batch_loss=37.8, batch

Epoch 4/10:  23%|▏| 223/991 [55:15<3:19:14, 15.57s/batch, batch_loss=37.8, batch

Epoch 4/10:  23%|▏| 223/991 [55:33<3:19:14, 15.57s/batch, batch_loss=17.9, batch

Epoch 4/10:  23%|▏| 224/991 [55:33<3:26:05, 16.12s/batch, batch_loss=17.9, batch

Epoch 4/10:  23%|▏| 224/991 [55:47<3:26:05, 16.12s/batch, batch_loss=10.3, batch

Epoch 4/10:  23%|▏| 225/991 [55:47<3:20:49, 15.73s/batch, batch_loss=10.3, batch

Epoch 4/10:  23%|▏| 225/991 [56:04<3:20:49, 15.73s/batch, batch_loss=32.1, batch

Epoch 4/10:  23%|▏| 226/991 [56:04<3:22:56, 15.92s/batch, batch_loss=32.1, batch

Epoch 4/10:  23%|▏| 226/991 [56:20<3:22:56, 15.92s/batch, batch_loss=2.4e+3, bat

Epoch 4/10:  23%|▏| 227/991 [56:20<3:23:23, 15.97s/batch, batch_loss=2.4e+3, bat

Epoch 4/10:  23%|▏| 227/991 [56:36<3:23:23, 15.97s/batch, batch_loss=3.58e+3, ba

Epoch 4/10:  23%|▏| 228/991 [56:36<3:24:11, 16.06s/batch, batch_loss=3.58e+3, ba

Epoch 4/10:  23%|▏| 228/991 [56:51<3:24:11, 16.06s/batch, batch_loss=12.7, batch

Epoch 4/10:  23%|▏| 229/991 [56:51<3:19:43, 15.73s/batch, batch_loss=12.7, batch

Epoch 4/10:  23%|▏| 229/991 [57:09<3:19:43, 15.73s/batch, batch_loss=11.3, batch

Epoch 4/10:  23%|▏| 230/991 [57:09<3:26:38, 16.29s/batch, batch_loss=11.3, batch

Epoch 4/10:  23%|▏| 230/991 [57:24<3:26:38, 16.29s/batch, batch_loss=13.7, batch

Epoch 4/10:  23%|▏| 231/991 [57:24<3:21:05, 15.88s/batch, batch_loss=13.7, batch

Epoch 4/10:  23%|▏| 231/991 [57:38<3:21:05, 15.88s/batch, batch_loss=11.6, batch

Epoch 4/10:  23%|▏| 232/991 [57:38<3:14:21, 15.36s/batch, batch_loss=11.6, batch

Epoch 4/10:  23%|▏| 232/991 [57:53<3:14:21, 15.36s/batch, batch_loss=11.8, batch

Epoch 4/10:  24%|▏| 233/991 [57:53<3:13:03, 15.28s/batch, batch_loss=11.8, batch

Epoch 4/10:  24%|▏| 233/991 [58:07<3:13:03, 15.28s/batch, batch_loss=15.9, batch

Epoch 4/10:  24%|▏| 234/991 [58:07<3:08:16, 14.92s/batch, batch_loss=15.9, batch

Epoch 4/10:  24%|▏| 234/991 [58:22<3:08:16, 14.92s/batch, batch_loss=16.1, batch

Epoch 4/10:  24%|▏| 235/991 [58:22<3:08:22, 14.95s/batch, batch_loss=16.1, batch

Epoch 4/10:  24%|▏| 235/991 [58:37<3:08:22, 14.95s/batch, batch_loss=25.7, batch

Epoch 4/10:  24%|▏| 236/991 [58:37<3:09:31, 15.06s/batch, batch_loss=25.7, batch

Epoch 4/10:  24%|▏| 236/991 [58:52<3:09:31, 15.06s/batch, batch_loss=27.2, batch

Epoch 4/10:  24%|▏| 237/991 [58:52<3:09:58, 15.12s/batch, batch_loss=27.2, batch

Epoch 4/10:  24%|▏| 237/991 [59:07<3:09:58, 15.12s/batch, batch_loss=19.1, batch

Epoch 4/10:  24%|▏| 238/991 [59:07<3:08:51, 15.05s/batch, batch_loss=19.1, batch

Epoch 4/10:  24%|▏| 238/991 [59:22<3:08:51, 15.05s/batch, batch_loss=7.46, batch

Epoch 4/10:  24%|▏| 239/991 [59:22<3:08:11, 15.02s/batch, batch_loss=7.46, batch

Epoch 4/10:  24%|▏| 239/991 [59:37<3:08:11, 15.02s/batch, batch_loss=8.4, batch_

Epoch 4/10:  24%|▏| 240/991 [59:37<3:06:55, 14.93s/batch, batch_loss=8.4, batch_

Epoch 4/10:  24%|▏| 240/991 [59:53<3:06:55, 14.93s/batch, batch_loss=10.8, batch

Epoch 4/10:  24%|▏| 241/991 [59:53<3:11:01, 15.28s/batch, batch_loss=10.8, batch

Epoch 4/10:  24%|▏| 241/991 [1:00:08<3:11:01, 15.28s/batch, batch_loss=21.3, bat

Epoch 4/10:  24%|▏| 242/991 [1:00:08<3:07:45, 15.04s/batch, batch_loss=21.3, bat

Epoch 4/10:  24%|▏| 242/991 [1:00:23<3:07:45, 15.04s/batch, batch_loss=268, batc

Epoch 4/10:  25%|▏| 243/991 [1:00:23<3:09:39, 15.21s/batch, batch_loss=268, batc

Epoch 4/10:  25%|▏| 243/991 [1:00:39<3:09:39, 15.21s/batch, batch_loss=16.5, bat

Epoch 4/10:  25%|▏| 244/991 [1:00:39<3:11:50, 15.41s/batch, batch_loss=16.5, bat

Epoch 4/10:  25%|▏| 244/991 [1:00:54<3:11:50, 15.41s/batch, batch_loss=7.32, bat

Epoch 4/10:  25%|▏| 245/991 [1:00:54<3:10:42, 15.34s/batch, batch_loss=7.32, bat

Epoch 4/10:  25%|▏| 245/991 [1:01:13<3:10:42, 15.34s/batch, batch_loss=6.5, batc

Epoch 4/10:  25%|▏| 246/991 [1:01:13<3:21:51, 16.26s/batch, batch_loss=6.5, batc

Epoch 4/10:  25%|▏| 246/991 [1:01:28<3:21:51, 16.26s/batch, batch_loss=14.8, bat

Epoch 4/10:  25%|▏| 247/991 [1:01:28<3:18:58, 16.05s/batch, batch_loss=14.8, bat

Epoch 4/10:  25%|▏| 247/991 [1:01:43<3:18:58, 16.05s/batch, batch_loss=5.86, bat

Epoch 4/10:  25%|▎| 248/991 [1:01:43<3:14:08, 15.68s/batch, batch_loss=5.86, bat

Epoch 4/10:  25%|▎| 248/991 [1:01:59<3:14:08, 15.68s/batch, batch_loss=15.1, bat

Epoch 4/10:  25%|▎| 249/991 [1:01:59<3:13:41, 15.66s/batch, batch_loss=15.1, bat

Epoch 4/10:  25%|▎| 249/991 [1:02:15<3:13:41, 15.66s/batch, batch_loss=10.3, bat

Epoch 4/10:  25%|▎| 250/991 [1:02:15<3:14:13, 15.73s/batch, batch_loss=10.3, bat

Epoch 4/10:  25%|▎| 250/991 [1:02:30<3:14:13, 15.73s/batch, batch_loss=7.16, bat

Epoch 4/10:  25%|▎| 251/991 [1:02:30<3:13:36, 15.70s/batch, batch_loss=7.16, bat

Epoch 4/10:  25%|▎| 251/991 [1:02:45<3:13:36, 15.70s/batch, batch_loss=13.5, bat

Epoch 4/10:  25%|▎| 252/991 [1:02:45<3:10:04, 15.43s/batch, batch_loss=13.5, bat

Epoch 4/10:  25%|▎| 252/991 [1:02:59<3:10:04, 15.43s/batch, batch_loss=8.38, bat

Epoch 4/10:  26%|▎| 253/991 [1:02:59<3:05:32, 15.09s/batch, batch_loss=8.38, bat

Epoch 4/10:  26%|▎| 253/991 [1:03:15<3:05:32, 15.09s/batch, batch_loss=20.6, bat

Epoch 4/10:  26%|▎| 254/991 [1:03:15<3:06:12, 15.16s/batch, batch_loss=20.6, bat

Epoch 4/10:  26%|▎| 254/991 [1:03:29<3:06:12, 15.16s/batch, batch_loss=16.1, bat

Epoch 4/10:  26%|▎| 255/991 [1:03:29<3:04:11, 15.02s/batch, batch_loss=16.1, bat

Epoch 4/10:  26%|▎| 255/991 [1:03:44<3:04:11, 15.02s/batch, batch_loss=698, batc

Epoch 4/10:  26%|▎| 256/991 [1:03:44<3:01:34, 14.82s/batch, batch_loss=698, batc

Epoch 4/10:  26%|▎| 256/991 [1:03:56<3:01:34, 14.82s/batch, batch_loss=19.5, bat

Epoch 4/10:  26%|▎| 257/991 [1:03:56<2:53:14, 14.16s/batch, batch_loss=19.5, bat

Epoch 4/10:  26%|▎| 257/991 [1:04:11<2:53:14, 14.16s/batch, batch_loss=204, batc

Epoch 4/10:  26%|▎| 258/991 [1:04:11<2:54:00, 14.24s/batch, batch_loss=204, batc

Epoch 4/10:  26%|▎| 258/991 [1:04:26<2:54:00, 14.24s/batch, batch_loss=15.3, bat

Epoch 4/10:  26%|▎| 259/991 [1:04:26<2:58:03, 14.60s/batch, batch_loss=15.3, bat

Epoch 4/10:  26%|▎| 259/991 [1:04:41<2:58:03, 14.60s/batch, batch_loss=16.9, bat

Epoch 4/10:  26%|▎| 260/991 [1:04:41<2:59:15, 14.71s/batch, batch_loss=16.9, bat

Epoch 4/10:  26%|▎| 260/991 [1:04:55<2:59:15, 14.71s/batch, batch_loss=15.2, bat

Epoch 4/10:  26%|▎| 261/991 [1:04:55<2:56:45, 14.53s/batch, batch_loss=15.2, bat

Epoch 4/10:  26%|▎| 261/991 [1:05:12<2:56:45, 14.53s/batch, batch_loss=11.6, bat

Epoch 4/10:  26%|▎| 262/991 [1:05:12<3:05:28, 15.27s/batch, batch_loss=11.6, bat

Epoch 4/10:  26%|▎| 262/991 [1:05:28<3:05:28, 15.27s/batch, batch_loss=12.4, bat

Epoch 4/10:  27%|▎| 263/991 [1:05:28<3:06:56, 15.41s/batch, batch_loss=12.4, bat

Epoch 4/10:  27%|▎| 263/991 [1:05:44<3:06:56, 15.41s/batch, batch_loss=15.3, bat

Epoch 4/10:  27%|▎| 264/991 [1:05:44<3:07:40, 15.49s/batch, batch_loss=15.3, bat

Epoch 4/10:  27%|▎| 264/991 [1:05:57<3:07:40, 15.49s/batch, batch_loss=15.9, bat

Epoch 4/10:  27%|▎| 265/991 [1:05:57<3:00:17, 14.90s/batch, batch_loss=15.9, bat

Epoch 4/10:  27%|▎| 265/991 [1:06:12<3:00:17, 14.90s/batch, batch_loss=14.8, bat

Epoch 4/10:  27%|▎| 266/991 [1:06:12<2:59:30, 14.86s/batch, batch_loss=14.8, bat

Epoch 4/10:  27%|▎| 266/991 [1:06:26<2:59:30, 14.86s/batch, batch_loss=10.5, bat

Epoch 4/10:  27%|▎| 267/991 [1:06:26<2:56:53, 14.66s/batch, batch_loss=10.5, bat

Epoch 4/10:  27%|▎| 267/991 [1:06:44<2:56:53, 14.66s/batch, batch_loss=8.38, bat

Epoch 4/10:  27%|▎| 268/991 [1:06:44<3:07:53, 15.59s/batch, batch_loss=8.38, bat

Epoch 4/10:  27%|▎| 268/991 [1:06:58<3:07:53, 15.59s/batch, batch_loss=14.4, bat

Epoch 4/10:  27%|▎| 269/991 [1:06:58<3:03:00, 15.21s/batch, batch_loss=14.4, bat

Epoch 4/10:  27%|▎| 269/991 [1:07:13<3:03:00, 15.21s/batch, batch_loss=1.62, bat

Epoch 4/10:  27%|▎| 270/991 [1:07:13<3:00:24, 15.01s/batch, batch_loss=1.62, bat

Epoch 4/10:  27%|▎| 270/991 [1:07:27<3:00:24, 15.01s/batch, batch_loss=11.4, bat

Epoch 4/10:  27%|▎| 271/991 [1:07:27<2:58:52, 14.91s/batch, batch_loss=11.4, bat

Epoch 4/10:  27%|▎| 271/991 [1:07:41<2:58:52, 14.91s/batch, batch_loss=13.5, bat

Epoch 4/10:  27%|▎| 272/991 [1:07:41<2:53:28, 14.48s/batch, batch_loss=13.5, bat

Epoch 4/10:  27%|▎| 272/991 [1:07:56<2:53:28, 14.48s/batch, batch_loss=17.7, bat

Epoch 4/10:  28%|▎| 273/991 [1:07:56<2:55:29, 14.66s/batch, batch_loss=17.7, bat

Epoch 4/10:  28%|▎| 273/991 [1:08:11<2:55:29, 14.66s/batch, batch_loss=11.7, bat

Epoch 4/10:  28%|▎| 274/991 [1:08:11<2:56:06, 14.74s/batch, batch_loss=11.7, bat

Epoch 4/10:  28%|▎| 274/991 [1:08:26<2:56:06, 14.74s/batch, batch_loss=3.34e+3, 

Epoch 4/10:  28%|▎| 275/991 [1:08:26<2:57:42, 14.89s/batch, batch_loss=3.34e+3, 

Epoch 4/10:  28%|▎| 275/991 [1:08:40<2:57:42, 14.89s/batch, batch_loss=13.5, bat

Epoch 4/10:  28%|▎| 276/991 [1:08:40<2:54:57, 14.68s/batch, batch_loss=13.5, bat

Epoch 4/10:  28%|▎| 276/991 [1:08:58<2:54:57, 14.68s/batch, batch_loss=4.75e+3, 

Epoch 4/10:  28%|▎| 277/991 [1:08:58<3:03:50, 15.45s/batch, batch_loss=4.75e+3, 

Epoch 4/10:  28%|▎| 277/991 [1:09:13<3:03:50, 15.45s/batch, batch_loss=13.3, bat

Epoch 4/10:  28%|▎| 278/991 [1:09:13<3:02:12, 15.33s/batch, batch_loss=13.3, bat

Epoch 4/10:  28%|▎| 278/991 [1:09:27<3:02:12, 15.33s/batch, batch_loss=18.2, bat

Epoch 4/10:  28%|▎| 279/991 [1:09:27<2:59:18, 15.11s/batch, batch_loss=18.2, bat

Epoch 4/10:  28%|▎| 279/991 [1:09:43<2:59:18, 15.11s/batch, batch_loss=11.2, bat

Epoch 4/10:  28%|▎| 280/991 [1:09:43<3:01:25, 15.31s/batch, batch_loss=11.2, bat

Epoch 4/10:  28%|▎| 280/991 [1:09:59<3:01:25, 15.31s/batch, batch_loss=12.7, bat

Epoch 4/10:  28%|▎| 281/991 [1:09:59<3:01:54, 15.37s/batch, batch_loss=12.7, bat

Epoch 4/10:  28%|▎| 281/991 [1:10:13<3:01:54, 15.37s/batch, batch_loss=6.45, bat

Epoch 4/10:  28%|▎| 282/991 [1:10:13<2:59:05, 15.16s/batch, batch_loss=6.45, bat

Epoch 4/10:  28%|▎| 282/991 [1:10:30<2:59:05, 15.16s/batch, batch_loss=15.7, bat

Epoch 4/10:  29%|▎| 283/991 [1:10:30<3:04:46, 15.66s/batch, batch_loss=15.7, bat

Epoch 4/10:  29%|▎| 283/991 [1:10:46<3:04:46, 15.66s/batch, batch_loss=18.3, bat

Epoch 4/10:  29%|▎| 284/991 [1:10:46<3:04:48, 15.68s/batch, batch_loss=18.3, bat

Epoch 4/10:  29%|▎| 284/991 [1:11:01<3:04:48, 15.68s/batch, batch_loss=11.8, bat

Epoch 4/10:  29%|▎| 285/991 [1:11:01<3:02:28, 15.51s/batch, batch_loss=11.8, bat

Epoch 4/10:  29%|▎| 285/991 [1:11:16<3:02:28, 15.51s/batch, batch_loss=8.1, batc

Epoch 4/10:  29%|▎| 286/991 [1:11:16<3:01:09, 15.42s/batch, batch_loss=8.1, batc

Epoch 4/10:  29%|▎| 286/991 [1:11:32<3:01:09, 15.42s/batch, batch_loss=7.79, bat

Epoch 4/10:  29%|▎| 287/991 [1:11:32<3:03:18, 15.62s/batch, batch_loss=7.79, bat

Epoch 4/10:  29%|▎| 287/991 [1:11:48<3:03:18, 15.62s/batch, batch_loss=2.61e+3, 

Epoch 4/10:  29%|▎| 288/991 [1:11:48<3:02:37, 15.59s/batch, batch_loss=2.61e+3, 

Epoch 4/10:  29%|▎| 288/991 [1:12:03<3:02:37, 15.59s/batch, batch_loss=1.25e+3, 

Epoch 4/10:  29%|▎| 289/991 [1:12:03<3:01:06, 15.48s/batch, batch_loss=1.25e+3, 

Epoch 4/10:  29%|▎| 289/991 [1:12:18<3:01:06, 15.48s/batch, batch_loss=13.2, bat

Epoch 4/10:  29%|▎| 290/991 [1:12:18<3:00:42, 15.47s/batch, batch_loss=13.2, bat

Epoch 4/10:  29%|▎| 290/991 [1:12:33<3:00:42, 15.47s/batch, batch_loss=5.15, bat

Epoch 4/10:  29%|▎| 291/991 [1:12:33<2:58:13, 15.28s/batch, batch_loss=5.15, bat

Epoch 4/10:  29%|▎| 291/991 [1:12:47<2:58:13, 15.28s/batch, batch_loss=12, batch

Epoch 4/10:  29%|▎| 292/991 [1:12:47<2:53:00, 14.85s/batch, batch_loss=12, batch

Epoch 4/10:  29%|▎| 292/991 [1:13:01<2:53:00, 14.85s/batch, batch_loss=14.1, bat

Epoch 4/10:  30%|▎| 293/991 [1:13:01<2:50:07, 14.62s/batch, batch_loss=14.1, bat

Epoch 4/10:  30%|▎| 293/991 [1:13:16<2:50:07, 14.62s/batch, batch_loss=13.7, bat

Epoch 4/10:  30%|▎| 294/991 [1:13:16<2:49:12, 14.57s/batch, batch_loss=13.7, bat

Epoch 4/10:  30%|▎| 294/991 [1:13:31<2:49:12, 14.57s/batch, batch_loss=9.65, bat

Epoch 4/10:  30%|▎| 295/991 [1:13:31<2:53:22, 14.95s/batch, batch_loss=9.65, bat

Epoch 4/10:  30%|▎| 295/991 [1:13:45<2:53:22, 14.95s/batch, batch_loss=16.4, bat

Epoch 4/10:  30%|▎| 296/991 [1:13:45<2:48:03, 14.51s/batch, batch_loss=16.4, bat

Epoch 4/10:  30%|▎| 296/991 [1:13:59<2:48:03, 14.51s/batch, batch_loss=12.1, bat

Epoch 4/10:  30%|▎| 297/991 [1:13:59<2:46:37, 14.41s/batch, batch_loss=12.1, bat

Epoch 4/10:  30%|▎| 297/991 [1:14:13<2:46:37, 14.41s/batch, batch_loss=3.24e+4, 

Epoch 4/10:  30%|▎| 298/991 [1:14:13<2:43:20, 14.14s/batch, batch_loss=3.24e+4, 

Epoch 4/10:  30%|▎| 298/991 [1:14:27<2:43:20, 14.14s/batch, batch_loss=15.4, bat

Epoch 4/10:  30%|▎| 299/991 [1:14:27<2:44:59, 14.31s/batch, batch_loss=15.4, bat

Epoch 4/10:  30%|▎| 299/991 [1:14:42<2:44:59, 14.31s/batch, batch_loss=6.72, bat

Epoch 4/10:  30%|▎| 300/991 [1:14:42<2:46:28, 14.45s/batch, batch_loss=6.72, bat

Epoch 4/10:  30%|▎| 300/991 [1:14:57<2:46:28, 14.45s/batch, batch_loss=8.25, bat

Epoch 4/10:  30%|▎| 301/991 [1:14:57<2:46:38, 14.49s/batch, batch_loss=8.25, bat

Epoch 4/10:  30%|▎| 301/991 [1:15:12<2:46:38, 14.49s/batch, batch_loss=11.4, bat

Epoch 4/10:  30%|▎| 302/991 [1:15:12<2:49:31, 14.76s/batch, batch_loss=11.4, bat

Epoch 4/10:  30%|▎| 302/991 [1:15:28<2:49:31, 14.76s/batch, batch_loss=8.59, bat

Epoch 4/10:  31%|▎| 303/991 [1:15:28<2:53:10, 15.10s/batch, batch_loss=8.59, bat

Epoch 4/10:  31%|▎| 303/991 [1:15:43<2:53:10, 15.10s/batch, batch_loss=3.4, batc

Epoch 4/10:  31%|▎| 304/991 [1:15:43<2:51:53, 15.01s/batch, batch_loss=3.4, batc

Epoch 4/10:  31%|▎| 304/991 [1:15:57<2:51:53, 15.01s/batch, batch_loss=14.6, bat

Epoch 4/10:  31%|▎| 305/991 [1:15:57<2:48:07, 14.71s/batch, batch_loss=14.6, bat

Epoch 4/10:  31%|▎| 305/991 [1:16:10<2:48:07, 14.71s/batch, batch_loss=8.76, bat

Epoch 4/10:  31%|▎| 306/991 [1:16:10<2:41:42, 14.16s/batch, batch_loss=8.76, bat

Epoch 4/10:  31%|▎| 306/991 [1:16:23<2:41:42, 14.16s/batch, batch_loss=6.29e+3, 

Epoch 4/10:  31%|▎| 307/991 [1:16:23<2:38:18, 13.89s/batch, batch_loss=6.29e+3, 

Epoch 4/10:  31%|▎| 307/991 [1:16:37<2:38:18, 13.89s/batch, batch_loss=11.5, bat

Epoch 4/10:  31%|▎| 308/991 [1:16:37<2:38:17, 13.90s/batch, batch_loss=11.5, bat

Epoch 4/10:  31%|▎| 308/991 [1:16:51<2:38:17, 13.90s/batch, batch_loss=18.8, bat

Epoch 4/10:  31%|▎| 309/991 [1:16:51<2:40:04, 14.08s/batch, batch_loss=18.8, bat

Epoch 4/10:  31%|▎| 309/991 [1:17:06<2:40:04, 14.08s/batch, batch_loss=14.5, bat

Epoch 4/10:  31%|▎| 310/991 [1:17:06<2:43:21, 14.39s/batch, batch_loss=14.5, bat

Epoch 4/10:  31%|▎| 310/991 [1:17:22<2:43:21, 14.39s/batch, batch_loss=11.4, bat

Epoch 4/10:  31%|▎| 311/991 [1:17:22<2:48:53, 14.90s/batch, batch_loss=11.4, bat

Epoch 4/10:  31%|▎| 311/991 [1:17:39<2:48:53, 14.90s/batch, batch_loss=12.6, bat

Epoch 4/10:  31%|▎| 312/991 [1:17:39<2:53:13, 15.31s/batch, batch_loss=12.6, bat

Epoch 4/10:  31%|▎| 312/991 [1:17:54<2:53:13, 15.31s/batch, batch_loss=1.06e+4, 

Epoch 4/10:  32%|▎| 313/991 [1:17:54<2:53:12, 15.33s/batch, batch_loss=1.06e+4, 

Epoch 4/10:  32%|▎| 313/991 [1:18:10<2:53:12, 15.33s/batch, batch_loss=9.12, bat

Epoch 4/10:  32%|▎| 314/991 [1:18:10<2:53:09, 15.35s/batch, batch_loss=9.12, bat

Epoch 4/10:  32%|▎| 314/991 [1:18:25<2:53:09, 15.35s/batch, batch_loss=13.9, bat

Epoch 4/10:  32%|▎| 315/991 [1:18:25<2:53:33, 15.40s/batch, batch_loss=13.9, bat

Epoch 4/10:  32%|▎| 315/991 [1:18:44<2:53:33, 15.40s/batch, batch_loss=19.4, bat

Epoch 4/10:  32%|▎| 316/991 [1:18:44<3:06:47, 16.60s/batch, batch_loss=19.4, bat

Epoch 4/10:  32%|▎| 316/991 [1:19:00<3:06:47, 16.60s/batch, batch_loss=20.7, bat

Epoch 4/10:  32%|▎| 317/991 [1:19:00<3:03:00, 16.29s/batch, batch_loss=20.7, bat

Epoch 4/10:  32%|▎| 317/991 [1:19:14<3:03:00, 16.29s/batch, batch_loss=18.1, bat

Epoch 4/10:  32%|▎| 318/991 [1:19:14<2:55:15, 15.63s/batch, batch_loss=18.1, bat

Epoch 4/10:  32%|▎| 318/991 [1:19:30<2:55:15, 15.63s/batch, batch_loss=14.3, bat

Epoch 4/10:  32%|▎| 319/991 [1:19:30<2:54:23, 15.57s/batch, batch_loss=14.3, bat

Epoch 4/10:  32%|▎| 319/991 [1:19:46<2:54:23, 15.57s/batch, batch_loss=13.7, bat

Epoch 4/10:  32%|▎| 320/991 [1:19:46<2:58:25, 15.95s/batch, batch_loss=13.7, bat

Epoch 4/10:  32%|▎| 320/991 [1:20:00<2:58:25, 15.95s/batch, batch_loss=23, batch

Epoch 4/10:  32%|▎| 321/991 [1:20:00<2:51:53, 15.39s/batch, batch_loss=23, batch

Epoch 4/10:  32%|▎| 321/991 [1:20:15<2:51:53, 15.39s/batch, batch_loss=7.71, bat

Epoch 4/10:  32%|▎| 322/991 [1:20:15<2:48:28, 15.11s/batch, batch_loss=7.71, bat

Epoch 4/10:  32%|▎| 322/991 [1:20:31<2:48:28, 15.11s/batch, batch_loss=8.84, bat

Epoch 4/10:  33%|▎| 323/991 [1:20:31<2:52:05, 15.46s/batch, batch_loss=8.84, bat

Epoch 4/10:  33%|▎| 323/991 [1:20:48<2:52:05, 15.46s/batch, batch_loss=20.4, bat

Epoch 4/10:  33%|▎| 324/991 [1:20:48<2:57:41, 15.98s/batch, batch_loss=20.4, bat

Epoch 4/10:  33%|▎| 324/991 [1:21:04<2:57:41, 15.98s/batch, batch_loss=11.3, bat

Epoch 4/10:  33%|▎| 325/991 [1:21:04<2:54:59, 15.77s/batch, batch_loss=11.3, bat

Epoch 4/10:  33%|▎| 325/991 [1:21:18<2:54:59, 15.77s/batch, batch_loss=25.1, bat

Epoch 4/10:  33%|▎| 326/991 [1:21:18<2:49:27, 15.29s/batch, batch_loss=25.1, bat

Epoch 4/10:  33%|▎| 326/991 [1:21:32<2:49:27, 15.29s/batch, batch_loss=3.05e+3, 

Epoch 4/10:  33%|▎| 327/991 [1:21:32<2:44:33, 14.87s/batch, batch_loss=3.05e+3, 

Epoch 4/10:  33%|▎| 327/991 [1:21:46<2:44:33, 14.87s/batch, batch_loss=12.3, bat

Epoch 4/10:  33%|▎| 328/991 [1:21:46<2:42:43, 14.73s/batch, batch_loss=12.3, bat

Epoch 4/10:  33%|▎| 328/991 [1:22:00<2:42:43, 14.73s/batch, batch_loss=18.5, bat

Epoch 4/10:  33%|▎| 329/991 [1:22:00<2:40:45, 14.57s/batch, batch_loss=18.5, bat

Epoch 4/10:  33%|▎| 329/991 [1:22:14<2:40:45, 14.57s/batch, batch_loss=14.2, bat

Epoch 4/10:  33%|▎| 330/991 [1:22:14<2:36:39, 14.22s/batch, batch_loss=14.2, bat

Epoch 4/10:  33%|▎| 330/991 [1:22:27<2:36:39, 14.22s/batch, batch_loss=12.4, bat

Epoch 4/10:  33%|▎| 331/991 [1:22:27<2:33:53, 13.99s/batch, batch_loss=12.4, bat

Epoch 4/10:  33%|▎| 331/991 [1:22:44<2:33:53, 13.99s/batch, batch_loss=12.2, bat

Epoch 4/10:  34%|▎| 332/991 [1:22:44<2:42:27, 14.79s/batch, batch_loss=12.2, bat

Epoch 4/10:  34%|▎| 332/991 [1:22:58<2:42:27, 14.79s/batch, batch_loss=14.6, bat

Epoch 4/10:  34%|▎| 333/991 [1:22:58<2:41:14, 14.70s/batch, batch_loss=14.6, bat

Epoch 4/10:  34%|▎| 333/991 [1:23:11<2:41:14, 14.70s/batch, batch_loss=15.4, bat

Epoch 4/10:  34%|▎| 334/991 [1:23:11<2:35:54, 14.24s/batch, batch_loss=15.4, bat

Epoch 4/10:  34%|▎| 334/991 [1:23:25<2:35:54, 14.24s/batch, batch_loss=4.24, bat

Epoch 4/10:  34%|▎| 335/991 [1:23:25<2:32:35, 13.96s/batch, batch_loss=4.24, bat

Epoch 4/10:  34%|▎| 335/991 [1:23:39<2:32:35, 13.96s/batch, batch_loss=8.49e+3, 

Epoch 4/10:  34%|▎| 336/991 [1:23:39<2:32:40, 13.99s/batch, batch_loss=8.49e+3, 

Epoch 4/10:  34%|▎| 336/991 [1:23:52<2:32:40, 13.99s/batch, batch_loss=2.35e+3, 

Epoch 4/10:  34%|▎| 337/991 [1:23:52<2:31:08, 13.87s/batch, batch_loss=2.35e+3, 

Epoch 4/10:  34%|▎| 337/991 [1:24:06<2:31:08, 13.87s/batch, batch_loss=9.22, bat

Epoch 4/10:  34%|▎| 338/991 [1:24:06<2:28:37, 13.66s/batch, batch_loss=9.22, bat

Epoch 4/10:  34%|▎| 338/991 [1:24:19<2:28:37, 13.66s/batch, batch_loss=22.2, bat

Epoch 4/10:  34%|▎| 339/991 [1:24:19<2:26:59, 13.53s/batch, batch_loss=22.2, bat

Epoch 4/10:  34%|▎| 339/991 [1:24:33<2:26:59, 13.53s/batch, batch_loss=11.2, bat

Epoch 4/10:  34%|▎| 340/991 [1:24:33<2:27:24, 13.59s/batch, batch_loss=11.2, bat

Epoch 4/10:  34%|▎| 340/991 [1:24:50<2:27:24, 13.59s/batch, batch_loss=11.1, bat

Epoch 4/10:  34%|▎| 341/991 [1:24:50<2:39:21, 14.71s/batch, batch_loss=11.1, bat

Epoch 4/10:  34%|▎| 341/991 [1:25:04<2:39:21, 14.71s/batch, batch_loss=1.41, bat

Epoch 4/10:  35%|▎| 342/991 [1:25:04<2:38:20, 14.64s/batch, batch_loss=1.41, bat

Epoch 4/10:  35%|▎| 342/991 [1:25:18<2:38:20, 14.64s/batch, batch_loss=7.81, bat

Epoch 4/10:  35%|▎| 343/991 [1:25:18<2:34:46, 14.33s/batch, batch_loss=7.81, bat

Epoch 4/10:  35%|▎| 343/991 [1:25:32<2:34:46, 14.33s/batch, batch_loss=14.2, bat

Epoch 4/10:  35%|▎| 344/991 [1:25:32<2:32:06, 14.11s/batch, batch_loss=14.2, bat

Epoch 4/10:  35%|▎| 344/991 [1:25:45<2:32:06, 14.11s/batch, batch_loss=113, batc

Epoch 4/10:  35%|▎| 345/991 [1:25:45<2:30:31, 13.98s/batch, batch_loss=113, batc

Epoch 4/10:  35%|▎| 345/991 [1:25:59<2:30:31, 13.98s/batch, batch_loss=15.1, bat

Epoch 4/10:  35%|▎| 346/991 [1:25:59<2:30:35, 14.01s/batch, batch_loss=15.1, bat

Epoch 4/10:  35%|▎| 346/991 [1:26:13<2:30:35, 14.01s/batch, batch_loss=12.3, bat

Epoch 4/10:  35%|▎| 347/991 [1:26:13<2:28:45, 13.86s/batch, batch_loss=12.3, bat

Epoch 4/10:  35%|▎| 347/991 [1:26:27<2:28:45, 13.86s/batch, batch_loss=14.1, bat

Epoch 4/10:  35%|▎| 348/991 [1:26:27<2:28:57, 13.90s/batch, batch_loss=14.1, bat

Epoch 4/10:  35%|▎| 348/991 [1:26:41<2:28:57, 13.90s/batch, batch_loss=9.97, bat

Epoch 4/10:  35%|▎| 349/991 [1:26:41<2:28:40, 13.90s/batch, batch_loss=9.97, bat

Epoch 4/10:  35%|▎| 349/991 [1:26:55<2:28:40, 13.90s/batch, batch_loss=12.2, bat

Epoch 4/10:  35%|▎| 350/991 [1:26:55<2:28:48, 13.93s/batch, batch_loss=12.2, bat

Epoch 4/10:  35%|▎| 350/991 [1:27:09<2:28:48, 13.93s/batch, batch_loss=8.06, bat

Epoch 4/10:  35%|▎| 351/991 [1:27:09<2:28:30, 13.92s/batch, batch_loss=8.06, bat

Epoch 4/10:  35%|▎| 351/991 [1:27:22<2:28:30, 13.92s/batch, batch_loss=14, batch

Epoch 4/10:  36%|▎| 352/991 [1:27:22<2:28:12, 13.92s/batch, batch_loss=14, batch

Epoch 4/10:  36%|▎| 352/991 [1:27:40<2:28:12, 13.92s/batch, batch_loss=17.5, bat

Epoch 4/10:  36%|▎| 353/991 [1:27:40<2:40:06, 15.06s/batch, batch_loss=17.5, bat

Epoch 4/10:  36%|▎| 353/991 [1:27:54<2:40:06, 15.06s/batch, batch_loss=21.8, bat

Epoch 4/10:  36%|▎| 354/991 [1:27:54<2:37:15, 14.81s/batch, batch_loss=21.8, bat

Epoch 4/10:  36%|▎| 354/991 [1:28:10<2:37:15, 14.81s/batch, batch_loss=10.2, bat

Epoch 4/10:  36%|▎| 355/991 [1:28:10<2:40:07, 15.11s/batch, batch_loss=10.2, bat

Epoch 4/10:  36%|▎| 355/991 [1:28:24<2:40:07, 15.11s/batch, batch_loss=18, batch

Epoch 4/10:  36%|▎| 356/991 [1:28:24<2:36:07, 14.75s/batch, batch_loss=18, batch

Epoch 4/10:  36%|▎| 356/991 [1:28:38<2:36:07, 14.75s/batch, batch_loss=15.4, bat

Epoch 4/10:  36%|▎| 357/991 [1:28:38<2:32:19, 14.42s/batch, batch_loss=15.4, bat

Epoch 4/10:  36%|▎| 357/991 [1:28:52<2:32:19, 14.42s/batch, batch_loss=13.9, bat

Epoch 4/10:  36%|▎| 358/991 [1:28:52<2:31:42, 14.38s/batch, batch_loss=13.9, bat

Epoch 4/10:  36%|▎| 358/991 [1:29:06<2:31:42, 14.38s/batch, batch_loss=5.06, bat

Epoch 4/10:  36%|▎| 359/991 [1:29:06<2:29:17, 14.17s/batch, batch_loss=5.06, bat

Epoch 4/10:  36%|▎| 359/991 [1:29:19<2:29:17, 14.17s/batch, batch_loss=8.34, bat

Epoch 4/10:  36%|▎| 360/991 [1:29:19<2:27:15, 14.00s/batch, batch_loss=8.34, bat

Epoch 4/10:  36%|▎| 360/991 [1:29:33<2:27:15, 14.00s/batch, batch_loss=25.1, bat

Epoch 4/10:  36%|▎| 361/991 [1:29:33<2:25:21, 13.84s/batch, batch_loss=25.1, bat

Epoch 4/10:  36%|▎| 361/991 [1:29:47<2:25:21, 13.84s/batch, batch_loss=18.1, bat

Epoch 4/10:  37%|▎| 362/991 [1:29:47<2:24:54, 13.82s/batch, batch_loss=18.1, bat

Epoch 4/10:  37%|▎| 362/991 [1:30:00<2:24:54, 13.82s/batch, batch_loss=12.1, bat

Epoch 4/10:  37%|▎| 363/991 [1:30:00<2:24:00, 13.76s/batch, batch_loss=12.1, bat

Epoch 4/10:  37%|▎| 363/991 [1:30:14<2:24:00, 13.76s/batch, batch_loss=12.9, bat

Epoch 4/10:  37%|▎| 364/991 [1:30:14<2:23:19, 13.71s/batch, batch_loss=12.9, bat

Epoch 4/10:  37%|▎| 364/991 [1:30:31<2:23:19, 13.71s/batch, batch_loss=8.56, bat

Epoch 4/10:  37%|▎| 365/991 [1:30:31<2:34:29, 14.81s/batch, batch_loss=8.56, bat

Epoch 4/10:  37%|▎| 365/991 [1:30:45<2:34:29, 14.81s/batch, batch_loss=12.1, bat

Epoch 4/10:  37%|▎| 366/991 [1:30:45<2:31:25, 14.54s/batch, batch_loss=12.1, bat

Epoch 4/10:  37%|▎| 366/991 [1:30:59<2:31:25, 14.54s/batch, batch_loss=13.6, bat

Epoch 4/10:  37%|▎| 367/991 [1:30:59<2:29:33, 14.38s/batch, batch_loss=13.6, bat

Epoch 4/10:  37%|▎| 367/991 [1:31:13<2:29:33, 14.38s/batch, batch_loss=12, batch

Epoch 4/10:  37%|▎| 368/991 [1:31:13<2:27:47, 14.23s/batch, batch_loss=12, batch

Epoch 4/10:  37%|▎| 368/991 [1:31:27<2:27:47, 14.23s/batch, batch_loss=12.8, bat

Epoch 4/10:  37%|▎| 369/991 [1:31:27<2:27:29, 14.23s/batch, batch_loss=12.8, bat

Epoch 4/10:  37%|▎| 369/991 [1:31:43<2:27:29, 14.23s/batch, batch_loss=1.2e+4, b

Epoch 4/10:  37%|▎| 370/991 [1:31:43<2:31:31, 14.64s/batch, batch_loss=1.2e+4, b

Epoch 4/10:  37%|▎| 370/991 [1:31:57<2:31:31, 14.64s/batch, batch_loss=18.8, bat

Epoch 4/10:  37%|▎| 371/991 [1:31:57<2:29:26, 14.46s/batch, batch_loss=18.8, bat

Epoch 4/10:  37%|▎| 371/991 [1:32:11<2:29:26, 14.46s/batch, batch_loss=14.7, bat

Epoch 4/10:  38%|▍| 372/991 [1:32:11<2:27:31, 14.30s/batch, batch_loss=14.7, bat

Epoch 4/10:  38%|▍| 372/991 [1:32:25<2:27:31, 14.30s/batch, batch_loss=25.1, bat

Epoch 4/10:  38%|▍| 373/991 [1:32:25<2:25:49, 14.16s/batch, batch_loss=25.1, bat

Epoch 4/10:  38%|▍| 373/991 [1:32:42<2:25:49, 14.16s/batch, batch_loss=468, batc

Epoch 4/10:  38%|▍| 374/991 [1:32:42<2:36:50, 15.25s/batch, batch_loss=468, batc

Epoch 4/10:  38%|▍| 374/991 [1:32:58<2:36:50, 15.25s/batch, batch_loss=1.43e+3, 

Epoch 4/10:  38%|▍| 375/991 [1:32:58<2:36:51, 15.28s/batch, batch_loss=1.43e+3, 

Epoch 4/10:  38%|▍| 375/991 [1:33:12<2:36:51, 15.28s/batch, batch_loss=1.22e+3, 

Epoch 4/10:  38%|▍| 376/991 [1:33:12<2:32:11, 14.85s/batch, batch_loss=1.22e+3, 

Epoch 4/10:  38%|▍| 376/991 [1:33:26<2:32:11, 14.85s/batch, batch_loss=21.1, bat

Epoch 4/10:  38%|▍| 377/991 [1:33:26<2:30:37, 14.72s/batch, batch_loss=21.1, bat

Epoch 4/10:  38%|▍| 377/991 [1:33:40<2:30:37, 14.72s/batch, batch_loss=1.18e+3, 

Epoch 4/10:  38%|▍| 378/991 [1:33:40<2:29:16, 14.61s/batch, batch_loss=1.18e+3, 

Epoch 4/10:  38%|▍| 378/991 [1:33:55<2:29:16, 14.61s/batch, batch_loss=10.8, bat

Epoch 4/10:  38%|▍| 379/991 [1:33:55<2:27:52, 14.50s/batch, batch_loss=10.8, bat

Epoch 4/10:  38%|▍| 379/991 [1:34:08<2:27:52, 14.50s/batch, batch_loss=13.4, bat

Epoch 4/10:  38%|▍| 380/991 [1:34:08<2:25:08, 14.25s/batch, batch_loss=13.4, bat

Epoch 4/10:  38%|▍| 380/991 [1:34:22<2:25:08, 14.25s/batch, batch_loss=18, batch

Epoch 4/10:  38%|▍| 381/991 [1:34:22<2:23:45, 14.14s/batch, batch_loss=18, batch

Epoch 4/10:  38%|▍| 381/991 [1:34:36<2:23:45, 14.14s/batch, batch_loss=11.5, bat

Epoch 4/10:  39%|▍| 382/991 [1:34:36<2:22:10, 14.01s/batch, batch_loss=11.5, bat

Epoch 4/10:  39%|▍| 382/991 [1:34:50<2:22:10, 14.01s/batch, batch_loss=10.2, bat

Epoch 4/10:  39%|▍| 383/991 [1:34:50<2:21:08, 13.93s/batch, batch_loss=10.2, bat

Epoch 4/10:  39%|▍| 383/991 [1:35:04<2:21:08, 13.93s/batch, batch_loss=22.9, bat

Epoch 4/10:  39%|▍| 384/991 [1:35:04<2:21:16, 13.96s/batch, batch_loss=22.9, bat

Epoch 4/10:  39%|▍| 384/991 [1:35:18<2:21:16, 13.96s/batch, batch_loss=8.62, bat

Epoch 4/10:  39%|▍| 385/991 [1:35:18<2:21:06, 13.97s/batch, batch_loss=8.62, bat

Epoch 4/10:  39%|▍| 385/991 [1:35:32<2:21:06, 13.97s/batch, batch_loss=15.7, bat

Epoch 4/10:  39%|▍| 386/991 [1:35:32<2:23:04, 14.19s/batch, batch_loss=15.7, bat

Epoch 4/10:  39%|▍| 386/991 [1:35:50<2:23:04, 14.19s/batch, batch_loss=21.1, bat

Epoch 4/10:  39%|▍| 387/991 [1:35:50<2:34:08, 15.31s/batch, batch_loss=21.1, bat

Epoch 4/10:  39%|▍| 387/991 [1:36:04<2:34:08, 15.31s/batch, batch_loss=791, batc

Epoch 4/10:  39%|▍| 388/991 [1:36:04<2:29:22, 14.86s/batch, batch_loss=791, batc

Epoch 4/10:  39%|▍| 388/991 [1:36:18<2:29:22, 14.86s/batch, batch_loss=14, batch

Epoch 4/10:  39%|▍| 389/991 [1:36:18<2:26:35, 14.61s/batch, batch_loss=14, batch

Epoch 4/10:  39%|▍| 389/991 [1:36:32<2:26:35, 14.61s/batch, batch_loss=868, batc

Epoch 4/10:  39%|▍| 390/991 [1:36:32<2:23:12, 14.30s/batch, batch_loss=868, batc

Epoch 4/10:  39%|▍| 390/991 [1:36:46<2:23:12, 14.30s/batch, batch_loss=17, batch

Epoch 4/10:  39%|▍| 391/991 [1:36:46<2:21:43, 14.17s/batch, batch_loss=17, batch

Epoch 4/10:  39%|▍| 391/991 [1:36:59<2:21:43, 14.17s/batch, batch_loss=11.9, bat

Epoch 4/10:  40%|▍| 392/991 [1:36:59<2:19:53, 14.01s/batch, batch_loss=11.9, bat

Epoch 4/10:  40%|▍| 392/991 [1:37:15<2:19:53, 14.01s/batch, batch_loss=17.1, bat

Epoch 4/10:  40%|▍| 393/991 [1:37:15<2:24:02, 14.45s/batch, batch_loss=17.1, bat

Epoch 4/10:  40%|▍| 393/991 [1:37:30<2:24:02, 14.45s/batch, batch_loss=605, batc

Epoch 4/10:  40%|▍| 394/991 [1:37:30<2:26:08, 14.69s/batch, batch_loss=605, batc

Epoch 4/10:  40%|▍| 394/991 [1:37:45<2:26:08, 14.69s/batch, batch_loss=17.9, bat

Epoch 4/10:  40%|▍| 395/991 [1:37:45<2:27:44, 14.87s/batch, batch_loss=17.9, bat

Epoch 4/10:  40%|▍| 395/991 [1:38:00<2:27:44, 14.87s/batch, batch_loss=13.1, bat

Epoch 4/10:  40%|▍| 396/991 [1:38:00<2:27:08, 14.84s/batch, batch_loss=13.1, bat

Epoch 4/10:  40%|▍| 396/991 [1:38:15<2:27:08, 14.84s/batch, batch_loss=15.6, bat

Epoch 4/10:  40%|▍| 397/991 [1:38:15<2:27:46, 14.93s/batch, batch_loss=15.6, bat

Epoch 4/10:  40%|▍| 397/991 [1:38:30<2:27:46, 14.93s/batch, batch_loss=13.3, bat

Epoch 4/10:  40%|▍| 398/991 [1:38:30<2:27:33, 14.93s/batch, batch_loss=13.3, bat

Epoch 4/10:  40%|▍| 398/991 [1:38:45<2:27:33, 14.93s/batch, batch_loss=20.8, bat

Epoch 4/10:  40%|▍| 399/991 [1:38:45<2:27:38, 14.96s/batch, batch_loss=20.8, bat

Epoch 4/10:  40%|▍| 399/991 [1:39:03<2:27:38, 14.96s/batch, batch_loss=10.8, bat

Epoch 4/10:  40%|▍| 400/991 [1:39:03<2:35:16, 15.76s/batch, batch_loss=10.8, bat

Epoch 4/10:  40%|▍| 400/991 [1:39:17<2:35:16, 15.76s/batch, batch_loss=13.1, bat

Epoch 4/10:  40%|▍| 401/991 [1:39:17<2:30:09, 15.27s/batch, batch_loss=13.1, bat

Epoch 4/10:  40%|▍| 401/991 [1:39:32<2:30:09, 15.27s/batch, batch_loss=17.5, bat

Epoch 4/10:  41%|▍| 402/991 [1:39:32<2:30:12, 15.30s/batch, batch_loss=17.5, bat

Epoch 4/10:  41%|▍| 402/991 [1:39:47<2:30:12, 15.30s/batch, batch_loss=19.9, bat

Epoch 4/10:  41%|▍| 403/991 [1:39:47<2:29:36, 15.27s/batch, batch_loss=19.9, bat

Epoch 4/10:  41%|▍| 403/991 [1:40:02<2:29:36, 15.27s/batch, batch_loss=10.1, bat

Epoch 4/10:  41%|▍| 404/991 [1:40:02<2:27:31, 15.08s/batch, batch_loss=10.1, bat

Epoch 4/10:  41%|▍| 404/991 [1:40:17<2:27:31, 15.08s/batch, batch_loss=14.6, bat

Epoch 4/10:  41%|▍| 405/991 [1:40:17<2:26:17, 14.98s/batch, batch_loss=14.6, bat

Epoch 4/10:  41%|▍| 405/991 [1:40:33<2:26:17, 14.98s/batch, batch_loss=5.68, bat

Epoch 4/10:  41%|▍| 406/991 [1:40:33<2:28:42, 15.25s/batch, batch_loss=5.68, bat

Epoch 4/10:  41%|▍| 406/991 [1:40:48<2:28:42, 15.25s/batch, batch_loss=23.9, bat

Epoch 4/10:  41%|▍| 407/991 [1:40:48<2:27:32, 15.16s/batch, batch_loss=23.9, bat

Epoch 4/10:  41%|▍| 407/991 [1:41:06<2:27:32, 15.16s/batch, batch_loss=8.5, batc

Epoch 4/10:  41%|▍| 408/991 [1:41:06<2:35:39, 16.02s/batch, batch_loss=8.5, batc

Epoch 4/10:  41%|▍| 408/991 [1:41:21<2:35:39, 16.02s/batch, batch_loss=22.3, bat

Epoch 4/10:  41%|▍| 409/991 [1:41:21<2:33:19, 15.81s/batch, batch_loss=22.3, bat

Epoch 4/10:  41%|▍| 409/991 [1:41:37<2:33:19, 15.81s/batch, batch_loss=20.3, bat

Epoch 4/10:  41%|▍| 410/991 [1:41:37<2:32:34, 15.76s/batch, batch_loss=20.3, bat

Epoch 4/10:  41%|▍| 410/991 [1:41:52<2:32:34, 15.76s/batch, batch_loss=10.2, bat

Epoch 4/10:  41%|▍| 411/991 [1:41:52<2:32:36, 15.79s/batch, batch_loss=10.2, bat

Epoch 4/10:  41%|▍| 411/991 [1:42:08<2:32:36, 15.79s/batch, batch_loss=12.8, bat

Epoch 4/10:  42%|▍| 412/991 [1:42:08<2:32:20, 15.79s/batch, batch_loss=12.8, bat

Epoch 4/10:  42%|▍| 412/991 [1:42:23<2:32:20, 15.79s/batch, batch_loss=14.9, bat

Epoch 4/10:  42%|▍| 413/991 [1:42:23<2:29:30, 15.52s/batch, batch_loss=14.9, bat

Epoch 4/10:  42%|▍| 413/991 [1:42:37<2:29:30, 15.52s/batch, batch_loss=15.5, bat

Epoch 4/10:  42%|▍| 414/991 [1:42:37<2:23:35, 14.93s/batch, batch_loss=15.5, bat

Epoch 4/10:  42%|▍| 414/991 [1:42:52<2:23:35, 14.93s/batch, batch_loss=10.2, bat

Epoch 4/10:  42%|▍| 415/991 [1:42:52<2:25:03, 15.11s/batch, batch_loss=10.2, bat

Epoch 4/10:  42%|▍| 415/991 [1:43:10<2:25:03, 15.11s/batch, batch_loss=9.86, bat

Epoch 4/10:  42%|▍| 416/991 [1:43:10<2:31:11, 15.78s/batch, batch_loss=9.86, bat

Epoch 4/10:  42%|▍| 416/991 [1:43:24<2:31:11, 15.78s/batch, batch_loss=9.46, bat

Epoch 4/10:  42%|▍| 417/991 [1:43:24<2:26:35, 15.32s/batch, batch_loss=9.46, bat

Epoch 4/10:  42%|▍| 417/991 [1:43:40<2:26:35, 15.32s/batch, batch_loss=15.7, bat

Epoch 4/10:  42%|▍| 418/991 [1:43:40<2:27:21, 15.43s/batch, batch_loss=15.7, bat

Epoch 4/10:  42%|▍| 418/991 [1:43:56<2:27:21, 15.43s/batch, batch_loss=1.3e+3, b

Epoch 4/10:  42%|▍| 419/991 [1:43:56<2:28:43, 15.60s/batch, batch_loss=1.3e+3, b

Epoch 4/10:  42%|▍| 419/991 [1:44:11<2:28:43, 15.60s/batch, batch_loss=16.3, bat

Epoch 4/10:  42%|▍| 420/991 [1:44:11<2:29:12, 15.68s/batch, batch_loss=16.3, bat

Epoch 4/10:  42%|▍| 420/991 [1:44:27<2:29:12, 15.68s/batch, batch_loss=12.5, bat

Epoch 4/10:  42%|▍| 421/991 [1:44:27<2:28:09, 15.60s/batch, batch_loss=12.5, bat

Epoch 4/10:  42%|▍| 421/991 [1:44:42<2:28:09, 15.60s/batch, batch_loss=8.4, batc

Epoch 4/10:  43%|▍| 422/991 [1:44:42<2:27:22, 15.54s/batch, batch_loss=8.4, batc

Epoch 4/10:  43%|▍| 422/991 [1:44:57<2:27:22, 15.54s/batch, batch_loss=13.5, bat

Epoch 4/10:  43%|▍| 423/991 [1:44:57<2:24:38, 15.28s/batch, batch_loss=13.5, bat

Epoch 4/10:  43%|▍| 423/991 [1:45:13<2:24:38, 15.28s/batch, batch_loss=11.7, bat

Epoch 4/10:  43%|▍| 424/991 [1:45:13<2:26:32, 15.51s/batch, batch_loss=11.7, bat

Epoch 4/10:  43%|▍| 424/991 [1:45:28<2:26:32, 15.51s/batch, batch_loss=7.44, bat

Epoch 4/10:  43%|▍| 425/991 [1:45:28<2:24:59, 15.37s/batch, batch_loss=7.44, bat

Epoch 4/10:  43%|▍| 425/991 [1:45:43<2:24:59, 15.37s/batch, batch_loss=2.73, bat

Epoch 4/10:  43%|▍| 426/991 [1:45:43<2:23:05, 15.20s/batch, batch_loss=2.73, bat

Epoch 4/10:  43%|▍| 426/991 [1:45:56<2:23:05, 15.20s/batch, batch_loss=8.66, bat

Epoch 4/10:  43%|▍| 427/991 [1:45:56<2:18:23, 14.72s/batch, batch_loss=8.66, bat

Epoch 4/10:  43%|▍| 427/991 [1:46:11<2:18:23, 14.72s/batch, batch_loss=17, batch

Epoch 4/10:  43%|▍| 428/991 [1:46:11<2:19:08, 14.83s/batch, batch_loss=17, batch

Epoch 4/10:  43%|▍| 428/991 [1:46:27<2:19:08, 14.83s/batch, batch_loss=19.3, bat

Epoch 4/10:  43%|▍| 429/991 [1:46:27<2:20:13, 14.97s/batch, batch_loss=19.3, bat

Epoch 4/10:  43%|▍| 429/991 [1:46:42<2:20:13, 14.97s/batch, batch_loss=9.3e+3, b

Epoch 4/10:  43%|▍| 430/991 [1:46:42<2:20:00, 14.97s/batch, batch_loss=9.3e+3, b

Epoch 4/10:  43%|▍| 430/991 [1:46:56<2:20:00, 14.97s/batch, batch_loss=22, batch

Epoch 4/10:  43%|▍| 431/991 [1:46:56<2:17:11, 14.70s/batch, batch_loss=22, batch

Epoch 4/10:  43%|▍| 431/991 [1:47:11<2:17:11, 14.70s/batch, batch_loss=18.2, bat

Epoch 4/10:  44%|▍| 432/991 [1:47:11<2:17:34, 14.77s/batch, batch_loss=18.2, bat

Epoch 4/10:  44%|▍| 432/991 [1:47:26<2:17:34, 14.77s/batch, batch_loss=11.9, bat

Epoch 4/10:  44%|▍| 433/991 [1:47:26<2:17:33, 14.79s/batch, batch_loss=11.9, bat

Epoch 4/10:  44%|▍| 433/991 [1:47:41<2:17:33, 14.79s/batch, batch_loss=17.6, bat

Epoch 4/10:  44%|▍| 434/991 [1:47:41<2:19:00, 14.97s/batch, batch_loss=17.6, bat

Epoch 4/10:  44%|▍| 434/991 [1:47:56<2:19:00, 14.97s/batch, batch_loss=12.5, bat

Epoch 4/10:  44%|▍| 435/991 [1:47:56<2:17:39, 14.86s/batch, batch_loss=12.5, bat

Epoch 4/10:  44%|▍| 435/991 [1:48:10<2:17:39, 14.86s/batch, batch_loss=14.8, bat

Epoch 4/10:  44%|▍| 436/991 [1:48:10<2:16:13, 14.73s/batch, batch_loss=14.8, bat

Epoch 4/10:  44%|▍| 436/991 [1:48:25<2:16:13, 14.73s/batch, batch_loss=16.3, bat

Epoch 4/10:  44%|▍| 437/991 [1:48:25<2:15:37, 14.69s/batch, batch_loss=16.3, bat

Epoch 4/10:  44%|▍| 437/991 [1:48:42<2:15:37, 14.69s/batch, batch_loss=18.6, bat

Epoch 4/10:  44%|▍| 438/991 [1:48:42<2:23:24, 15.56s/batch, batch_loss=18.6, bat

Epoch 4/10:  44%|▍| 438/991 [1:48:58<2:23:24, 15.56s/batch, batch_loss=12.3, bat

Epoch 4/10:  44%|▍| 439/991 [1:48:58<2:22:44, 15.51s/batch, batch_loss=12.3, bat

Epoch 4/10:  44%|▍| 439/991 [1:49:13<2:22:44, 15.51s/batch, batch_loss=21.9, bat

Epoch 4/10:  44%|▍| 440/991 [1:49:13<2:20:53, 15.34s/batch, batch_loss=21.9, bat

Epoch 4/10:  44%|▍| 440/991 [1:49:28<2:20:53, 15.34s/batch, batch_loss=21.3, bat

Epoch 4/10:  45%|▍| 441/991 [1:49:28<2:22:22, 15.53s/batch, batch_loss=21.3, bat

Epoch 4/10:  45%|▍| 441/991 [1:49:43<2:22:22, 15.53s/batch, batch_loss=14, batch

Epoch 4/10:  45%|▍| 442/991 [1:49:43<2:18:19, 15.12s/batch, batch_loss=14, batch

Epoch 4/10:  45%|▍| 442/991 [1:49:57<2:18:19, 15.12s/batch, batch_loss=18.5, bat

Epoch 4/10:  45%|▍| 443/991 [1:49:57<2:16:53, 14.99s/batch, batch_loss=18.5, bat

Epoch 4/10:  45%|▍| 443/991 [1:50:12<2:16:53, 14.99s/batch, batch_loss=13.3, bat

Epoch 4/10:  45%|▍| 444/991 [1:50:12<2:14:42, 14.78s/batch, batch_loss=13.3, bat

Epoch 4/10:  45%|▍| 444/991 [1:50:28<2:14:42, 14.78s/batch, batch_loss=20.1, bat

Epoch 4/10:  45%|▍| 445/991 [1:50:28<2:18:31, 15.22s/batch, batch_loss=20.1, bat

Epoch 4/10:  45%|▍| 445/991 [1:50:43<2:18:31, 15.22s/batch, batch_loss=24.9, bat

Epoch 4/10:  45%|▍| 446/991 [1:50:43<2:18:42, 15.27s/batch, batch_loss=24.9, bat

Epoch 4/10:  45%|▍| 446/991 [1:50:58<2:18:42, 15.27s/batch, batch_loss=12.7, bat

Epoch 4/10:  45%|▍| 447/991 [1:50:58<2:16:53, 15.10s/batch, batch_loss=12.7, bat

Epoch 4/10:  45%|▍| 447/991 [1:51:12<2:16:53, 15.10s/batch, batch_loss=16.3, bat

Epoch 4/10:  45%|▍| 448/991 [1:51:12<2:13:19, 14.73s/batch, batch_loss=16.3, bat

Epoch 4/10:  45%|▍| 448/991 [1:51:29<2:13:19, 14.73s/batch, batch_loss=16, batch

Epoch 4/10:  45%|▍| 449/991 [1:51:29<2:18:45, 15.36s/batch, batch_loss=16, batch

Epoch 4/10:  45%|▍| 449/991 [1:51:43<2:18:45, 15.36s/batch, batch_loss=21.2, bat

Epoch 4/10:  45%|▍| 450/991 [1:51:43<2:16:51, 15.18s/batch, batch_loss=21.2, bat

Epoch 4/10:  45%|▍| 450/991 [1:51:58<2:16:51, 15.18s/batch, batch_loss=18.1, bat

Epoch 4/10:  46%|▍| 451/991 [1:51:58<2:13:48, 14.87s/batch, batch_loss=18.1, bat

Epoch 4/10:  46%|▍| 451/991 [1:52:13<2:13:48, 14.87s/batch, batch_loss=16.4, bat

Epoch 4/10:  46%|▍| 452/991 [1:52:13<2:15:14, 15.06s/batch, batch_loss=16.4, bat

Epoch 4/10:  46%|▍| 452/991 [1:52:32<2:15:14, 15.06s/batch, batch_loss=19.1, bat

Epoch 4/10:  46%|▍| 453/991 [1:52:32<2:24:28, 16.11s/batch, batch_loss=19.1, bat

Epoch 4/10:  46%|▍| 453/991 [1:52:51<2:24:28, 16.11s/batch, batch_loss=7.23e+3, 

Epoch 4/10:  46%|▍| 454/991 [1:52:51<2:33:56, 17.20s/batch, batch_loss=7.23e+3, 

Epoch 4/10:  46%|▍| 454/991 [1:53:19<2:33:56, 17.20s/batch, batch_loss=26.7, bat

Epoch 4/10:  46%|▍| 455/991 [1:53:19<3:00:33, 20.21s/batch, batch_loss=26.7, bat

Epoch 4/10:  46%|▍| 455/991 [1:53:53<3:00:33, 20.21s/batch, batch_loss=23.1, bat

Epoch 4/10:  46%|▍| 456/991 [1:53:53<3:38:07, 24.46s/batch, batch_loss=23.1, bat

Epoch 4/10:  46%|▍| 456/991 [1:54:28<3:38:07, 24.46s/batch, batch_loss=13.9, bat

Epoch 4/10:  46%|▍| 457/991 [1:54:28<4:04:44, 27.50s/batch, batch_loss=13.9, bat

Epoch 4/10:  46%|▍| 457/991 [1:55:04<4:04:44, 27.50s/batch, batch_loss=15, batch

Epoch 4/10:  46%|▍| 458/991 [1:55:04<4:27:13, 30.08s/batch, batch_loss=15, batch

Epoch 4/10:  46%|▍| 458/991 [1:55:37<4:27:13, 30.08s/batch, batch_loss=21.9, bat

Epoch 4/10:  46%|▍| 459/991 [1:55:37<4:35:53, 31.12s/batch, batch_loss=21.9, bat

Epoch 4/10:  46%|▍| 459/991 [1:56:12<4:35:53, 31.12s/batch, batch_loss=19.6, bat

Epoch 4/10:  46%|▍| 460/991 [1:56:12<4:45:29, 32.26s/batch, batch_loss=19.6, bat

Epoch 4/10:  46%|▍| 460/991 [1:56:46<4:45:29, 32.26s/batch, batch_loss=47.9, bat

Epoch 4/10:  47%|▍| 461/991 [1:56:46<4:50:15, 32.86s/batch, batch_loss=47.9, bat

Epoch 4/10:  47%|▍| 461/991 [1:57:24<4:50:15, 32.86s/batch, batch_loss=12.8, bat

Epoch 4/10:  47%|▍| 462/991 [1:57:24<5:01:20, 34.18s/batch, batch_loss=12.8, bat

Epoch 4/10:  47%|▍| 462/991 [1:57:58<5:01:20, 34.18s/batch, batch_loss=6.21e+4, 

Epoch 4/10:  47%|▍| 463/991 [1:57:58<5:02:28, 34.37s/batch, batch_loss=6.21e+4, 

Epoch 4/10:  47%|▍| 463/991 [1:58:30<5:02:28, 34.37s/batch, batch_loss=13.4, bat

Epoch 4/10:  47%|▍| 464/991 [1:58:30<4:53:39, 33.43s/batch, batch_loss=13.4, bat

Epoch 4/10:  47%|▍| 464/991 [1:58:44<4:53:39, 33.43s/batch, batch_loss=14.5, bat

Epoch 4/10:  47%|▍| 465/991 [1:58:44<4:03:23, 27.76s/batch, batch_loss=14.5, bat

Epoch 4/10:  47%|▍| 465/991 [1:58:59<4:03:23, 27.76s/batch, batch_loss=16.2, bat

Epoch 4/10:  47%|▍| 466/991 [1:58:59<3:28:27, 23.82s/batch, batch_loss=16.2, bat

Epoch 4/10:  47%|▍| 466/991 [1:59:14<3:28:27, 23.82s/batch, batch_loss=13, batch

Epoch 4/10:  47%|▍| 467/991 [1:59:14<3:05:38, 21.26s/batch, batch_loss=13, batch

Epoch 4/10:  47%|▍| 467/991 [1:59:30<3:05:38, 21.26s/batch, batch_loss=18, batch

Epoch 4/10:  47%|▍| 468/991 [1:59:30<2:50:04, 19.51s/batch, batch_loss=18, batch

Epoch 4/10:  47%|▍| 468/991 [1:59:46<2:50:04, 19.51s/batch, batch_loss=16.4, bat

Epoch 4/10:  47%|▍| 469/991 [1:59:46<2:40:41, 18.47s/batch, batch_loss=16.4, bat

Epoch 4/10:  47%|▍| 469/991 [2:00:00<2:40:41, 18.47s/batch, batch_loss=14.8, bat

Epoch 4/10:  47%|▍| 470/991 [2:00:00<2:29:50, 17.26s/batch, batch_loss=14.8, bat

Epoch 4/10:  47%|▍| 470/991 [2:00:15<2:29:50, 17.26s/batch, batch_loss=19, batch

Epoch 4/10:  48%|▍| 471/991 [2:00:15<2:22:21, 16.43s/batch, batch_loss=19, batch

Epoch 4/10:  48%|▍| 471/991 [2:00:30<2:22:21, 16.43s/batch, batch_loss=20.5, bat

Epoch 4/10:  48%|▍| 472/991 [2:00:30<2:19:39, 16.14s/batch, batch_loss=20.5, bat

Epoch 4/10:  48%|▍| 472/991 [2:00:45<2:19:39, 16.14s/batch, batch_loss=19.1, bat

Epoch 4/10:  48%|▍| 473/991 [2:00:45<2:17:04, 15.88s/batch, batch_loss=19.1, bat

Epoch 4/10:  48%|▍| 473/991 [2:01:00<2:17:04, 15.88s/batch, batch_loss=15, batch

Epoch 4/10:  48%|▍| 474/991 [2:01:00<2:14:45, 15.64s/batch, batch_loss=15, batch

Epoch 4/10:  48%|▍| 474/991 [2:01:18<2:14:45, 15.64s/batch, batch_loss=2.4e+3, b

Epoch 4/10:  48%|▍| 475/991 [2:01:18<2:20:22, 16.32s/batch, batch_loss=2.4e+3, b

Epoch 4/10:  48%|▍| 475/991 [2:01:34<2:20:22, 16.32s/batch, batch_loss=17.6, bat

Epoch 4/10:  48%|▍| 476/991 [2:01:34<2:18:13, 16.10s/batch, batch_loss=17.6, bat

Epoch 4/10:  48%|▍| 476/991 [2:01:49<2:18:13, 16.10s/batch, batch_loss=16.3, bat

Epoch 4/10:  48%|▍| 477/991 [2:01:49<2:14:45, 15.73s/batch, batch_loss=16.3, bat

Epoch 4/10:  48%|▍| 477/991 [2:02:04<2:14:45, 15.73s/batch, batch_loss=16.8, bat

Epoch 4/10:  48%|▍| 478/991 [2:02:04<2:12:29, 15.50s/batch, batch_loss=16.8, bat

Epoch 4/10:  48%|▍| 478/991 [2:02:18<2:12:29, 15.50s/batch, batch_loss=18.5, bat

Epoch 4/10:  48%|▍| 479/991 [2:02:18<2:10:14, 15.26s/batch, batch_loss=18.5, bat

Epoch 4/10:  48%|▍| 479/991 [2:02:34<2:10:14, 15.26s/batch, batch_loss=16.6, bat

Epoch 4/10:  48%|▍| 480/991 [2:02:34<2:10:41, 15.35s/batch, batch_loss=16.6, bat

Epoch 4/10:  48%|▍| 480/991 [2:02:49<2:10:41, 15.35s/batch, batch_loss=23.9, bat

Epoch 4/10:  49%|▍| 481/991 [2:02:49<2:08:52, 15.16s/batch, batch_loss=23.9, bat

Epoch 4/10:  49%|▍| 481/991 [2:03:04<2:08:52, 15.16s/batch, batch_loss=19.2, bat

Epoch 4/10:  49%|▍| 482/991 [2:03:04<2:09:48, 15.30s/batch, batch_loss=19.2, bat

Epoch 4/10:  49%|▍| 482/991 [2:03:20<2:09:48, 15.30s/batch, batch_loss=14.1, bat

Epoch 4/10:  49%|▍| 483/991 [2:03:20<2:11:48, 15.57s/batch, batch_loss=14.1, bat

Epoch 4/10:  49%|▍| 483/991 [2:03:36<2:11:48, 15.57s/batch, batch_loss=20.9, bat

Epoch 4/10:  49%|▍| 484/991 [2:03:36<2:11:44, 15.59s/batch, batch_loss=20.9, bat

Epoch 4/10:  49%|▍| 484/991 [2:03:52<2:11:44, 15.59s/batch, batch_loss=8.49, bat

Epoch 4/10:  49%|▍| 485/991 [2:03:52<2:12:12, 15.68s/batch, batch_loss=8.49, bat

Epoch 4/10:  49%|▍| 485/991 [2:04:07<2:12:12, 15.68s/batch, batch_loss=25.4, bat

Epoch 4/10:  49%|▍| 486/991 [2:04:07<2:10:45, 15.54s/batch, batch_loss=25.4, bat

Epoch 4/10:  49%|▍| 486/991 [2:04:23<2:10:45, 15.54s/batch, batch_loss=15.2, bat

Epoch 4/10:  49%|▍| 487/991 [2:04:23<2:11:35, 15.67s/batch, batch_loss=15.2, bat

Epoch 4/10:  49%|▍| 487/991 [2:04:40<2:11:35, 15.67s/batch, batch_loss=8.38, bat

Epoch 4/10:  49%|▍| 488/991 [2:04:40<2:13:15, 15.90s/batch, batch_loss=8.38, bat

Epoch 4/10:  49%|▍| 488/991 [2:04:56<2:13:15, 15.90s/batch, batch_loss=10.3, bat

Epoch 4/10:  49%|▍| 489/991 [2:04:56<2:13:29, 15.96s/batch, batch_loss=10.3, bat

Epoch 4/10:  49%|▍| 489/991 [2:05:13<2:13:29, 15.96s/batch, batch_loss=7.37, bat

Epoch 4/10:  49%|▍| 490/991 [2:05:13<2:17:25, 16.46s/batch, batch_loss=7.37, bat

Epoch 4/10:  49%|▍| 490/991 [2:05:29<2:17:25, 16.46s/batch, batch_loss=22, batch

Epoch 4/10:  50%|▍| 491/991 [2:05:29<2:15:08, 16.22s/batch, batch_loss=22, batch

Epoch 4/10:  50%|▍| 491/991 [2:05:45<2:15:08, 16.22s/batch, batch_loss=17.7, bat

Epoch 4/10:  50%|▍| 492/991 [2:05:45<2:13:56, 16.10s/batch, batch_loss=17.7, bat

Epoch 4/10:  50%|▍| 492/991 [2:06:01<2:13:56, 16.10s/batch, batch_loss=20.8, bat

Epoch 4/10:  50%|▍| 493/991 [2:06:01<2:12:43, 15.99s/batch, batch_loss=20.8, bat

Epoch 4/10:  50%|▍| 493/991 [2:06:16<2:12:43, 15.99s/batch, batch_loss=8.59, bat

Epoch 4/10:  50%|▍| 494/991 [2:06:16<2:10:58, 15.81s/batch, batch_loss=8.59, bat

Epoch 4/10:  50%|▍| 494/991 [2:06:31<2:10:58, 15.81s/batch, batch_loss=8.54e+4, 

Epoch 4/10:  50%|▍| 495/991 [2:06:31<2:08:12, 15.51s/batch, batch_loss=8.54e+4, 

Epoch 4/10:  50%|▍| 495/991 [2:06:50<2:08:12, 15.51s/batch, batch_loss=12.5, bat

Epoch 4/10:  50%|▌| 496/991 [2:06:50<2:16:11, 16.51s/batch, batch_loss=12.5, bat

Epoch 4/10:  50%|▌| 496/991 [2:07:06<2:16:11, 16.51s/batch, batch_loss=174, batc

Epoch 4/10:  50%|▌| 497/991 [2:07:06<2:14:58, 16.39s/batch, batch_loss=174, batc

Epoch 4/10:  50%|▌| 497/991 [2:07:21<2:14:58, 16.39s/batch, batch_loss=15.5, bat

Epoch 4/10:  50%|▌| 498/991 [2:07:21<2:12:49, 16.16s/batch, batch_loss=15.5, bat

Epoch 4/10:  50%|▌| 498/991 [2:07:37<2:12:49, 16.16s/batch, batch_loss=404, batc

Epoch 4/10:  50%|▌| 499/991 [2:07:37<2:12:09, 16.12s/batch, batch_loss=404, batc

Epoch 4/10:  50%|▌| 499/991 [2:07:54<2:12:09, 16.12s/batch, batch_loss=17, batch

Epoch 4/10:  50%|▌| 500/991 [2:07:54<2:13:19, 16.29s/batch, batch_loss=17, batch

Epoch 4/10:  50%|▌| 500/991 [2:08:11<2:13:19, 16.29s/batch, batch_loss=8.24, bat

Epoch 4/10:  51%|▌| 501/991 [2:08:11<2:14:53, 16.52s/batch, batch_loss=8.24, bat

Epoch 4/10:  51%|▌| 501/991 [2:08:26<2:14:53, 16.52s/batch, batch_loss=10.1, bat

Epoch 4/10:  51%|▌| 502/991 [2:08:26<2:11:34, 16.14s/batch, batch_loss=10.1, bat

Epoch 4/10:  51%|▌| 502/991 [2:08:44<2:11:34, 16.14s/batch, batch_loss=24.3, bat

Epoch 4/10:  51%|▌| 503/991 [2:08:44<2:16:07, 16.74s/batch, batch_loss=24.3, bat

Epoch 4/10:  51%|▌| 503/991 [2:08:59<2:16:07, 16.74s/batch, batch_loss=11.2, bat

Epoch 4/10:  51%|▌| 504/991 [2:08:59<2:11:26, 16.19s/batch, batch_loss=11.2, bat

Epoch 4/10:  51%|▌| 504/991 [2:09:15<2:11:26, 16.19s/batch, batch_loss=7.43, bat

Epoch 4/10:  51%|▌| 505/991 [2:09:15<2:08:50, 15.91s/batch, batch_loss=7.43, bat

Epoch 4/10:  51%|▌| 505/991 [2:09:31<2:08:50, 15.91s/batch, batch_loss=15.3, bat

Epoch 4/10:  51%|▌| 506/991 [2:09:31<2:09:17, 15.99s/batch, batch_loss=15.3, bat

Epoch 4/10:  51%|▌| 506/991 [2:09:46<2:09:17, 15.99s/batch, batch_loss=11.6, bat

Epoch 4/10:  51%|▌| 507/991 [2:09:46<2:06:30, 15.68s/batch, batch_loss=11.6, bat

Epoch 4/10:  51%|▌| 507/991 [2:10:01<2:06:30, 15.68s/batch, batch_loss=16.3, bat

Epoch 4/10:  51%|▌| 508/991 [2:10:01<2:05:06, 15.54s/batch, batch_loss=16.3, bat

Epoch 4/10:  51%|▌| 508/991 [2:10:16<2:05:06, 15.54s/batch, batch_loss=17.8, bat

Epoch 4/10:  51%|▌| 509/991 [2:10:16<2:04:04, 15.45s/batch, batch_loss=17.8, bat

Epoch 4/10:  51%|▌| 509/991 [2:10:33<2:04:04, 15.45s/batch, batch_loss=12.6, bat

Epoch 4/10:  51%|▌| 510/991 [2:10:33<2:07:17, 15.88s/batch, batch_loss=12.6, bat

Epoch 4/10:  51%|▌| 510/991 [2:10:49<2:07:17, 15.88s/batch, batch_loss=14.5, bat

Epoch 4/10:  52%|▌| 511/991 [2:10:49<2:06:29, 15.81s/batch, batch_loss=14.5, bat

Epoch 4/10:  52%|▌| 511/991 [2:11:05<2:06:29, 15.81s/batch, batch_loss=10.4, bat

Epoch 4/10:  52%|▌| 512/991 [2:11:05<2:06:08, 15.80s/batch, batch_loss=10.4, bat

Epoch 4/10:  52%|▌| 512/991 [2:11:21<2:06:08, 15.80s/batch, batch_loss=9.76, bat

Epoch 4/10:  52%|▌| 513/991 [2:11:21<2:08:00, 16.07s/batch, batch_loss=9.76, bat

Epoch 4/10:  52%|▌| 513/991 [2:11:37<2:08:00, 16.07s/batch, batch_loss=16.6, bat

Epoch 4/10:  52%|▌| 514/991 [2:11:37<2:06:19, 15.89s/batch, batch_loss=16.6, bat

Epoch 4/10:  52%|▌| 514/991 [2:11:52<2:06:19, 15.89s/batch, batch_loss=20, batch

Epoch 4/10:  52%|▌| 515/991 [2:11:52<2:05:08, 15.77s/batch, batch_loss=20, batch

Epoch 4/10:  52%|▌| 515/991 [2:12:07<2:05:08, 15.77s/batch, batch_loss=21.9, bat

Epoch 4/10:  52%|▌| 516/991 [2:12:07<2:02:49, 15.51s/batch, batch_loss=21.9, bat

Epoch 4/10:  52%|▌| 516/991 [2:12:22<2:02:49, 15.51s/batch, batch_loss=12.3, bat

Epoch 4/10:  52%|▌| 517/991 [2:12:22<2:01:35, 15.39s/batch, batch_loss=12.3, bat

Epoch 4/10:  52%|▌| 517/991 [2:12:38<2:01:35, 15.39s/batch, batch_loss=20.7, bat

Epoch 4/10:  52%|▌| 518/991 [2:12:38<2:01:29, 15.41s/batch, batch_loss=20.7, bat

Epoch 4/10:  52%|▌| 518/991 [2:12:52<2:01:29, 15.41s/batch, batch_loss=14.1, bat

Epoch 4/10:  52%|▌| 519/991 [2:12:52<1:59:26, 15.18s/batch, batch_loss=14.1, bat

Epoch 4/10:  52%|▌| 519/991 [2:13:08<1:59:26, 15.18s/batch, batch_loss=14.7, bat

Epoch 4/10:  52%|▌| 520/991 [2:13:08<2:00:12, 15.31s/batch, batch_loss=14.7, bat

Epoch 4/10:  52%|▌| 520/991 [2:13:24<2:00:12, 15.31s/batch, batch_loss=7.46, bat

Epoch 4/10:  53%|▌| 521/991 [2:13:24<2:02:32, 15.64s/batch, batch_loss=7.46, bat

Epoch 4/10:  53%|▌| 521/991 [2:13:39<2:02:32, 15.64s/batch, batch_loss=9.91, bat

Epoch 4/10:  53%|▌| 522/991 [2:13:39<2:00:06, 15.37s/batch, batch_loss=9.91, bat

Epoch 4/10:  53%|▌| 522/991 [2:13:54<2:00:06, 15.37s/batch, batch_loss=3.15, bat

Epoch 4/10:  53%|▌| 523/991 [2:13:54<1:57:48, 15.10s/batch, batch_loss=3.15, bat

Epoch 4/10:  53%|▌| 523/991 [2:14:08<1:57:48, 15.10s/batch, batch_loss=9.26, bat

Epoch 4/10:  53%|▌| 524/991 [2:14:08<1:55:52, 14.89s/batch, batch_loss=9.26, bat

Epoch 4/10:  53%|▌| 524/991 [2:14:24<1:55:52, 14.89s/batch, batch_loss=6.86, bat

Epoch 4/10:  53%|▌| 525/991 [2:14:24<1:57:31, 15.13s/batch, batch_loss=6.86, bat

Epoch 4/10:  53%|▌| 525/991 [2:14:38<1:57:31, 15.13s/batch, batch_loss=8.96, bat

Epoch 4/10:  53%|▌| 526/991 [2:14:38<1:56:26, 15.02s/batch, batch_loss=8.96, bat

Epoch 4/10:  53%|▌| 526/991 [2:14:54<1:56:26, 15.02s/batch, batch_loss=21.9, bat

Epoch 4/10:  53%|▌| 527/991 [2:14:54<1:56:51, 15.11s/batch, batch_loss=21.9, bat

Epoch 4/10:  53%|▌| 527/991 [2:15:09<1:56:51, 15.11s/batch, batch_loss=20.3, bat

Epoch 4/10:  53%|▌| 528/991 [2:15:09<1:57:15, 15.20s/batch, batch_loss=20.3, bat

Epoch 4/10:  53%|▌| 528/991 [2:15:24<1:57:15, 15.20s/batch, batch_loss=9.63, bat

Epoch 4/10:  53%|▌| 529/991 [2:15:24<1:57:09, 15.21s/batch, batch_loss=9.63, bat

Epoch 4/10:  53%|▌| 529/991 [2:15:39<1:57:09, 15.21s/batch, batch_loss=24, batch

Epoch 4/10:  53%|▌| 530/991 [2:15:39<1:56:24, 15.15s/batch, batch_loss=24, batch

Epoch 4/10:  53%|▌| 530/991 [2:15:53<1:56:24, 15.15s/batch, batch_loss=21.6, bat

Epoch 4/10:  54%|▌| 531/991 [2:15:53<1:53:34, 14.81s/batch, batch_loss=21.6, bat

Epoch 4/10:  54%|▌| 531/991 [2:16:09<1:53:34, 14.81s/batch, batch_loss=21.5, bat

Epoch 4/10:  54%|▌| 532/991 [2:16:09<1:54:14, 14.93s/batch, batch_loss=21.5, bat

Epoch 4/10:  54%|▌| 532/991 [2:16:24<1:54:14, 14.93s/batch, batch_loss=19.1, bat

Epoch 4/10:  54%|▌| 533/991 [2:16:24<1:56:01, 15.20s/batch, batch_loss=19.1, bat

Epoch 4/10:  54%|▌| 533/991 [2:16:41<1:56:01, 15.20s/batch, batch_loss=19.4, bat

Epoch 4/10:  54%|▌| 534/991 [2:16:41<1:58:02, 15.50s/batch, batch_loss=19.4, bat

Epoch 4/10:  54%|▌| 534/991 [2:16:56<1:58:02, 15.50s/batch, batch_loss=33, batch

Epoch 4/10:  54%|▌| 535/991 [2:16:56<1:58:15, 15.56s/batch, batch_loss=33, batch

Epoch 4/10:  54%|▌| 535/991 [2:17:11<1:58:15, 15.56s/batch, batch_loss=29.6, bat

Epoch 4/10:  54%|▌| 536/991 [2:17:11<1:56:41, 15.39s/batch, batch_loss=29.6, bat

Epoch 4/10:  54%|▌| 536/991 [2:17:27<1:56:41, 15.39s/batch, batch_loss=14.2, bat

Epoch 4/10:  54%|▌| 537/991 [2:17:27<1:57:08, 15.48s/batch, batch_loss=14.2, bat

Epoch 4/10:  54%|▌| 537/991 [2:17:43<1:57:08, 15.48s/batch, batch_loss=1.78e+3, 

Epoch 4/10:  54%|▌| 538/991 [2:17:43<1:58:23, 15.68s/batch, batch_loss=1.78e+3, 

Epoch 4/10:  54%|▌| 538/991 [2:17:59<1:58:23, 15.68s/batch, batch_loss=54.4, bat

Epoch 4/10:  54%|▌| 539/991 [2:17:59<1:59:06, 15.81s/batch, batch_loss=54.4, bat

Epoch 4/10:  54%|▌| 539/991 [2:18:16<1:59:06, 15.81s/batch, batch_loss=50.4, bat

Epoch 4/10:  54%|▌| 540/991 [2:18:16<2:01:34, 16.17s/batch, batch_loss=50.4, bat

Epoch 4/10:  54%|▌| 540/991 [2:18:32<2:01:34, 16.17s/batch, batch_loss=1.3e+4, b

Epoch 4/10:  55%|▌| 541/991 [2:18:32<2:00:41, 16.09s/batch, batch_loss=1.3e+4, b

Epoch 4/10:  55%|▌| 541/991 [2:18:50<2:00:41, 16.09s/batch, batch_loss=2.87e+3, 

Epoch 4/10:  55%|▌| 542/991 [2:18:50<2:05:06, 16.72s/batch, batch_loss=2.87e+3, 

Epoch 4/10:  55%|▌| 542/991 [2:19:06<2:05:06, 16.72s/batch, batch_loss=65, batch

Epoch 4/10:  55%|▌| 543/991 [2:19:06<2:01:16, 16.24s/batch, batch_loss=65, batch

Epoch 4/10:  55%|▌| 543/991 [2:19:21<2:01:16, 16.24s/batch, batch_loss=28, batch

Epoch 4/10:  55%|▌| 544/991 [2:19:21<2:00:15, 16.14s/batch, batch_loss=28, batch

Epoch 4/10:  55%|▌| 544/991 [2:19:38<2:00:15, 16.14s/batch, batch_loss=16.2, bat

Epoch 4/10:  55%|▌| 545/991 [2:19:38<2:01:24, 16.33s/batch, batch_loss=16.2, bat

Epoch 4/10:  55%|▌| 545/991 [2:19:55<2:01:24, 16.33s/batch, batch_loss=302, batc

Epoch 4/10:  55%|▌| 546/991 [2:19:55<2:01:08, 16.33s/batch, batch_loss=302, batc

Epoch 4/10:  55%|▌| 546/991 [2:20:09<2:01:08, 16.33s/batch, batch_loss=14.6, bat

Epoch 4/10:  55%|▌| 547/991 [2:20:09<1:56:09, 15.70s/batch, batch_loss=14.6, bat

Epoch 4/10:  55%|▌| 547/991 [2:20:24<1:56:09, 15.70s/batch, batch_loss=14.9, bat

Epoch 4/10:  55%|▌| 548/991 [2:20:24<1:55:00, 15.58s/batch, batch_loss=14.9, bat

Epoch 4/10:  55%|▌| 548/991 [2:20:43<1:55:00, 15.58s/batch, batch_loss=12.6, bat

Epoch 4/10:  55%|▌| 549/991 [2:20:43<2:01:27, 16.49s/batch, batch_loss=12.6, bat

Epoch 4/10:  55%|▌| 549/991 [2:20:59<2:01:27, 16.49s/batch, batch_loss=26.7, bat

Epoch 4/10:  55%|▌| 550/991 [2:20:59<2:00:16, 16.36s/batch, batch_loss=26.7, bat

Epoch 4/10:  55%|▌| 550/991 [2:21:15<2:00:16, 16.36s/batch, batch_loss=20.3, bat

Epoch 4/10:  56%|▌| 551/991 [2:21:15<1:58:40, 16.18s/batch, batch_loss=20.3, bat

Epoch 4/10:  56%|▌| 551/991 [2:21:31<1:58:40, 16.18s/batch, batch_loss=15.5, bat

Epoch 4/10:  56%|▌| 552/991 [2:21:31<1:59:24, 16.32s/batch, batch_loss=15.5, bat

Epoch 4/10:  56%|▌| 552/991 [2:21:48<1:59:24, 16.32s/batch, batch_loss=17.8, bat

Epoch 4/10:  56%|▌| 553/991 [2:21:48<1:59:50, 16.42s/batch, batch_loss=17.8, bat

Epoch 4/10:  56%|▌| 553/991 [2:22:05<1:59:50, 16.42s/batch, batch_loss=5.75e+3, 

Epoch 4/10:  56%|▌| 554/991 [2:22:05<2:00:30, 16.55s/batch, batch_loss=5.75e+3, 

Epoch 4/10:  56%|▌| 554/991 [2:22:21<2:00:30, 16.55s/batch, batch_loss=2.57e+3, 

Epoch 4/10:  56%|▌| 555/991 [2:22:21<2:00:39, 16.60s/batch, batch_loss=2.57e+3, 

Epoch 4/10:  56%|▌| 555/991 [2:22:42<2:00:39, 16.60s/batch, batch_loss=19.1, bat

Epoch 4/10:  56%|▌| 556/991 [2:22:42<2:09:06, 17.81s/batch, batch_loss=19.1, bat

Epoch 4/10:  56%|▌| 556/991 [2:22:59<2:09:06, 17.81s/batch, batch_loss=1.27e+4, 

Epoch 4/10:  56%|▌| 557/991 [2:22:59<2:06:24, 17.47s/batch, batch_loss=1.27e+4, 

Epoch 4/10:  56%|▌| 557/991 [2:23:15<2:06:24, 17.47s/batch, batch_loss=12.5, bat

Epoch 4/10:  56%|▌| 558/991 [2:23:15<2:03:47, 17.15s/batch, batch_loss=12.5, bat

Epoch 4/10:  56%|▌| 558/991 [2:23:31<2:03:47, 17.15s/batch, batch_loss=18.1, bat

Epoch 4/10:  56%|▌| 559/991 [2:23:31<2:01:22, 16.86s/batch, batch_loss=18.1, bat

Epoch 4/10:  56%|▌| 559/991 [2:23:48<2:01:22, 16.86s/batch, batch_loss=6.98, bat

Epoch 4/10:  57%|▌| 560/991 [2:23:48<1:59:52, 16.69s/batch, batch_loss=6.98, bat

Epoch 4/10:  57%|▌| 560/991 [2:24:04<1:59:52, 16.69s/batch, batch_loss=9.4, batc

Epoch 4/10:  57%|▌| 561/991 [2:24:04<1:59:03, 16.61s/batch, batch_loss=9.4, batc

Epoch 4/10:  57%|▌| 561/991 [2:24:20<1:59:03, 16.61s/batch, batch_loss=17.9, bat

Epoch 4/10:  57%|▌| 562/991 [2:24:20<1:58:09, 16.53s/batch, batch_loss=17.9, bat

Epoch 4/10:  57%|▌| 562/991 [2:24:37<1:58:09, 16.53s/batch, batch_loss=6.08, bat

Epoch 4/10:  57%|▌| 563/991 [2:24:37<1:58:30, 16.61s/batch, batch_loss=6.08, bat

Epoch 4/10:  57%|▌| 563/991 [2:24:54<1:58:30, 16.61s/batch, batch_loss=11.4, bat

Epoch 4/10:  57%|▌| 564/991 [2:24:54<1:58:34, 16.66s/batch, batch_loss=11.4, bat

Epoch 4/10:  57%|▌| 564/991 [2:25:16<1:58:34, 16.66s/batch, batch_loss=501, batc

Epoch 4/10:  57%|▌| 565/991 [2:25:16<2:10:18, 18.35s/batch, batch_loss=501, batc

Epoch 4/10:  57%|▌| 565/991 [2:25:34<2:10:18, 18.35s/batch, batch_loss=18.1, bat

Epoch 4/10:  57%|▌| 566/991 [2:25:34<2:08:23, 18.13s/batch, batch_loss=18.1, bat

Epoch 4/10:  57%|▌| 566/991 [2:25:50<2:08:23, 18.13s/batch, batch_loss=20.9, bat

Epoch 4/10:  57%|▌| 567/991 [2:25:50<2:03:01, 17.41s/batch, batch_loss=20.9, bat

Epoch 4/10:  57%|▌| 567/991 [2:26:06<2:03:01, 17.41s/batch, batch_loss=296, batc

Epoch 4/10:  57%|▌| 568/991 [2:26:06<2:01:26, 17.23s/batch, batch_loss=296, batc

Epoch 4/10:  57%|▌| 568/991 [2:26:22<2:01:26, 17.23s/batch, batch_loss=30.8, bat

Epoch 4/10:  57%|▌| 569/991 [2:26:22<1:58:37, 16.87s/batch, batch_loss=30.8, bat

Epoch 4/10:  57%|▌| 569/991 [2:26:41<1:58:37, 16.87s/batch, batch_loss=8.5e+3, b

Epoch 4/10:  58%|▌| 570/991 [2:26:41<2:01:58, 17.38s/batch, batch_loss=8.5e+3, b

Epoch 4/10:  58%|▌| 570/991 [2:26:58<2:01:58, 17.38s/batch, batch_loss=12.7, bat

Epoch 4/10:  58%|▌| 571/991 [2:26:58<2:00:59, 17.28s/batch, batch_loss=12.7, bat

Epoch 4/10:  58%|▌| 571/991 [2:27:14<2:00:59, 17.28s/batch, batch_loss=12.5, bat

Epoch 4/10:  58%|▌| 572/991 [2:27:14<1:57:45, 16.86s/batch, batch_loss=12.5, bat

Epoch 4/10:  58%|▌| 572/991 [2:27:31<1:57:45, 16.86s/batch, batch_loss=8.47, bat

Epoch 4/10:  58%|▌| 573/991 [2:27:31<1:57:30, 16.87s/batch, batch_loss=8.47, bat

Epoch 4/10:  58%|▌| 573/991 [2:27:48<1:57:30, 16.87s/batch, batch_loss=10.8, bat

Epoch 4/10:  58%|▌| 574/991 [2:27:48<1:58:52, 17.11s/batch, batch_loss=10.8, bat

Epoch 4/10:  58%|▌| 574/991 [2:28:05<1:58:52, 17.11s/batch, batch_loss=27.8, bat

Epoch 4/10:  58%|▌| 575/991 [2:28:05<1:56:44, 16.84s/batch, batch_loss=27.8, bat

Epoch 4/10:  58%|▌| 575/991 [2:28:22<1:56:44, 16.84s/batch, batch_loss=25.3, bat

Epoch 4/10:  58%|▌| 576/991 [2:28:22<1:57:13, 16.95s/batch, batch_loss=25.3, bat

Epoch 4/10:  58%|▌| 576/991 [2:28:37<1:57:13, 16.95s/batch, batch_loss=14.3, bat

Epoch 4/10:  58%|▌| 577/991 [2:28:37<1:54:03, 16.53s/batch, batch_loss=14.3, bat

Epoch 4/10:  58%|▌| 577/991 [2:28:53<1:54:03, 16.53s/batch, batch_loss=8.49, bat

Epoch 4/10:  58%|▌| 578/991 [2:28:53<1:52:20, 16.32s/batch, batch_loss=8.49, bat

Epoch 4/10:  58%|▌| 578/991 [2:29:09<1:52:20, 16.32s/batch, batch_loss=14.5, bat

Epoch 4/10:  58%|▌| 579/991 [2:29:09<1:51:25, 16.23s/batch, batch_loss=14.5, bat

Epoch 4/10:  58%|▌| 579/991 [2:29:25<1:51:25, 16.23s/batch, batch_loss=21.4, bat

Epoch 4/10:  59%|▌| 580/991 [2:29:25<1:49:32, 15.99s/batch, batch_loss=21.4, bat

Epoch 4/10:  59%|▌| 580/991 [2:29:40<1:49:32, 15.99s/batch, batch_loss=8.73, bat

Epoch 4/10:  59%|▌| 581/991 [2:29:40<1:47:30, 15.73s/batch, batch_loss=8.73, bat

Epoch 4/10:  59%|▌| 581/991 [2:29:58<1:47:30, 15.73s/batch, batch_loss=1.08, bat

Epoch 4/10:  59%|▌| 582/991 [2:29:58<1:52:44, 16.54s/batch, batch_loss=1.08, bat

Epoch 4/10:  59%|▌| 582/991 [2:30:14<1:52:44, 16.54s/batch, batch_loss=6.59e+3, 

Epoch 4/10:  59%|▌| 583/991 [2:30:14<1:50:12, 16.21s/batch, batch_loss=6.59e+3, 

Epoch 4/10:  59%|▌| 583/991 [2:30:30<1:50:12, 16.21s/batch, batch_loss=14.4, bat

Epoch 4/10:  59%|▌| 584/991 [2:30:30<1:49:44, 16.18s/batch, batch_loss=14.4, bat

Epoch 4/10:  59%|▌| 584/991 [2:30:47<1:49:44, 16.18s/batch, batch_loss=9.32, bat

Epoch 4/10:  59%|▌| 585/991 [2:30:47<1:50:32, 16.34s/batch, batch_loss=9.32, bat

Epoch 4/10:  59%|▌| 585/991 [2:31:02<1:50:32, 16.34s/batch, batch_loss=22.5, bat

Epoch 4/10:  59%|▌| 586/991 [2:31:02<1:49:01, 16.15s/batch, batch_loss=22.5, bat

Epoch 4/10:  59%|▌| 586/991 [2:31:20<1:49:01, 16.15s/batch, batch_loss=24.7, bat

Epoch 4/10:  59%|▌| 587/991 [2:31:20<1:51:42, 16.59s/batch, batch_loss=24.7, bat

Epoch 4/10:  59%|▌| 587/991 [2:31:40<1:51:42, 16.59s/batch, batch_loss=17.8, bat

Epoch 4/10:  59%|▌| 588/991 [2:31:40<1:57:53, 17.55s/batch, batch_loss=17.8, bat

Epoch 4/10:  59%|▌| 588/991 [2:31:55<1:57:53, 17.55s/batch, batch_loss=8.89, bat

Epoch 4/10:  59%|▌| 589/991 [2:31:55<1:52:59, 16.86s/batch, batch_loss=8.89, bat

Epoch 4/10:  59%|▌| 589/991 [2:32:11<1:52:59, 16.86s/batch, batch_loss=16.9, bat

Epoch 4/10:  60%|▌| 590/991 [2:32:11<1:52:00, 16.76s/batch, batch_loss=16.9, bat

Epoch 4/10:  60%|▌| 590/991 [2:32:28<1:52:00, 16.76s/batch, batch_loss=16.6, bat

Epoch 4/10:  60%|▌| 591/991 [2:32:28<1:51:10, 16.68s/batch, batch_loss=16.6, bat

Epoch 4/10:  60%|▌| 591/991 [2:32:46<1:51:10, 16.68s/batch, batch_loss=8.87, bat

Epoch 4/10:  60%|▌| 592/991 [2:32:46<1:54:00, 17.14s/batch, batch_loss=8.87, bat

Epoch 4/10:  60%|▌| 592/991 [2:33:02<1:54:00, 17.14s/batch, batch_loss=13.6, bat

Epoch 4/10:  60%|▌| 593/991 [2:33:02<1:50:57, 16.73s/batch, batch_loss=13.6, bat

Epoch 4/10:  60%|▌| 593/991 [2:33:18<1:50:57, 16.73s/batch, batch_loss=11.6, bat

Epoch 4/10:  60%|▌| 594/991 [2:33:18<1:48:58, 16.47s/batch, batch_loss=11.6, bat

Epoch 4/10:  60%|▌| 594/991 [2:33:34<1:48:58, 16.47s/batch, batch_loss=7.95, bat

Epoch 4/10:  60%|▌| 595/991 [2:33:34<1:48:19, 16.41s/batch, batch_loss=7.95, bat

Epoch 4/10:  60%|▌| 595/991 [2:33:49<1:48:19, 16.41s/batch, batch_loss=7.24, bat

Epoch 4/10:  60%|▌| 596/991 [2:33:49<1:45:20, 16.00s/batch, batch_loss=7.24, bat

Epoch 4/10:  60%|▌| 596/991 [2:34:04<1:45:20, 16.00s/batch, batch_loss=22.9, bat

Epoch 4/10:  60%|▌| 597/991 [2:34:04<1:43:55, 15.83s/batch, batch_loss=22.9, bat

Epoch 4/10:  60%|▌| 597/991 [2:34:20<1:43:55, 15.83s/batch, batch_loss=10.2, bat

Epoch 4/10:  60%|▌| 598/991 [2:34:20<1:43:03, 15.74s/batch, batch_loss=10.2, bat

Epoch 4/10:  60%|▌| 598/991 [2:34:36<1:43:03, 15.74s/batch, batch_loss=18.4, bat

Epoch 4/10:  60%|▌| 599/991 [2:34:36<1:42:48, 15.74s/batch, batch_loss=18.4, bat

Epoch 4/10:  60%|▌| 599/991 [2:34:51<1:42:48, 15.74s/batch, batch_loss=12.2, bat

Epoch 4/10:  61%|▌| 600/991 [2:34:51<1:41:54, 15.64s/batch, batch_loss=12.2, bat

Epoch 4/10:  61%|▌| 600/991 [2:35:07<1:41:54, 15.64s/batch, batch_loss=18, batch

Epoch 4/10:  61%|▌| 601/991 [2:35:07<1:42:21, 15.75s/batch, batch_loss=18, batch

Epoch 4/10:  61%|▌| 601/991 [2:35:23<1:42:21, 15.75s/batch, batch_loss=11.8, bat

Epoch 4/10:  61%|▌| 602/991 [2:35:23<1:42:17, 15.78s/batch, batch_loss=11.8, bat

Epoch 4/10:  61%|▌| 602/991 [2:35:42<1:42:17, 15.78s/batch, batch_loss=7.18, bat

Epoch 4/10:  61%|▌| 603/991 [2:35:42<1:48:17, 16.75s/batch, batch_loss=7.18, bat

Epoch 4/10:  61%|▌| 603/991 [2:35:57<1:48:17, 16.75s/batch, batch_loss=1.01e+4, 

Epoch 4/10:  61%|▌| 604/991 [2:35:57<1:43:51, 16.10s/batch, batch_loss=1.01e+4, 

Epoch 4/10:  61%|▌| 604/991 [2:36:12<1:43:51, 16.10s/batch, batch_loss=12.4, bat

Epoch 4/10:  61%|▌| 605/991 [2:36:12<1:42:06, 15.87s/batch, batch_loss=12.4, bat

Epoch 4/10:  61%|▌| 605/991 [2:36:28<1:42:06, 15.87s/batch, batch_loss=7.78, bat

Epoch 4/10:  61%|▌| 606/991 [2:36:28<1:42:04, 15.91s/batch, batch_loss=7.78, bat

Epoch 4/10:  61%|▌| 606/991 [2:36:44<1:42:04, 15.91s/batch, batch_loss=11.1, bat

Epoch 4/10:  61%|▌| 607/991 [2:36:44<1:42:49, 16.07s/batch, batch_loss=11.1, bat

Epoch 4/10:  61%|▌| 607/991 [2:37:00<1:42:49, 16.07s/batch, batch_loss=13, batch

Epoch 4/10:  61%|▌| 608/991 [2:37:00<1:42:26, 16.05s/batch, batch_loss=13, batch

Epoch 4/10:  61%|▌| 608/991 [2:37:17<1:42:26, 16.05s/batch, batch_loss=16.3, bat

Epoch 4/10:  61%|▌| 609/991 [2:37:17<1:43:57, 16.33s/batch, batch_loss=16.3, bat

Epoch 4/10:  61%|▌| 609/991 [2:37:34<1:43:57, 16.33s/batch, batch_loss=15.4, bat

Epoch 4/10:  62%|▌| 610/991 [2:37:34<1:44:45, 16.50s/batch, batch_loss=15.4, bat

Epoch 4/10:  62%|▌| 610/991 [2:37:50<1:44:45, 16.50s/batch, batch_loss=26.4, bat

Epoch 4/10:  62%|▌| 611/991 [2:37:50<1:43:43, 16.38s/batch, batch_loss=26.4, bat

Epoch 4/10:  62%|▌| 611/991 [2:38:06<1:43:43, 16.38s/batch, batch_loss=6.83, bat

Epoch 4/10:  62%|▌| 612/991 [2:38:06<1:42:01, 16.15s/batch, batch_loss=6.83, bat

Epoch 4/10:  62%|▌| 612/991 [2:38:23<1:42:01, 16.15s/batch, batch_loss=12.1, bat

Epoch 4/10:  62%|▌| 613/991 [2:38:23<1:43:16, 16.39s/batch, batch_loss=12.1, bat

Epoch 4/10:  62%|▌| 613/991 [2:38:39<1:43:16, 16.39s/batch, batch_loss=1.73e+4, 

Epoch 4/10:  62%|▌| 614/991 [2:38:39<1:42:25, 16.30s/batch, batch_loss=1.73e+4, 

Epoch 4/10:  62%|▌| 614/991 [2:38:56<1:42:25, 16.30s/batch, batch_loss=985, batc

Epoch 4/10:  62%|▌| 615/991 [2:38:56<1:42:33, 16.36s/batch, batch_loss=985, batc

Epoch 4/10:  62%|▌| 615/991 [2:39:11<1:42:33, 16.36s/batch, batch_loss=7.74, bat

Epoch 4/10:  62%|▌| 616/991 [2:39:11<1:40:13, 16.04s/batch, batch_loss=7.74, bat

Epoch 4/10:  62%|▌| 616/991 [2:39:26<1:40:13, 16.04s/batch, batch_loss=18, batch

Epoch 4/10:  62%|▌| 617/991 [2:39:26<1:38:55, 15.87s/batch, batch_loss=18, batch

Epoch 4/10:  62%|▌| 617/991 [2:39:42<1:38:55, 15.87s/batch, batch_loss=13.2, bat

Epoch 4/10:  62%|▌| 618/991 [2:39:42<1:37:54, 15.75s/batch, batch_loss=13.2, bat

Epoch 4/10:  62%|▌| 618/991 [2:39:58<1:37:54, 15.75s/batch, batch_loss=20.2, bat

Epoch 4/10:  62%|▌| 619/991 [2:39:58<1:39:13, 16.00s/batch, batch_loss=20.2, bat

Epoch 4/10:  62%|▌| 619/991 [2:40:14<1:39:13, 16.00s/batch, batch_loss=12.3, bat

Epoch 4/10:  63%|▋| 620/991 [2:40:14<1:37:42, 15.80s/batch, batch_loss=12.3, bat

Epoch 4/10:  63%|▋| 620/991 [2:40:29<1:37:42, 15.80s/batch, batch_loss=10.3, bat

Epoch 4/10:  63%|▋| 621/991 [2:40:29<1:36:52, 15.71s/batch, batch_loss=10.3, bat

Epoch 4/10:  63%|▋| 621/991 [2:40:44<1:36:52, 15.71s/batch, batch_loss=5.49e+3, 

Epoch 4/10:  63%|▋| 622/991 [2:40:44<1:34:34, 15.38s/batch, batch_loss=5.49e+3, 

Epoch 4/10:  63%|▋| 622/991 [2:41:01<1:34:34, 15.38s/batch, batch_loss=18, batch

Epoch 4/10:  63%|▋| 623/991 [2:41:01<1:38:08, 16.00s/batch, batch_loss=18, batch

Epoch 4/10:  63%|▋| 623/991 [2:41:17<1:38:08, 16.00s/batch, batch_loss=1.6e+4, b

Epoch 4/10:  63%|▋| 624/991 [2:41:17<1:37:15, 15.90s/batch, batch_loss=1.6e+4, b

Epoch 4/10:  63%|▋| 624/991 [2:41:32<1:37:15, 15.90s/batch, batch_loss=10.3, bat

Epoch 4/10:  63%|▋| 625/991 [2:41:32<1:35:18, 15.62s/batch, batch_loss=10.3, bat

Epoch 4/10:  63%|▋| 625/991 [2:41:49<1:35:18, 15.62s/batch, batch_loss=6.47, bat

Epoch 4/10:  63%|▋| 626/991 [2:41:49<1:37:16, 15.99s/batch, batch_loss=6.47, bat

Epoch 4/10:  63%|▋| 626/991 [2:42:04<1:37:16, 15.99s/batch, batch_loss=4.31e+3, 

Epoch 4/10:  63%|▋| 627/991 [2:42:04<1:35:10, 15.69s/batch, batch_loss=4.31e+3, 

Epoch 4/10:  63%|▋| 627/991 [2:42:18<1:35:10, 15.69s/batch, batch_loss=1.05e+3, 

Epoch 4/10:  63%|▋| 628/991 [2:42:18<1:31:58, 15.20s/batch, batch_loss=1.05e+3, 

Epoch 4/10:  63%|▋| 628/991 [2:42:32<1:31:58, 15.20s/batch, batch_loss=14.7, bat

Epoch 4/10:  63%|▋| 629/991 [2:42:32<1:30:24, 14.99s/batch, batch_loss=14.7, bat

Epoch 4/10:  63%|▋| 629/991 [2:42:47<1:30:24, 14.99s/batch, batch_loss=18.4, bat

Epoch 4/10:  64%|▋| 630/991 [2:42:47<1:29:34, 14.89s/batch, batch_loss=18.4, bat

Epoch 4/10:  64%|▋| 630/991 [2:43:04<1:29:34, 14.89s/batch, batch_loss=16.7, bat

Epoch 4/10:  64%|▋| 631/991 [2:43:04<1:34:01, 15.67s/batch, batch_loss=16.7, bat

Epoch 4/10:  64%|▋| 631/991 [2:43:20<1:34:01, 15.67s/batch, batch_loss=4.38, bat

Epoch 4/10:  64%|▋| 632/991 [2:43:20<1:32:55, 15.53s/batch, batch_loss=4.38, bat

Epoch 4/10:  64%|▋| 632/991 [2:43:36<1:32:55, 15.53s/batch, batch_loss=20.8, bat

Epoch 4/10:  64%|▋| 633/991 [2:43:36<1:33:24, 15.66s/batch, batch_loss=20.8, bat

Epoch 4/10:  64%|▋| 633/991 [2:43:50<1:33:24, 15.66s/batch, batch_loss=28.2, bat

Epoch 4/10:  64%|▋| 634/991 [2:43:50<1:30:30, 15.21s/batch, batch_loss=28.2, bat

Epoch 4/10:  64%|▋| 634/991 [2:44:05<1:30:30, 15.21s/batch, batch_loss=23.7, bat

Epoch 4/10:  64%|▋| 635/991 [2:44:05<1:29:44, 15.12s/batch, batch_loss=23.7, bat

Epoch 4/10:  64%|▋| 635/991 [2:44:20<1:29:44, 15.12s/batch, batch_loss=18.5, bat

Epoch 4/10:  64%|▋| 636/991 [2:44:20<1:29:21, 15.10s/batch, batch_loss=18.5, bat

Epoch 4/10:  64%|▋| 636/991 [2:44:35<1:29:21, 15.10s/batch, batch_loss=17.7, bat

Epoch 4/10:  64%|▋| 637/991 [2:44:35<1:29:29, 15.17s/batch, batch_loss=17.7, bat

Epoch 4/10:  64%|▋| 637/991 [2:44:50<1:29:29, 15.17s/batch, batch_loss=16.4, bat

Epoch 4/10:  64%|▋| 638/991 [2:44:50<1:29:41, 15.25s/batch, batch_loss=16.4, bat

Epoch 4/10:  64%|▋| 638/991 [2:45:09<1:29:41, 15.25s/batch, batch_loss=11.2, bat

Epoch 4/10:  64%|▋| 639/991 [2:45:09<1:35:49, 16.33s/batch, batch_loss=11.2, bat

Epoch 4/10:  64%|▋| 639/991 [2:45:26<1:35:49, 16.33s/batch, batch_loss=675, batc

Epoch 4/10:  65%|▋| 640/991 [2:45:26<1:36:24, 16.48s/batch, batch_loss=675, batc

Epoch 4/10:  65%|▋| 640/991 [2:45:43<1:36:24, 16.48s/batch, batch_loss=14.4, bat

Epoch 4/10:  65%|▋| 641/991 [2:45:43<1:36:21, 16.52s/batch, batch_loss=14.4, bat

Epoch 4/10:  65%|▋| 641/991 [2:46:00<1:36:21, 16.52s/batch, batch_loss=9.52, bat

Epoch 4/10:  65%|▋| 642/991 [2:46:00<1:36:31, 16.59s/batch, batch_loss=9.52, bat

Epoch 4/10:  65%|▋| 642/991 [2:46:15<1:36:31, 16.59s/batch, batch_loss=2.12e+4, 

Epoch 4/10:  65%|▋| 643/991 [2:46:16<1:35:07, 16.40s/batch, batch_loss=2.12e+4, 

Epoch 4/10:  65%|▋| 643/991 [2:46:32<1:35:07, 16.40s/batch, batch_loss=1.76e+4, 

Epoch 4/10:  65%|▋| 644/991 [2:46:32<1:34:24, 16.32s/batch, batch_loss=1.76e+4, 

Epoch 4/10:  65%|▋| 644/991 [2:46:49<1:34:24, 16.32s/batch, batch_loss=2.19e+3, 

Epoch 4/10:  65%|▋| 645/991 [2:46:49<1:35:26, 16.55s/batch, batch_loss=2.19e+3, 

Epoch 4/10:  65%|▋| 645/991 [2:47:05<1:35:26, 16.55s/batch, batch_loss=10, batch

Epoch 4/10:  65%|▋| 646/991 [2:47:05<1:34:22, 16.41s/batch, batch_loss=10, batch

Epoch 4/10:  65%|▋| 646/991 [2:47:21<1:34:22, 16.41s/batch, batch_loss=14.7, bat

Epoch 4/10:  65%|▋| 647/991 [2:47:21<1:33:33, 16.32s/batch, batch_loss=14.7, bat

Epoch 4/10:  65%|▋| 647/991 [2:47:36<1:33:33, 16.32s/batch, batch_loss=16.4, bat

Epoch 4/10:  65%|▋| 648/991 [2:47:36<1:32:00, 16.09s/batch, batch_loss=16.4, bat

Epoch 4/10:  65%|▋| 648/991 [2:47:52<1:32:00, 16.09s/batch, batch_loss=17.4, bat

Epoch 4/10:  65%|▋| 649/991 [2:47:52<1:30:24, 15.86s/batch, batch_loss=17.4, bat

Epoch 4/10:  65%|▋| 649/991 [2:48:09<1:30:24, 15.86s/batch, batch_loss=1.34e+4, 

Epoch 4/10:  66%|▋| 650/991 [2:48:09<1:31:39, 16.13s/batch, batch_loss=1.34e+4, 

Epoch 4/10:  66%|▋| 650/991 [2:48:24<1:31:39, 16.13s/batch, batch_loss=9.62, bat

Epoch 4/10:  66%|▋| 651/991 [2:48:24<1:30:05, 15.90s/batch, batch_loss=9.62, bat

Epoch 4/10:  66%|▋| 651/991 [2:48:41<1:30:05, 15.90s/batch, batch_loss=14.2, bat

Epoch 4/10:  66%|▋| 652/991 [2:48:41<1:32:30, 16.37s/batch, batch_loss=14.2, bat

Epoch 4/10:  66%|▋| 652/991 [2:48:57<1:32:30, 16.37s/batch, batch_loss=19.9, bat

Epoch 4/10:  66%|▋| 653/991 [2:48:57<1:30:47, 16.12s/batch, batch_loss=19.9, bat

Epoch 4/10:  66%|▋| 653/991 [2:49:13<1:30:47, 16.12s/batch, batch_loss=17.8, bat

Epoch 4/10:  66%|▋| 654/991 [2:49:13<1:29:53, 16.01s/batch, batch_loss=17.8, bat

Epoch 4/10:  66%|▋| 654/991 [2:49:29<1:29:53, 16.01s/batch, batch_loss=3.84e+3, 

Epoch 4/10:  66%|▋| 655/991 [2:49:29<1:29:46, 16.03s/batch, batch_loss=3.84e+3, 

Epoch 4/10:  66%|▋| 655/991 [2:49:43<1:29:46, 16.03s/batch, batch_loss=5.19e+3, 

Epoch 4/10:  66%|▋| 656/991 [2:49:43<1:26:02, 15.41s/batch, batch_loss=5.19e+3, 

Epoch 4/10:  66%|▋| 656/991 [2:49:56<1:26:02, 15.41s/batch, batch_loss=4.22e+3, 

Epoch 4/10:  66%|▋| 657/991 [2:49:56<1:22:29, 14.82s/batch, batch_loss=4.22e+3, 

Epoch 4/10:  66%|▋| 657/991 [2:50:10<1:22:29, 14.82s/batch, batch_loss=2.2e+4, b

Epoch 4/10:  66%|▋| 658/991 [2:50:10<1:20:10, 14.44s/batch, batch_loss=2.2e+4, b

Epoch 4/10:  66%|▋| 658/991 [2:50:28<1:20:10, 14.44s/batch, batch_loss=5.23, bat

Epoch 4/10:  66%|▋| 659/991 [2:50:28<1:26:35, 15.65s/batch, batch_loss=5.23, bat

Epoch 4/10:  66%|▋| 659/991 [2:50:44<1:26:35, 15.65s/batch, batch_loss=5.61, bat

Epoch 4/10:  67%|▋| 660/991 [2:50:44<1:26:46, 15.73s/batch, batch_loss=5.61, bat

Epoch 4/10:  67%|▋| 660/991 [2:50:59<1:26:46, 15.73s/batch, batch_loss=12.5, bat

Epoch 4/10:  67%|▋| 661/991 [2:50:59<1:25:56, 15.63s/batch, batch_loss=12.5, bat

Epoch 4/10:  67%|▋| 661/991 [2:51:15<1:25:56, 15.63s/batch, batch_loss=16.2, bat

Epoch 4/10:  67%|▋| 662/991 [2:51:15<1:25:21, 15.57s/batch, batch_loss=16.2, bat

Epoch 4/10:  67%|▋| 662/991 [2:51:30<1:25:21, 15.57s/batch, batch_loss=15.9, bat

Epoch 4/10:  67%|▋| 663/991 [2:51:30<1:24:16, 15.42s/batch, batch_loss=15.9, bat

Epoch 4/10:  67%|▋| 663/991 [2:51:46<1:24:16, 15.42s/batch, batch_loss=3.05e+3, 

Epoch 4/10:  67%|▋| 664/991 [2:51:46<1:24:17, 15.47s/batch, batch_loss=3.05e+3, 

Epoch 4/10:  67%|▋| 664/991 [2:52:01<1:24:17, 15.47s/batch, batch_loss=15.1, bat

Epoch 4/10:  67%|▋| 665/991 [2:52:01<1:24:18, 15.52s/batch, batch_loss=15.1, bat

Epoch 4/10:  67%|▋| 665/991 [2:52:16<1:24:18, 15.52s/batch, batch_loss=3.07e+3, 

Epoch 4/10:  67%|▋| 666/991 [2:52:16<1:23:22, 15.39s/batch, batch_loss=3.07e+3, 

Epoch 4/10:  67%|▋| 666/991 [2:52:32<1:23:22, 15.39s/batch, batch_loss=21, batch

Epoch 4/10:  67%|▋| 667/991 [2:52:32<1:24:05, 15.57s/batch, batch_loss=21, batch

Epoch 4/10:  67%|▋| 667/991 [2:52:50<1:24:05, 15.57s/batch, batch_loss=373, batc

Epoch 4/10:  67%|▋| 668/991 [2:52:50<1:27:32, 16.26s/batch, batch_loss=373, batc

Epoch 4/10:  67%|▋| 668/991 [2:53:07<1:27:32, 16.26s/batch, batch_loss=2.96e+3, 

Epoch 4/10:  68%|▋| 669/991 [2:53:07<1:28:13, 16.44s/batch, batch_loss=2.96e+3, 

Epoch 4/10:  68%|▋| 669/991 [2:53:23<1:28:13, 16.44s/batch, batch_loss=1.02e+3, 

Epoch 4/10:  68%|▋| 670/991 [2:53:23<1:27:29, 16.35s/batch, batch_loss=1.02e+3, 

Epoch 4/10:  68%|▋| 670/991 [2:53:39<1:27:29, 16.35s/batch, batch_loss=9.56, bat

Epoch 4/10:  68%|▋| 671/991 [2:53:39<1:26:55, 16.30s/batch, batch_loss=9.56, bat

Epoch 4/10:  68%|▋| 671/991 [2:53:55<1:26:55, 16.30s/batch, batch_loss=13.4, bat

Epoch 4/10:  68%|▋| 672/991 [2:53:55<1:26:21, 16.24s/batch, batch_loss=13.4, bat

Epoch 4/10:  68%|▋| 672/991 [2:54:12<1:26:21, 16.24s/batch, batch_loss=18.7, bat

Epoch 4/10:  68%|▋| 673/991 [2:54:12<1:26:47, 16.38s/batch, batch_loss=18.7, bat

Epoch 4/10:  68%|▋| 673/991 [2:54:28<1:26:47, 16.38s/batch, batch_loss=16.3, bat

Epoch 4/10:  68%|▋| 674/991 [2:54:28<1:26:28, 16.37s/batch, batch_loss=16.3, bat

Epoch 4/10:  68%|▋| 674/991 [2:54:44<1:26:28, 16.37s/batch, batch_loss=4.69, bat

Epoch 4/10:  68%|▋| 675/991 [2:54:44<1:24:39, 16.07s/batch, batch_loss=4.69, bat

Epoch 4/10:  68%|▋| 675/991 [2:55:00<1:24:39, 16.07s/batch, batch_loss=8.73, bat

Epoch 4/10:  68%|▋| 676/991 [2:55:00<1:24:02, 16.01s/batch, batch_loss=8.73, bat

Epoch 4/10:  68%|▋| 676/991 [2:55:16<1:24:02, 16.01s/batch, batch_loss=16, batch

Epoch 4/10:  68%|▋| 677/991 [2:55:16<1:24:31, 16.15s/batch, batch_loss=16, batch

Epoch 4/10:  68%|▋| 677/991 [2:55:35<1:24:31, 16.15s/batch, batch_loss=8.06, bat

Epoch 4/10:  68%|▋| 678/991 [2:55:35<1:28:44, 17.01s/batch, batch_loss=8.06, bat

Epoch 4/10:  68%|▋| 678/991 [2:55:52<1:28:44, 17.01s/batch, batch_loss=3.81e+3, 

Epoch 4/10:  69%|▋| 679/991 [2:55:52<1:27:24, 16.81s/batch, batch_loss=3.81e+3, 

Epoch 4/10:  69%|▋| 679/991 [2:56:08<1:27:24, 16.81s/batch, batch_loss=6.09e+3, 

Epoch 4/10:  69%|▋| 680/991 [2:56:08<1:26:58, 16.78s/batch, batch_loss=6.09e+3, 

Epoch 4/10:  69%|▋| 680/991 [2:56:24<1:26:58, 16.78s/batch, batch_loss=72376.0, 

Epoch 4/10:  69%|▋| 681/991 [2:56:24<1:25:37, 16.57s/batch, batch_loss=72376.0, 

Epoch 4/10:  69%|▋| 681/991 [2:56:39<1:25:37, 16.57s/batch, batch_loss=14.3, bat

Epoch 4/10:  69%|▋| 682/991 [2:56:39<1:22:55, 16.10s/batch, batch_loss=14.3, bat

Epoch 4/10:  69%|▋| 682/991 [2:56:55<1:22:55, 16.10s/batch, batch_loss=376, batc

Epoch 4/10:  69%|▋| 683/991 [2:56:55<1:21:33, 15.89s/batch, batch_loss=376, batc

Epoch 4/10:  69%|▋| 683/991 [2:57:13<1:21:33, 15.89s/batch, batch_loss=5.08, bat

Epoch 4/10:  69%|▋| 684/991 [2:57:13<1:24:33, 16.52s/batch, batch_loss=5.08, bat

Epoch 4/10:  69%|▋| 684/991 [2:57:28<1:24:33, 16.52s/batch, batch_loss=13.6, bat

Epoch 4/10:  69%|▋| 685/991 [2:57:28<1:22:19, 16.14s/batch, batch_loss=13.6, bat

Epoch 4/10:  69%|▋| 685/991 [2:57:42<1:22:19, 16.14s/batch, batch_loss=11.9, bat

Epoch 4/10:  69%|▋| 686/991 [2:57:42<1:18:51, 15.51s/batch, batch_loss=11.9, bat

Epoch 4/10:  69%|▋| 686/991 [2:57:57<1:18:51, 15.51s/batch, batch_loss=539, batc

Epoch 4/10:  69%|▋| 687/991 [2:57:57<1:17:43, 15.34s/batch, batch_loss=539, batc

Epoch 4/10:  69%|▋| 687/991 [2:58:13<1:17:43, 15.34s/batch, batch_loss=5.39, bat

Epoch 4/10:  69%|▋| 688/991 [2:58:13<1:17:51, 15.42s/batch, batch_loss=5.39, bat

Epoch 4/10:  69%|▋| 688/991 [2:58:28<1:17:51, 15.42s/batch, batch_loss=5.81, bat

Epoch 4/10:  70%|▋| 689/991 [2:58:28<1:17:57, 15.49s/batch, batch_loss=5.81, bat

Epoch 4/10:  70%|▋| 689/991 [2:58:44<1:17:57, 15.49s/batch, batch_loss=12.6, bat

Epoch 4/10:  70%|▋| 690/991 [2:58:44<1:17:20, 15.42s/batch, batch_loss=12.6, bat

Epoch 4/10:  70%|▋| 690/991 [2:58:58<1:17:20, 15.42s/batch, batch_loss=17, batch

Epoch 4/10:  70%|▋| 691/991 [2:58:58<1:16:06, 15.22s/batch, batch_loss=17, batch

Epoch 4/10:  70%|▋| 691/991 [2:59:13<1:16:06, 15.22s/batch, batch_loss=5.73, bat

Epoch 4/10:  70%|▋| 692/991 [2:59:13<1:15:43, 15.19s/batch, batch_loss=5.73, bat

Epoch 4/10:  70%|▋| 692/991 [2:59:28<1:15:43, 15.19s/batch, batch_loss=4.65e+3, 

Epoch 4/10:  70%|▋| 693/991 [2:59:28<1:14:22, 14.97s/batch, batch_loss=4.65e+3, 

Epoch 4/10:  70%|▋| 693/991 [2:59:43<1:14:22, 14.97s/batch, batch_loss=453, batc

Epoch 4/10:  70%|▋| 694/991 [2:59:43<1:15:01, 15.16s/batch, batch_loss=453, batc

Epoch 4/10:  70%|▋| 694/991 [2:59:59<1:15:01, 15.16s/batch, batch_loss=775, batc

Epoch 4/10:  70%|▋| 695/991 [2:59:59<1:15:10, 15.24s/batch, batch_loss=775, batc

Epoch 4/10:  70%|▋| 695/991 [3:00:14<1:15:10, 15.24s/batch, batch_loss=9.95, bat

Epoch 4/10:  70%|▋| 696/991 [3:00:14<1:14:41, 15.19s/batch, batch_loss=9.95, bat

Epoch 4/10:  70%|▋| 696/991 [3:00:30<1:14:41, 15.19s/batch, batch_loss=6.8e+3, b

Epoch 4/10:  70%|▋| 697/991 [3:00:30<1:15:27, 15.40s/batch, batch_loss=6.8e+3, b

Epoch 4/10:  70%|▋| 697/991 [3:00:45<1:15:27, 15.40s/batch, batch_loss=10.8, bat

Epoch 4/10:  70%|▋| 698/991 [3:00:45<1:15:11, 15.40s/batch, batch_loss=10.8, bat

Epoch 4/10:  70%|▋| 698/991 [3:01:02<1:15:11, 15.40s/batch, batch_loss=7.77, bat

Epoch 4/10:  71%|▋| 699/991 [3:01:02<1:17:25, 15.91s/batch, batch_loss=7.77, bat

Epoch 4/10:  71%|▋| 699/991 [3:01:18<1:17:25, 15.91s/batch, batch_loss=9.5, batc

Epoch 4/10:  71%|▋| 700/991 [3:01:18<1:16:34, 15.79s/batch, batch_loss=9.5, batc

Epoch 4/10:  71%|▋| 700/991 [3:01:33<1:16:34, 15.79s/batch, batch_loss=213, batc

Epoch 4/10:  71%|▋| 701/991 [3:01:33<1:15:56, 15.71s/batch, batch_loss=213, batc

Epoch 4/10:  71%|▋| 701/991 [3:01:48<1:15:56, 15.71s/batch, batch_loss=16, batch

Epoch 4/10:  71%|▋| 702/991 [3:01:48<1:14:17, 15.42s/batch, batch_loss=16, batch

Epoch 4/10:  71%|▋| 702/991 [3:02:03<1:14:17, 15.42s/batch, batch_loss=273, batc

Epoch 4/10:  71%|▋| 703/991 [3:02:03<1:13:37, 15.34s/batch, batch_loss=273, batc

Epoch 4/10:  71%|▋| 703/991 [3:02:18<1:13:37, 15.34s/batch, batch_loss=7.5, batc

Epoch 4/10:  71%|▋| 704/991 [3:02:18<1:12:23, 15.13s/batch, batch_loss=7.5, batc

Epoch 4/10:  71%|▋| 704/991 [3:02:32<1:12:23, 15.13s/batch, batch_loss=9.85, bat

Epoch 4/10:  71%|▋| 705/991 [3:02:32<1:11:14, 14.95s/batch, batch_loss=9.85, bat

Epoch 4/10:  71%|▋| 705/991 [3:02:48<1:11:14, 14.95s/batch, batch_loss=15.7, bat

Epoch 4/10:  71%|▋| 706/991 [3:02:48<1:11:30, 15.05s/batch, batch_loss=15.7, bat

Epoch 4/10:  71%|▋| 706/991 [3:03:03<1:11:30, 15.05s/batch, batch_loss=13.1, bat

Epoch 4/10:  71%|▋| 707/991 [3:03:03<1:11:40, 15.14s/batch, batch_loss=13.1, bat

Epoch 4/10:  71%|▋| 707/991 [3:03:18<1:11:40, 15.14s/batch, batch_loss=8.82, bat

Epoch 4/10:  71%|▋| 708/991 [3:03:18<1:11:26, 15.15s/batch, batch_loss=8.82, bat

Epoch 4/10:  71%|▋| 708/991 [3:03:33<1:11:26, 15.15s/batch, batch_loss=7.01, bat

Epoch 4/10:  72%|▋| 709/991 [3:03:33<1:10:45, 15.06s/batch, batch_loss=7.01, bat

Epoch 4/10:  72%|▋| 709/991 [3:03:48<1:10:45, 15.06s/batch, batch_loss=30.3, bat

Epoch 4/10:  72%|▋| 710/991 [3:03:48<1:10:48, 15.12s/batch, batch_loss=30.3, bat

Epoch 4/10:  72%|▋| 710/991 [3:04:04<1:10:48, 15.12s/batch, batch_loss=98.1, bat

Epoch 4/10:  72%|▋| 711/991 [3:04:04<1:10:47, 15.17s/batch, batch_loss=98.1, bat

Epoch 4/10:  72%|▋| 711/991 [3:04:20<1:10:47, 15.17s/batch, batch_loss=12.9, bat

Epoch 4/10:  72%|▋| 712/991 [3:04:20<1:11:39, 15.41s/batch, batch_loss=12.9, bat

Epoch 4/10:  72%|▋| 712/991 [3:04:35<1:11:39, 15.41s/batch, batch_loss=74.3, bat

Epoch 4/10:  72%|▋| 713/991 [3:04:35<1:11:59, 15.54s/batch, batch_loss=74.3, bat

Epoch 4/10:  72%|▋| 713/991 [3:04:55<1:11:59, 15.54s/batch, batch_loss=21.4, bat

Epoch 4/10:  72%|▋| 714/991 [3:04:55<1:17:23, 16.76s/batch, batch_loss=21.4, bat

Epoch 4/10:  72%|▋| 714/991 [3:05:12<1:17:23, 16.76s/batch, batch_loss=15, batch

Epoch 4/10:  72%|▋| 715/991 [3:05:12<1:16:56, 16.73s/batch, batch_loss=15, batch

Epoch 4/10:  72%|▋| 715/991 [3:05:28<1:16:56, 16.73s/batch, batch_loss=15.1, bat

Epoch 4/10:  72%|▋| 716/991 [3:05:28<1:16:12, 16.63s/batch, batch_loss=15.1, bat

Epoch 4/10:  72%|▋| 716/991 [3:05:45<1:16:12, 16.63s/batch, batch_loss=14.8, bat

Epoch 4/10:  72%|▋| 717/991 [3:05:45<1:16:43, 16.80s/batch, batch_loss=14.8, bat

Epoch 4/10:  72%|▋| 717/991 [3:06:02<1:16:43, 16.80s/batch, batch_loss=21.1, bat

Epoch 4/10:  72%|▋| 718/991 [3:06:02<1:16:16, 16.76s/batch, batch_loss=21.1, bat

Epoch 4/10:  72%|▋| 718/991 [3:06:19<1:16:16, 16.76s/batch, batch_loss=11.5, bat

Epoch 4/10:  73%|▋| 719/991 [3:06:19<1:15:57, 16.76s/batch, batch_loss=11.5, bat

Epoch 4/10:  73%|▋| 719/991 [3:06:34<1:15:57, 16.76s/batch, batch_loss=12.3, bat

Epoch 4/10:  73%|▋| 720/991 [3:06:34<1:13:30, 16.27s/batch, batch_loss=12.3, bat

Epoch 4/10:  73%|▋| 720/991 [3:06:52<1:13:30, 16.27s/batch, batch_loss=16, batch

Epoch 4/10:  73%|▋| 721/991 [3:06:52<1:16:08, 16.92s/batch, batch_loss=16, batch

Epoch 4/10:  73%|▋| 721/991 [3:07:08<1:16:08, 16.92s/batch, batch_loss=19.7, bat

Epoch 4/10:  73%|▋| 722/991 [3:07:08<1:14:39, 16.65s/batch, batch_loss=19.7, bat

Epoch 4/10:  73%|▋| 722/991 [3:07:24<1:14:39, 16.65s/batch, batch_loss=7.22e+3, 

Epoch 4/10:  73%|▋| 723/991 [3:07:24<1:13:32, 16.47s/batch, batch_loss=7.22e+3, 

Epoch 4/10:  73%|▋| 723/991 [3:07:40<1:13:32, 16.47s/batch, batch_loss=4.46, bat

Epoch 4/10:  73%|▋| 724/991 [3:07:40<1:12:15, 16.24s/batch, batch_loss=4.46, bat

Epoch 4/10:  73%|▋| 724/991 [3:07:56<1:12:15, 16.24s/batch, batch_loss=14.1, bat

Epoch 4/10:  73%|▋| 725/991 [3:07:56<1:11:15, 16.07s/batch, batch_loss=14.1, bat

Epoch 4/10:  73%|▋| 725/991 [3:08:11<1:11:15, 16.07s/batch, batch_loss=10.5, bat

Epoch 4/10:  73%|▋| 726/991 [3:08:11<1:10:28, 15.96s/batch, batch_loss=10.5, bat

Epoch 4/10:  73%|▋| 726/991 [3:08:30<1:10:28, 15.96s/batch, batch_loss=1.29e+4, 

Epoch 4/10:  73%|▋| 727/991 [3:08:30<1:13:47, 16.77s/batch, batch_loss=1.29e+4, 

Epoch 4/10:  73%|▋| 727/991 [3:08:45<1:13:47, 16.77s/batch, batch_loss=12.1, bat

Epoch 4/10:  73%|▋| 728/991 [3:08:45<1:11:26, 16.30s/batch, batch_loss=12.1, bat

Epoch 4/10:  73%|▋| 728/991 [3:09:01<1:11:26, 16.30s/batch, batch_loss=132, batc

Epoch 4/10:  74%|▋| 729/991 [3:09:01<1:09:57, 16.02s/batch, batch_loss=132, batc

Epoch 4/10:  74%|▋| 729/991 [3:09:16<1:09:57, 16.02s/batch, batch_loss=13.2, bat

Epoch 4/10:  74%|▋| 730/991 [3:09:16<1:09:00, 15.86s/batch, batch_loss=13.2, bat

Epoch 4/10:  74%|▋| 730/991 [3:09:31<1:09:00, 15.86s/batch, batch_loss=110, batc

Epoch 4/10:  74%|▋| 731/991 [3:09:31<1:07:18, 15.53s/batch, batch_loss=110, batc

Epoch 4/10:  74%|▋| 731/991 [3:09:46<1:07:18, 15.53s/batch, batch_loss=1.39e+4, 

Epoch 4/10:  74%|▋| 732/991 [3:09:46<1:06:38, 15.44s/batch, batch_loss=1.39e+4, 

Epoch 4/10:  74%|▋| 732/991 [3:10:02<1:06:38, 15.44s/batch, batch_loss=31.7, bat

Epoch 4/10:  74%|▋| 733/991 [3:10:02<1:06:54, 15.56s/batch, batch_loss=31.7, bat

Epoch 4/10:  74%|▋| 733/991 [3:10:17<1:06:54, 15.56s/batch, batch_loss=6.82e+3, 

Epoch 4/10:  74%|▋| 734/991 [3:10:17<1:06:06, 15.44s/batch, batch_loss=6.82e+3, 

Epoch 4/10:  74%|▋| 734/991 [3:10:33<1:06:06, 15.44s/batch, batch_loss=38.9, bat

Epoch 4/10:  74%|▋| 735/991 [3:10:33<1:06:19, 15.54s/batch, batch_loss=38.9, bat

Epoch 4/10:  74%|▋| 735/991 [3:10:49<1:06:19, 15.54s/batch, batch_loss=31.6, bat

Epoch 4/10:  74%|▋| 736/991 [3:10:49<1:06:46, 15.71s/batch, batch_loss=31.6, bat

Epoch 4/10:  74%|▋| 736/991 [3:11:04<1:06:46, 15.71s/batch, batch_loss=14.5, bat

Epoch 4/10:  74%|▋| 737/991 [3:11:04<1:06:06, 15.61s/batch, batch_loss=14.5, bat

Epoch 4/10:  74%|▋| 737/991 [3:11:20<1:06:06, 15.61s/batch, batch_loss=1.5e+3, b

Epoch 4/10:  74%|▋| 738/991 [3:11:20<1:05:23, 15.51s/batch, batch_loss=1.5e+3, b

Epoch 4/10:  74%|▋| 738/991 [3:11:35<1:05:23, 15.51s/batch, batch_loss=68.4, bat

Epoch 4/10:  75%|▋| 739/991 [3:11:35<1:04:37, 15.39s/batch, batch_loss=68.4, bat

Epoch 4/10:  75%|▋| 739/991 [3:11:51<1:04:37, 15.39s/batch, batch_loss=11.9, bat

Epoch 4/10:  75%|▋| 740/991 [3:11:51<1:05:15, 15.60s/batch, batch_loss=11.9, bat

Epoch 4/10:  75%|▋| 740/991 [3:12:06<1:05:15, 15.60s/batch, batch_loss=1.82e+4, 

Epoch 4/10:  75%|▋| 741/991 [3:12:06<1:04:39, 15.52s/batch, batch_loss=1.82e+4, 

Epoch 4/10:  75%|▋| 741/991 [3:12:22<1:04:39, 15.52s/batch, batch_loss=2.26e+3, 

Epoch 4/10:  75%|▋| 742/991 [3:12:22<1:04:40, 15.58s/batch, batch_loss=2.26e+3, 

Epoch 4/10:  75%|▋| 742/991 [3:12:38<1:04:40, 15.58s/batch, batch_loss=13.7, bat

Epoch 4/10:  75%|▋| 743/991 [3:12:38<1:05:35, 15.87s/batch, batch_loss=13.7, bat

Epoch 4/10:  75%|▋| 743/991 [3:12:55<1:05:35, 15.87s/batch, batch_loss=22.9, bat

Epoch 4/10:  75%|▊| 744/991 [3:12:55<1:06:25, 16.14s/batch, batch_loss=22.9, bat

Epoch 4/10:  75%|▊| 744/991 [3:13:12<1:06:25, 16.14s/batch, batch_loss=23.9, bat

Epoch 4/10:  75%|▊| 745/991 [3:13:12<1:06:57, 16.33s/batch, batch_loss=23.9, bat

Epoch 4/10:  75%|▊| 745/991 [3:13:28<1:06:57, 16.33s/batch, batch_loss=1.16e+3, 

Epoch 4/10:  75%|▊| 746/991 [3:13:28<1:06:27, 16.27s/batch, batch_loss=1.16e+3, 

Epoch 4/10:  75%|▊| 746/991 [3:13:45<1:06:27, 16.27s/batch, batch_loss=3.9e+3, b

Epoch 4/10:  75%|▊| 747/991 [3:13:45<1:06:46, 16.42s/batch, batch_loss=3.9e+3, b

Epoch 4/10:  75%|▊| 747/991 [3:14:02<1:06:46, 16.42s/batch, batch_loss=12.5, bat

Epoch 4/10:  75%|▊| 748/991 [3:14:02<1:06:40, 16.46s/batch, batch_loss=12.5, bat

Epoch 4/10:  75%|▊| 748/991 [3:14:18<1:06:40, 16.46s/batch, batch_loss=15.2, bat

Epoch 4/10:  76%|▊| 749/991 [3:14:18<1:05:50, 16.32s/batch, batch_loss=15.2, bat

Epoch 4/10:  76%|▊| 749/991 [3:14:33<1:05:50, 16.32s/batch, batch_loss=17.7, bat

Epoch 4/10:  76%|▊| 750/991 [3:14:33<1:04:50, 16.14s/batch, batch_loss=17.7, bat

Epoch 4/10:  76%|▊| 750/991 [3:14:49<1:04:50, 16.14s/batch, batch_loss=14.5, bat

Epoch 4/10:  76%|▊| 751/991 [3:14:49<1:04:21, 16.09s/batch, batch_loss=14.5, bat

Epoch 4/10:  76%|▊| 751/991 [3:15:05<1:04:21, 16.09s/batch, batch_loss=8.52, bat

Epoch 4/10:  76%|▊| 752/991 [3:15:05<1:04:08, 16.10s/batch, batch_loss=8.52, bat

Epoch 4/10:  76%|▊| 752/991 [3:15:21<1:04:08, 16.10s/batch, batch_loss=9.43, bat

Epoch 4/10:  76%|▊| 753/991 [3:15:21<1:03:56, 16.12s/batch, batch_loss=9.43, bat

Epoch 4/10:  76%|▊| 753/991 [3:15:38<1:03:56, 16.12s/batch, batch_loss=6.08, bat

Epoch 4/10:  76%|▊| 754/991 [3:15:38<1:03:58, 16.19s/batch, batch_loss=6.08, bat

Epoch 4/10:  76%|▊| 754/991 [3:15:54<1:03:58, 16.19s/batch, batch_loss=28.1, bat

Epoch 4/10:  76%|▊| 755/991 [3:15:54<1:03:23, 16.12s/batch, batch_loss=28.1, bat

Epoch 4/10:  76%|▊| 755/991 [3:16:10<1:03:23, 16.12s/batch, batch_loss=25.6, bat

Epoch 4/10:  76%|▊| 756/991 [3:16:10<1:03:03, 16.10s/batch, batch_loss=25.6, bat

Epoch 4/10:  76%|▊| 756/991 [3:16:26<1:03:03, 16.10s/batch, batch_loss=4.08, bat

Epoch 4/10:  76%|▊| 757/991 [3:16:26<1:02:45, 16.09s/batch, batch_loss=4.08, bat

Epoch 4/10:  76%|▊| 757/991 [3:16:41<1:02:45, 16.09s/batch, batch_loss=14.6, bat

Epoch 4/10:  76%|▊| 758/991 [3:16:41<1:01:21, 15.80s/batch, batch_loss=14.6, bat

Epoch 4/10:  76%|▊| 758/991 [3:16:56<1:01:21, 15.80s/batch, batch_loss=13.9, bat

Epoch 4/10:  77%|▊| 759/991 [3:16:56<59:52, 15.48s/batch, batch_loss=13.9, batch

Epoch 4/10:  77%|▊| 759/991 [3:17:11<59:52, 15.48s/batch, batch_loss=34.4, batch

Epoch 4/10:  77%|▊| 760/991 [3:17:11<59:19, 15.41s/batch, batch_loss=34.4, batch

Epoch 4/10:  77%|▊| 760/991 [3:17:25<59:19, 15.41s/batch, batch_loss=45.3, batch

Epoch 4/10:  77%|▊| 761/991 [3:17:25<57:35, 15.02s/batch, batch_loss=45.3, batch

Epoch 4/10:  77%|▊| 761/991 [3:17:40<57:35, 15.02s/batch, batch_loss=36.9, batch

Epoch 4/10:  77%|▊| 762/991 [3:17:40<57:24, 15.04s/batch, batch_loss=36.9, batch

Epoch 4/10:  77%|▊| 762/991 [3:17:55<57:24, 15.04s/batch, batch_loss=539, batch_

Epoch 4/10:  77%|▊| 763/991 [3:17:55<57:03, 15.01s/batch, batch_loss=539, batch_

Epoch 4/10:  77%|▊| 763/991 [3:18:10<57:03, 15.01s/batch, batch_loss=12.9, batch

Epoch 4/10:  77%|▊| 764/991 [3:18:10<56:56, 15.05s/batch, batch_loss=12.9, batch

Epoch 4/10:  77%|▊| 764/991 [3:18:25<56:56, 15.05s/batch, batch_loss=3.22, batch

Epoch 4/10:  77%|▊| 765/991 [3:18:25<56:39, 15.04s/batch, batch_loss=3.22, batch

Epoch 4/10:  77%|▊| 765/991 [3:18:44<56:39, 15.04s/batch, batch_loss=11.2, batch

Epoch 4/10:  77%|▊| 766/991 [3:18:44<1:00:26, 16.12s/batch, batch_loss=11.2, bat

Epoch 4/10:  77%|▊| 766/991 [3:19:00<1:00:26, 16.12s/batch, batch_loss=14.8, bat

Epoch 4/10:  77%|▊| 767/991 [3:19:00<59:44, 16.00s/batch, batch_loss=14.8, batch

Epoch 4/10:  77%|▊| 767/991 [3:19:15<59:44, 16.00s/batch, batch_loss=3.94, batch

Epoch 4/10:  77%|▊| 768/991 [3:19:15<59:12, 15.93s/batch, batch_loss=3.94, batch

Epoch 4/10:  77%|▊| 768/991 [3:19:32<59:12, 15.93s/batch, batch_loss=2.02, batch

Epoch 4/10:  78%|▊| 769/991 [3:19:32<59:07, 15.98s/batch, batch_loss=2.02, batch

Epoch 4/10:  78%|▊| 769/991 [3:19:48<59:07, 15.98s/batch, batch_loss=19.8, batch

Epoch 4/10:  78%|▊| 770/991 [3:19:48<59:39, 16.20s/batch, batch_loss=19.8, batch

Epoch 4/10:  78%|▊| 770/991 [3:20:02<59:39, 16.20s/batch, batch_loss=2.76e+3, ba

Epoch 4/10:  78%|▊| 771/991 [3:20:02<57:03, 15.56s/batch, batch_loss=2.76e+3, ba

Epoch 4/10:  78%|▊| 771/991 [3:20:17<57:03, 15.56s/batch, batch_loss=6.89, batch

Epoch 4/10:  78%|▊| 772/991 [3:20:17<55:33, 15.22s/batch, batch_loss=6.89, batch

Epoch 4/10:  78%|▊| 772/991 [3:20:32<55:33, 15.22s/batch, batch_loss=1.41, batch

Epoch 4/10:  78%|▊| 773/991 [3:20:32<55:26, 15.26s/batch, batch_loss=1.41, batch

Epoch 4/10:  78%|▊| 773/991 [3:20:52<55:26, 15.26s/batch, batch_loss=14.5, batch

Epoch 4/10:  78%|▊| 774/991 [3:20:52<1:00:36, 16.76s/batch, batch_loss=14.5, bat

Epoch 4/10:  78%|▊| 774/991 [3:21:09<1:00:36, 16.76s/batch, batch_loss=9.3, batc

Epoch 4/10:  78%|▊| 775/991 [3:21:09<59:43, 16.59s/batch, batch_loss=9.3, batch_

Epoch 4/10:  78%|▊| 775/991 [3:21:25<59:43, 16.59s/batch, batch_loss=265, batch_

Epoch 4/10:  78%|▊| 776/991 [3:21:25<59:45, 16.68s/batch, batch_loss=265, batch_

Epoch 4/10:  78%|▊| 776/991 [3:21:42<59:45, 16.68s/batch, batch_loss=0.565, batc

Epoch 4/10:  78%|▊| 777/991 [3:21:42<59:42, 16.74s/batch, batch_loss=0.565, batc

Epoch 4/10:  78%|▊| 777/991 [3:21:58<59:42, 16.74s/batch, batch_loss=0.973, batc

Epoch 4/10:  79%|▊| 778/991 [3:21:58<58:23, 16.45s/batch, batch_loss=0.973, batc

Epoch 4/10:  79%|▊| 778/991 [3:22:15<58:23, 16.45s/batch, batch_loss=9.09, batch

Epoch 4/10:  79%|▊| 779/991 [3:22:15<58:43, 16.62s/batch, batch_loss=9.09, batch

Epoch 4/10:  79%|▊| 779/991 [3:22:32<58:43, 16.62s/batch, batch_loss=2.54, batch

Epoch 4/10:  79%|▊| 780/991 [3:22:32<58:28, 16.63s/batch, batch_loss=2.54, batch

Epoch 4/10:  79%|▊| 780/991 [3:22:51<58:28, 16.63s/batch, batch_loss=3.99, batch

Epoch 4/10:  79%|▊| 781/991 [3:22:51<1:00:51, 17.39s/batch, batch_loss=3.99, bat

Epoch 4/10:  79%|▊| 781/991 [3:23:08<1:00:51, 17.39s/batch, batch_loss=2.5e+4, b

Epoch 4/10:  79%|▊| 782/991 [3:23:08<1:00:03, 17.24s/batch, batch_loss=2.5e+4, b

Epoch 4/10:  79%|▊| 782/991 [3:23:25<1:00:03, 17.24s/batch, batch_loss=31.1, bat

Epoch 4/10:  79%|▊| 783/991 [3:23:25<59:16, 17.10s/batch, batch_loss=31.1, batch

Epoch 4/10:  79%|▊| 783/991 [3:23:41<59:16, 17.10s/batch, batch_loss=13.7, batch

Epoch 4/10:  79%|▊| 784/991 [3:23:41<58:25, 16.94s/batch, batch_loss=13.7, batch

Epoch 4/10:  79%|▊| 784/991 [3:23:59<58:25, 16.94s/batch, batch_loss=14.1, batch

Epoch 4/10:  79%|▊| 785/991 [3:23:59<58:51, 17.14s/batch, batch_loss=14.1, batch

Epoch 4/10:  79%|▊| 785/991 [3:24:16<58:51, 17.14s/batch, batch_loss=12, batch_i

Epoch 4/10:  79%|▊| 786/991 [3:24:16<58:36, 17.15s/batch, batch_loss=12, batch_i

Epoch 4/10:  79%|▊| 786/991 [3:24:32<58:36, 17.15s/batch, batch_loss=2.48e+4, ba

Epoch 4/10:  79%|▊| 787/991 [3:24:32<57:28, 16.90s/batch, batch_loss=2.48e+4, ba

Epoch 4/10:  79%|▊| 787/991 [3:24:52<57:28, 16.90s/batch, batch_loss=691, batch_

Epoch 4/10:  80%|▊| 788/991 [3:24:52<59:54, 17.71s/batch, batch_loss=691, batch_

Epoch 4/10:  80%|▊| 788/991 [3:25:07<59:54, 17.71s/batch, batch_loss=33.6, batch

Epoch 4/10:  80%|▊| 789/991 [3:25:07<57:19, 17.03s/batch, batch_loss=33.6, batch

Epoch 4/10:  80%|▊| 789/991 [3:25:23<57:19, 17.03s/batch, batch_loss=24.4, batch

Epoch 4/10:  80%|▊| 790/991 [3:25:23<55:16, 16.50s/batch, batch_loss=24.4, batch

Epoch 4/10:  80%|▊| 790/991 [3:25:39<55:16, 16.50s/batch, batch_loss=19.1, batch

Epoch 4/10:  80%|▊| 791/991 [3:25:39<55:00, 16.50s/batch, batch_loss=19.1, batch

Epoch 4/10:  80%|▊| 791/991 [3:25:55<55:00, 16.50s/batch, batch_loss=1.04e+4, ba

Epoch 4/10:  80%|▊| 792/991 [3:25:55<54:20, 16.39s/batch, batch_loss=1.04e+4, ba

Epoch 4/10:  80%|▊| 792/991 [3:26:11<54:20, 16.39s/batch, batch_loss=11.3, batch

Epoch 4/10:  80%|▊| 793/991 [3:26:11<53:35, 16.24s/batch, batch_loss=11.3, batch

Epoch 4/10:  80%|▊| 793/991 [3:26:27<53:35, 16.24s/batch, batch_loss=2.65, batch

Epoch 4/10:  80%|▊| 794/991 [3:26:27<52:44, 16.06s/batch, batch_loss=2.65, batch

Epoch 4/10:  80%|▊| 794/991 [3:26:42<52:44, 16.06s/batch, batch_loss=9.38, batch

Epoch 4/10:  80%|▊| 795/991 [3:26:42<51:55, 15.89s/batch, batch_loss=9.38, batch

Epoch 4/10:  80%|▊| 795/991 [3:26:58<51:55, 15.89s/batch, batch_loss=24.1, batch

Epoch 4/10:  80%|▊| 796/991 [3:26:58<51:39, 15.89s/batch, batch_loss=24.1, batch

Epoch 4/10:  80%|▊| 796/991 [3:27:17<51:39, 15.89s/batch, batch_loss=39.8, batch

Epoch 4/10:  80%|▊| 797/991 [3:27:17<54:27, 16.84s/batch, batch_loss=39.8, batch

Epoch 4/10:  80%|▊| 797/991 [3:27:32<54:27, 16.84s/batch, batch_loss=350, batch_

Epoch 4/10:  81%|▊| 798/991 [3:27:32<52:31, 16.33s/batch, batch_loss=350, batch_

Epoch 4/10:  81%|▊| 798/991 [3:27:47<52:31, 16.33s/batch, batch_loss=13.8, batch

Epoch 4/10:  81%|▊| 799/991 [3:27:47<50:43, 15.85s/batch, batch_loss=13.8, batch

Epoch 4/10:  81%|▊| 799/991 [3:28:03<50:43, 15.85s/batch, batch_loss=39.1, batch

Epoch 4/10:  81%|▊| 800/991 [3:28:03<50:12, 15.77s/batch, batch_loss=39.1, batch

Epoch 4/10:  81%|▊| 800/991 [3:28:19<50:12, 15.77s/batch, batch_loss=14.6, batch

Epoch 4/10:  81%|▊| 801/991 [3:28:19<50:06, 15.82s/batch, batch_loss=14.6, batch

Epoch 4/10:  81%|▊| 801/991 [3:28:35<50:06, 15.82s/batch, batch_loss=19.4, batch

Epoch 4/10:  81%|▊| 802/991 [3:28:35<50:14, 15.95s/batch, batch_loss=19.4, batch

Epoch 4/10:  81%|▊| 802/991 [3:28:53<50:14, 15.95s/batch, batch_loss=6.91, batch

Epoch 4/10:  81%|▊| 803/991 [3:28:53<52:18, 16.69s/batch, batch_loss=6.91, batch

Epoch 4/10:  81%|▊| 803/991 [3:29:09<52:18, 16.69s/batch, batch_loss=21.5, batch

Epoch 4/10:  81%|▊| 804/991 [3:29:09<51:24, 16.49s/batch, batch_loss=21.5, batch

Epoch 4/10:  81%|▊| 804/991 [3:29:25<51:24, 16.49s/batch, batch_loss=8.16, batch

Epoch 4/10:  81%|▊| 805/991 [3:29:25<50:40, 16.35s/batch, batch_loss=8.16, batch

Epoch 4/10:  81%|▊| 805/991 [3:29:42<50:40, 16.35s/batch, batch_loss=13.3, batch

Epoch 4/10:  81%|▊| 806/991 [3:29:42<50:26, 16.36s/batch, batch_loss=13.3, batch

Epoch 4/10:  81%|▊| 806/991 [3:29:58<50:26, 16.36s/batch, batch_loss=11.3, batch

Epoch 4/10:  81%|▊| 807/991 [3:29:58<50:05, 16.33s/batch, batch_loss=11.3, batch

Epoch 4/10:  81%|▊| 807/991 [3:30:13<50:05, 16.33s/batch, batch_loss=26.5, batch

Epoch 4/10:  82%|▊| 808/991 [3:30:13<48:57, 16.05s/batch, batch_loss=26.5, batch

Epoch 4/10:  82%|▊| 808/991 [3:30:29<48:57, 16.05s/batch, batch_loss=1.2e+4, bat

Epoch 4/10:  82%|▊| 809/991 [3:30:29<48:05, 15.85s/batch, batch_loss=1.2e+4, bat

Epoch 4/10:  82%|▊| 809/991 [3:30:45<48:05, 15.85s/batch, batch_loss=21.8, batch

Epoch 4/10:  82%|▊| 810/991 [3:30:45<48:08, 15.96s/batch, batch_loss=21.8, batch

Epoch 4/10:  82%|▊| 810/991 [3:31:01<48:08, 15.96s/batch, batch_loss=11.3, batch

Epoch 4/10:  82%|▊| 811/991 [3:31:01<47:46, 15.92s/batch, batch_loss=11.3, batch

Epoch 4/10:  82%|▊| 811/991 [3:31:16<47:46, 15.92s/batch, batch_loss=8.25, batch

Epoch 4/10:  82%|▊| 812/991 [3:31:16<47:04, 15.78s/batch, batch_loss=8.25, batch

Epoch 4/10:  82%|▊| 812/991 [3:31:32<47:04, 15.78s/batch, batch_loss=11, batch_i

Epoch 4/10:  82%|▊| 813/991 [3:31:32<46:56, 15.82s/batch, batch_loss=11, batch_i

Epoch 4/10:  82%|▊| 813/991 [3:31:51<46:56, 15.82s/batch, batch_loss=16.8, batch

Epoch 4/10:  82%|▊| 814/991 [3:31:51<49:23, 16.75s/batch, batch_loss=16.8, batch

Epoch 4/10:  82%|▊| 814/991 [3:32:06<49:23, 16.75s/batch, batch_loss=10.7, batch

Epoch 4/10:  82%|▊| 815/991 [3:32:06<47:46, 16.29s/batch, batch_loss=10.7, batch

Epoch 4/10:  82%|▊| 815/991 [3:32:22<47:46, 16.29s/batch, batch_loss=94, batch_i

Epoch 4/10:  82%|▊| 816/991 [3:32:22<46:49, 16.05s/batch, batch_loss=94, batch_i

Epoch 4/10:  82%|▊| 816/991 [3:32:37<46:49, 16.05s/batch, batch_loss=372, batch_

Epoch 4/10:  82%|▊| 817/991 [3:32:37<45:56, 15.84s/batch, batch_loss=372, batch_

Epoch 4/10:  82%|▊| 817/991 [3:32:52<45:56, 15.84s/batch, batch_loss=384, batch_

Epoch 4/10:  83%|▊| 818/991 [3:32:52<45:16, 15.70s/batch, batch_loss=384, batch_

Epoch 4/10:  83%|▊| 818/991 [3:33:10<45:16, 15.70s/batch, batch_loss=14, batch_i

Epoch 4/10:  83%|▊| 819/991 [3:33:10<46:38, 16.27s/batch, batch_loss=14, batch_i

Epoch 4/10:  83%|▊| 819/991 [3:33:25<46:38, 16.27s/batch, batch_loss=9.07, batch

Epoch 4/10:  83%|▊| 820/991 [3:33:25<45:19, 15.91s/batch, batch_loss=9.07, batch

Epoch 4/10:  83%|▊| 820/991 [3:33:42<45:19, 15.91s/batch, batch_loss=7.14, batch

Epoch 4/10:  83%|▊| 821/991 [3:33:42<45:43, 16.14s/batch, batch_loss=7.14, batch

Epoch 4/10:  83%|▊| 821/991 [3:33:58<45:43, 16.14s/batch, batch_loss=11.1, batch

Epoch 4/10:  83%|▊| 822/991 [3:33:58<45:11, 16.05s/batch, batch_loss=11.1, batch

Epoch 4/10:  83%|▊| 822/991 [3:34:13<45:11, 16.05s/batch, batch_loss=153, batch_

Epoch 4/10:  83%|▊| 823/991 [3:34:13<44:33, 15.91s/batch, batch_loss=153, batch_

Epoch 4/10:  83%|▊| 823/991 [3:34:28<44:33, 15.91s/batch, batch_loss=7.5, batch_

Epoch 4/10:  83%|▊| 824/991 [3:34:28<43:35, 15.66s/batch, batch_loss=7.5, batch_

Epoch 4/10:  83%|▊| 824/991 [3:34:44<43:35, 15.66s/batch, batch_loss=15.2, batch

Epoch 4/10:  83%|▊| 825/991 [3:34:44<43:02, 15.55s/batch, batch_loss=15.2, batch

Epoch 4/10:  83%|▊| 825/991 [3:34:58<43:02, 15.55s/batch, batch_loss=2.59e+3, ba

Epoch 4/10:  83%|▊| 826/991 [3:34:58<41:42, 15.16s/batch, batch_loss=2.59e+3, ba

Epoch 4/10:  83%|▊| 826/991 [3:35:16<41:42, 15.16s/batch, batch_loss=24, batch_i

Epoch 4/10:  83%|▊| 827/991 [3:35:16<43:52, 16.05s/batch, batch_loss=24, batch_i

Epoch 4/10:  83%|▊| 827/991 [3:35:31<43:52, 16.05s/batch, batch_loss=41, batch_i

Epoch 4/10:  84%|▊| 828/991 [3:35:31<43:03, 15.85s/batch, batch_loss=41, batch_i

Epoch 4/10:  84%|▊| 828/991 [3:35:47<43:03, 15.85s/batch, batch_loss=9.23, batch

Epoch 4/10:  84%|▊| 829/991 [3:35:47<42:41, 15.81s/batch, batch_loss=9.23, batch

Epoch 4/10:  84%|▊| 829/991 [3:36:02<42:41, 15.81s/batch, batch_loss=16.7, batch

Epoch 4/10:  84%|▊| 830/991 [3:36:02<41:53, 15.61s/batch, batch_loss=16.7, batch

Epoch 4/10:  84%|▊| 830/991 [3:36:17<41:53, 15.61s/batch, batch_loss=14.5, batch

Epoch 4/10:  84%|▊| 831/991 [3:36:17<40:37, 15.24s/batch, batch_loss=14.5, batch

Epoch 4/10:  84%|▊| 831/991 [3:36:32<40:37, 15.24s/batch, batch_loss=19.5, batch

Epoch 4/10:  84%|▊| 832/991 [3:36:32<40:44, 15.38s/batch, batch_loss=19.5, batch

Epoch 4/10:  84%|▊| 832/991 [3:36:48<40:44, 15.38s/batch, batch_loss=229, batch_

Epoch 4/10:  84%|▊| 833/991 [3:36:48<40:55, 15.54s/batch, batch_loss=229, batch_

Epoch 4/10:  84%|▊| 833/991 [3:37:07<40:55, 15.54s/batch, batch_loss=30.7, batch

Epoch 4/10:  84%|▊| 834/991 [3:37:07<43:07, 16.48s/batch, batch_loss=30.7, batch

Epoch 4/10:  84%|▊| 834/991 [3:37:23<43:07, 16.48s/batch, batch_loss=18.3, batch

Epoch 4/10:  84%|▊| 835/991 [3:37:23<42:53, 16.50s/batch, batch_loss=18.3, batch

Epoch 4/10:  84%|▊| 835/991 [3:37:40<42:53, 16.50s/batch, batch_loss=3.27e+3, ba

Epoch 4/10:  84%|▊| 836/991 [3:37:40<42:39, 16.51s/batch, batch_loss=3.27e+3, ba

Epoch 4/10:  84%|▊| 836/991 [3:37:56<42:39, 16.51s/batch, batch_loss=4.89e+3, ba

Epoch 4/10:  84%|▊| 837/991 [3:37:56<42:03, 16.38s/batch, batch_loss=4.89e+3, ba

Epoch 4/10:  84%|▊| 837/991 [3:38:12<42:03, 16.38s/batch, batch_loss=21.7, batch

Epoch 4/10:  85%|▊| 838/991 [3:38:12<41:15, 16.18s/batch, batch_loss=21.7, batch

Epoch 4/10:  85%|▊| 838/991 [3:38:28<41:15, 16.18s/batch, batch_loss=6.81, batch

Epoch 4/10:  85%|▊| 839/991 [3:38:28<40:53, 16.14s/batch, batch_loss=6.81, batch

Epoch 4/10:  85%|▊| 839/991 [3:38:42<40:53, 16.14s/batch, batch_loss=5.73, batch

Epoch 4/10:  85%|▊| 840/991 [3:38:42<39:18, 15.62s/batch, batch_loss=5.73, batch

Epoch 4/10:  85%|▊| 840/991 [3:38:58<39:18, 15.62s/batch, batch_loss=22.4, batch

Epoch 4/10:  85%|▊| 841/991 [3:38:58<38:49, 15.53s/batch, batch_loss=22.4, batch

Epoch 4/10:  85%|▊| 841/991 [3:39:13<38:49, 15.53s/batch, batch_loss=20.3, batch

Epoch 4/10:  85%|▊| 842/991 [3:39:13<38:14, 15.40s/batch, batch_loss=20.3, batch

Epoch 4/10:  85%|▊| 842/991 [3:39:29<38:14, 15.40s/batch, batch_loss=8.98, batch

Epoch 4/10:  85%|▊| 843/991 [3:39:29<38:26, 15.59s/batch, batch_loss=8.98, batch

Epoch 4/10:  85%|▊| 843/991 [3:39:45<38:26, 15.59s/batch, batch_loss=1.69e+3, ba

Epoch 4/10:  85%|▊| 844/991 [3:39:45<38:25, 15.69s/batch, batch_loss=1.69e+3, ba

Epoch 4/10:  85%|▊| 844/991 [3:40:01<38:25, 15.69s/batch, batch_loss=27.3, batch

Epoch 4/10:  85%|▊| 845/991 [3:40:01<38:56, 16.00s/batch, batch_loss=27.3, batch

Epoch 4/10:  85%|▊| 845/991 [3:40:18<38:56, 16.00s/batch, batch_loss=1.17e+4, ba

Epoch 4/10:  85%|▊| 846/991 [3:40:18<38:49, 16.06s/batch, batch_loss=1.17e+4, ba

Epoch 4/10:  85%|▊| 846/991 [3:40:34<38:49, 16.06s/batch, batch_loss=56.6, batch

Epoch 4/10:  85%|▊| 847/991 [3:40:34<38:31, 16.05s/batch, batch_loss=56.6, batch

Epoch 4/10:  85%|▊| 847/991 [3:40:48<38:31, 16.05s/batch, batch_loss=32.4, batch

Epoch 4/10:  86%|▊| 848/991 [3:40:48<37:07, 15.58s/batch, batch_loss=32.4, batch

Epoch 4/10:  86%|▊| 848/991 [3:41:06<37:07, 15.58s/batch, batch_loss=1e+3, batch

Epoch 4/10:  86%|▊| 849/991 [3:41:06<38:53, 16.44s/batch, batch_loss=1e+3, batch

Epoch 4/10:  86%|▊| 849/991 [3:41:21<38:53, 16.44s/batch, batch_loss=10.3, batch

Epoch 4/10:  86%|▊| 850/991 [3:41:21<37:36, 16.00s/batch, batch_loss=10.3, batch

Epoch 4/10:  86%|▊| 850/991 [3:41:37<37:36, 16.00s/batch, batch_loss=28.1, batch

Epoch 4/10:  86%|▊| 851/991 [3:41:37<37:19, 15.99s/batch, batch_loss=28.1, batch

Epoch 4/10:  86%|▊| 851/991 [3:41:52<37:19, 15.99s/batch, batch_loss=17.2, batch

Epoch 4/10:  86%|▊| 852/991 [3:41:52<36:08, 15.60s/batch, batch_loss=17.2, batch

Epoch 4/10:  86%|▊| 852/991 [3:42:07<36:08, 15.60s/batch, batch_loss=7.61e+3, ba

Epoch 4/10:  86%|▊| 853/991 [3:42:07<35:38, 15.50s/batch, batch_loss=7.61e+3, ba

Epoch 4/10:  86%|▊| 853/991 [3:42:23<35:38, 15.50s/batch, batch_loss=27.2, batch

Epoch 4/10:  86%|▊| 854/991 [3:42:23<35:16, 15.45s/batch, batch_loss=27.2, batch

Epoch 4/10:  86%|▊| 854/991 [3:42:39<35:16, 15.45s/batch, batch_loss=7.35, batch

Epoch 4/10:  86%|▊| 855/991 [3:42:39<35:28, 15.65s/batch, batch_loss=7.35, batch

Epoch 4/10:  86%|▊| 855/991 [3:42:57<35:28, 15.65s/batch, batch_loss=9.2, batch_

Epoch 4/10:  86%|▊| 856/991 [3:42:57<36:37, 16.28s/batch, batch_loss=9.2, batch_

Epoch 4/10:  86%|▊| 856/991 [3:43:12<36:37, 16.28s/batch, batch_loss=8.39, batch

Epoch 4/10:  86%|▊| 857/991 [3:43:12<35:43, 15.99s/batch, batch_loss=8.39, batch

Epoch 4/10:  86%|▊| 857/991 [3:43:27<35:43, 15.99s/batch, batch_loss=45.8, batch

Epoch 4/10:  87%|▊| 858/991 [3:43:27<34:47, 15.69s/batch, batch_loss=45.8, batch

Epoch 4/10:  87%|▊| 858/991 [3:43:42<34:47, 15.69s/batch, batch_loss=18.9, batch

Epoch 4/10:  87%|▊| 859/991 [3:43:42<34:11, 15.54s/batch, batch_loss=18.9, batch

Epoch 4/10:  87%|▊| 859/991 [3:43:57<34:11, 15.54s/batch, batch_loss=29.1, batch

Epoch 4/10:  87%|▊| 860/991 [3:43:57<33:34, 15.38s/batch, batch_loss=29.1, batch

Epoch 4/10:  87%|▊| 860/991 [3:44:12<33:34, 15.38s/batch, batch_loss=9.25, batch

Epoch 4/10:  87%|▊| 861/991 [3:44:12<33:16, 15.36s/batch, batch_loss=9.25, batch

Epoch 4/10:  87%|▊| 861/991 [3:44:28<33:16, 15.36s/batch, batch_loss=20.4, batch

Epoch 4/10:  87%|▊| 862/991 [3:44:28<33:27, 15.56s/batch, batch_loss=20.4, batch

Epoch 4/10:  87%|▊| 862/991 [3:44:44<33:27, 15.56s/batch, batch_loss=32.4, batch

Epoch 4/10:  87%|▊| 863/991 [3:44:44<33:24, 15.66s/batch, batch_loss=32.4, batch

Epoch 4/10:  87%|▊| 863/991 [3:45:03<33:24, 15.66s/batch, batch_loss=10.1, batch

Epoch 4/10:  87%|▊| 864/991 [3:45:03<34:47, 16.44s/batch, batch_loss=10.1, batch

Epoch 4/10:  87%|▊| 864/991 [3:45:18<34:47, 16.44s/batch, batch_loss=21.3, batch

Epoch 4/10:  87%|▊| 865/991 [3:45:18<33:49, 16.11s/batch, batch_loss=21.3, batch

Epoch 4/10:  87%|▊| 865/991 [3:45:34<33:49, 16.11s/batch, batch_loss=34.3, batch

Epoch 4/10:  87%|▊| 866/991 [3:45:34<33:23, 16.03s/batch, batch_loss=34.3, batch

Epoch 4/10:  87%|▊| 866/991 [3:45:50<33:23, 16.03s/batch, batch_loss=31.6, batch

Epoch 4/10:  87%|▊| 867/991 [3:45:50<33:01, 15.98s/batch, batch_loss=31.6, batch

Epoch 4/10:  87%|▊| 867/991 [3:46:05<33:01, 15.98s/batch, batch_loss=22.3, batch

Epoch 4/10:  88%|▉| 868/991 [3:46:05<32:23, 15.80s/batch, batch_loss=22.3, batch

Epoch 4/10:  88%|▉| 868/991 [3:46:21<32:23, 15.80s/batch, batch_loss=12.1, batch

Epoch 4/10:  88%|▉| 869/991 [3:46:21<32:21, 15.91s/batch, batch_loss=12.1, batch

Epoch 4/10:  88%|▉| 869/991 [3:46:37<32:21, 15.91s/batch, batch_loss=14.7, batch

Epoch 4/10:  88%|▉| 870/991 [3:46:37<32:04, 15.91s/batch, batch_loss=14.7, batch

Epoch 4/10:  88%|▉| 870/991 [3:46:53<32:04, 15.91s/batch, batch_loss=8.93, batch

Epoch 4/10:  88%|▉| 871/991 [3:46:53<31:55, 15.97s/batch, batch_loss=8.93, batch

Epoch 4/10:  88%|▉| 871/991 [3:47:09<31:55, 15.97s/batch, batch_loss=22, batch_i

Epoch 4/10:  88%|▉| 872/991 [3:47:09<31:24, 15.83s/batch, batch_loss=22, batch_i

Epoch 4/10:  88%|▉| 872/991 [3:47:25<31:24, 15.83s/batch, batch_loss=19.3, batch

Epoch 4/10:  88%|▉| 873/991 [3:47:25<31:24, 15.97s/batch, batch_loss=19.3, batch

Epoch 4/10:  88%|▉| 873/991 [3:47:41<31:24, 15.97s/batch, batch_loss=8.05, batch

Epoch 4/10:  88%|▉| 874/991 [3:47:41<31:12, 16.01s/batch, batch_loss=8.05, batch

Epoch 4/10:  88%|▉| 874/991 [3:47:57<31:12, 16.01s/batch, batch_loss=12.9, batch

Epoch 4/10:  88%|▉| 875/991 [3:47:57<31:03, 16.07s/batch, batch_loss=12.9, batch

Epoch 4/10:  88%|▉| 875/991 [3:48:12<31:03, 16.07s/batch, batch_loss=41.1, batch

Epoch 4/10:  88%|▉| 876/991 [3:48:12<30:11, 15.75s/batch, batch_loss=41.1, batch

Epoch 4/10:  88%|▉| 876/991 [3:48:28<30:11, 15.75s/batch, batch_loss=26.4, batch

Epoch 4/10:  88%|▉| 877/991 [3:48:28<29:53, 15.73s/batch, batch_loss=26.4, batch

Epoch 4/10:  88%|▉| 877/991 [3:48:43<29:53, 15.73s/batch, batch_loss=40.7, batch

Epoch 4/10:  89%|▉| 878/991 [3:48:43<29:05, 15.44s/batch, batch_loss=40.7, batch

Epoch 4/10:  89%|▉| 878/991 [3:48:58<29:05, 15.44s/batch, batch_loss=19.8, batch

Epoch 4/10:  89%|▉| 879/991 [3:48:58<28:40, 15.36s/batch, batch_loss=19.8, batch

Epoch 4/10:  89%|▉| 879/991 [3:49:13<28:40, 15.36s/batch, batch_loss=10.3, batch

Epoch 4/10:  89%|▉| 880/991 [3:49:13<28:08, 15.21s/batch, batch_loss=10.3, batch

Epoch 4/10:  89%|▉| 880/991 [3:49:28<28:08, 15.21s/batch, batch_loss=5.13e+3, ba

Epoch 4/10:  89%|▉| 881/991 [3:49:28<27:45, 15.14s/batch, batch_loss=5.13e+3, ba

Epoch 4/10:  89%|▉| 881/991 [3:49:44<27:45, 15.14s/batch, batch_loss=17.5, batch

Epoch 4/10:  89%|▉| 882/991 [3:49:44<28:02, 15.44s/batch, batch_loss=17.5, batch

Epoch 4/10:  89%|▉| 882/991 [3:49:57<28:02, 15.44s/batch, batch_loss=21.6, batch

Epoch 4/10:  89%|▉| 883/991 [3:49:57<26:44, 14.86s/batch, batch_loss=21.6, batch

Epoch 4/10:  89%|▉| 883/991 [3:50:11<26:44, 14.86s/batch, batch_loss=17.4, batch

Epoch 4/10:  89%|▉| 884/991 [3:50:11<25:39, 14.39s/batch, batch_loss=17.4, batch

Epoch 4/10:  89%|▉| 884/991 [3:50:26<25:39, 14.39s/batch, batch_loss=16.7, batch

Epoch 4/10:  89%|▉| 885/991 [3:50:26<25:56, 14.68s/batch, batch_loss=16.7, batch

Epoch 4/10:  89%|▉| 885/991 [3:50:40<25:56, 14.68s/batch, batch_loss=21.2, batch

Epoch 4/10:  89%|▉| 886/991 [3:50:40<25:32, 14.60s/batch, batch_loss=21.2, batch

Epoch 4/10:  89%|▉| 886/991 [3:50:58<25:32, 14.60s/batch, batch_loss=1.93e+4, ba

Epoch 4/10:  90%|▉| 887/991 [3:50:58<27:01, 15.59s/batch, batch_loss=1.93e+4, ba

Epoch 4/10:  90%|▉| 887/991 [3:51:14<27:01, 15.59s/batch, batch_loss=17.7, batch

Epoch 4/10:  90%|▉| 888/991 [3:51:14<26:37, 15.51s/batch, batch_loss=17.7, batch

Epoch 4/10:  90%|▉| 888/991 [3:51:29<26:37, 15.51s/batch, batch_loss=20.3, batch

Epoch 4/10:  90%|▉| 889/991 [3:51:29<26:21, 15.51s/batch, batch_loss=20.3, batch

Epoch 4/10:  90%|▉| 889/991 [3:51:45<26:21, 15.51s/batch, batch_loss=18.8, batch

Epoch 4/10:  90%|▉| 890/991 [3:51:45<26:19, 15.63s/batch, batch_loss=18.8, batch

Epoch 4/10:  90%|▉| 890/991 [3:52:00<26:19, 15.63s/batch, batch_loss=13.4, batch

Epoch 4/10:  90%|▉| 891/991 [3:52:00<25:44, 15.45s/batch, batch_loss=13.4, batch

Epoch 4/10:  90%|▉| 891/991 [3:52:15<25:44, 15.45s/batch, batch_loss=32.3, batch

Epoch 4/10:  90%|▉| 892/991 [3:52:15<25:04, 15.20s/batch, batch_loss=32.3, batch

Epoch 4/10:  90%|▉| 892/991 [3:52:30<25:04, 15.20s/batch, batch_loss=3.68e+3, ba

Epoch 4/10:  90%|▉| 893/991 [3:52:30<25:01, 15.32s/batch, batch_loss=3.68e+3, ba

Epoch 4/10:  90%|▉| 893/991 [3:52:47<25:01, 15.32s/batch, batch_loss=15.1, batch

Epoch 4/10:  90%|▉| 894/991 [3:52:47<25:26, 15.73s/batch, batch_loss=15.1, batch

Epoch 4/10:  90%|▉| 894/991 [3:53:06<25:26, 15.73s/batch, batch_loss=17.1, batch

Epoch 4/10:  90%|▉| 895/991 [3:53:06<26:35, 16.62s/batch, batch_loss=17.1, batch

Epoch 4/10:  90%|▉| 895/991 [3:53:22<26:35, 16.62s/batch, batch_loss=12.7, batch

Epoch 4/10:  90%|▉| 896/991 [3:53:22<25:59, 16.42s/batch, batch_loss=12.7, batch

Epoch 4/10:  90%|▉| 896/991 [3:53:37<25:59, 16.42s/batch, batch_loss=24.9, batch

Epoch 4/10:  91%|▉| 897/991 [3:53:37<25:14, 16.11s/batch, batch_loss=24.9, batch

Epoch 4/10:  91%|▉| 897/991 [3:53:53<25:14, 16.11s/batch, batch_loss=27.8, batch

Epoch 4/10:  91%|▉| 898/991 [3:53:53<24:43, 15.95s/batch, batch_loss=27.8, batch

Epoch 4/10:  91%|▉| 898/991 [3:54:07<24:43, 15.95s/batch, batch_loss=18, batch_i

Epoch 4/10:  91%|▉| 899/991 [3:54:07<23:41, 15.45s/batch, batch_loss=18, batch_i

Epoch 4/10:  91%|▉| 899/991 [3:54:23<23:41, 15.45s/batch, batch_loss=23.3, batch

Epoch 4/10:  91%|▉| 900/991 [3:54:23<23:29, 15.49s/batch, batch_loss=23.3, batch

Epoch 4/10:  91%|▉| 900/991 [3:54:39<23:29, 15.49s/batch, batch_loss=13.9, batch

Epoch 4/10:  91%|▉| 901/991 [3:54:39<23:35, 15.73s/batch, batch_loss=13.9, batch

Epoch 4/10:  91%|▉| 901/991 [3:54:55<23:35, 15.73s/batch, batch_loss=14.3, batch

Epoch 4/10:  91%|▉| 902/991 [3:54:55<23:31, 15.86s/batch, batch_loss=14.3, batch

Epoch 4/10:  91%|▉| 902/991 [3:55:14<23:31, 15.86s/batch, batch_loss=7.36, batch

Epoch 4/10:  91%|▉| 903/991 [3:55:14<24:36, 16.78s/batch, batch_loss=7.36, batch

Epoch 4/10:  91%|▉| 903/991 [3:55:30<24:36, 16.78s/batch, batch_loss=7.73, batch

Epoch 4/10:  91%|▉| 904/991 [3:55:30<24:02, 16.58s/batch, batch_loss=7.73, batch

Epoch 4/10:  91%|▉| 904/991 [3:55:46<24:02, 16.58s/batch, batch_loss=46.9, batch

Epoch 4/10:  91%|▉| 905/991 [3:55:46<23:27, 16.37s/batch, batch_loss=46.9, batch

Epoch 4/10:  91%|▉| 905/991 [3:56:02<23:27, 16.37s/batch, batch_loss=25.9, batch

Epoch 4/10:  91%|▉| 906/991 [3:56:02<23:03, 16.28s/batch, batch_loss=25.9, batch

Epoch 4/10:  91%|▉| 906/991 [3:56:18<23:03, 16.28s/batch, batch_loss=23.2, batch

Epoch 4/10:  92%|▉| 907/991 [3:56:18<22:39, 16.18s/batch, batch_loss=23.2, batch

Epoch 4/10:  92%|▉| 907/991 [3:56:33<22:39, 16.18s/batch, batch_loss=14.4, batch

Epoch 4/10:  92%|▉| 908/991 [3:56:33<21:54, 15.84s/batch, batch_loss=14.4, batch

Epoch 4/10:  92%|▉| 908/991 [3:56:48<21:54, 15.84s/batch, batch_loss=5.62, batch

Epoch 4/10:  92%|▉| 909/991 [3:56:48<21:11, 15.50s/batch, batch_loss=5.62, batch

Epoch 4/10:  92%|▉| 909/991 [3:57:04<21:11, 15.50s/batch, batch_loss=683, batch_

Epoch 4/10:  92%|▉| 910/991 [3:57:04<21:27, 15.89s/batch, batch_loss=683, batch_

Epoch 4/10:  92%|▉| 910/991 [3:57:19<21:27, 15.89s/batch, batch_loss=1.01e+3, ba

Epoch 4/10:  92%|▉| 911/991 [3:57:19<20:41, 15.51s/batch, batch_loss=1.01e+3, ba

Epoch 4/10:  92%|▉| 911/991 [3:57:34<20:41, 15.51s/batch, batch_loss=26.8, batch

Epoch 4/10:  92%|▉| 912/991 [3:57:34<20:06, 15.27s/batch, batch_loss=26.8, batch

Epoch 4/10:  92%|▉| 912/991 [3:57:49<20:06, 15.27s/batch, batch_loss=30.8, batch

Epoch 4/10:  92%|▉| 913/991 [3:57:49<19:47, 15.23s/batch, batch_loss=30.8, batch

Epoch 4/10:  92%|▉| 913/991 [3:58:04<19:47, 15.23s/batch, batch_loss=19.9, batch

Epoch 4/10:  92%|▉| 914/991 [3:58:04<19:26, 15.14s/batch, batch_loss=19.9, batch

Epoch 4/10:  92%|▉| 914/991 [3:58:19<19:26, 15.14s/batch, batch_loss=25.5, batch

Epoch 4/10:  92%|▉| 915/991 [3:58:19<19:01, 15.02s/batch, batch_loss=25.5, batch

Epoch 4/10:  92%|▉| 915/991 [3:58:34<19:01, 15.02s/batch, batch_loss=17.4, batch

Epoch 4/10:  92%|▉| 916/991 [3:58:34<18:47, 15.03s/batch, batch_loss=17.4, batch

Epoch 4/10:  92%|▉| 916/991 [3:58:48<18:47, 15.03s/batch, batch_loss=10.7, batch

Epoch 4/10:  93%|▉| 917/991 [3:58:48<18:21, 14.88s/batch, batch_loss=10.7, batch

Epoch 4/10:  93%|▉| 917/991 [3:59:04<18:21, 14.88s/batch, batch_loss=18.1, batch

Epoch 4/10:  93%|▉| 918/991 [3:59:04<18:20, 15.07s/batch, batch_loss=18.1, batch

Epoch 4/10:  93%|▉| 918/991 [3:59:19<18:20, 15.07s/batch, batch_loss=15.3, batch

Epoch 4/10:  93%|▉| 919/991 [3:59:19<18:08, 15.12s/batch, batch_loss=15.3, batch

Epoch 4/10:  93%|▉| 919/991 [3:59:34<18:08, 15.12s/batch, batch_loss=18.2, batch

Epoch 4/10:  93%|▉| 920/991 [3:59:34<17:54, 15.14s/batch, batch_loss=18.2, batch

Epoch 4/10:  93%|▉| 920/991 [3:59:49<17:54, 15.14s/batch, batch_loss=22.6, batch

Epoch 4/10:  93%|▉| 921/991 [3:59:49<17:32, 15.03s/batch, batch_loss=22.6, batch

Epoch 4/10:  93%|▉| 921/991 [4:00:04<17:32, 15.03s/batch, batch_loss=37.7, batch

Epoch 4/10:  93%|▉| 922/991 [4:00:04<17:25, 15.16s/batch, batch_loss=37.7, batch

Epoch 4/10:  93%|▉| 922/991 [4:00:20<17:25, 15.16s/batch, batch_loss=6.7, batch_

Epoch 4/10:  93%|▉| 923/991 [4:00:20<17:20, 15.31s/batch, batch_loss=6.7, batch_

Epoch 4/10:  93%|▉| 923/991 [4:00:36<17:20, 15.31s/batch, batch_loss=11.4, batch

Epoch 4/10:  93%|▉| 924/991 [4:00:36<17:14, 15.43s/batch, batch_loss=11.4, batch

Epoch 4/10:  93%|▉| 924/991 [4:00:52<17:14, 15.43s/batch, batch_loss=13.5, batch

Epoch 4/10:  93%|▉| 925/991 [4:00:52<17:09, 15.60s/batch, batch_loss=13.5, batch

Epoch 4/10:  93%|▉| 925/991 [4:01:09<17:09, 15.60s/batch, batch_loss=3e+4, batch

Epoch 4/10:  93%|▉| 926/991 [4:01:09<17:30, 16.17s/batch, batch_loss=3e+4, batch

Epoch 4/10:  93%|▉| 926/991 [4:01:24<17:30, 16.17s/batch, batch_loss=7.17, batch

Epoch 4/10:  94%|▉| 927/991 [4:01:24<16:57, 15.89s/batch, batch_loss=7.17, batch

Epoch 4/10:  94%|▉| 927/991 [4:01:40<16:57, 15.89s/batch, batch_loss=865, batch_

Epoch 4/10:  94%|▉| 928/991 [4:01:40<16:39, 15.87s/batch, batch_loss=865, batch_

Epoch 4/10:  94%|▉| 928/991 [4:01:56<16:39, 15.87s/batch, batch_loss=11.1, batch

Epoch 4/10:  94%|▉| 929/991 [4:01:56<16:27, 15.93s/batch, batch_loss=11.1, batch

Epoch 4/10:  94%|▉| 929/991 [4:02:12<16:27, 15.93s/batch, batch_loss=8.43, batch

Epoch 4/10:  94%|▉| 930/991 [4:02:12<16:03, 15.79s/batch, batch_loss=8.43, batch

Epoch 4/10:  94%|▉| 930/991 [4:02:27<16:03, 15.79s/batch, batch_loss=15.7, batch

Epoch 4/10:  94%|▉| 931/991 [4:02:27<15:31, 15.52s/batch, batch_loss=15.7, batch

Epoch 4/10:  94%|▉| 931/991 [4:02:42<15:31, 15.52s/batch, batch_loss=13.6, batch

Epoch 4/10:  94%|▉| 932/991 [4:02:42<15:19, 15.58s/batch, batch_loss=13.6, batch

Epoch 4/10:  94%|▉| 932/991 [4:02:58<15:19, 15.58s/batch, batch_loss=14.1, batch

Epoch 4/10:  94%|▉| 933/991 [4:02:58<15:06, 15.64s/batch, batch_loss=14.1, batch

Epoch 4/10:  94%|▉| 933/991 [4:03:14<15:06, 15.64s/batch, batch_loss=2.17, batch

Epoch 4/10:  94%|▉| 934/991 [4:03:14<14:59, 15.77s/batch, batch_loss=2.17, batch

Epoch 4/10:  94%|▉| 934/991 [4:03:30<14:59, 15.77s/batch, batch_loss=2.31, batch

Epoch 4/10:  94%|▉| 935/991 [4:03:30<14:38, 15.68s/batch, batch_loss=2.31, batch

Epoch 4/10:  94%|▉| 935/991 [4:03:46<14:38, 15.68s/batch, batch_loss=169, batch_

Epoch 4/10:  94%|▉| 936/991 [4:03:46<14:33, 15.89s/batch, batch_loss=169, batch_

Epoch 4/10:  94%|▉| 936/991 [4:04:02<14:33, 15.89s/batch, batch_loss=35.3, batch

Epoch 4/10:  95%|▉| 937/991 [4:04:02<14:22, 15.97s/batch, batch_loss=35.3, batch

Epoch 4/10:  95%|▉| 937/991 [4:04:18<14:22, 15.97s/batch, batch_loss=11.7, batch

Epoch 4/10:  95%|▉| 938/991 [4:04:18<14:04, 15.93s/batch, batch_loss=11.7, batch

Epoch 4/10:  95%|▉| 938/991 [4:04:34<14:04, 15.93s/batch, batch_loss=9.73, batch

Epoch 4/10:  95%|▉| 939/991 [4:04:34<13:47, 15.92s/batch, batch_loss=9.73, batch

Epoch 4/10:  95%|▉| 939/991 [4:04:51<13:47, 15.92s/batch, batch_loss=431, batch_

Epoch 4/10:  95%|▉| 940/991 [4:04:51<13:43, 16.14s/batch, batch_loss=431, batch_

Epoch 4/10:  95%|▉| 940/991 [4:05:05<13:43, 16.14s/batch, batch_loss=20.5, batch

Epoch 4/10:  95%|▉| 941/991 [4:05:05<13:04, 15.69s/batch, batch_loss=20.5, batch

Epoch 4/10:  95%|▉| 941/991 [4:05:20<13:04, 15.69s/batch, batch_loss=14, batch_i

Epoch 4/10:  95%|▉| 942/991 [4:05:20<12:28, 15.28s/batch, batch_loss=14, batch_i

Epoch 4/10:  95%|▉| 942/991 [4:05:35<12:28, 15.28s/batch, batch_loss=11.4, batch

Epoch 4/10:  95%|▉| 943/991 [4:05:35<12:15, 15.32s/batch, batch_loss=11.4, batch

Epoch 4/10:  95%|▉| 943/991 [4:05:50<12:15, 15.32s/batch, batch_loss=17.1, batch

Epoch 4/10:  95%|▉| 944/991 [4:05:50<11:49, 15.10s/batch, batch_loss=17.1, batch

Epoch 4/10:  95%|▉| 944/991 [4:06:05<11:49, 15.10s/batch, batch_loss=1.97, batch

Epoch 4/10:  95%|▉| 945/991 [4:06:05<11:34, 15.09s/batch, batch_loss=1.97, batch

Epoch 4/10:  95%|▉| 945/991 [4:06:20<11:34, 15.09s/batch, batch_loss=10.7, batch

Epoch 4/10:  95%|▉| 946/991 [4:06:20<11:27, 15.28s/batch, batch_loss=10.7, batch

Epoch 4/10:  95%|▉| 946/991 [4:06:38<11:27, 15.28s/batch, batch_loss=13.4, batch

Epoch 4/10:  96%|▉| 947/991 [4:06:38<11:43, 15.98s/batch, batch_loss=13.4, batch

Epoch 4/10:  96%|▉| 947/991 [4:06:53<11:43, 15.98s/batch, batch_loss=10.8, batch

Epoch 4/10:  96%|▉| 948/991 [4:06:53<11:17, 15.76s/batch, batch_loss=10.8, batch

Epoch 4/10:  96%|▉| 948/991 [4:07:09<11:17, 15.76s/batch, batch_loss=6.02, batch

Epoch 4/10:  96%|▉| 949/991 [4:07:09<10:57, 15.64s/batch, batch_loss=6.02, batch

Epoch 4/10:  96%|▉| 949/991 [4:07:24<10:57, 15.64s/batch, batch_loss=8.76, batch

Epoch 4/10:  96%|▉| 950/991 [4:07:24<10:42, 15.67s/batch, batch_loss=8.76, batch

Epoch 4/10:  96%|▉| 950/991 [4:07:40<10:42, 15.67s/batch, batch_loss=15.4, batch

Epoch 4/10:  96%|▉| 951/991 [4:07:40<10:20, 15.52s/batch, batch_loss=15.4, batch

Epoch 4/10:  96%|▉| 951/991 [4:07:55<10:20, 15.52s/batch, batch_loss=18, batch_i

Epoch 4/10:  96%|▉| 952/991 [4:07:55<09:59, 15.37s/batch, batch_loss=18, batch_i

Epoch 4/10:  96%|▉| 952/991 [4:08:11<09:59, 15.37s/batch, batch_loss=8.32, batch

Epoch 4/10:  96%|▉| 953/991 [4:08:11<09:51, 15.57s/batch, batch_loss=8.32, batch

Epoch 4/10:  96%|▉| 953/991 [4:08:26<09:51, 15.57s/batch, batch_loss=330, batch_

Epoch 4/10:  96%|▉| 954/991 [4:08:26<09:34, 15.54s/batch, batch_loss=330, batch_

Epoch 4/10:  96%|▉| 954/991 [4:08:41<09:34, 15.54s/batch, batch_loss=18.3, batch

Epoch 4/10:  96%|▉| 955/991 [4:08:41<09:16, 15.45s/batch, batch_loss=18.3, batch

Epoch 4/10:  96%|▉| 955/991 [4:08:58<09:16, 15.45s/batch, batch_loss=24.7, batch

Epoch 4/10:  96%|▉| 956/991 [4:08:58<09:11, 15.75s/batch, batch_loss=24.7, batch

Epoch 4/10:  96%|▉| 956/991 [4:09:14<09:11, 15.75s/batch, batch_loss=21.4, batch

Epoch 4/10:  97%|▉| 957/991 [4:09:14<08:59, 15.88s/batch, batch_loss=21.4, batch

Epoch 4/10:  97%|▉| 957/991 [4:09:30<08:59, 15.88s/batch, batch_loss=23.8, batch

Epoch 4/10:  97%|▉| 958/991 [4:09:30<08:43, 15.85s/batch, batch_loss=23.8, batch

Epoch 4/10:  97%|▉| 958/991 [4:09:46<08:43, 15.85s/batch, batch_loss=9.2, batch_

Epoch 4/10:  97%|▉| 959/991 [4:09:46<08:31, 15.97s/batch, batch_loss=9.2, batch_

Epoch 4/10:  97%|▉| 959/991 [4:10:03<08:31, 15.97s/batch, batch_loss=11.8, batch

Epoch 4/10:  97%|▉| 960/991 [4:10:03<08:20, 16.15s/batch, batch_loss=11.8, batch

Epoch 4/10:  97%|▉| 960/991 [4:10:17<08:20, 16.15s/batch, batch_loss=16.6, batch

Epoch 4/10:  97%|▉| 961/991 [4:10:17<07:49, 15.65s/batch, batch_loss=16.6, batch

Epoch 4/10:  97%|▉| 961/991 [4:10:33<07:49, 15.65s/batch, batch_loss=6.82, batch

Epoch 4/10:  97%|▉| 962/991 [4:10:33<07:37, 15.79s/batch, batch_loss=6.82, batch

Epoch 4/10:  97%|▉| 962/991 [4:10:52<07:37, 15.79s/batch, batch_loss=8.77, batch

Epoch 4/10:  97%|▉| 963/991 [4:10:52<07:44, 16.60s/batch, batch_loss=8.77, batch

Epoch 4/10:  97%|▉| 963/991 [4:11:07<07:44, 16.60s/batch, batch_loss=9.44e+3, ba

Epoch 4/10:  97%|▉| 964/991 [4:11:07<07:19, 16.26s/batch, batch_loss=9.44e+3, ba

Epoch 4/10:  97%|▉| 964/991 [4:11:23<07:19, 16.26s/batch, batch_loss=21.7, batch

Epoch 4/10:  97%|▉| 965/991 [4:11:23<06:57, 16.07s/batch, batch_loss=21.7, batch

Epoch 4/10:  97%|▉| 965/991 [4:11:39<06:57, 16.07s/batch, batch_loss=16.3, batch

Epoch 4/10:  97%|▉| 966/991 [4:11:39<06:40, 16.00s/batch, batch_loss=16.3, batch

Epoch 4/10:  97%|▉| 966/991 [4:11:54<06:40, 16.00s/batch, batch_loss=2.4e+4, bat

Epoch 4/10:  98%|▉| 967/991 [4:11:54<06:20, 15.86s/batch, batch_loss=2.4e+4, bat

Epoch 4/10:  98%|▉| 967/991 [4:12:09<06:20, 15.86s/batch, batch_loss=419, batch_

Epoch 4/10:  98%|▉| 968/991 [4:12:09<06:00, 15.66s/batch, batch_loss=419, batch_

Epoch 4/10:  98%|▉| 968/991 [4:12:24<06:00, 15.66s/batch, batch_loss=24.6, batch

Epoch 4/10:  98%|▉| 969/991 [4:12:24<05:40, 15.48s/batch, batch_loss=24.6, batch

Epoch 4/10:  98%|▉| 969/991 [4:12:42<05:40, 15.48s/batch, batch_loss=1.69, batch

Epoch 4/10:  98%|▉| 970/991 [4:12:42<05:35, 15.97s/batch, batch_loss=1.69, batch

Epoch 4/10:  98%|▉| 970/991 [4:12:58<05:35, 15.97s/batch, batch_loss=9.98, batch

Epoch 4/10:  98%|▉| 971/991 [4:12:58<05:20, 16.02s/batch, batch_loss=9.98, batch

Epoch 4/10:  98%|▉| 971/991 [4:13:13<05:20, 16.02s/batch, batch_loss=25.6, batch

Epoch 4/10:  98%|▉| 972/991 [4:13:13<05:02, 15.94s/batch, batch_loss=25.6, batch

Epoch 4/10:  98%|▉| 972/991 [4:13:30<05:02, 15.94s/batch, batch_loss=19.5, batch

Epoch 4/10:  98%|▉| 973/991 [4:13:30<04:51, 16.19s/batch, batch_loss=19.5, batch

Epoch 4/10:  98%|▉| 973/991 [4:13:46<04:51, 16.19s/batch, batch_loss=18, batch_i

Epoch 4/10:  98%|▉| 974/991 [4:13:46<04:31, 15.95s/batch, batch_loss=18, batch_i

Epoch 4/10:  98%|▉| 974/991 [4:14:01<04:31, 15.95s/batch, batch_loss=11.6, batch

Epoch 4/10:  98%|▉| 975/991 [4:14:01<04:10, 15.68s/batch, batch_loss=11.6, batch

Epoch 4/10:  98%|▉| 975/991 [4:14:16<04:10, 15.68s/batch, batch_loss=31.2, batch

Epoch 4/10:  98%|▉| 976/991 [4:14:16<03:55, 15.67s/batch, batch_loss=31.2, batch

Epoch 4/10:  98%|▉| 976/991 [4:14:31<03:55, 15.67s/batch, batch_loss=2.9, batch_

Epoch 4/10:  99%|▉| 977/991 [4:14:31<03:35, 15.41s/batch, batch_loss=2.9, batch_

Epoch 4/10:  99%|▉| 977/991 [4:14:46<03:35, 15.41s/batch, batch_loss=2.35, batch

Epoch 4/10:  99%|▉| 978/991 [4:14:46<03:16, 15.15s/batch, batch_loss=2.35, batch

Epoch 4/10:  99%|▉| 978/991 [4:15:00<03:16, 15.15s/batch, batch_loss=1.75, batch

Epoch 4/10:  99%|▉| 979/991 [4:15:00<02:59, 14.97s/batch, batch_loss=1.75, batch

Epoch 4/10:  99%|▉| 979/991 [4:15:14<02:59, 14.97s/batch, batch_loss=1.23, batch

Epoch 4/10:  99%|▉| 980/991 [4:15:14<02:40, 14.59s/batch, batch_loss=1.23, batch

Epoch 4/10:  99%|▉| 980/991 [4:15:27<02:40, 14.59s/batch, batch_loss=0.919, batc

Epoch 4/10:  99%|▉| 981/991 [4:15:27<02:21, 14.20s/batch, batch_loss=0.919, batc

Epoch 4/10:  99%|▉| 981/991 [4:15:41<02:21, 14.20s/batch, batch_loss=0.619, batc

Epoch 4/10:  99%|▉| 982/991 [4:15:41<02:07, 14.14s/batch, batch_loss=0.619, batc

Epoch 4/10:  99%|▉| 982/991 [4:15:56<02:07, 14.14s/batch, batch_loss=0.433, batc

Epoch 4/10:  99%|▉| 983/991 [4:15:56<01:54, 14.31s/batch, batch_loss=0.433, batc

Epoch 4/10:  99%|▉| 983/991 [4:16:10<01:54, 14.31s/batch, batch_loss=0.313, batc

Epoch 4/10:  99%|▉| 984/991 [4:16:10<01:39, 14.25s/batch, batch_loss=0.313, batc

Epoch 4/10:  99%|▉| 984/991 [4:16:25<01:39, 14.25s/batch, batch_loss=0.221, batc

Epoch 4/10:  99%|▉| 985/991 [4:16:25<01:26, 14.41s/batch, batch_loss=0.221, batc

Epoch 4/10:  99%|▉| 985/991 [4:16:39<01:26, 14.41s/batch, batch_loss=0.164, batc

Epoch 4/10:  99%|▉| 986/991 [4:16:39<01:11, 14.32s/batch, batch_loss=0.164, batc

Epoch 4/10:  99%|▉| 986/991 [4:16:53<01:11, 14.32s/batch, batch_loss=0.125, batc

Epoch 4/10: 100%|▉| 987/991 [4:16:53<00:56, 14.19s/batch, batch_loss=0.125, batc

Epoch 4/10: 100%|▉| 987/991 [4:17:07<00:56, 14.19s/batch, batch_loss=0.0996, bat

Epoch 4/10: 100%|▉| 988/991 [4:17:07<00:42, 14.18s/batch, batch_loss=0.0996, bat

Epoch 4/10: 100%|▉| 988/991 [4:17:21<00:42, 14.18s/batch, batch_loss=0.0877, bat

Epoch 4/10: 100%|▉| 989/991 [4:17:21<00:28, 14.08s/batch, batch_loss=0.0877, bat

Epoch 4/10: 100%|▉| 989/991 [4:17:36<00:28, 14.08s/batch, batch_loss=0.0854, bat

Epoch 4/10: 100%|▉| 990/991 [4:17:36<00:14, 14.28s/batch, batch_loss=0.0854, bat

Epoch 4/10: 100%|▉| 990/991 [4:17:47<00:14, 14.28s/batch, batch_loss=0.0859, bat

Epoch 4/10: 100%|█| 991/991 [4:17:47<00:00, 13.44s/batch, batch_loss=0.0859, bat

Epoch 4/10: 100%|█| 991/991 [4:17:47<00:00, 15.61s/batch, batch_loss=0.0859, bat




Epoch 4, Loss: 988.2371


Validation:   0%|                                    | 0/743 [00:00<?, ?batch/s]

Validation:   0%| | 0/743 [00:15<?, ?batch/s, batch_loss=18.5, batch_index=1, ba

Validation:   0%| | 1/743 [00:15<3:14:24, 15.72s/batch, batch_loss=18.5, batch_i

Validation:   0%| | 1/743 [00:30<3:14:24, 15.72s/batch, batch_loss=16.5, batch_i

Validation:   0%| | 2/743 [00:30<3:10:00, 15.39s/batch, batch_loss=16.5, batch_i

Validation:   0%| | 2/743 [00:46<3:10:00, 15.39s/batch, batch_loss=16.8, batch_i

Validation:   0%| | 3/743 [00:46<3:09:34, 15.37s/batch, batch_loss=16.8, batch_i

Validation:   0%| | 3/743 [01:03<3:09:34, 15.37s/batch, batch_loss=10, batch_ind

Validation:   1%| | 4/743 [01:03<3:18:06, 16.08s/batch, batch_loss=10, batch_ind

Validation:   1%| | 4/743 [01:19<3:18:06, 16.08s/batch, batch_loss=23.1, batch_i

Validation:   1%| | 5/743 [01:19<3:17:36, 16.07s/batch, batch_loss=23.1, batch_i

Validation:   1%| | 5/743 [01:35<3:17:36, 16.07s/batch, batch_loss=24.5, batch_i

Validation:   1%| | 6/743 [01:35<3:17:28, 16.08s/batch, batch_loss=24.5, batch_i

Validation:   1%| | 6/743 [01:50<3:17:28, 16.08s/batch, batch_loss=562, batch_in

Validation:   1%| | 7/743 [01:50<3:12:21, 15.68s/batch, batch_loss=562, batch_in

Validation:   1%| | 7/743 [02:05<3:12:21, 15.68s/batch, batch_loss=18.3, batch_i

Validation:   1%| | 8/743 [02:05<3:10:38, 15.56s/batch, batch_loss=18.3, batch_i

Validation:   1%| | 8/743 [02:20<3:10:38, 15.56s/batch, batch_loss=13.6, batch_i

Validation:   1%| | 9/743 [02:20<3:06:45, 15.27s/batch, batch_loss=13.6, batch_i

Validation:   1%| | 9/743 [02:36<3:06:45, 15.27s/batch, batch_loss=13.9, batch_i

Validation:   1%| | 10/743 [02:36<3:10:13, 15.57s/batch, batch_loss=13.9, batch_

Validation:   1%| | 10/743 [02:54<3:10:13, 15.57s/batch, batch_loss=11.8, batch_

Validation:   1%| | 11/743 [02:54<3:18:41, 16.29s/batch, batch_loss=11.8, batch_

Validation:   1%| | 11/743 [03:09<3:18:41, 16.29s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:09<3:14:59, 16.01s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:25<3:14:59, 16.01s/batch, batch_loss=17.2, batch_

Validation:   2%| | 13/743 [03:25<3:12:04, 15.79s/batch, batch_loss=17.2, batch_

Validation:   2%| | 13/743 [03:41<3:12:04, 15.79s/batch, batch_loss=11.1, batch_

Validation:   2%| | 14/743 [03:41<3:15:19, 16.08s/batch, batch_loss=11.1, batch_

Validation:   2%| | 14/743 [03:58<3:15:19, 16.08s/batch, batch_loss=17.8, batch_

Validation:   2%| | 15/743 [03:58<3:18:15, 16.34s/batch, batch_loss=17.8, batch_

Validation:   2%| | 15/743 [04:14<3:18:15, 16.34s/batch, batch_loss=16.6, batch_

Validation:   2%| | 16/743 [04:14<3:16:31, 16.22s/batch, batch_loss=16.6, batch_

Validation:   2%| | 16/743 [04:31<3:16:31, 16.22s/batch, batch_loss=14.2, batch_

Validation:   2%| | 17/743 [04:31<3:18:42, 16.42s/batch, batch_loss=14.2, batch_

Validation:   2%| | 17/743 [04:48<3:18:42, 16.42s/batch, batch_loss=4.53e+3, bat

Validation:   2%| | 18/743 [04:48<3:21:11, 16.65s/batch, batch_loss=4.53e+3, bat

Validation:   2%| | 18/743 [05:08<3:21:11, 16.65s/batch, batch_loss=11.4, batch_

Validation:   3%| | 19/743 [05:08<3:30:20, 17.43s/batch, batch_loss=11.4, batch_

Validation:   3%| | 19/743 [05:24<3:30:20, 17.43s/batch, batch_loss=16.3, batch_

Validation:   3%| | 20/743 [05:24<3:27:24, 17.21s/batch, batch_loss=16.3, batch_

Validation:   3%| | 20/743 [05:39<3:27:24, 17.21s/batch, batch_loss=968, batch_i

Validation:   3%| | 21/743 [05:39<3:18:37, 16.51s/batch, batch_loss=968, batch_i

Validation:   3%| | 21/743 [05:55<3:18:37, 16.51s/batch, batch_loss=13.2, batch_

Validation:   3%| | 22/743 [05:55<3:16:42, 16.37s/batch, batch_loss=13.2, batch_

Validation:   3%| | 22/743 [06:10<3:16:42, 16.37s/batch, batch_loss=5.31, batch_

Validation:   3%| | 23/743 [06:10<3:11:02, 15.92s/batch, batch_loss=5.31, batch_

Validation:   3%| | 23/743 [06:26<3:11:02, 15.92s/batch, batch_loss=14.4, batch_

Validation:   3%| | 24/743 [06:26<3:10:45, 15.92s/batch, batch_loss=14.4, batch_

Validation:   3%| | 24/743 [06:42<3:10:45, 15.92s/batch, batch_loss=14.3, batch_

Validation:   3%| | 25/743 [06:42<3:10:43, 15.94s/batch, batch_loss=14.3, batch_

Validation:   3%| | 25/743 [07:06<3:10:43, 15.94s/batch, batch_loss=20, batch_in

Validation:   3%| | 26/743 [07:06<3:38:28, 18.28s/batch, batch_loss=20, batch_in

Validation:   3%| | 26/743 [07:26<3:38:28, 18.28s/batch, batch_loss=1.65e+3, bat

Validation:   4%| | 27/743 [07:26<3:43:59, 18.77s/batch, batch_loss=1.65e+3, bat

Validation:   4%| | 27/743 [07:43<3:43:59, 18.77s/batch, batch_loss=17.5, batch_

Validation:   4%| | 28/743 [07:43<3:39:21, 18.41s/batch, batch_loss=17.5, batch_

Validation:   4%| | 28/743 [08:01<3:39:21, 18.41s/batch, batch_loss=17.5, batch_

Validation:   4%| | 29/743 [08:01<3:37:01, 18.24s/batch, batch_loss=17.5, batch_

Validation:   4%| | 29/743 [08:17<3:37:01, 18.24s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [08:17<3:29:21, 17.62s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [08:34<3:29:21, 17.62s/batch, batch_loss=19, batch_in

Validation:   4%| | 31/743 [08:34<3:27:09, 17.46s/batch, batch_loss=19, batch_in

Validation:   4%| | 31/743 [08:50<3:27:09, 17.46s/batch, batch_loss=12.9, batch_

Validation:   4%| | 32/743 [08:50<3:22:06, 17.06s/batch, batch_loss=12.9, batch_

Validation:   4%| | 32/743 [09:08<3:22:06, 17.06s/batch, batch_loss=16.7, batch_

Validation:   4%| | 33/743 [09:08<3:25:24, 17.36s/batch, batch_loss=16.7, batch_

Validation:   4%| | 33/743 [09:28<3:25:24, 17.36s/batch, batch_loss=17.8, batch_

Validation:   5%| | 34/743 [09:28<3:32:28, 17.98s/batch, batch_loss=17.8, batch_

Validation:   5%| | 34/743 [09:45<3:32:28, 17.98s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [09:45<3:28:45, 17.69s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [10:01<3:28:45, 17.69s/batch, batch_loss=15.3, batch_

Validation:   5%| | 36/743 [10:01<3:23:47, 17.29s/batch, batch_loss=15.3, batch_

Validation:   5%| | 36/743 [10:18<3:23:47, 17.29s/batch, batch_loss=167, batch_i

Validation:   5%| | 37/743 [10:18<3:21:32, 17.13s/batch, batch_loss=167, batch_i

Validation:   5%| | 37/743 [10:35<3:21:32, 17.13s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [10:35<3:20:49, 17.09s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [10:51<3:20:49, 17.09s/batch, batch_loss=13.9, batch_

Validation:   5%| | 39/743 [10:51<3:16:27, 16.74s/batch, batch_loss=13.9, batch_

Validation:   5%| | 39/743 [11:07<3:16:27, 16.74s/batch, batch_loss=21.5, batch_

Validation:   5%| | 40/743 [11:07<3:13:21, 16.50s/batch, batch_loss=21.5, batch_

Validation:   5%| | 40/743 [11:22<3:13:21, 16.50s/batch, batch_loss=15.2, batch_

Validation:   6%| | 41/743 [11:22<3:07:43, 16.04s/batch, batch_loss=15.2, batch_

Validation:   6%| | 41/743 [11:38<3:07:43, 16.04s/batch, batch_loss=17.4, batch_

Validation:   6%| | 42/743 [11:38<3:07:30, 16.05s/batch, batch_loss=17.4, batch_

Validation:   6%| | 42/743 [11:57<3:07:30, 16.05s/batch, batch_loss=11.2, batch_

Validation:   6%| | 43/743 [11:57<3:18:41, 17.03s/batch, batch_loss=11.2, batch_

Validation:   6%| | 43/743 [12:14<3:18:41, 17.03s/batch, batch_loss=15.5, batch_

Validation:   6%| | 44/743 [12:14<3:15:52, 16.81s/batch, batch_loss=15.5, batch_

Validation:   6%| | 44/743 [12:29<3:15:52, 16.81s/batch, batch_loss=19.5, batch_

Validation:   6%| | 45/743 [12:29<3:10:16, 16.36s/batch, batch_loss=19.5, batch_

Validation:   6%| | 45/743 [12:45<3:10:16, 16.36s/batch, batch_loss=7.87, batch_

Validation:   6%| | 46/743 [12:45<3:08:04, 16.19s/batch, batch_loss=7.87, batch_

Validation:   6%| | 46/743 [13:00<3:08:04, 16.19s/batch, batch_loss=16.7, batch_

Validation:   6%| | 47/743 [13:00<3:03:20, 15.81s/batch, batch_loss=16.7, batch_

Validation:   6%| | 47/743 [13:15<3:03:20, 15.81s/batch, batch_loss=17.9, batch_

Validation:   6%| | 48/743 [13:15<3:03:14, 15.82s/batch, batch_loss=17.9, batch_

Validation:   6%| | 48/743 [13:30<3:03:14, 15.82s/batch, batch_loss=20.3, batch_

Validation:   7%| | 49/743 [13:30<2:58:57, 15.47s/batch, batch_loss=20.3, batch_

Validation:   7%| | 49/743 [13:45<2:58:57, 15.47s/batch, batch_loss=14.4, batch_

Validation:   7%| | 50/743 [13:45<2:57:35, 15.38s/batch, batch_loss=14.4, batch_

Validation:   7%| | 50/743 [14:00<2:57:35, 15.38s/batch, batch_loss=15, batch_in

Validation:   7%| | 51/743 [14:00<2:56:07, 15.27s/batch, batch_loss=15, batch_in

Validation:   7%| | 51/743 [14:17<2:56:07, 15.27s/batch, batch_loss=17, batch_in

Validation:   7%| | 52/743 [14:17<2:59:19, 15.57s/batch, batch_loss=17, batch_in

Validation:   7%| | 52/743 [14:31<2:59:19, 15.57s/batch, batch_loss=19.8, batch_

Validation:   7%| | 53/743 [14:31<2:55:30, 15.26s/batch, batch_loss=19.8, batch_

Validation:   7%| | 53/743 [14:46<2:55:30, 15.26s/batch, batch_loss=14.1, batch_

Validation:   7%| | 54/743 [14:46<2:55:10, 15.25s/batch, batch_loss=14.1, batch_

Validation:   7%| | 54/743 [15:06<2:55:10, 15.25s/batch, batch_loss=21.5, batch_

Validation:   7%| | 55/743 [15:06<3:10:46, 16.64s/batch, batch_loss=21.5, batch_

Validation:   7%| | 55/743 [15:24<3:10:46, 16.64s/batch, batch_loss=18.4, batch_

Validation:   8%| | 56/743 [15:24<3:14:18, 16.97s/batch, batch_loss=18.4, batch_

Validation:   8%| | 56/743 [15:41<3:14:18, 16.97s/batch, batch_loss=13.9, batch_

Validation:   8%| | 57/743 [15:41<3:14:52, 17.05s/batch, batch_loss=13.9, batch_

Validation:   8%| | 57/743 [15:59<3:14:52, 17.05s/batch, batch_loss=17.8, batch_

Validation:   8%| | 58/743 [15:59<3:16:31, 17.21s/batch, batch_loss=17.8, batch_

Validation:   8%| | 58/743 [16:15<3:16:31, 17.21s/batch, batch_loss=106, batch_i

Validation:   8%| | 59/743 [16:15<3:12:18, 16.87s/batch, batch_loss=106, batch_i

Validation:   8%| | 59/743 [16:31<3:12:18, 16.87s/batch, batch_loss=6.16e+3, bat

Validation:   8%| | 60/743 [16:31<3:08:33, 16.56s/batch, batch_loss=6.16e+3, bat

Validation:   8%| | 60/743 [16:47<3:08:33, 16.56s/batch, batch_loss=7.79, batch_

Validation:   8%| | 61/743 [16:47<3:07:40, 16.51s/batch, batch_loss=7.79, batch_

Validation:   8%| | 61/743 [17:04<3:07:40, 16.51s/batch, batch_loss=9.97, batch_

Validation:   8%| | 62/743 [17:04<3:07:46, 16.54s/batch, batch_loss=9.97, batch_

Validation:   8%| | 62/743 [17:20<3:07:46, 16.54s/batch, batch_loss=23, batch_in

Validation:   8%| | 63/743 [17:20<3:05:58, 16.41s/batch, batch_loss=23, batch_in

Validation:   8%| | 63/743 [17:35<3:05:58, 16.41s/batch, batch_loss=12.2, batch_

Validation:   9%| | 64/743 [17:35<3:03:00, 16.17s/batch, batch_loss=12.2, batch_

Validation:   9%| | 64/743 [17:54<3:03:00, 16.17s/batch, batch_loss=20.9, batch_

Validation:   9%| | 65/743 [17:54<3:12:34, 17.04s/batch, batch_loss=20.9, batch_

Validation:   9%| | 65/743 [18:09<3:12:34, 17.04s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [18:09<3:05:18, 16.42s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [18:25<3:05:18, 16.42s/batch, batch_loss=16.1, batch_

Validation:   9%| | 67/743 [18:25<3:03:27, 16.28s/batch, batch_loss=16.1, batch_

Validation:   9%| | 67/743 [18:40<3:03:27, 16.28s/batch, batch_loss=15.3, batch_

Validation:   9%| | 68/743 [18:40<2:58:29, 15.87s/batch, batch_loss=15.3, batch_

Validation:   9%| | 68/743 [18:55<2:58:29, 15.87s/batch, batch_loss=9.14, batch_

Validation:   9%| | 69/743 [18:55<2:54:47, 15.56s/batch, batch_loss=9.14, batch_

Validation:   9%| | 69/743 [19:10<2:54:47, 15.56s/batch, batch_loss=13.9, batch_

Validation:   9%| | 70/743 [19:10<2:50:43, 15.22s/batch, batch_loss=13.9, batch_

Validation:   9%| | 70/743 [19:23<2:50:43, 15.22s/batch, batch_loss=8.82, batch_

Validation:  10%| | 71/743 [19:23<2:45:20, 14.76s/batch, batch_loss=8.82, batch_

Validation:  10%| | 71/743 [19:38<2:45:20, 14.76s/batch, batch_loss=13.4, batch_

Validation:  10%| | 72/743 [19:38<2:44:33, 14.71s/batch, batch_loss=13.4, batch_

Validation:  10%| | 72/743 [19:53<2:44:33, 14.71s/batch, batch_loss=13.7, batch_

Validation:  10%| | 73/743 [19:53<2:45:06, 14.79s/batch, batch_loss=13.7, batch_

Validation:  10%| | 73/743 [20:08<2:45:06, 14.79s/batch, batch_loss=15.2, batch_

Validation:  10%| | 74/743 [20:08<2:45:45, 14.87s/batch, batch_loss=15.2, batch_

Validation:  10%| | 74/743 [20:22<2:45:45, 14.87s/batch, batch_loss=12.3, batch_

Validation:  10%| | 75/743 [20:22<2:43:57, 14.73s/batch, batch_loss=12.3, batch_

Validation:  10%| | 75/743 [20:37<2:43:57, 14.73s/batch, batch_loss=17.2, batch_

Validation:  10%| | 76/743 [20:37<2:42:58, 14.66s/batch, batch_loss=17.2, batch_

Validation:  10%| | 76/743 [20:52<2:42:58, 14.66s/batch, batch_loss=13.7, batch_

Validation:  10%| | 77/743 [20:52<2:44:17, 14.80s/batch, batch_loss=13.7, batch_

Validation:  10%| | 77/743 [21:06<2:44:17, 14.80s/batch, batch_loss=16.4, batch_

Validation:  10%| | 78/743 [21:06<2:42:05, 14.62s/batch, batch_loss=16.4, batch_

Validation:  10%| | 78/743 [21:21<2:42:05, 14.62s/batch, batch_loss=8.12, batch_

Validation:  11%| | 79/743 [21:21<2:43:38, 14.79s/batch, batch_loss=8.12, batch_

Validation:  11%| | 79/743 [21:37<2:43:38, 14.79s/batch, batch_loss=6.98, batch_

Validation:  11%| | 80/743 [21:37<2:45:13, 14.95s/batch, batch_loss=6.98, batch_

Validation:  11%| | 80/743 [21:52<2:45:13, 14.95s/batch, batch_loss=153, batch_i

Validation:  11%| | 81/743 [21:52<2:47:03, 15.14s/batch, batch_loss=153, batch_i

Validation:  11%| | 81/743 [22:06<2:47:03, 15.14s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [22:06<2:43:13, 14.82s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [22:21<2:43:13, 14.82s/batch, batch_loss=36.4, batch_

Validation:  11%| | 83/743 [22:21<2:43:07, 14.83s/batch, batch_loss=36.4, batch_

Validation:  11%| | 83/743 [22:36<2:43:07, 14.83s/batch, batch_loss=16.3, batch_

Validation:  11%| | 84/743 [22:36<2:43:56, 14.93s/batch, batch_loss=16.3, batch_

Validation:  11%| | 84/743 [22:52<2:43:56, 14.93s/batch, batch_loss=21.6, batch_

Validation:  11%| | 85/743 [22:52<2:45:28, 15.09s/batch, batch_loss=21.6, batch_

Validation:  11%| | 85/743 [23:09<2:45:28, 15.09s/batch, batch_loss=23.8, batch_

Validation:  12%| | 86/743 [23:09<2:52:15, 15.73s/batch, batch_loss=23.8, batch_

Validation:  12%| | 86/743 [23:25<2:52:15, 15.73s/batch, batch_loss=31.9, batch_

Validation:  12%| | 87/743 [23:25<2:52:02, 15.74s/batch, batch_loss=31.9, batch_

Validation:  12%| | 87/743 [23:41<2:52:02, 15.74s/batch, batch_loss=24.8, batch_

Validation:  12%| | 88/743 [23:41<2:52:16, 15.78s/batch, batch_loss=24.8, batch_

Validation:  12%| | 88/743 [23:57<2:52:16, 15.78s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [23:57<2:53:29, 15.92s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [24:13<2:53:29, 15.92s/batch, batch_loss=5.25, batch_

Validation:  12%| | 90/743 [24:13<2:52:53, 15.89s/batch, batch_loss=5.25, batch_

Validation:  12%| | 90/743 [24:28<2:52:53, 15.89s/batch, batch_loss=35.6, batch_

Validation:  12%| | 91/743 [24:28<2:51:27, 15.78s/batch, batch_loss=35.6, batch_

Validation:  12%| | 91/743 [24:43<2:51:27, 15.78s/batch, batch_loss=33.7, batch_

Validation:  12%| | 92/743 [24:43<2:49:05, 15.59s/batch, batch_loss=33.7, batch_

Validation:  12%| | 92/743 [24:59<2:49:05, 15.59s/batch, batch_loss=28.2, batch_

Validation:  13%|▏| 93/743 [24:59<2:49:00, 15.60s/batch, batch_loss=28.2, batch_

Validation:  13%|▏| 93/743 [25:18<2:49:00, 15.60s/batch, batch_loss=28.3, batch_

Validation:  13%|▏| 94/743 [25:18<2:59:22, 16.58s/batch, batch_loss=28.3, batch_

Validation:  13%|▏| 94/743 [25:34<2:59:22, 16.58s/batch, batch_loss=11.9, batch_

Validation:  13%|▏| 95/743 [25:34<2:56:38, 16.36s/batch, batch_loss=11.9, batch_

Validation:  13%|▏| 95/743 [25:50<2:56:38, 16.36s/batch, batch_loss=20.9, batch_

Validation:  13%|▏| 96/743 [25:50<2:56:51, 16.40s/batch, batch_loss=20.9, batch_

Validation:  13%|▏| 96/743 [26:06<2:56:51, 16.40s/batch, batch_loss=29.5, batch_

Validation:  13%|▏| 97/743 [26:06<2:55:37, 16.31s/batch, batch_loss=29.5, batch_

Validation:  13%|▏| 97/743 [26:22<2:55:37, 16.31s/batch, batch_loss=16.5, batch_

Validation:  13%|▏| 98/743 [26:22<2:52:29, 16.05s/batch, batch_loss=16.5, batch_

Validation:  13%|▏| 98/743 [26:38<2:52:29, 16.05s/batch, batch_loss=20.1, batch_

Validation:  13%|▏| 99/743 [26:38<2:51:41, 16.00s/batch, batch_loss=20.1, batch_

Validation:  13%|▏| 99/743 [26:53<2:51:41, 16.00s/batch, batch_loss=12.6, batch_

Validation:  13%|▏| 100/743 [26:53<2:48:53, 15.76s/batch, batch_loss=12.6, batch

Validation:  13%|▏| 100/743 [27:07<2:48:53, 15.76s/batch, batch_loss=15, batch_i

Validation:  14%|▏| 101/743 [27:07<2:45:14, 15.44s/batch, batch_loss=15, batch_i

Validation:  14%|▏| 101/743 [27:24<2:45:14, 15.44s/batch, batch_loss=13.6, batch

Validation:  14%|▏| 102/743 [27:24<2:49:12, 15.84s/batch, batch_loss=13.6, batch

Validation:  14%|▏| 102/743 [27:40<2:49:12, 15.84s/batch, batch_loss=3.41e+3, ba

Validation:  14%|▏| 103/743 [27:40<2:48:53, 15.83s/batch, batch_loss=3.41e+3, ba

Validation:  14%|▏| 103/743 [27:56<2:48:53, 15.83s/batch, batch_loss=17.3, batch

Validation:  14%|▏| 104/743 [27:56<2:47:25, 15.72s/batch, batch_loss=17.3, batch

Validation:  14%|▏| 104/743 [28:10<2:47:25, 15.72s/batch, batch_loss=6.73, batch

Validation:  14%|▏| 105/743 [28:10<2:42:59, 15.33s/batch, batch_loss=6.73, batch

Validation:  14%|▏| 105/743 [28:25<2:42:59, 15.33s/batch, batch_loss=13.7, batch

Validation:  14%|▏| 106/743 [28:25<2:41:57, 15.26s/batch, batch_loss=13.7, batch

Validation:  14%|▏| 106/743 [28:40<2:41:57, 15.26s/batch, batch_loss=726, batch_

Validation:  14%|▏| 107/743 [28:40<2:40:57, 15.18s/batch, batch_loss=726, batch_

Validation:  14%|▏| 107/743 [28:54<2:40:57, 15.18s/batch, batch_loss=1.61e+3, ba

Validation:  15%|▏| 108/743 [28:54<2:38:08, 14.94s/batch, batch_loss=1.61e+3, ba

Validation:  15%|▏| 108/743 [29:10<2:38:08, 14.94s/batch, batch_loss=205, batch_

Validation:  15%|▏| 109/743 [29:10<2:38:45, 15.02s/batch, batch_loss=205, batch_

Validation:  15%|▏| 109/743 [29:25<2:38:45, 15.02s/batch, batch_loss=30.8, batch

Validation:  15%|▏| 110/743 [29:25<2:39:23, 15.11s/batch, batch_loss=30.8, batch

Validation:  15%|▏| 110/743 [29:40<2:39:23, 15.11s/batch, batch_loss=12.6, batch

Validation:  15%|▏| 111/743 [29:40<2:39:15, 15.12s/batch, batch_loss=12.6, batch

Validation:  15%|▏| 111/743 [29:54<2:39:15, 15.12s/batch, batch_loss=35.1, batch

Validation:  15%|▏| 112/743 [29:54<2:36:31, 14.88s/batch, batch_loss=35.1, batch

Validation:  15%|▏| 112/743 [30:10<2:36:31, 14.88s/batch, batch_loss=1.07e+4, ba

Validation:  15%|▏| 113/743 [30:10<2:36:59, 14.95s/batch, batch_loss=1.07e+4, ba

Validation:  15%|▏| 113/743 [30:24<2:36:59, 14.95s/batch, batch_loss=18.6, batch

Validation:  15%|▏| 114/743 [30:24<2:36:46, 14.95s/batch, batch_loss=18.6, batch

Validation:  15%|▏| 114/743 [30:40<2:36:46, 14.95s/batch, batch_loss=20.6, batch

Validation:  15%|▏| 115/743 [30:40<2:37:00, 15.00s/batch, batch_loss=20.6, batch

Validation:  15%|▏| 115/743 [30:56<2:37:00, 15.00s/batch, batch_loss=17.8, batch

Validation:  16%|▏| 116/743 [30:56<2:39:50, 15.30s/batch, batch_loss=17.8, batch

Validation:  16%|▏| 116/743 [31:13<2:39:50, 15.30s/batch, batch_loss=23.3, batch

Validation:  16%|▏| 117/743 [31:13<2:47:41, 16.07s/batch, batch_loss=23.3, batch

Validation:  16%|▏| 117/743 [31:29<2:47:41, 16.07s/batch, batch_loss=23.4, batch

Validation:  16%|▏| 118/743 [31:29<2:46:40, 16.00s/batch, batch_loss=23.4, batch

Validation:  16%|▏| 118/743 [31:45<2:46:40, 16.00s/batch, batch_loss=17.7, batch

Validation:  16%|▏| 119/743 [31:45<2:44:42, 15.84s/batch, batch_loss=17.7, batch

Validation:  16%|▏| 119/743 [32:00<2:44:42, 15.84s/batch, batch_loss=23.2, batch

Validation:  16%|▏| 120/743 [32:00<2:42:59, 15.70s/batch, batch_loss=23.2, batch

Validation:  16%|▏| 120/743 [32:15<2:42:59, 15.70s/batch, batch_loss=14.9, batch

Validation:  16%|▏| 121/743 [32:15<2:39:29, 15.39s/batch, batch_loss=14.9, batch

Validation:  16%|▏| 121/743 [32:31<2:39:29, 15.39s/batch, batch_loss=4.24, batch

Validation:  16%|▏| 122/743 [32:31<2:41:49, 15.64s/batch, batch_loss=4.24, batch

Validation:  16%|▏| 122/743 [32:47<2:41:49, 15.64s/batch, batch_loss=8.17, batch

Validation:  17%|▏| 123/743 [32:47<2:44:11, 15.89s/batch, batch_loss=8.17, batch

Validation:  17%|▏| 123/743 [33:04<2:44:11, 15.89s/batch, batch_loss=11.3, batch

Validation:  17%|▏| 124/743 [33:04<2:45:21, 16.03s/batch, batch_loss=11.3, batch

Validation:  17%|▏| 124/743 [33:19<2:45:21, 16.03s/batch, batch_loss=30, batch_i

Validation:  17%|▏| 125/743 [33:19<2:43:42, 15.89s/batch, batch_loss=30, batch_i

Validation:  17%|▏| 125/743 [33:35<2:43:42, 15.89s/batch, batch_loss=13.9, batch

Validation:  17%|▏| 126/743 [33:35<2:42:07, 15.77s/batch, batch_loss=13.9, batch

Validation:  17%|▏| 126/743 [33:52<2:42:07, 15.77s/batch, batch_loss=11.6, batch

Validation:  17%|▏| 127/743 [33:52<2:47:01, 16.27s/batch, batch_loss=11.6, batch

Validation:  17%|▏| 127/743 [34:09<2:47:01, 16.27s/batch, batch_loss=21, batch_i

Validation:  17%|▏| 128/743 [34:09<2:47:16, 16.32s/batch, batch_loss=21, batch_i

Validation:  17%|▏| 128/743 [34:24<2:47:16, 16.32s/batch, batch_loss=15.4, batch

Validation:  17%|▏| 129/743 [34:24<2:45:03, 16.13s/batch, batch_loss=15.4, batch

Validation:  17%|▏| 129/743 [34:40<2:45:03, 16.13s/batch, batch_loss=19, batch_i

Validation:  17%|▏| 130/743 [34:40<2:43:12, 15.98s/batch, batch_loss=19, batch_i

Validation:  17%|▏| 130/743 [34:56<2:43:12, 15.98s/batch, batch_loss=27.2, batch

Validation:  18%|▏| 131/743 [34:56<2:41:55, 15.88s/batch, batch_loss=27.2, batch

Validation:  18%|▏| 131/743 [35:10<2:41:55, 15.88s/batch, batch_loss=27.7, batch

Validation:  18%|▏| 132/743 [35:10<2:37:29, 15.47s/batch, batch_loss=27.7, batch

Validation:  18%|▏| 132/743 [35:28<2:37:29, 15.47s/batch, batch_loss=38.8, batch

Validation:  18%|▏| 133/743 [35:28<2:44:47, 16.21s/batch, batch_loss=38.8, batch

Validation:  18%|▏| 133/743 [35:43<2:44:47, 16.21s/batch, batch_loss=22.4, batch

Validation:  18%|▏| 134/743 [35:43<2:39:25, 15.71s/batch, batch_loss=22.4, batch

Validation:  18%|▏| 134/743 [35:57<2:39:25, 15.71s/batch, batch_loss=39.3, batch

Validation:  18%|▏| 135/743 [35:57<2:33:57, 15.19s/batch, batch_loss=39.3, batch

Validation:  18%|▏| 135/743 [36:11<2:33:57, 15.19s/batch, batch_loss=19.7, batch

Validation:  18%|▏| 136/743 [36:11<2:29:40, 14.79s/batch, batch_loss=19.7, batch

Validation:  18%|▏| 136/743 [36:25<2:29:40, 14.79s/batch, batch_loss=25.4, batch

Validation:  18%|▏| 137/743 [36:25<2:29:29, 14.80s/batch, batch_loss=25.4, batch

Validation:  18%|▏| 137/743 [36:40<2:29:29, 14.80s/batch, batch_loss=6.49, batch

Validation:  19%|▏| 138/743 [36:40<2:29:29, 14.83s/batch, batch_loss=6.49, batch

Validation:  19%|▏| 138/743 [36:54<2:29:29, 14.83s/batch, batch_loss=253, batch_

Validation:  19%|▏| 139/743 [36:54<2:25:32, 14.46s/batch, batch_loss=253, batch_

Validation:  19%|▏| 139/743 [37:09<2:25:32, 14.46s/batch, batch_loss=20.4, batch

Validation:  19%|▏| 140/743 [37:09<2:27:49, 14.71s/batch, batch_loss=20.4, batch

Validation:  19%|▏| 140/743 [37:26<2:27:49, 14.71s/batch, batch_loss=11.5, batch

Validation:  19%|▏| 141/743 [37:26<2:34:39, 15.41s/batch, batch_loss=11.5, batch

Validation:  19%|▏| 141/743 [37:41<2:34:39, 15.41s/batch, batch_loss=14.9, batch

Validation:  19%|▏| 142/743 [37:41<2:32:49, 15.26s/batch, batch_loss=14.9, batch

Validation:  19%|▏| 142/743 [37:56<2:32:49, 15.26s/batch, batch_loss=13, batch_i

Validation:  19%|▏| 143/743 [37:56<2:32:05, 15.21s/batch, batch_loss=13, batch_i

Validation:  19%|▏| 143/743 [38:11<2:32:05, 15.21s/batch, batch_loss=20.1, batch

Validation:  19%|▏| 144/743 [38:11<2:31:46, 15.20s/batch, batch_loss=20.1, batch

Validation:  19%|▏| 144/743 [38:27<2:31:46, 15.20s/batch, batch_loss=14.2, batch

Validation:  20%|▏| 145/743 [38:27<2:31:46, 15.23s/batch, batch_loss=14.2, batch

Validation:  20%|▏| 145/743 [38:42<2:31:46, 15.23s/batch, batch_loss=17.4, batch

Validation:  20%|▏| 146/743 [38:42<2:30:46, 15.15s/batch, batch_loss=17.4, batch

Validation:  20%|▏| 146/743 [38:58<2:30:46, 15.15s/batch, batch_loss=17, batch_i

Validation:  20%|▏| 147/743 [38:58<2:34:30, 15.55s/batch, batch_loss=17, batch_i

Validation:  20%|▏| 147/743 [39:17<2:34:30, 15.55s/batch, batch_loss=3.19e+4, ba

Validation:  20%|▏| 148/743 [39:17<2:43:49, 16.52s/batch, batch_loss=3.19e+4, ba

Validation:  20%|▏| 148/743 [39:33<2:43:49, 16.52s/batch, batch_loss=23.6, batch

Validation:  20%|▏| 149/743 [39:33<2:41:21, 16.30s/batch, batch_loss=23.6, batch

Validation:  20%|▏| 149/743 [39:48<2:41:21, 16.30s/batch, batch_loss=27.3, batch

Validation:  20%|▏| 150/743 [39:48<2:38:43, 16.06s/batch, batch_loss=27.3, batch

Validation:  20%|▏| 150/743 [40:04<2:38:43, 16.06s/batch, batch_loss=14.5, batch

Validation:  20%|▏| 151/743 [40:04<2:39:01, 16.12s/batch, batch_loss=14.5, batch

Validation:  20%|▏| 151/743 [40:20<2:39:01, 16.12s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [40:20<2:37:49, 16.02s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [40:36<2:37:49, 16.02s/batch, batch_loss=15.7, batch

Validation:  21%|▏| 153/743 [40:36<2:35:33, 15.82s/batch, batch_loss=15.7, batch

Validation:  21%|▏| 153/743 [40:51<2:35:33, 15.82s/batch, batch_loss=15.3, batch

Validation:  21%|▏| 154/743 [40:51<2:34:50, 15.77s/batch, batch_loss=15.3, batch

Validation:  21%|▏| 154/743 [41:06<2:34:50, 15.77s/batch, batch_loss=19.2, batch

Validation:  21%|▏| 155/743 [41:06<2:32:40, 15.58s/batch, batch_loss=19.2, batch

Validation:  21%|▏| 155/743 [41:22<2:32:40, 15.58s/batch, batch_loss=18.2, batch

Validation:  21%|▏| 156/743 [41:22<2:31:02, 15.44s/batch, batch_loss=18.2, batch

Validation:  21%|▏| 156/743 [41:37<2:31:02, 15.44s/batch, batch_loss=18.1, batch

Validation:  21%|▏| 157/743 [41:37<2:31:51, 15.55s/batch, batch_loss=18.1, batch

Validation:  21%|▏| 157/743 [41:54<2:31:51, 15.55s/batch, batch_loss=23.4, batch

Validation:  21%|▏| 158/743 [41:54<2:34:01, 15.80s/batch, batch_loss=23.4, batch

Validation:  21%|▏| 158/743 [42:10<2:34:01, 15.80s/batch, batch_loss=22.1, batch

Validation:  21%|▏| 159/743 [42:10<2:34:03, 15.83s/batch, batch_loss=22.1, batch

Validation:  21%|▏| 159/743 [42:25<2:34:03, 15.83s/batch, batch_loss=15.5, batch

Validation:  22%|▏| 160/743 [42:25<2:32:02, 15.65s/batch, batch_loss=15.5, batch

Validation:  22%|▏| 160/743 [42:40<2:32:02, 15.65s/batch, batch_loss=18, batch_i

Validation:  22%|▏| 161/743 [42:40<2:29:14, 15.38s/batch, batch_loss=18, batch_i

Validation:  22%|▏| 161/743 [42:55<2:29:14, 15.38s/batch, batch_loss=24.4, batch

Validation:  22%|▏| 162/743 [42:55<2:29:06, 15.40s/batch, batch_loss=24.4, batch

Validation:  22%|▏| 162/743 [43:10<2:29:06, 15.40s/batch, batch_loss=14.6, batch

Validation:  22%|▏| 163/743 [43:10<2:28:19, 15.34s/batch, batch_loss=14.6, batch

Validation:  22%|▏| 163/743 [43:27<2:28:19, 15.34s/batch, batch_loss=9.54, batch

Validation:  22%|▏| 164/743 [43:27<2:32:03, 15.76s/batch, batch_loss=9.54, batch

Validation:  22%|▏| 164/743 [43:41<2:32:03, 15.76s/batch, batch_loss=13.4, batch

Validation:  22%|▏| 165/743 [43:41<2:28:09, 15.38s/batch, batch_loss=13.4, batch

Validation:  22%|▏| 165/743 [43:57<2:28:09, 15.38s/batch, batch_loss=9.51, batch

Validation:  22%|▏| 166/743 [43:57<2:28:01, 15.39s/batch, batch_loss=9.51, batch

Validation:  22%|▏| 166/743 [44:12<2:28:01, 15.39s/batch, batch_loss=14.8, batch

Validation:  22%|▏| 167/743 [44:12<2:27:20, 15.35s/batch, batch_loss=14.8, batch

Validation:  22%|▏| 167/743 [44:27<2:27:20, 15.35s/batch, batch_loss=21.1, batch

Validation:  23%|▏| 168/743 [44:27<2:24:35, 15.09s/batch, batch_loss=21.1, batch

Validation:  23%|▏| 168/743 [44:41<2:24:35, 15.09s/batch, batch_loss=24.5, batch

Validation:  23%|▏| 169/743 [44:41<2:21:40, 14.81s/batch, batch_loss=24.5, batch

Validation:  23%|▏| 169/743 [44:55<2:21:40, 14.81s/batch, batch_loss=20.8, batch

Validation:  23%|▏| 170/743 [44:55<2:20:57, 14.76s/batch, batch_loss=20.8, batch

Validation:  23%|▏| 170/743 [45:10<2:20:57, 14.76s/batch, batch_loss=20.2, batch

Validation:  23%|▏| 171/743 [45:10<2:21:10, 14.81s/batch, batch_loss=20.2, batch

Validation:  23%|▏| 171/743 [45:25<2:21:10, 14.81s/batch, batch_loss=18.7, batch

Validation:  23%|▏| 172/743 [45:25<2:20:52, 14.80s/batch, batch_loss=18.7, batch

Validation:  23%|▏| 172/743 [45:40<2:20:52, 14.80s/batch, batch_loss=20.6, batch

Validation:  23%|▏| 173/743 [45:40<2:21:39, 14.91s/batch, batch_loss=20.6, batch

Validation:  23%|▏| 173/743 [45:55<2:21:39, 14.91s/batch, batch_loss=18.2, batch

Validation:  23%|▏| 174/743 [45:55<2:22:05, 14.98s/batch, batch_loss=18.2, batch

Validation:  23%|▏| 174/743 [46:11<2:22:05, 14.98s/batch, batch_loss=18.2, batch

Validation:  24%|▏| 175/743 [46:11<2:23:55, 15.20s/batch, batch_loss=18.2, batch

Validation:  24%|▏| 175/743 [46:26<2:23:55, 15.20s/batch, batch_loss=12.1, batch

Validation:  24%|▏| 176/743 [46:26<2:23:10, 15.15s/batch, batch_loss=12.1, batch

Validation:  24%|▏| 176/743 [46:41<2:23:10, 15.15s/batch, batch_loss=17.1, batch

Validation:  24%|▏| 177/743 [46:41<2:22:36, 15.12s/batch, batch_loss=17.1, batch

Validation:  24%|▏| 177/743 [46:56<2:22:36, 15.12s/batch, batch_loss=30.2, batch

Validation:  24%|▏| 178/743 [46:56<2:22:43, 15.16s/batch, batch_loss=30.2, batch

Validation:  24%|▏| 178/743 [47:15<2:22:43, 15.16s/batch, batch_loss=20.7, batch

Validation:  24%|▏| 179/743 [47:15<2:32:24, 16.21s/batch, batch_loss=20.7, batch

Validation:  24%|▏| 179/743 [47:30<2:32:24, 16.21s/batch, batch_loss=7.22e+3, ba

Validation:  24%|▏| 180/743 [47:30<2:28:29, 15.82s/batch, batch_loss=7.22e+3, ba

Validation:  24%|▏| 180/743 [47:46<2:28:29, 15.82s/batch, batch_loss=18.3, batch

Validation:  24%|▏| 181/743 [47:46<2:27:57, 15.80s/batch, batch_loss=18.3, batch

Validation:  24%|▏| 181/743 [48:02<2:27:57, 15.80s/batch, batch_loss=19.1, batch

Validation:  24%|▏| 182/743 [48:02<2:28:23, 15.87s/batch, batch_loss=19.1, batch

Validation:  24%|▏| 182/743 [48:18<2:28:23, 15.87s/batch, batch_loss=20, batch_i

Validation:  25%|▏| 183/743 [48:18<2:29:30, 16.02s/batch, batch_loss=20, batch_i

Validation:  25%|▏| 183/743 [48:34<2:29:30, 16.02s/batch, batch_loss=10.8, batch

Validation:  25%|▏| 184/743 [48:34<2:29:00, 15.99s/batch, batch_loss=10.8, batch

Validation:  25%|▏| 184/743 [48:53<2:29:00, 15.99s/batch, batch_loss=22.1, batch

Validation:  25%|▏| 185/743 [48:53<2:36:35, 16.84s/batch, batch_loss=22.1, batch

Validation:  25%|▏| 185/743 [49:09<2:36:35, 16.84s/batch, batch_loss=24.6, batch

Validation:  25%|▎| 186/743 [49:09<2:34:15, 16.62s/batch, batch_loss=24.6, batch

Validation:  25%|▎| 186/743 [49:25<2:34:15, 16.62s/batch, batch_loss=35.1, batch

Validation:  25%|▎| 187/743 [49:25<2:32:13, 16.43s/batch, batch_loss=35.1, batch

Validation:  25%|▎| 187/743 [49:40<2:32:13, 16.43s/batch, batch_loss=15.2, batch

Validation:  25%|▎| 188/743 [49:40<2:27:53, 15.99s/batch, batch_loss=15.2, batch

Validation:  25%|▎| 188/743 [49:56<2:27:53, 15.99s/batch, batch_loss=16.8, batch

Validation:  25%|▎| 189/743 [49:56<2:28:51, 16.12s/batch, batch_loss=16.8, batch

Validation:  25%|▎| 189/743 [50:13<2:28:51, 16.12s/batch, batch_loss=977, batch_

Validation:  26%|▎| 190/743 [50:13<2:28:34, 16.12s/batch, batch_loss=977, batch_

Validation:  26%|▎| 190/743 [50:28<2:28:34, 16.12s/batch, batch_loss=24.8, batch

Validation:  26%|▎| 191/743 [50:28<2:27:51, 16.07s/batch, batch_loss=24.8, batch

Validation:  26%|▎| 191/743 [50:44<2:27:51, 16.07s/batch, batch_loss=12.6, batch

Validation:  26%|▎| 192/743 [50:44<2:24:46, 15.77s/batch, batch_loss=12.6, batch

Validation:  26%|▎| 192/743 [50:59<2:24:46, 15.77s/batch, batch_loss=16.4, batch

Validation:  26%|▎| 193/743 [50:59<2:22:22, 15.53s/batch, batch_loss=16.4, batch

Validation:  26%|▎| 193/743 [51:13<2:22:22, 15.53s/batch, batch_loss=19.2, batch

Validation:  26%|▎| 194/743 [51:13<2:18:58, 15.19s/batch, batch_loss=19.2, batch

Validation:  26%|▎| 194/743 [51:28<2:18:58, 15.19s/batch, batch_loss=10.5, batch

Validation:  26%|▎| 195/743 [51:28<2:18:02, 15.11s/batch, batch_loss=10.5, batch

Validation:  26%|▎| 195/743 [51:43<2:18:02, 15.11s/batch, batch_loss=17.6, batch

Validation:  26%|▎| 196/743 [51:43<2:16:39, 14.99s/batch, batch_loss=17.6, batch

Validation:  26%|▎| 196/743 [51:57<2:16:39, 14.99s/batch, batch_loss=9.16, batch

Validation:  27%|▎| 197/743 [51:57<2:15:18, 14.87s/batch, batch_loss=9.16, batch

Validation:  27%|▎| 197/743 [52:12<2:15:18, 14.87s/batch, batch_loss=20, batch_i

Validation:  27%|▎| 198/743 [52:12<2:13:51, 14.74s/batch, batch_loss=20, batch_i

Validation:  27%|▎| 198/743 [52:25<2:13:51, 14.74s/batch, batch_loss=19.2, batch

Validation:  27%|▎| 199/743 [52:25<2:10:24, 14.38s/batch, batch_loss=19.2, batch

Validation:  27%|▎| 199/743 [52:40<2:10:24, 14.38s/batch, batch_loss=278, batch_

Validation:  27%|▎| 200/743 [52:40<2:10:36, 14.43s/batch, batch_loss=278, batch_

Validation:  27%|▎| 200/743 [52:54<2:10:36, 14.43s/batch, batch_loss=37.8, batch

Validation:  27%|▎| 201/743 [52:54<2:09:51, 14.38s/batch, batch_loss=37.8, batch

Validation:  27%|▎| 201/743 [53:11<2:09:51, 14.38s/batch, batch_loss=30.2, batch

Validation:  27%|▎| 202/743 [53:11<2:16:49, 15.18s/batch, batch_loss=30.2, batch

Validation:  27%|▎| 202/743 [53:27<2:16:49, 15.18s/batch, batch_loss=12.8, batch

Validation:  27%|▎| 203/743 [53:27<2:17:37, 15.29s/batch, batch_loss=12.8, batch

Validation:  27%|▎| 203/743 [53:41<2:17:37, 15.29s/batch, batch_loss=21.4, batch

Validation:  27%|▎| 204/743 [53:41<2:13:54, 14.91s/batch, batch_loss=21.4, batch

Validation:  27%|▎| 204/743 [53:55<2:13:54, 14.91s/batch, batch_loss=28.3, batch

Validation:  28%|▎| 205/743 [53:55<2:12:36, 14.79s/batch, batch_loss=28.3, batch

Validation:  28%|▎| 205/743 [54:10<2:12:36, 14.79s/batch, batch_loss=17.7, batch

Validation:  28%|▎| 206/743 [54:10<2:11:38, 14.71s/batch, batch_loss=17.7, batch

Validation:  28%|▎| 206/743 [54:24<2:11:38, 14.71s/batch, batch_loss=18.3, batch

Validation:  28%|▎| 207/743 [54:24<2:11:42, 14.74s/batch, batch_loss=18.3, batch

Validation:  28%|▎| 207/743 [54:39<2:11:42, 14.74s/batch, batch_loss=18.2, batch

Validation:  28%|▎| 208/743 [54:39<2:12:04, 14.81s/batch, batch_loss=18.2, batch

Validation:  28%|▎| 208/743 [54:55<2:12:04, 14.81s/batch, batch_loss=9.49, batch

Validation:  28%|▎| 209/743 [54:55<2:13:30, 15.00s/batch, batch_loss=9.49, batch

Validation:  28%|▎| 209/743 [55:11<2:13:30, 15.00s/batch, batch_loss=10.9, batch

Validation:  28%|▎| 210/743 [55:11<2:17:12, 15.44s/batch, batch_loss=10.9, batch

Validation:  28%|▎| 210/743 [55:26<2:17:12, 15.44s/batch, batch_loss=12.2, batch

Validation:  28%|▎| 211/743 [55:27<2:16:19, 15.37s/batch, batch_loss=12.2, batch

Validation:  28%|▎| 211/743 [55:43<2:16:19, 15.37s/batch, batch_loss=12.5, batch

Validation:  29%|▎| 212/743 [55:43<2:18:44, 15.68s/batch, batch_loss=12.5, batch

Validation:  29%|▎| 212/743 [55:58<2:18:44, 15.68s/batch, batch_loss=537, batch_

Validation:  29%|▎| 213/743 [55:58<2:17:43, 15.59s/batch, batch_loss=537, batch_

Validation:  29%|▎| 213/743 [56:15<2:17:43, 15.59s/batch, batch_loss=11.9, batch

Validation:  29%|▎| 214/743 [56:15<2:19:25, 15.81s/batch, batch_loss=11.9, batch

Validation:  29%|▎| 214/743 [56:31<2:19:25, 15.81s/batch, batch_loss=11.8, batch

Validation:  29%|▎| 215/743 [56:31<2:20:07, 15.92s/batch, batch_loss=11.8, batch

Validation:  29%|▎| 215/743 [56:46<2:20:07, 15.92s/batch, batch_loss=2.58e+3, ba

Validation:  29%|▎| 216/743 [56:46<2:18:21, 15.75s/batch, batch_loss=2.58e+3, ba

Validation:  29%|▎| 216/743 [57:02<2:18:21, 15.75s/batch, batch_loss=27.9, batch

Validation:  29%|▎| 217/743 [57:02<2:17:39, 15.70s/batch, batch_loss=27.9, batch

Validation:  29%|▎| 217/743 [57:16<2:17:39, 15.70s/batch, batch_loss=15.2, batch

Validation:  29%|▎| 218/743 [57:16<2:13:50, 15.30s/batch, batch_loss=15.2, batch

Validation:  29%|▎| 218/743 [57:32<2:13:50, 15.30s/batch, batch_loss=33.4, batch

Validation:  29%|▎| 219/743 [57:32<2:14:02, 15.35s/batch, batch_loss=33.4, batch

Validation:  29%|▎| 219/743 [57:48<2:14:02, 15.35s/batch, batch_loss=32.9, batch

Validation:  30%|▎| 220/743 [57:48<2:16:43, 15.69s/batch, batch_loss=32.9, batch

Validation:  30%|▎| 220/743 [58:03<2:16:43, 15.69s/batch, batch_loss=17.9, batch

Validation:  30%|▎| 221/743 [58:03<2:15:05, 15.53s/batch, batch_loss=17.9, batch

Validation:  30%|▎| 221/743 [58:18<2:15:05, 15.53s/batch, batch_loss=11.4, batch

Validation:  30%|▎| 222/743 [58:18<2:13:10, 15.34s/batch, batch_loss=11.4, batch

Validation:  30%|▎| 222/743 [58:33<2:13:10, 15.34s/batch, batch_loss=10.7, batch

Validation:  30%|▎| 223/743 [58:33<2:11:10, 15.14s/batch, batch_loss=10.7, batch

Validation:  30%|▎| 223/743 [58:47<2:11:10, 15.14s/batch, batch_loss=10.7, batch

Validation:  30%|▎| 224/743 [58:47<2:09:33, 14.98s/batch, batch_loss=10.7, batch

Validation:  30%|▎| 224/743 [59:02<2:09:33, 14.98s/batch, batch_loss=4.94e+3, ba

Validation:  30%|▎| 225/743 [59:02<2:09:21, 14.98s/batch, batch_loss=4.94e+3, ba

Validation:  30%|▎| 225/743 [59:16<2:09:21, 14.98s/batch, batch_loss=17.5, batch

Validation:  30%|▎| 226/743 [59:16<2:06:19, 14.66s/batch, batch_loss=17.5, batch

Validation:  30%|▎| 226/743 [59:30<2:06:19, 14.66s/batch, batch_loss=16.4, batch

Validation:  31%|▎| 227/743 [59:30<2:03:33, 14.37s/batch, batch_loss=16.4, batch

Validation:  31%|▎| 227/743 [59:47<2:03:33, 14.37s/batch, batch_loss=18.8, batch

Validation:  31%|▎| 228/743 [59:47<2:10:00, 15.15s/batch, batch_loss=18.8, batch

Validation:  31%|▎| 228/743 [1:00:01<2:10:00, 15.15s/batch, batch_loss=18.6, bat

Validation:  31%|▎| 229/743 [1:00:01<2:08:12, 14.97s/batch, batch_loss=18.6, bat

Validation:  31%|▎| 229/743 [1:00:16<2:08:12, 14.97s/batch, batch_loss=19.8, bat

Validation:  31%|▎| 230/743 [1:00:16<2:06:30, 14.80s/batch, batch_loss=19.8, bat

Validation:  31%|▎| 230/743 [1:00:30<2:06:30, 14.80s/batch, batch_loss=3.23e+4, 

Validation:  31%|▎| 231/743 [1:00:30<2:04:17, 14.57s/batch, batch_loss=3.23e+4, 

Validation:  31%|▎| 231/743 [1:00:45<2:04:17, 14.57s/batch, batch_loss=20.1, bat

Validation:  31%|▎| 232/743 [1:00:45<2:04:57, 14.67s/batch, batch_loss=20.1, bat

Validation:  31%|▎| 232/743 [1:01:02<2:04:57, 14.67s/batch, batch_loss=11, batch

Validation:  31%|▎| 233/743 [1:01:02<2:12:24, 15.58s/batch, batch_loss=11, batch

Validation:  31%|▎| 233/743 [1:01:18<2:12:24, 15.58s/batch, batch_loss=12.6, bat

Validation:  31%|▎| 234/743 [1:01:18<2:11:51, 15.54s/batch, batch_loss=12.6, bat

Validation:  31%|▎| 234/743 [1:01:34<2:11:51, 15.54s/batch, batch_loss=15.4, bat

Validation:  32%|▎| 235/743 [1:01:34<2:12:33, 15.66s/batch, batch_loss=15.4, bat

Validation:  32%|▎| 235/743 [1:01:49<2:12:33, 15.66s/batch, batch_loss=2.77, bat

Validation:  32%|▎| 236/743 [1:01:49<2:11:05, 15.51s/batch, batch_loss=2.77, bat

Validation:  32%|▎| 236/743 [1:02:05<2:11:05, 15.51s/batch, batch_loss=19.7, bat

Validation:  32%|▎| 237/743 [1:02:05<2:11:11, 15.56s/batch, batch_loss=19.7, bat

Validation:  32%|▎| 237/743 [1:02:17<2:11:11, 15.56s/batch, batch_loss=14.7, bat

Validation:  32%|▎| 238/743 [1:02:17<2:03:16, 14.65s/batch, batch_loss=14.7, bat

Validation:  32%|▎| 238/743 [1:02:31<2:03:16, 14.65s/batch, batch_loss=4.5e+3, b

Validation:  32%|▎| 239/743 [1:02:31<1:59:53, 14.27s/batch, batch_loss=4.5e+3, b

Validation:  32%|▎| 239/743 [1:02:46<1:59:53, 14.27s/batch, batch_loss=19.5, bat

Validation:  32%|▎| 240/743 [1:02:46<2:01:14, 14.46s/batch, batch_loss=19.5, bat

Validation:  32%|▎| 240/743 [1:03:04<2:01:14, 14.46s/batch, batch_loss=19, batch

Validation:  32%|▎| 241/743 [1:03:04<2:11:26, 15.71s/batch, batch_loss=19, batch

Validation:  32%|▎| 241/743 [1:03:20<2:11:26, 15.71s/batch, batch_loss=232, batc

Validation:  33%|▎| 242/743 [1:03:20<2:10:39, 15.65s/batch, batch_loss=232, batc

Validation:  33%|▎| 242/743 [1:03:35<2:10:39, 15.65s/batch, batch_loss=9.11, bat

Validation:  33%|▎| 243/743 [1:03:35<2:09:24, 15.53s/batch, batch_loss=9.11, bat

Validation:  33%|▎| 243/743 [1:03:50<2:09:24, 15.53s/batch, batch_loss=12.9, bat

Validation:  33%|▎| 244/743 [1:03:50<2:09:12, 15.54s/batch, batch_loss=12.9, bat

Validation:  33%|▎| 244/743 [1:04:06<2:09:12, 15.54s/batch, batch_loss=23.2, bat

Validation:  33%|▎| 245/743 [1:04:06<2:08:06, 15.43s/batch, batch_loss=23.2, bat

Validation:  33%|▎| 245/743 [1:04:21<2:08:06, 15.43s/batch, batch_loss=7.12, bat

Validation:  33%|▎| 246/743 [1:04:21<2:07:31, 15.39s/batch, batch_loss=7.12, bat

Validation:  33%|▎| 246/743 [1:04:37<2:07:31, 15.39s/batch, batch_loss=15.2, bat

Validation:  33%|▎| 247/743 [1:04:37<2:08:56, 15.60s/batch, batch_loss=15.2, bat

Validation:  33%|▎| 247/743 [1:04:56<2:08:56, 15.60s/batch, batch_loss=44.5, bat

Validation:  33%|▎| 248/743 [1:04:56<2:15:53, 16.47s/batch, batch_loss=44.5, bat

Validation:  33%|▎| 248/743 [1:05:12<2:15:53, 16.47s/batch, batch_loss=11.2, bat

Validation:  34%|▎| 249/743 [1:05:12<2:15:29, 16.46s/batch, batch_loss=11.2, bat

Validation:  34%|▎| 249/743 [1:05:28<2:15:29, 16.46s/batch, batch_loss=20.8, bat

Validation:  34%|▎| 250/743 [1:05:28<2:13:20, 16.23s/batch, batch_loss=20.8, bat

Validation:  34%|▎| 250/743 [1:05:43<2:13:20, 16.23s/batch, batch_loss=20.7, bat

Validation:  34%|▎| 251/743 [1:05:43<2:11:37, 16.05s/batch, batch_loss=20.7, bat

Validation:  34%|▎| 251/743 [1:05:58<2:11:37, 16.05s/batch, batch_loss=23.6, bat

Validation:  34%|▎| 252/743 [1:05:58<2:09:02, 15.77s/batch, batch_loss=23.6, bat

Validation:  34%|▎| 252/743 [1:06:14<2:09:02, 15.77s/batch, batch_loss=19.4, bat

Validation:  34%|▎| 253/743 [1:06:14<2:08:42, 15.76s/batch, batch_loss=19.4, bat

Validation:  34%|▎| 253/743 [1:06:29<2:08:42, 15.76s/batch, batch_loss=1.15e+4, 

Validation:  34%|▎| 254/743 [1:06:29<2:06:55, 15.57s/batch, batch_loss=1.15e+4, 

Validation:  34%|▎| 254/743 [1:06:44<2:06:55, 15.57s/batch, batch_loss=2.45e+3, 

Validation:  34%|▎| 255/743 [1:06:44<2:03:45, 15.22s/batch, batch_loss=2.45e+3, 

Validation:  34%|▎| 255/743 [1:07:01<2:03:45, 15.22s/batch, batch_loss=23.3, bat

Validation:  34%|▎| 256/743 [1:07:01<2:07:38, 15.73s/batch, batch_loss=23.3, bat

Validation:  34%|▎| 256/743 [1:07:15<2:07:38, 15.73s/batch, batch_loss=23.2, bat

Validation:  35%|▎| 257/743 [1:07:15<2:04:51, 15.41s/batch, batch_loss=23.2, bat

Validation:  35%|▎| 257/743 [1:07:29<2:04:51, 15.41s/batch, batch_loss=13.4, bat

Validation:  35%|▎| 258/743 [1:07:29<2:00:36, 14.92s/batch, batch_loss=13.4, bat

Validation:  35%|▎| 258/743 [1:07:43<2:00:36, 14.92s/batch, batch_loss=2.88, bat

Validation:  35%|▎| 259/743 [1:07:43<1:58:41, 14.71s/batch, batch_loss=2.88, bat

Validation:  35%|▎| 259/743 [1:07:58<1:58:41, 14.71s/batch, batch_loss=2.05, bat

Validation:  35%|▎| 260/743 [1:07:58<1:58:04, 14.67s/batch, batch_loss=2.05, bat

Validation:  35%|▎| 260/743 [1:08:12<1:58:04, 14.67s/batch, batch_loss=8.8, batc

Validation:  35%|▎| 261/743 [1:08:12<1:56:24, 14.49s/batch, batch_loss=8.8, batc

Validation:  35%|▎| 261/743 [1:08:27<1:56:24, 14.49s/batch, batch_loss=31.7, bat

Validation:  35%|▎| 262/743 [1:08:27<1:56:55, 14.58s/batch, batch_loss=31.7, bat

Validation:  35%|▎| 262/743 [1:08:41<1:56:55, 14.58s/batch, batch_loss=2.71e+3, 

Validation:  35%|▎| 263/743 [1:08:41<1:56:55, 14.62s/batch, batch_loss=2.71e+3, 

Validation:  35%|▎| 263/743 [1:08:55<1:56:55, 14.62s/batch, batch_loss=15.9, bat

Validation:  36%|▎| 264/743 [1:08:55<1:54:51, 14.39s/batch, batch_loss=15.9, bat

Validation:  36%|▎| 264/743 [1:09:12<1:54:51, 14.39s/batch, batch_loss=24.1, bat

Validation:  36%|▎| 265/743 [1:09:12<2:01:08, 15.21s/batch, batch_loss=24.1, bat

Validation:  36%|▎| 265/743 [1:09:27<2:01:08, 15.21s/batch, batch_loss=29.7, bat

Validation:  36%|▎| 266/743 [1:09:27<1:59:43, 15.06s/batch, batch_loss=29.7, bat

Validation:  36%|▎| 266/743 [1:09:41<1:59:43, 15.06s/batch, batch_loss=21.4, bat

Validation:  36%|▎| 267/743 [1:09:41<1:56:45, 14.72s/batch, batch_loss=21.4, bat

Validation:  36%|▎| 267/743 [1:09:56<1:56:45, 14.72s/batch, batch_loss=3.02e+3, 

Validation:  36%|▎| 268/743 [1:09:56<1:56:32, 14.72s/batch, batch_loss=3.02e+3, 

Validation:  36%|▎| 268/743 [1:10:11<1:56:32, 14.72s/batch, batch_loss=40, batch

Validation:  36%|▎| 269/743 [1:10:11<1:56:55, 14.80s/batch, batch_loss=40, batch

Validation:  36%|▎| 269/743 [1:10:25<1:56:55, 14.80s/batch, batch_loss=39.4, bat

Validation:  36%|▎| 270/743 [1:10:25<1:55:29, 14.65s/batch, batch_loss=39.4, bat

Validation:  36%|▎| 270/743 [1:10:40<1:55:29, 14.65s/batch, batch_loss=28.2, bat

Validation:  36%|▎| 271/743 [1:10:40<1:56:14, 14.78s/batch, batch_loss=28.2, bat

Validation:  36%|▎| 271/743 [1:10:58<1:56:14, 14.78s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:10:58<2:03:37, 15.75s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:11:13<2:03:37, 15.75s/batch, batch_loss=17.7, bat

Validation:  37%|▎| 273/743 [1:11:13<2:01:24, 15.50s/batch, batch_loss=17.7, bat

Validation:  37%|▎| 273/743 [1:11:27<2:01:24, 15.50s/batch, batch_loss=24.5, bat

Validation:  37%|▎| 274/743 [1:11:27<1:58:08, 15.11s/batch, batch_loss=24.5, bat

Validation:  37%|▎| 274/743 [1:11:43<1:58:08, 15.11s/batch, batch_loss=20.3, bat

Validation:  37%|▎| 275/743 [1:11:43<1:59:36, 15.33s/batch, batch_loss=20.3, bat

Validation:  37%|▎| 275/743 [1:11:59<1:59:36, 15.33s/batch, batch_loss=13.5, bat

Validation:  37%|▎| 276/743 [1:11:59<2:00:17, 15.46s/batch, batch_loss=13.5, bat

Validation:  37%|▎| 276/743 [1:12:15<2:00:17, 15.46s/batch, batch_loss=24.8, bat

Validation:  37%|▎| 277/743 [1:12:15<2:01:11, 15.60s/batch, batch_loss=24.8, bat

Validation:  37%|▎| 277/743 [1:12:31<2:01:11, 15.60s/batch, batch_loss=21.1, bat

Validation:  37%|▎| 278/743 [1:12:31<2:02:01, 15.74s/batch, batch_loss=21.1, bat

Validation:  37%|▎| 278/743 [1:12:47<2:02:01, 15.74s/batch, batch_loss=10.9, bat

Validation:  38%|▍| 279/743 [1:12:47<2:02:09, 15.80s/batch, batch_loss=10.9, bat

Validation:  38%|▍| 279/743 [1:13:02<2:02:09, 15.80s/batch, batch_loss=14.3, bat

Validation:  38%|▍| 280/743 [1:13:02<2:00:34, 15.63s/batch, batch_loss=14.3, bat

Validation:  38%|▍| 280/743 [1:13:17<2:00:34, 15.63s/batch, batch_loss=17.5, bat

Validation:  38%|▍| 281/743 [1:13:17<1:58:43, 15.42s/batch, batch_loss=17.5, bat

Validation:  38%|▍| 281/743 [1:13:32<1:58:43, 15.42s/batch, batch_loss=19.5, bat

Validation:  38%|▍| 282/743 [1:13:32<1:57:40, 15.32s/batch, batch_loss=19.5, bat

Validation:  38%|▍| 282/743 [1:13:48<1:57:40, 15.32s/batch, batch_loss=18.1, bat

Validation:  38%|▍| 283/743 [1:13:48<2:00:05, 15.66s/batch, batch_loss=18.1, bat

Validation:  38%|▍| 283/743 [1:14:06<2:00:05, 15.66s/batch, batch_loss=17.3, bat

Validation:  38%|▍| 284/743 [1:14:06<2:03:25, 16.13s/batch, batch_loss=17.3, bat

Validation:  38%|▍| 284/743 [1:14:21<2:03:25, 16.13s/batch, batch_loss=15.9, bat

Validation:  38%|▍| 285/743 [1:14:21<2:00:15, 15.75s/batch, batch_loss=15.9, bat

Validation:  38%|▍| 285/743 [1:14:35<2:00:15, 15.75s/batch, batch_loss=17.6, bat

Validation:  38%|▍| 286/743 [1:14:35<1:56:50, 15.34s/batch, batch_loss=17.6, bat

Validation:  38%|▍| 286/743 [1:14:50<1:56:50, 15.34s/batch, batch_loss=1.18e+4, 

Validation:  39%|▍| 287/743 [1:14:50<1:55:24, 15.19s/batch, batch_loss=1.18e+4, 

Validation:  39%|▍| 287/743 [1:15:04<1:55:24, 15.19s/batch, batch_loss=24.2, bat

Validation:  39%|▍| 288/743 [1:15:04<1:53:57, 15.03s/batch, batch_loss=24.2, bat

Validation:  39%|▍| 288/743 [1:15:22<1:53:57, 15.03s/batch, batch_loss=24.7, bat

Validation:  39%|▍| 289/743 [1:15:22<1:59:29, 15.79s/batch, batch_loss=24.7, bat

Validation:  39%|▍| 289/743 [1:15:37<1:59:29, 15.79s/batch, batch_loss=497, batc

Validation:  39%|▍| 290/743 [1:15:37<1:58:09, 15.65s/batch, batch_loss=497, batc

Validation:  39%|▍| 290/743 [1:15:52<1:58:09, 15.65s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:15:52<1:55:15, 15.30s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:16:07<1:55:15, 15.30s/batch, batch_loss=1.19e+3, 

Validation:  39%|▍| 292/743 [1:16:07<1:54:17, 15.21s/batch, batch_loss=1.19e+3, 

Validation:  39%|▍| 292/743 [1:16:22<1:54:17, 15.21s/batch, batch_loss=27.6, bat

Validation:  39%|▍| 293/743 [1:16:22<1:53:38, 15.15s/batch, batch_loss=27.6, bat

Validation:  39%|▍| 293/743 [1:16:37<1:53:38, 15.15s/batch, batch_loss=1.09e+3, 

Validation:  40%|▍| 294/743 [1:16:37<1:53:11, 15.13s/batch, batch_loss=1.09e+3, 

Validation:  40%|▍| 294/743 [1:16:52<1:53:11, 15.13s/batch, batch_loss=14.7, bat

Validation:  40%|▍| 295/743 [1:16:52<1:52:05, 15.01s/batch, batch_loss=14.7, bat

Validation:  40%|▍| 295/743 [1:17:07<1:52:05, 15.01s/batch, batch_loss=17.6, bat

Validation:  40%|▍| 296/743 [1:17:07<1:51:33, 14.97s/batch, batch_loss=17.6, bat

Validation:  40%|▍| 296/743 [1:17:21<1:51:33, 14.97s/batch, batch_loss=11.8, bat

Validation:  40%|▍| 297/743 [1:17:21<1:49:13, 14.69s/batch, batch_loss=11.8, bat

Validation:  40%|▍| 297/743 [1:17:37<1:49:13, 14.69s/batch, batch_loss=28, batch

Validation:  40%|▍| 298/743 [1:17:37<1:53:29, 15.30s/batch, batch_loss=28, batch

Validation:  40%|▍| 298/743 [1:17:51<1:53:29, 15.30s/batch, batch_loss=34.5, bat

Validation:  40%|▍| 299/743 [1:17:51<1:50:19, 14.91s/batch, batch_loss=34.5, bat

Validation:  40%|▍| 299/743 [1:18:05<1:50:19, 14.91s/batch, batch_loss=41.3, bat

Validation:  40%|▍| 300/743 [1:18:05<1:46:58, 14.49s/batch, batch_loss=41.3, bat

Validation:  40%|▍| 300/743 [1:18:19<1:46:58, 14.49s/batch, batch_loss=832, batc

Validation:  41%|▍| 301/743 [1:18:19<1:46:28, 14.45s/batch, batch_loss=832, batc

Validation:  41%|▍| 301/743 [1:18:34<1:46:28, 14.45s/batch, batch_loss=10.7, bat

Validation:  41%|▍| 302/743 [1:18:34<1:46:27, 14.48s/batch, batch_loss=10.7, bat

Validation:  41%|▍| 302/743 [1:18:48<1:46:27, 14.48s/batch, batch_loss=12.6, bat

Validation:  41%|▍| 303/743 [1:18:48<1:46:10, 14.48s/batch, batch_loss=12.6, bat

Validation:  41%|▍| 303/743 [1:19:03<1:46:10, 14.48s/batch, batch_loss=16, batch

Validation:  41%|▍| 304/743 [1:19:03<1:47:05, 14.64s/batch, batch_loss=16, batch

Validation:  41%|▍| 304/743 [1:19:19<1:47:05, 14.64s/batch, batch_loss=11.1, bat

Validation:  41%|▍| 305/743 [1:19:19<1:50:25, 15.13s/batch, batch_loss=11.1, bat

Validation:  41%|▍| 305/743 [1:19:35<1:50:25, 15.13s/batch, batch_loss=21.6, bat

Validation:  41%|▍| 306/743 [1:19:35<1:51:33, 15.32s/batch, batch_loss=21.6, bat

Validation:  41%|▍| 306/743 [1:19:51<1:51:33, 15.32s/batch, batch_loss=23.2, bat

Validation:  41%|▍| 307/743 [1:19:51<1:52:13, 15.44s/batch, batch_loss=23.2, bat

Validation:  41%|▍| 307/743 [1:20:06<1:52:13, 15.44s/batch, batch_loss=877, batc

Validation:  41%|▍| 308/743 [1:20:06<1:51:53, 15.43s/batch, batch_loss=877, batc

Validation:  41%|▍| 308/743 [1:20:21<1:51:53, 15.43s/batch, batch_loss=26.3, bat

Validation:  42%|▍| 309/743 [1:20:21<1:50:49, 15.32s/batch, batch_loss=26.3, bat

Validation:  42%|▍| 309/743 [1:20:37<1:50:49, 15.32s/batch, batch_loss=20.2, bat

Validation:  42%|▍| 310/743 [1:20:37<1:51:12, 15.41s/batch, batch_loss=20.2, bat

Validation:  42%|▍| 310/743 [1:20:52<1:51:12, 15.41s/batch, batch_loss=18.3, bat

Validation:  42%|▍| 311/743 [1:20:52<1:50:00, 15.28s/batch, batch_loss=18.3, bat

Validation:  42%|▍| 311/743 [1:21:07<1:50:00, 15.28s/batch, batch_loss=16.2, bat

Validation:  42%|▍| 312/743 [1:21:07<1:48:44, 15.14s/batch, batch_loss=16.2, bat

Validation:  42%|▍| 312/743 [1:21:22<1:48:44, 15.14s/batch, batch_loss=7.98, bat

Validation:  42%|▍| 313/743 [1:21:22<1:47:41, 15.03s/batch, batch_loss=7.98, bat

Validation:  42%|▍| 313/743 [1:21:36<1:47:41, 15.03s/batch, batch_loss=13, batch

Validation:  42%|▍| 314/743 [1:21:36<1:46:35, 14.91s/batch, batch_loss=13, batch

Validation:  42%|▍| 314/743 [1:21:51<1:46:35, 14.91s/batch, batch_loss=20.5, bat

Validation:  42%|▍| 315/743 [1:21:51<1:46:43, 14.96s/batch, batch_loss=20.5, bat

Validation:  42%|▍| 315/743 [1:22:06<1:46:43, 14.96s/batch, batch_loss=19.8, bat

Validation:  43%|▍| 316/743 [1:22:06<1:46:36, 14.98s/batch, batch_loss=19.8, bat

Validation:  43%|▍| 316/743 [1:22:21<1:46:36, 14.98s/batch, batch_loss=25.4, bat

Validation:  43%|▍| 317/743 [1:22:21<1:44:39, 14.74s/batch, batch_loss=25.4, bat

Validation:  43%|▍| 317/743 [1:22:36<1:44:39, 14.74s/batch, batch_loss=15.3, bat

Validation:  43%|▍| 318/743 [1:22:36<1:45:02, 14.83s/batch, batch_loss=15.3, bat

Validation:  43%|▍| 318/743 [1:22:50<1:45:02, 14.83s/batch, batch_loss=19.2, bat

Validation:  43%|▍| 319/743 [1:22:50<1:43:14, 14.61s/batch, batch_loss=19.2, bat

Validation:  43%|▍| 319/743 [1:23:07<1:43:14, 14.61s/batch, batch_loss=17.9, bat

Validation:  43%|▍| 320/743 [1:23:07<1:48:05, 15.33s/batch, batch_loss=17.9, bat

Validation:  43%|▍| 320/743 [1:23:21<1:48:05, 15.33s/batch, batch_loss=15.7, bat

Validation:  43%|▍| 321/743 [1:23:21<1:46:21, 15.12s/batch, batch_loss=15.7, bat

Validation:  43%|▍| 321/743 [1:23:36<1:46:21, 15.12s/batch, batch_loss=14.1, bat

Validation:  43%|▍| 322/743 [1:23:36<1:45:45, 15.07s/batch, batch_loss=14.1, bat

Validation:  43%|▍| 322/743 [1:23:51<1:45:45, 15.07s/batch, batch_loss=19, batch

Validation:  43%|▍| 323/743 [1:23:51<1:44:35, 14.94s/batch, batch_loss=19, batch

Validation:  43%|▍| 323/743 [1:24:05<1:44:35, 14.94s/batch, batch_loss=293, batc

Validation:  44%|▍| 324/743 [1:24:05<1:42:44, 14.71s/batch, batch_loss=293, batc

Validation:  44%|▍| 324/743 [1:24:19<1:42:44, 14.71s/batch, batch_loss=20.2, bat

Validation:  44%|▍| 325/743 [1:24:19<1:41:49, 14.62s/batch, batch_loss=20.2, bat

Validation:  44%|▍| 325/743 [1:24:34<1:41:49, 14.62s/batch, batch_loss=20.7, bat

Validation:  44%|▍| 326/743 [1:24:34<1:40:46, 14.50s/batch, batch_loss=20.7, bat

Validation:  44%|▍| 326/743 [1:24:48<1:40:46, 14.50s/batch, batch_loss=22.1, bat

Validation:  44%|▍| 327/743 [1:24:48<1:39:31, 14.35s/batch, batch_loss=22.1, bat

Validation:  44%|▍| 327/743 [1:25:02<1:39:31, 14.35s/batch, batch_loss=18.9, bat

Validation:  44%|▍| 328/743 [1:25:02<1:38:36, 14.26s/batch, batch_loss=18.9, bat

Validation:  44%|▍| 328/743 [1:25:18<1:38:36, 14.26s/batch, batch_loss=6.75, bat

Validation:  44%|▍| 329/743 [1:25:18<1:43:26, 14.99s/batch, batch_loss=6.75, bat

Validation:  44%|▍| 329/743 [1:25:33<1:43:26, 14.99s/batch, batch_loss=15.6, bat

Validation:  44%|▍| 330/743 [1:25:33<1:41:44, 14.78s/batch, batch_loss=15.6, bat

Validation:  44%|▍| 330/743 [1:25:48<1:41:44, 14.78s/batch, batch_loss=23.7, bat

Validation:  45%|▍| 331/743 [1:25:48<1:41:54, 14.84s/batch, batch_loss=23.7, bat

Validation:  45%|▍| 331/743 [1:26:03<1:41:54, 14.84s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:26:03<1:42:40, 14.99s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:26:18<1:42:40, 14.99s/batch, batch_loss=34.3, bat

Validation:  45%|▍| 333/743 [1:26:18<1:41:33, 14.86s/batch, batch_loss=34.3, bat

Validation:  45%|▍| 333/743 [1:26:32<1:41:33, 14.86s/batch, batch_loss=27.1, bat

Validation:  45%|▍| 334/743 [1:26:32<1:41:18, 14.86s/batch, batch_loss=27.1, bat

Validation:  45%|▍| 334/743 [1:26:48<1:41:18, 14.86s/batch, batch_loss=41.4, bat

Validation:  45%|▍| 335/743 [1:26:48<1:42:18, 15.04s/batch, batch_loss=41.4, bat

Validation:  45%|▍| 335/743 [1:27:04<1:42:18, 15.04s/batch, batch_loss=12.2, bat

Validation:  45%|▍| 336/743 [1:27:04<1:43:37, 15.28s/batch, batch_loss=12.2, bat

Validation:  45%|▍| 336/743 [1:27:22<1:43:37, 15.28s/batch, batch_loss=34.6, bat

Validation:  45%|▍| 337/743 [1:27:22<1:48:25, 16.02s/batch, batch_loss=34.6, bat

Validation:  45%|▍| 337/743 [1:27:36<1:48:25, 16.02s/batch, batch_loss=43.1, bat

Validation:  45%|▍| 338/743 [1:27:36<1:45:58, 15.70s/batch, batch_loss=43.1, bat

Validation:  45%|▍| 338/743 [1:27:52<1:45:58, 15.70s/batch, batch_loss=38.8, bat

Validation:  46%|▍| 339/743 [1:27:52<1:45:48, 15.71s/batch, batch_loss=38.8, bat

Validation:  46%|▍| 339/743 [1:28:07<1:45:48, 15.71s/batch, batch_loss=36.8, bat

Validation:  46%|▍| 340/743 [1:28:07<1:44:38, 15.58s/batch, batch_loss=36.8, bat

Validation:  46%|▍| 340/743 [1:28:22<1:44:38, 15.58s/batch, batch_loss=17.8, bat

Validation:  46%|▍| 341/743 [1:28:22<1:42:43, 15.33s/batch, batch_loss=17.8, bat

Validation:  46%|▍| 341/743 [1:28:38<1:42:43, 15.33s/batch, batch_loss=23.4, bat

Validation:  46%|▍| 342/743 [1:28:38<1:44:20, 15.61s/batch, batch_loss=23.4, bat

Validation:  46%|▍| 342/743 [1:28:54<1:44:20, 15.61s/batch, batch_loss=22.5, bat

Validation:  46%|▍| 343/743 [1:28:54<1:43:10, 15.48s/batch, batch_loss=22.5, bat

Validation:  46%|▍| 343/743 [1:29:08<1:43:10, 15.48s/batch, batch_loss=25.6, bat

Validation:  46%|▍| 344/743 [1:29:08<1:41:30, 15.26s/batch, batch_loss=25.6, bat

Validation:  46%|▍| 344/743 [1:29:24<1:41:30, 15.26s/batch, batch_loss=21.6, bat

Validation:  46%|▍| 345/743 [1:29:24<1:42:16, 15.42s/batch, batch_loss=21.6, bat

Validation:  46%|▍| 345/743 [1:29:39<1:42:16, 15.42s/batch, batch_loss=34.6, bat

Validation:  47%|▍| 346/743 [1:29:39<1:41:06, 15.28s/batch, batch_loss=34.6, bat

Validation:  47%|▍| 346/743 [1:29:54<1:41:06, 15.28s/batch, batch_loss=25.2, bat

Validation:  47%|▍| 347/743 [1:29:54<1:40:27, 15.22s/batch, batch_loss=25.2, bat

Validation:  47%|▍| 347/743 [1:30:10<1:40:27, 15.22s/batch, batch_loss=32.9, bat

Validation:  47%|▍| 348/743 [1:30:10<1:40:47, 15.31s/batch, batch_loss=32.9, bat

Validation:  47%|▍| 348/743 [1:30:25<1:40:47, 15.31s/batch, batch_loss=27.1, bat

Validation:  47%|▍| 349/743 [1:30:25<1:39:38, 15.17s/batch, batch_loss=27.1, bat

Validation:  47%|▍| 349/743 [1:30:40<1:39:38, 15.17s/batch, batch_loss=20.3, bat

Validation:  47%|▍| 350/743 [1:30:40<1:39:19, 15.16s/batch, batch_loss=20.3, bat

Validation:  47%|▍| 350/743 [1:30:57<1:39:19, 15.16s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:30:57<1:43:05, 15.78s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:31:11<1:43:05, 15.78s/batch, batch_loss=35.9, bat

Validation:  47%|▍| 352/743 [1:31:11<1:40:12, 15.38s/batch, batch_loss=35.9, bat

Validation:  47%|▍| 352/743 [1:31:26<1:40:12, 15.38s/batch, batch_loss=22.5, bat

Validation:  48%|▍| 353/743 [1:31:26<1:38:54, 15.22s/batch, batch_loss=22.5, bat

Validation:  48%|▍| 353/743 [1:31:41<1:38:54, 15.22s/batch, batch_loss=26.8, bat

Validation:  48%|▍| 354/743 [1:31:41<1:37:02, 14.97s/batch, batch_loss=26.8, bat

Validation:  48%|▍| 354/743 [1:31:54<1:37:02, 14.97s/batch, batch_loss=33.9, bat

Validation:  48%|▍| 355/743 [1:31:54<1:33:38, 14.48s/batch, batch_loss=33.9, bat

Validation:  48%|▍| 355/743 [1:32:07<1:33:38, 14.48s/batch, batch_loss=41.9, bat

Validation:  48%|▍| 356/743 [1:32:07<1:30:14, 13.99s/batch, batch_loss=41.9, bat

Validation:  48%|▍| 356/743 [1:32:20<1:30:14, 13.99s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:32:20<1:28:43, 13.79s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:32:35<1:28:43, 13.79s/batch, batch_loss=15.4, bat

Validation:  48%|▍| 358/743 [1:32:35<1:29:47, 13.99s/batch, batch_loss=15.4, bat

Validation:  48%|▍| 358/743 [1:32:52<1:29:47, 13.99s/batch, batch_loss=10.9, bat

Validation:  48%|▍| 359/743 [1:32:52<1:36:56, 15.15s/batch, batch_loss=10.9, bat

Validation:  48%|▍| 359/743 [1:33:07<1:36:56, 15.15s/batch, batch_loss=24.8, bat

Validation:  48%|▍| 360/743 [1:33:07<1:36:25, 15.11s/batch, batch_loss=24.8, bat

Validation:  48%|▍| 360/743 [1:33:22<1:36:25, 15.11s/batch, batch_loss=15.2, bat

Validation:  49%|▍| 361/743 [1:33:22<1:35:21, 14.98s/batch, batch_loss=15.2, bat

Validation:  49%|▍| 361/743 [1:33:36<1:35:21, 14.98s/batch, batch_loss=34.4, bat

Validation:  49%|▍| 362/743 [1:33:36<1:33:31, 14.73s/batch, batch_loss=34.4, bat

Validation:  49%|▍| 362/743 [1:33:52<1:33:31, 14.73s/batch, batch_loss=26, batch

Validation:  49%|▍| 363/743 [1:33:52<1:34:12, 14.88s/batch, batch_loss=26, batch

Validation:  49%|▍| 363/743 [1:34:06<1:34:12, 14.88s/batch, batch_loss=24.7, bat

Validation:  49%|▍| 364/743 [1:34:06<1:33:08, 14.74s/batch, batch_loss=24.7, bat

Validation:  49%|▍| 364/743 [1:34:20<1:33:08, 14.74s/batch, batch_loss=18.6, bat

Validation:  49%|▍| 365/743 [1:34:20<1:31:11, 14.47s/batch, batch_loss=18.6, bat

Validation:  49%|▍| 365/743 [1:34:35<1:31:11, 14.47s/batch, batch_loss=16.4, bat

Validation:  49%|▍| 366/743 [1:34:35<1:31:30, 14.56s/batch, batch_loss=16.4, bat

Validation:  49%|▍| 366/743 [1:34:50<1:31:30, 14.56s/batch, batch_loss=16.7, bat

Validation:  49%|▍| 367/743 [1:34:50<1:33:20, 14.90s/batch, batch_loss=16.7, bat

Validation:  49%|▍| 367/743 [1:35:08<1:33:20, 14.90s/batch, batch_loss=4.84e+3, 

Validation:  50%|▍| 368/743 [1:35:08<1:38:42, 15.79s/batch, batch_loss=4.84e+3, 

Validation:  50%|▍| 368/743 [1:35:24<1:38:42, 15.79s/batch, batch_loss=19.8, bat

Validation:  50%|▍| 369/743 [1:35:24<1:39:04, 15.90s/batch, batch_loss=19.8, bat

Validation:  50%|▍| 369/743 [1:35:40<1:39:04, 15.90s/batch, batch_loss=29.9, bat

Validation:  50%|▍| 370/743 [1:35:40<1:38:36, 15.86s/batch, batch_loss=29.9, bat

Validation:  50%|▍| 370/743 [1:35:55<1:38:36, 15.86s/batch, batch_loss=19.5, bat

Validation:  50%|▍| 371/743 [1:35:55<1:37:28, 15.72s/batch, batch_loss=19.5, bat

Validation:  50%|▍| 371/743 [1:36:11<1:37:28, 15.72s/batch, batch_loss=20.6, bat

Validation:  50%|▌| 372/743 [1:36:11<1:36:16, 15.57s/batch, batch_loss=20.6, bat

Validation:  50%|▌| 372/743 [1:36:25<1:36:16, 15.57s/batch, batch_loss=23.1, bat

Validation:  50%|▌| 373/743 [1:36:25<1:34:36, 15.34s/batch, batch_loss=23.1, bat

Validation:  50%|▌| 373/743 [1:36:41<1:34:36, 15.34s/batch, batch_loss=18.7, bat

Validation:  50%|▌| 374/743 [1:36:41<1:34:43, 15.40s/batch, batch_loss=18.7, bat

Validation:  50%|▌| 374/743 [1:36:57<1:34:43, 15.40s/batch, batch_loss=9.89, bat

Validation:  50%|▌| 375/743 [1:36:57<1:35:05, 15.51s/batch, batch_loss=9.89, bat

Validation:  50%|▌| 375/743 [1:37:12<1:35:05, 15.51s/batch, batch_loss=37.4, bat

Validation:  51%|▌| 376/743 [1:37:12<1:33:45, 15.33s/batch, batch_loss=37.4, bat

Validation:  51%|▌| 376/743 [1:37:30<1:33:45, 15.33s/batch, batch_loss=10.4, bat

Validation:  51%|▌| 377/743 [1:37:30<1:39:31, 16.31s/batch, batch_loss=10.4, bat

Validation:  51%|▌| 377/743 [1:37:45<1:39:31, 16.31s/batch, batch_loss=19.1, bat

Validation:  51%|▌| 378/743 [1:37:45<1:36:12, 15.81s/batch, batch_loss=19.1, bat

Validation:  51%|▌| 378/743 [1:38:00<1:36:12, 15.81s/batch, batch_loss=12.4, bat

Validation:  51%|▌| 379/743 [1:38:00<1:34:24, 15.56s/batch, batch_loss=12.4, bat

Validation:  51%|▌| 379/743 [1:38:15<1:34:24, 15.56s/batch, batch_loss=7.1, batc

Validation:  51%|▌| 380/743 [1:38:15<1:33:01, 15.38s/batch, batch_loss=7.1, batc

Validation:  51%|▌| 380/743 [1:38:29<1:33:01, 15.38s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:38:29<1:30:14, 14.96s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:38:43<1:30:14, 14.96s/batch, batch_loss=915, batc

Validation:  51%|▌| 382/743 [1:38:43<1:28:36, 14.73s/batch, batch_loss=915, batc

Validation:  51%|▌| 382/743 [1:38:58<1:28:36, 14.73s/batch, batch_loss=214, batc

Validation:  52%|▌| 383/743 [1:38:58<1:28:34, 14.76s/batch, batch_loss=214, batc

Validation:  52%|▌| 383/743 [1:39:13<1:28:34, 14.76s/batch, batch_loss=284, batc

Validation:  52%|▌| 384/743 [1:39:13<1:29:10, 14.90s/batch, batch_loss=284, batc

Validation:  52%|▌| 384/743 [1:39:30<1:29:10, 14.90s/batch, batch_loss=23.4, bat

Validation:  52%|▌| 385/743 [1:39:30<1:32:30, 15.50s/batch, batch_loss=23.4, bat

Validation:  52%|▌| 385/743 [1:39:44<1:32:30, 15.50s/batch, batch_loss=10.6, bat

Validation:  52%|▌| 386/743 [1:39:44<1:30:19, 15.18s/batch, batch_loss=10.6, bat

Validation:  52%|▌| 386/743 [1:39:59<1:30:19, 15.18s/batch, batch_loss=8.32, bat

Validation:  52%|▌| 387/743 [1:39:59<1:29:01, 15.00s/batch, batch_loss=8.32, bat

Validation:  52%|▌| 387/743 [1:40:14<1:29:01, 15.00s/batch, batch_loss=14.1, bat

Validation:  52%|▌| 388/743 [1:40:14<1:28:45, 15.00s/batch, batch_loss=14.1, bat

Validation:  52%|▌| 388/743 [1:40:29<1:28:45, 15.00s/batch, batch_loss=12, batch

Validation:  52%|▌| 389/743 [1:40:29<1:28:16, 14.96s/batch, batch_loss=12, batch

Validation:  52%|▌| 389/743 [1:40:44<1:28:16, 14.96s/batch, batch_loss=17.9, bat

Validation:  52%|▌| 390/743 [1:40:44<1:27:59, 14.95s/batch, batch_loss=17.9, bat

Validation:  52%|▌| 390/743 [1:40:58<1:27:59, 14.95s/batch, batch_loss=17.5, bat

Validation:  53%|▌| 391/743 [1:40:58<1:26:41, 14.78s/batch, batch_loss=17.5, bat

Validation:  53%|▌| 391/743 [1:41:13<1:26:41, 14.78s/batch, batch_loss=17.4, bat

Validation:  53%|▌| 392/743 [1:41:13<1:26:50, 14.84s/batch, batch_loss=17.4, bat

Validation:  53%|▌| 392/743 [1:41:29<1:26:50, 14.84s/batch, batch_loss=19.7, bat

Validation:  53%|▌| 393/743 [1:41:29<1:27:28, 14.99s/batch, batch_loss=19.7, bat

Validation:  53%|▌| 393/743 [1:41:43<1:27:28, 14.99s/batch, batch_loss=15.3, bat

Validation:  53%|▌| 394/743 [1:41:43<1:27:01, 14.96s/batch, batch_loss=15.3, bat

Validation:  53%|▌| 394/743 [1:41:58<1:27:01, 14.96s/batch, batch_loss=10.9, bat

Validation:  53%|▌| 395/743 [1:41:58<1:25:51, 14.80s/batch, batch_loss=10.9, bat

Validation:  53%|▌| 395/743 [1:42:13<1:25:51, 14.80s/batch, batch_loss=13.9, bat

Validation:  53%|▌| 396/743 [1:42:13<1:26:32, 14.96s/batch, batch_loss=13.9, bat

Validation:  53%|▌| 396/743 [1:42:28<1:26:32, 14.96s/batch, batch_loss=11, batch

Validation:  53%|▌| 397/743 [1:42:28<1:26:40, 15.03s/batch, batch_loss=11, batch

Validation:  53%|▌| 397/743 [1:42:44<1:26:40, 15.03s/batch, batch_loss=19.5, bat

Validation:  54%|▌| 398/743 [1:42:44<1:26:36, 15.06s/batch, batch_loss=19.5, bat

Validation:  54%|▌| 398/743 [1:43:00<1:26:36, 15.06s/batch, batch_loss=17.8, bat

Validation:  54%|▌| 399/743 [1:43:00<1:28:52, 15.50s/batch, batch_loss=17.8, bat

Validation:  54%|▌| 399/743 [1:43:18<1:28:52, 15.50s/batch, batch_loss=23.3, bat

Validation:  54%|▌| 400/743 [1:43:18<1:32:50, 16.24s/batch, batch_loss=23.3, bat

Validation:  54%|▌| 400/743 [1:43:34<1:32:50, 16.24s/batch, batch_loss=18.7, bat

Validation:  54%|▌| 401/743 [1:43:34<1:31:48, 16.11s/batch, batch_loss=18.7, bat

Validation:  54%|▌| 401/743 [1:43:50<1:31:48, 16.11s/batch, batch_loss=6.21, bat

Validation:  54%|▌| 402/743 [1:43:50<1:31:19, 16.07s/batch, batch_loss=6.21, bat

Validation:  54%|▌| 402/743 [1:44:06<1:31:19, 16.07s/batch, batch_loss=15.8, bat

Validation:  54%|▌| 403/743 [1:44:06<1:30:36, 15.99s/batch, batch_loss=15.8, bat

Validation:  54%|▌| 403/743 [1:44:22<1:30:36, 15.99s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 404/743 [1:44:22<1:30:19, 15.99s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 404/743 [1:44:36<1:30:19, 15.99s/batch, batch_loss=9.97, bat

Validation:  55%|▌| 405/743 [1:44:36<1:28:16, 15.67s/batch, batch_loss=9.97, bat

Validation:  55%|▌| 405/743 [1:44:52<1:28:16, 15.67s/batch, batch_loss=12.8, bat

Validation:  55%|▌| 406/743 [1:44:52<1:28:24, 15.74s/batch, batch_loss=12.8, bat

Validation:  55%|▌| 406/743 [1:45:08<1:28:24, 15.74s/batch, batch_loss=16.5, bat

Validation:  55%|▌| 407/743 [1:45:08<1:28:14, 15.76s/batch, batch_loss=16.5, bat

Validation:  55%|▌| 407/743 [1:45:23<1:28:14, 15.76s/batch, batch_loss=28.7, bat

Validation:  55%|▌| 408/743 [1:45:23<1:26:14, 15.45s/batch, batch_loss=28.7, bat

Validation:  55%|▌| 408/743 [1:45:38<1:26:14, 15.45s/batch, batch_loss=11.2, bat

Validation:  55%|▌| 409/743 [1:45:38<1:25:41, 15.39s/batch, batch_loss=11.2, bat

Validation:  55%|▌| 409/743 [1:45:53<1:25:41, 15.39s/batch, batch_loss=18.3, bat

Validation:  55%|▌| 410/743 [1:45:53<1:24:20, 15.20s/batch, batch_loss=18.3, bat

Validation:  55%|▌| 410/743 [1:46:09<1:24:20, 15.20s/batch, batch_loss=19.9, bat

Validation:  55%|▌| 411/743 [1:46:09<1:25:09, 15.39s/batch, batch_loss=19.9, bat

Validation:  55%|▌| 411/743 [1:46:25<1:25:09, 15.39s/batch, batch_loss=15.4, bat

Validation:  55%|▌| 412/743 [1:46:25<1:25:31, 15.50s/batch, batch_loss=15.4, bat

Validation:  55%|▌| 412/743 [1:46:39<1:25:31, 15.50s/batch, batch_loss=1.93e+3, 

Validation:  56%|▌| 413/743 [1:46:39<1:24:18, 15.33s/batch, batch_loss=1.93e+3, 

Validation:  56%|▌| 413/743 [1:46:54<1:24:18, 15.33s/batch, batch_loss=33.4, bat

Validation:  56%|▌| 414/743 [1:46:54<1:23:20, 15.20s/batch, batch_loss=33.4, bat

Validation:  56%|▌| 414/743 [1:47:08<1:23:20, 15.20s/batch, batch_loss=32.4, bat

Validation:  56%|▌| 415/743 [1:47:08<1:21:11, 14.85s/batch, batch_loss=32.4, bat

Validation:  56%|▌| 415/743 [1:47:25<1:21:11, 14.85s/batch, batch_loss=6.49e+3, 

Validation:  56%|▌| 416/743 [1:47:25<1:23:29, 15.32s/batch, batch_loss=6.49e+3, 

Validation:  56%|▌| 416/743 [1:47:39<1:23:29, 15.32s/batch, batch_loss=17.4, bat

Validation:  56%|▌| 417/743 [1:47:39<1:21:29, 15.00s/batch, batch_loss=17.4, bat

Validation:  56%|▌| 417/743 [1:47:53<1:21:29, 15.00s/batch, batch_loss=15.5, bat

Validation:  56%|▌| 418/743 [1:47:53<1:20:00, 14.77s/batch, batch_loss=15.5, bat

Validation:  56%|▌| 418/743 [1:48:08<1:20:00, 14.77s/batch, batch_loss=16.1, bat

Validation:  56%|▌| 419/743 [1:48:08<1:20:19, 14.88s/batch, batch_loss=16.1, bat

Validation:  56%|▌| 419/743 [1:48:23<1:20:19, 14.88s/batch, batch_loss=13.4, bat

Validation:  57%|▌| 420/743 [1:48:23<1:19:04, 14.69s/batch, batch_loss=13.4, bat

Validation:  57%|▌| 420/743 [1:48:37<1:19:04, 14.69s/batch, batch_loss=34.6, bat

Validation:  57%|▌| 421/743 [1:48:37<1:18:27, 14.62s/batch, batch_loss=34.6, bat

Validation:  57%|▌| 421/743 [1:48:55<1:18:27, 14.62s/batch, batch_loss=9.12, bat

Validation:  57%|▌| 422/743 [1:48:55<1:24:00, 15.70s/batch, batch_loss=9.12, bat

Validation:  57%|▌| 422/743 [1:49:10<1:24:00, 15.70s/batch, batch_loss=25.2, bat

Validation:  57%|▌| 423/743 [1:49:10<1:22:37, 15.49s/batch, batch_loss=25.2, bat

Validation:  57%|▌| 423/743 [1:49:25<1:22:37, 15.49s/batch, batch_loss=324, batc

Validation:  57%|▌| 424/743 [1:49:25<1:21:12, 15.28s/batch, batch_loss=324, batc

Validation:  57%|▌| 424/743 [1:49:40<1:21:12, 15.28s/batch, batch_loss=28.5, bat

Validation:  57%|▌| 425/743 [1:49:40<1:20:24, 15.17s/batch, batch_loss=28.5, bat

Validation:  57%|▌| 425/743 [1:49:55<1:20:24, 15.17s/batch, batch_loss=27.5, bat

Validation:  57%|▌| 426/743 [1:49:55<1:20:06, 15.16s/batch, batch_loss=27.5, bat

Validation:  57%|▌| 426/743 [1:50:10<1:20:06, 15.16s/batch, batch_loss=26.9, bat

Validation:  57%|▌| 427/743 [1:50:10<1:19:51, 15.16s/batch, batch_loss=26.9, bat

Validation:  57%|▌| 427/743 [1:50:24<1:19:51, 15.16s/batch, batch_loss=5.27e+3, 

Validation:  58%|▌| 428/743 [1:50:24<1:17:45, 14.81s/batch, batch_loss=5.27e+3, 

Validation:  58%|▌| 428/743 [1:50:39<1:17:45, 14.81s/batch, batch_loss=19.5, bat

Validation:  58%|▌| 429/743 [1:50:39<1:18:01, 14.91s/batch, batch_loss=19.5, bat

Validation:  58%|▌| 429/743 [1:50:54<1:18:01, 14.91s/batch, batch_loss=5.41e+3, 

Validation:  58%|▌| 430/743 [1:50:54<1:17:34, 14.87s/batch, batch_loss=5.41e+3, 

Validation:  58%|▌| 430/743 [1:51:09<1:17:34, 14.87s/batch, batch_loss=1.39e+4, 

Validation:  58%|▌| 431/743 [1:51:09<1:17:43, 14.95s/batch, batch_loss=1.39e+4, 

Validation:  58%|▌| 431/743 [1:51:25<1:17:43, 14.95s/batch, batch_loss=953, batc

Validation:  58%|▌| 432/743 [1:51:25<1:18:08, 15.08s/batch, batch_loss=953, batc

Validation:  58%|▌| 432/743 [1:51:40<1:18:08, 15.08s/batch, batch_loss=14.8, bat

Validation:  58%|▌| 433/743 [1:51:40<1:18:47, 15.25s/batch, batch_loss=14.8, bat

Validation:  58%|▌| 433/743 [1:51:56<1:18:47, 15.25s/batch, batch_loss=10.8, bat

Validation:  58%|▌| 434/743 [1:51:56<1:19:30, 15.44s/batch, batch_loss=10.8, bat

Validation:  58%|▌| 434/743 [1:52:13<1:19:30, 15.44s/batch, batch_loss=14, batch

Validation:  59%|▌| 435/743 [1:52:13<1:20:34, 15.70s/batch, batch_loss=14, batch

Validation:  59%|▌| 435/743 [1:52:28<1:20:34, 15.70s/batch, batch_loss=14.9, bat

Validation:  59%|▌| 436/743 [1:52:28<1:20:34, 15.75s/batch, batch_loss=14.9, bat

Validation:  59%|▌| 436/743 [1:52:44<1:20:34, 15.75s/batch, batch_loss=25, batch

Validation:  59%|▌| 437/743 [1:52:44<1:20:10, 15.72s/batch, batch_loss=25, batch

Validation:  59%|▌| 437/743 [1:53:00<1:20:10, 15.72s/batch, batch_loss=967, batc

Validation:  59%|▌| 438/743 [1:53:00<1:20:08, 15.77s/batch, batch_loss=967, batc

Validation:  59%|▌| 438/743 [1:53:15<1:20:08, 15.77s/batch, batch_loss=905, batc

Validation:  59%|▌| 439/743 [1:53:15<1:19:24, 15.67s/batch, batch_loss=905, batc

Validation:  59%|▌| 439/743 [1:53:31<1:19:24, 15.67s/batch, batch_loss=19.6, bat

Validation:  59%|▌| 440/743 [1:53:31<1:18:41, 15.58s/batch, batch_loss=19.6, bat

Validation:  59%|▌| 440/743 [1:53:46<1:18:41, 15.58s/batch, batch_loss=15.9, bat

Validation:  59%|▌| 441/743 [1:53:46<1:17:45, 15.45s/batch, batch_loss=15.9, bat

Validation:  59%|▌| 441/743 [1:54:01<1:17:45, 15.45s/batch, batch_loss=16.7, bat

Validation:  59%|▌| 442/743 [1:54:01<1:17:04, 15.36s/batch, batch_loss=16.7, bat

Validation:  59%|▌| 442/743 [1:54:17<1:17:04, 15.36s/batch, batch_loss=12.1, bat

Validation:  60%|▌| 443/743 [1:54:17<1:17:40, 15.53s/batch, batch_loss=12.1, bat

Validation:  60%|▌| 443/743 [1:54:32<1:17:40, 15.53s/batch, batch_loss=16.9, bat

Validation:  60%|▌| 444/743 [1:54:32<1:17:08, 15.48s/batch, batch_loss=16.9, bat

Validation:  60%|▌| 444/743 [1:54:47<1:17:08, 15.48s/batch, batch_loss=8.85, bat

Validation:  60%|▌| 445/743 [1:54:47<1:16:06, 15.32s/batch, batch_loss=8.85, bat

Validation:  60%|▌| 445/743 [1:55:02<1:16:06, 15.32s/batch, batch_loss=14.8, bat

Validation:  60%|▌| 446/743 [1:55:02<1:14:47, 15.11s/batch, batch_loss=14.8, bat

Validation:  60%|▌| 446/743 [1:55:16<1:14:47, 15.11s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [1:55:16<1:13:34, 14.92s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [1:55:32<1:13:34, 14.92s/batch, batch_loss=5.38, bat

Validation:  60%|▌| 448/743 [1:55:32<1:13:37, 14.97s/batch, batch_loss=5.38, bat

Validation:  60%|▌| 448/743 [1:55:46<1:13:37, 14.97s/batch, batch_loss=16.2, bat

Validation:  60%|▌| 449/743 [1:55:46<1:12:51, 14.87s/batch, batch_loss=16.2, bat

Validation:  60%|▌| 449/743 [1:56:01<1:12:51, 14.87s/batch, batch_loss=19.3, bat

Validation:  61%|▌| 450/743 [1:56:01<1:12:43, 14.89s/batch, batch_loss=19.3, bat

Validation:  61%|▌| 450/743 [1:56:16<1:12:43, 14.89s/batch, batch_loss=13.3, bat

Validation:  61%|▌| 451/743 [1:56:16<1:12:55, 14.98s/batch, batch_loss=13.3, bat

Validation:  61%|▌| 451/743 [1:56:30<1:12:55, 14.98s/batch, batch_loss=24.8, bat

Validation:  61%|▌| 452/743 [1:56:30<1:10:57, 14.63s/batch, batch_loss=24.8, bat

Validation:  61%|▌| 452/743 [1:56:45<1:10:57, 14.63s/batch, batch_loss=14.1, bat

Validation:  61%|▌| 453/743 [1:56:45<1:10:23, 14.56s/batch, batch_loss=14.1, bat

Validation:  61%|▌| 453/743 [1:56:59<1:10:23, 14.56s/batch, batch_loss=6.21, bat

Validation:  61%|▌| 454/743 [1:56:59<1:09:46, 14.48s/batch, batch_loss=6.21, bat

Validation:  61%|▌| 454/743 [1:57:14<1:09:46, 14.48s/batch, batch_loss=10.6, bat

Validation:  61%|▌| 455/743 [1:57:14<1:10:40, 14.72s/batch, batch_loss=10.6, bat

Validation:  61%|▌| 455/743 [1:57:28<1:10:40, 14.72s/batch, batch_loss=8.67, bat

Validation:  61%|▌| 456/743 [1:57:28<1:09:03, 14.44s/batch, batch_loss=8.67, bat

Validation:  61%|▌| 456/743 [1:57:45<1:09:03, 14.44s/batch, batch_loss=16.4, bat

Validation:  62%|▌| 457/743 [1:57:45<1:12:41, 15.25s/batch, batch_loss=16.4, bat

Validation:  62%|▌| 457/743 [1:58:00<1:12:41, 15.25s/batch, batch_loss=33.7, bat

Validation:  62%|▌| 458/743 [1:58:00<1:12:02, 15.17s/batch, batch_loss=33.7, bat

Validation:  62%|▌| 458/743 [1:58:16<1:12:02, 15.17s/batch, batch_loss=13.8, bat

Validation:  62%|▌| 459/743 [1:58:16<1:12:28, 15.31s/batch, batch_loss=13.8, bat

Validation:  62%|▌| 459/743 [1:58:30<1:12:28, 15.31s/batch, batch_loss=20.4, bat

Validation:  62%|▌| 460/743 [1:58:30<1:10:37, 14.97s/batch, batch_loss=20.4, bat

Validation:  62%|▌| 460/743 [1:58:46<1:10:37, 14.97s/batch, batch_loss=15.9, bat

Validation:  62%|▌| 461/743 [1:58:46<1:11:32, 15.22s/batch, batch_loss=15.9, bat

Validation:  62%|▌| 461/743 [1:59:00<1:11:32, 15.22s/batch, batch_loss=13.7, bat

Validation:  62%|▌| 462/743 [1:59:00<1:10:25, 15.04s/batch, batch_loss=13.7, bat

Validation:  62%|▌| 462/743 [1:59:15<1:10:25, 15.04s/batch, batch_loss=15.1, bat

Validation:  62%|▌| 463/743 [1:59:15<1:10:03, 15.01s/batch, batch_loss=15.1, bat

Validation:  62%|▌| 463/743 [1:59:31<1:10:03, 15.01s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:59:31<1:11:31, 15.38s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:59:47<1:11:31, 15.38s/batch, batch_loss=18.8, bat

Validation:  63%|▋| 465/743 [1:59:47<1:11:36, 15.46s/batch, batch_loss=18.8, bat

Validation:  63%|▋| 465/743 [2:00:03<1:11:36, 15.46s/batch, batch_loss=14, batch

Validation:  63%|▋| 466/743 [2:00:03<1:11:32, 15.49s/batch, batch_loss=14, batch

Validation:  63%|▋| 466/743 [2:00:18<1:11:32, 15.49s/batch, batch_loss=26.2, bat

Validation:  63%|▋| 467/743 [2:00:18<1:11:25, 15.53s/batch, batch_loss=26.2, bat

Validation:  63%|▋| 467/743 [2:00:33<1:11:25, 15.53s/batch, batch_loss=15.1, bat

Validation:  63%|▋| 468/743 [2:00:33<1:10:05, 15.29s/batch, batch_loss=15.1, bat

Validation:  63%|▋| 468/743 [2:00:51<1:10:05, 15.29s/batch, batch_loss=26.5, bat

Validation:  63%|▋| 469/743 [2:00:51<1:12:57, 15.98s/batch, batch_loss=26.5, bat

Validation:  63%|▋| 469/743 [2:01:06<1:12:57, 15.98s/batch, batch_loss=6.36e+4, 

Validation:  63%|▋| 470/743 [2:01:06<1:12:08, 15.86s/batch, batch_loss=6.36e+4, 

Validation:  63%|▋| 470/743 [2:01:22<1:12:08, 15.86s/batch, batch_loss=13.8, bat

Validation:  63%|▋| 471/743 [2:01:22<1:11:33, 15.79s/batch, batch_loss=13.8, bat

Validation:  63%|▋| 471/743 [2:01:37<1:11:33, 15.79s/batch, batch_loss=23.6, bat

Validation:  64%|▋| 472/743 [2:01:37<1:10:56, 15.71s/batch, batch_loss=23.6, bat

Validation:  64%|▋| 472/743 [2:01:52<1:10:56, 15.71s/batch, batch_loss=590, batc

Validation:  64%|▋| 473/743 [2:01:52<1:09:51, 15.52s/batch, batch_loss=590, batc

Validation:  64%|▋| 473/743 [2:02:07<1:09:51, 15.52s/batch, batch_loss=17.7, bat

Validation:  64%|▋| 474/743 [2:02:07<1:07:40, 15.09s/batch, batch_loss=17.7, bat

Validation:  64%|▋| 474/743 [2:02:20<1:07:40, 15.09s/batch, batch_loss=20.7, bat

Validation:  64%|▋| 475/743 [2:02:20<1:04:39, 14.47s/batch, batch_loss=20.7, bat

Validation:  64%|▋| 475/743 [2:02:35<1:04:39, 14.47s/batch, batch_loss=8.65, bat

Validation:  64%|▋| 476/743 [2:02:35<1:06:07, 14.86s/batch, batch_loss=8.65, bat

Validation:  64%|▋| 476/743 [2:02:52<1:06:07, 14.86s/batch, batch_loss=13.5, bat

Validation:  64%|▋| 477/743 [2:02:52<1:08:46, 15.51s/batch, batch_loss=13.5, bat

Validation:  64%|▋| 477/743 [2:03:07<1:08:46, 15.51s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [2:03:07<1:07:41, 15.33s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [2:03:23<1:07:41, 15.33s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [2:03:23<1:07:25, 15.33s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [2:03:37<1:07:25, 15.33s/batch, batch_loss=11.6, bat

Validation:  65%|▋| 480/743 [2:03:37<1:05:34, 14.96s/batch, batch_loss=11.6, bat

Validation:  65%|▋| 480/743 [2:03:51<1:05:34, 14.96s/batch, batch_loss=10.4, bat

Validation:  65%|▋| 481/743 [2:03:51<1:04:26, 14.76s/batch, batch_loss=10.4, bat

Validation:  65%|▋| 481/743 [2:04:06<1:04:26, 14.76s/batch, batch_loss=6.95e+3, 

Validation:  65%|▋| 482/743 [2:04:06<1:04:11, 14.76s/batch, batch_loss=6.95e+3, 

Validation:  65%|▋| 482/743 [2:04:21<1:04:11, 14.76s/batch, batch_loss=20.3, bat

Validation:  65%|▋| 483/743 [2:04:21<1:04:52, 14.97s/batch, batch_loss=20.3, bat

Validation:  65%|▋| 483/743 [2:04:36<1:04:52, 14.97s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [2:04:36<1:04:00, 14.83s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [2:04:53<1:04:00, 14.83s/batch, batch_loss=3.13e+4, 

Validation:  65%|▋| 485/743 [2:04:53<1:07:21, 15.66s/batch, batch_loss=3.13e+4, 

Validation:  65%|▋| 485/743 [2:05:07<1:07:21, 15.66s/batch, batch_loss=16.8, bat

Validation:  65%|▋| 486/743 [2:05:07<1:04:49, 15.13s/batch, batch_loss=16.8, bat

Validation:  65%|▋| 486/743 [2:05:22<1:04:49, 15.13s/batch, batch_loss=36.4, bat

Validation:  66%|▋| 487/743 [2:05:22<1:04:43, 15.17s/batch, batch_loss=36.4, bat

Validation:  66%|▋| 487/743 [2:05:38<1:04:43, 15.17s/batch, batch_loss=28, batch

Validation:  66%|▋| 488/743 [2:05:38<1:05:00, 15.30s/batch, batch_loss=28, batch

Validation:  66%|▋| 488/743 [2:05:53<1:05:00, 15.30s/batch, batch_loss=12.4, bat

Validation:  66%|▋| 489/743 [2:05:53<1:04:04, 15.14s/batch, batch_loss=12.4, bat

Validation:  66%|▋| 489/743 [2:06:07<1:04:04, 15.14s/batch, batch_loss=21.9, bat

Validation:  66%|▋| 490/743 [2:06:07<1:02:42, 14.87s/batch, batch_loss=21.9, bat

Validation:  66%|▋| 490/743 [2:06:22<1:02:42, 14.87s/batch, batch_loss=20, batch

Validation:  66%|▋| 491/743 [2:06:22<1:02:01, 14.77s/batch, batch_loss=20, batch

Validation:  66%|▋| 491/743 [2:06:37<1:02:01, 14.77s/batch, batch_loss=1.06e+3, 

Validation:  66%|▋| 492/743 [2:06:37<1:02:04, 14.84s/batch, batch_loss=1.06e+3, 

Validation:  66%|▋| 492/743 [2:06:52<1:02:04, 14.84s/batch, batch_loss=1.43e+4, 

Validation:  66%|▋| 493/743 [2:06:52<1:02:26, 14.99s/batch, batch_loss=1.43e+4, 

Validation:  66%|▋| 493/743 [2:07:10<1:02:26, 14.99s/batch, batch_loss=7.29, bat

Validation:  66%|▋| 494/743 [2:07:10<1:05:39, 15.82s/batch, batch_loss=7.29, bat

Validation:  66%|▋| 494/743 [2:07:24<1:05:39, 15.82s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:07:24<1:03:33, 15.38s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:07:39<1:03:33, 15.38s/batch, batch_loss=18.3, bat

Validation:  67%|▋| 496/743 [2:07:39<1:03:06, 15.33s/batch, batch_loss=18.3, bat

Validation:  67%|▋| 496/743 [2:07:55<1:03:06, 15.33s/batch, batch_loss=12.3, bat

Validation:  67%|▋| 497/743 [2:07:55<1:03:26, 15.47s/batch, batch_loss=12.3, bat

Validation:  67%|▋| 497/743 [2:08:10<1:03:26, 15.47s/batch, batch_loss=16.1, bat

Validation:  67%|▋| 498/743 [2:08:10<1:02:39, 15.34s/batch, batch_loss=16.1, bat

Validation:  67%|▋| 498/743 [2:08:25<1:02:39, 15.34s/batch, batch_loss=4.75, bat

Validation:  67%|▋| 499/743 [2:08:25<1:02:01, 15.25s/batch, batch_loss=4.75, bat

Validation:  67%|▋| 499/743 [2:08:40<1:02:01, 15.25s/batch, batch_loss=2.51e+4, 

Validation:  67%|▋| 500/743 [2:08:40<1:01:12, 15.11s/batch, batch_loss=2.51e+4, 

Validation:  67%|▋| 500/743 [2:08:54<1:01:12, 15.11s/batch, batch_loss=21.9, bat

Validation:  67%|▋| 501/743 [2:08:54<1:00:13, 14.93s/batch, batch_loss=21.9, bat

Validation:  67%|▋| 501/743 [2:09:11<1:00:13, 14.93s/batch, batch_loss=3.16e+3, 

Validation:  68%|▋| 502/743 [2:09:11<1:01:36, 15.34s/batch, batch_loss=3.16e+3, 

Validation:  68%|▋| 502/743 [2:09:26<1:01:36, 15.34s/batch, batch_loss=16.6, bat

Validation:  68%|▋| 503/743 [2:09:26<1:01:52, 15.47s/batch, batch_loss=16.6, bat

Validation:  68%|▋| 503/743 [2:09:42<1:01:52, 15.47s/batch, batch_loss=11.3, bat

Validation:  68%|▋| 504/743 [2:09:42<1:01:32, 15.45s/batch, batch_loss=11.3, bat

Validation:  68%|▋| 504/743 [2:09:58<1:01:32, 15.45s/batch, batch_loss=23.9, bat

Validation:  68%|▋| 505/743 [2:09:58<1:02:39, 15.80s/batch, batch_loss=23.9, bat

Validation:  68%|▋| 505/743 [2:10:14<1:02:39, 15.80s/batch, batch_loss=2.85e+3, 

Validation:  68%|▋| 506/743 [2:10:14<1:02:35, 15.84s/batch, batch_loss=2.85e+3, 

Validation:  68%|▋| 506/743 [2:10:30<1:02:35, 15.84s/batch, batch_loss=1.99e+3, 

Validation:  68%|▋| 507/743 [2:10:30<1:01:55, 15.74s/batch, batch_loss=1.99e+3, 

Validation:  68%|▋| 507/743 [2:10:45<1:01:55, 15.74s/batch, batch_loss=8.38e+3, 

Validation:  68%|▋| 508/743 [2:10:45<1:01:05, 15.60s/batch, batch_loss=8.38e+3, 

Validation:  68%|▋| 508/743 [2:11:02<1:01:05, 15.60s/batch, batch_loss=8.49e+3, 

Validation:  69%|▋| 509/743 [2:11:02<1:01:52, 15.87s/batch, batch_loss=8.49e+3, 

Validation:  69%|▋| 509/743 [2:11:16<1:01:52, 15.87s/batch, batch_loss=13, batch

Validation:  69%|▋| 510/743 [2:11:16<59:42, 15.38s/batch, batch_loss=13, batch_i

Validation:  69%|▋| 510/743 [2:11:30<59:42, 15.38s/batch, batch_loss=19.6, batch

Validation:  69%|▋| 511/743 [2:11:30<57:52, 14.97s/batch, batch_loss=19.6, batch

Validation:  69%|▋| 511/743 [2:11:45<57:52, 14.97s/batch, batch_loss=16.3, batch

Validation:  69%|▋| 512/743 [2:11:45<57:13, 14.86s/batch, batch_loss=16.3, batch

Validation:  69%|▋| 512/743 [2:11:59<57:13, 14.86s/batch, batch_loss=18.6, batch

Validation:  69%|▋| 513/743 [2:11:59<56:35, 14.76s/batch, batch_loss=18.6, batch

Validation:  69%|▋| 513/743 [2:12:14<56:35, 14.76s/batch, batch_loss=17.6, batch

Validation:  69%|▋| 514/743 [2:12:14<56:08, 14.71s/batch, batch_loss=17.6, batch

Validation:  69%|▋| 514/743 [2:12:28<56:08, 14.71s/batch, batch_loss=14.6, batch

Validation:  69%|▋| 515/743 [2:12:28<55:07, 14.51s/batch, batch_loss=14.6, batch

Validation:  69%|▋| 515/743 [2:12:42<55:07, 14.51s/batch, batch_loss=14.5, batch

Validation:  69%|▋| 516/743 [2:12:42<54:32, 14.42s/batch, batch_loss=14.5, batch

Validation:  69%|▋| 516/743 [2:12:57<54:32, 14.42s/batch, batch_loss=6.16e+4, ba

Validation:  70%|▋| 517/743 [2:12:57<54:39, 14.51s/batch, batch_loss=6.16e+4, ba

Validation:  70%|▋| 517/743 [2:13:11<54:39, 14.51s/batch, batch_loss=498, batch_

Validation:  70%|▋| 518/743 [2:13:11<54:21, 14.50s/batch, batch_loss=498, batch_

Validation:  70%|▋| 518/743 [2:13:25<54:21, 14.50s/batch, batch_loss=11.6, batch

Validation:  70%|▋| 519/743 [2:13:25<53:58, 14.46s/batch, batch_loss=11.6, batch

Validation:  70%|▋| 519/743 [2:13:40<53:58, 14.46s/batch, batch_loss=19.9, batch

Validation:  70%|▋| 520/743 [2:13:40<54:04, 14.55s/batch, batch_loss=19.9, batch

Validation:  70%|▋| 520/743 [2:13:55<54:04, 14.55s/batch, batch_loss=13.2, batch

Validation:  70%|▋| 521/743 [2:13:55<54:24, 14.70s/batch, batch_loss=13.2, batch

Validation:  70%|▋| 521/743 [2:14:10<54:24, 14.70s/batch, batch_loss=12.3, batch

Validation:  70%|▋| 522/743 [2:14:10<54:28, 14.79s/batch, batch_loss=12.3, batch

Validation:  70%|▋| 522/743 [2:14:26<54:28, 14.79s/batch, batch_loss=427, batch_

Validation:  70%|▋| 523/743 [2:14:26<54:48, 14.95s/batch, batch_loss=427, batch_

Validation:  70%|▋| 523/743 [2:14:41<54:48, 14.95s/batch, batch_loss=14.1, batch

Validation:  71%|▋| 524/743 [2:14:41<55:19, 15.16s/batch, batch_loss=14.1, batch

Validation:  71%|▋| 524/743 [2:14:58<55:19, 15.16s/batch, batch_loss=27.2, batch

Validation:  71%|▋| 525/743 [2:14:58<56:22, 15.52s/batch, batch_loss=27.2, batch

Validation:  71%|▋| 525/743 [2:15:13<56:22, 15.52s/batch, batch_loss=10.4, batch

Validation:  71%|▋| 526/743 [2:15:13<56:03, 15.50s/batch, batch_loss=10.4, batch

Validation:  71%|▋| 526/743 [2:15:28<56:03, 15.50s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:15:28<55:08, 15.32s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:15:46<55:08, 15.32s/batch, batch_loss=505, batch_

Validation:  71%|▋| 528/743 [2:15:46<58:17, 16.27s/batch, batch_loss=505, batch_

Validation:  71%|▋| 528/743 [2:16:03<58:17, 16.27s/batch, batch_loss=6.5e+3, bat

Validation:  71%|▋| 529/743 [2:16:03<57:51, 16.22s/batch, batch_loss=6.5e+3, bat

Validation:  71%|▋| 529/743 [2:16:17<57:51, 16.22s/batch, batch_loss=204, batch_

Validation:  71%|▋| 530/743 [2:16:17<55:47, 15.72s/batch, batch_loss=204, batch_

Validation:  71%|▋| 530/743 [2:16:32<55:47, 15.72s/batch, batch_loss=39.1, batch

Validation:  71%|▋| 531/743 [2:16:32<54:44, 15.49s/batch, batch_loss=39.1, batch

Validation:  71%|▋| 531/743 [2:16:48<54:44, 15.49s/batch, batch_loss=262, batch_

Validation:  72%|▋| 532/743 [2:16:48<54:32, 15.51s/batch, batch_loss=262, batch_

Validation:  72%|▋| 532/743 [2:17:03<54:32, 15.51s/batch, batch_loss=8.5, batch_

Validation:  72%|▋| 533/743 [2:17:03<54:09, 15.47s/batch, batch_loss=8.5, batch_

Validation:  72%|▋| 533/743 [2:17:18<54:09, 15.47s/batch, batch_loss=12.5, batch

Validation:  72%|▋| 534/743 [2:17:18<53:19, 15.31s/batch, batch_loss=12.5, batch

Validation:  72%|▋| 534/743 [2:17:36<53:19, 15.31s/batch, batch_loss=25.6, batch

Validation:  72%|▋| 535/743 [2:17:36<56:04, 16.18s/batch, batch_loss=25.6, batch

Validation:  72%|▋| 535/743 [2:17:52<56:04, 16.18s/batch, batch_loss=22.5, batch

Validation:  72%|▋| 536/743 [2:17:52<55:01, 15.95s/batch, batch_loss=22.5, batch

Validation:  72%|▋| 536/743 [2:18:07<55:01, 15.95s/batch, batch_loss=13.3, batch

Validation:  72%|▋| 537/743 [2:18:07<54:00, 15.73s/batch, batch_loss=13.3, batch

Validation:  72%|▋| 537/743 [2:18:22<54:00, 15.73s/batch, batch_loss=16.7, batch

Validation:  72%|▋| 538/743 [2:18:22<52:54, 15.48s/batch, batch_loss=16.7, batch

Validation:  72%|▋| 538/743 [2:18:37<52:54, 15.48s/batch, batch_loss=260, batch_

Validation:  73%|▋| 539/743 [2:18:37<52:28, 15.44s/batch, batch_loss=260, batch_

Validation:  73%|▋| 539/743 [2:18:52<52:28, 15.44s/batch, batch_loss=20, batch_i

Validation:  73%|▋| 540/743 [2:18:52<51:41, 15.28s/batch, batch_loss=20, batch_i

Validation:  73%|▋| 540/743 [2:19:08<51:41, 15.28s/batch, batch_loss=32.8, batch

Validation:  73%|▋| 541/743 [2:19:08<51:50, 15.40s/batch, batch_loss=32.8, batch

Validation:  73%|▋| 541/743 [2:19:22<51:50, 15.40s/batch, batch_loss=1.94e+3, ba

Validation:  73%|▋| 542/743 [2:19:22<50:30, 15.08s/batch, batch_loss=1.94e+3, ba

Validation:  73%|▋| 542/743 [2:19:37<50:30, 15.08s/batch, batch_loss=22.1, batch

Validation:  73%|▋| 543/743 [2:19:37<49:51, 14.96s/batch, batch_loss=22.1, batch

Validation:  73%|▋| 543/743 [2:19:51<49:51, 14.96s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:19:51<49:13, 14.84s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:20:06<49:13, 14.84s/batch, batch_loss=2.74e+3, ba

Validation:  73%|▋| 545/743 [2:20:06<49:24, 14.97s/batch, batch_loss=2.74e+3, ba

Validation:  73%|▋| 545/743 [2:20:20<49:24, 14.97s/batch, batch_loss=8.6, batch_

Validation:  73%|▋| 546/743 [2:20:20<48:03, 14.64s/batch, batch_loss=8.6, batch_

Validation:  73%|▋| 546/743 [2:20:35<48:03, 14.64s/batch, batch_loss=265, batch_

Validation:  74%|▋| 547/743 [2:20:35<47:30, 14.55s/batch, batch_loss=265, batch_

Validation:  74%|▋| 547/743 [2:20:49<47:30, 14.55s/batch, batch_loss=28.4, batch

Validation:  74%|▋| 548/743 [2:20:49<46:37, 14.35s/batch, batch_loss=28.4, batch

Validation:  74%|▋| 548/743 [2:21:03<46:37, 14.35s/batch, batch_loss=4.12e+3, ba

Validation:  74%|▋| 549/743 [2:21:03<46:38, 14.43s/batch, batch_loss=4.12e+3, ba

Validation:  74%|▋| 549/743 [2:21:18<46:38, 14.43s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:21:18<47:14, 14.69s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:21:35<47:14, 14.69s/batch, batch_loss=16.6, batch

Validation:  74%|▋| 551/743 [2:21:35<49:09, 15.36s/batch, batch_loss=16.6, batch

Validation:  74%|▋| 551/743 [2:21:51<49:09, 15.36s/batch, batch_loss=6.78e+3, ba

Validation:  74%|▋| 552/743 [2:21:51<48:43, 15.30s/batch, batch_loss=6.78e+3, ba

Validation:  74%|▋| 552/743 [2:22:05<48:43, 15.30s/batch, batch_loss=32, batch_i

Validation:  74%|▋| 553/743 [2:22:05<47:29, 15.00s/batch, batch_loss=32, batch_i

Validation:  74%|▋| 553/743 [2:22:19<47:29, 15.00s/batch, batch_loss=20.7, batch

Validation:  75%|▋| 554/743 [2:22:19<46:51, 14.87s/batch, batch_loss=20.7, batch

Validation:  75%|▋| 554/743 [2:22:34<46:51, 14.87s/batch, batch_loss=2.46e+3, ba

Validation:  75%|▋| 555/743 [2:22:34<46:21, 14.79s/batch, batch_loss=2.46e+3, ba

Validation:  75%|▋| 555/743 [2:22:49<46:21, 14.79s/batch, batch_loss=46.5, batch

Validation:  75%|▋| 556/743 [2:22:49<46:27, 14.91s/batch, batch_loss=46.5, batch

Validation:  75%|▋| 556/743 [2:23:05<46:27, 14.91s/batch, batch_loss=7.5, batch_

Validation:  75%|▋| 557/743 [2:23:05<46:37, 15.04s/batch, batch_loss=7.5, batch_

Validation:  75%|▋| 557/743 [2:23:24<46:37, 15.04s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:23:24<50:31, 16.39s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:23:40<50:31, 16.39s/batch, batch_loss=3.59e+3, ba

Validation:  75%|▊| 559/743 [2:23:40<49:28, 16.14s/batch, batch_loss=3.59e+3, ba

Validation:  75%|▊| 559/743 [2:23:55<49:28, 16.14s/batch, batch_loss=2.92e+3, ba

Validation:  75%|▊| 560/743 [2:23:55<48:41, 15.96s/batch, batch_loss=2.92e+3, ba

Validation:  75%|▊| 560/743 [2:24:10<48:41, 15.96s/batch, batch_loss=9.44, batch

Validation:  76%|▊| 561/743 [2:24:10<47:39, 15.71s/batch, batch_loss=9.44, batch

Validation:  76%|▊| 561/743 [2:24:25<47:39, 15.71s/batch, batch_loss=15.8, batch

Validation:  76%|▊| 562/743 [2:24:25<46:34, 15.44s/batch, batch_loss=15.8, batch

Validation:  76%|▊| 562/743 [2:24:40<46:34, 15.44s/batch, batch_loss=21.6, batch

Validation:  76%|▊| 563/743 [2:24:40<46:00, 15.33s/batch, batch_loss=21.6, batch

Validation:  76%|▊| 563/743 [2:24:56<46:00, 15.33s/batch, batch_loss=1.08e+3, ba

Validation:  76%|▊| 564/743 [2:24:56<45:55, 15.40s/batch, batch_loss=1.08e+3, ba

Validation:  76%|▊| 564/743 [2:25:14<45:55, 15.40s/batch, batch_loss=3.69e+3, ba

Validation:  76%|▊| 565/743 [2:25:14<48:34, 16.38s/batch, batch_loss=3.69e+3, ba

Validation:  76%|▊| 565/743 [2:25:29<48:34, 16.38s/batch, batch_loss=12, batch_i

Validation:  76%|▊| 566/743 [2:25:29<47:02, 15.94s/batch, batch_loss=12, batch_i

Validation:  76%|▊| 566/743 [2:25:45<47:02, 15.94s/batch, batch_loss=16, batch_i

Validation:  76%|▊| 567/743 [2:25:45<46:06, 15.72s/batch, batch_loss=16, batch_i

Validation:  76%|▊| 567/743 [2:26:00<46:06, 15.72s/batch, batch_loss=16.2, batch

Validation:  76%|▊| 568/743 [2:26:00<45:35, 15.63s/batch, batch_loss=16.2, batch

Validation:  76%|▊| 568/743 [2:26:15<45:35, 15.63s/batch, batch_loss=17.5, batch

Validation:  77%|▊| 569/743 [2:26:15<45:01, 15.52s/batch, batch_loss=17.5, batch

Validation:  77%|▊| 569/743 [2:26:31<45:01, 15.52s/batch, batch_loss=21.4, batch

Validation:  77%|▊| 570/743 [2:26:31<45:12, 15.68s/batch, batch_loss=21.4, batch

Validation:  77%|▊| 570/743 [2:26:46<45:12, 15.68s/batch, batch_loss=11, batch_i

Validation:  77%|▊| 571/743 [2:26:46<44:27, 15.51s/batch, batch_loss=11, batch_i

Validation:  77%|▊| 571/743 [2:27:01<44:27, 15.51s/batch, batch_loss=27.6, batch

Validation:  77%|▊| 572/743 [2:27:01<43:23, 15.23s/batch, batch_loss=27.6, batch

Validation:  77%|▊| 572/743 [2:27:18<43:23, 15.23s/batch, batch_loss=14.1, batch

Validation:  77%|▊| 573/743 [2:27:18<44:41, 15.78s/batch, batch_loss=14.1, batch

Validation:  77%|▊| 573/743 [2:27:33<44:41, 15.78s/batch, batch_loss=14.4, batch

Validation:  77%|▊| 574/743 [2:27:33<43:29, 15.44s/batch, batch_loss=14.4, batch

Validation:  77%|▊| 574/743 [2:27:47<43:29, 15.44s/batch, batch_loss=12.9, batch

Validation:  77%|▊| 575/743 [2:27:47<42:17, 15.10s/batch, batch_loss=12.9, batch

Validation:  77%|▊| 575/743 [2:28:02<42:17, 15.10s/batch, batch_loss=25.1, batch

Validation:  78%|▊| 576/743 [2:28:02<42:17, 15.19s/batch, batch_loss=25.1, batch

Validation:  78%|▊| 576/743 [2:28:18<42:17, 15.19s/batch, batch_loss=27.8, batch

Validation:  78%|▊| 577/743 [2:28:18<42:33, 15.38s/batch, batch_loss=27.8, batch

Validation:  78%|▊| 577/743 [2:28:33<42:33, 15.38s/batch, batch_loss=24.5, batch

Validation:  78%|▊| 578/743 [2:28:33<41:50, 15.21s/batch, batch_loss=24.5, batch

Validation:  78%|▊| 578/743 [2:28:48<41:50, 15.21s/batch, batch_loss=322, batch_

Validation:  78%|▊| 579/743 [2:28:48<41:34, 15.21s/batch, batch_loss=322, batch_

Validation:  78%|▊| 579/743 [2:29:02<41:34, 15.21s/batch, batch_loss=6.65, batch

Validation:  78%|▊| 580/743 [2:29:02<40:28, 14.90s/batch, batch_loss=6.65, batch

Validation:  78%|▊| 580/743 [2:29:16<40:28, 14.90s/batch, batch_loss=10, batch_i

Validation:  78%|▊| 581/743 [2:29:16<39:11, 14.52s/batch, batch_loss=10, batch_i

Validation:  78%|▊| 581/743 [2:29:30<39:11, 14.52s/batch, batch_loss=17.6, batch

Validation:  78%|▊| 582/743 [2:29:30<38:49, 14.47s/batch, batch_loss=17.6, batch

Validation:  78%|▊| 582/743 [2:29:45<38:49, 14.47s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:29:45<38:54, 14.59s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:30:00<38:54, 14.59s/batch, batch_loss=2.56, batch

Validation:  79%|▊| 584/743 [2:30:00<38:34, 14.56s/batch, batch_loss=2.56, batch

Validation:  79%|▊| 584/743 [2:30:14<38:34, 14.56s/batch, batch_loss=20.9, batch

Validation:  79%|▊| 585/743 [2:30:14<37:45, 14.34s/batch, batch_loss=20.9, batch

Validation:  79%|▊| 585/743 [2:30:28<37:45, 14.34s/batch, batch_loss=552, batch_

Validation:  79%|▊| 586/743 [2:30:28<37:21, 14.28s/batch, batch_loss=552, batch_

Validation:  79%|▊| 586/743 [2:30:42<37:21, 14.28s/batch, batch_loss=9.31, batch

Validation:  79%|▊| 587/743 [2:30:42<37:32, 14.44s/batch, batch_loss=9.31, batch

Validation:  79%|▊| 587/743 [2:30:58<37:32, 14.44s/batch, batch_loss=407, batch_

Validation:  79%|▊| 588/743 [2:30:58<38:16, 14.82s/batch, batch_loss=407, batch_

Validation:  79%|▊| 588/743 [2:31:13<38:16, 14.82s/batch, batch_loss=2.51e+4, ba

Validation:  79%|▊| 589/743 [2:31:13<37:46, 14.72s/batch, batch_loss=2.51e+4, ba

Validation:  79%|▊| 589/743 [2:31:28<37:46, 14.72s/batch, batch_loss=15.3, batch

Validation:  79%|▊| 590/743 [2:31:28<37:54, 14.87s/batch, batch_loss=15.3, batch

Validation:  79%|▊| 590/743 [2:31:46<37:54, 14.87s/batch, batch_loss=14.6, batch

Validation:  80%|▊| 591/743 [2:31:46<40:10, 15.86s/batch, batch_loss=14.6, batch

Validation:  80%|▊| 591/743 [2:32:00<40:10, 15.86s/batch, batch_loss=13, batch_i

Validation:  80%|▊| 592/743 [2:32:00<38:19, 15.23s/batch, batch_loss=13, batch_i

Validation:  80%|▊| 592/743 [2:32:15<38:19, 15.23s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:32:15<38:21, 15.34s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:32:30<38:21, 15.34s/batch, batch_loss=3.97, batch

Validation:  80%|▊| 594/743 [2:32:30<37:30, 15.11s/batch, batch_loss=3.97, batch

Validation:  80%|▊| 594/743 [2:32:45<37:30, 15.11s/batch, batch_loss=4.57, batch

Validation:  80%|▊| 595/743 [2:32:45<37:05, 15.04s/batch, batch_loss=4.57, batch

Validation:  80%|▊| 595/743 [2:33:01<37:05, 15.04s/batch, batch_loss=4.79, batch

Validation:  80%|▊| 596/743 [2:33:01<37:57, 15.49s/batch, batch_loss=4.79, batch

Validation:  80%|▊| 596/743 [2:33:17<37:57, 15.49s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:33:17<37:50, 15.55s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:33:32<37:50, 15.55s/batch, batch_loss=15, batch_i

Validation:  80%|▊| 598/743 [2:33:32<36:57, 15.29s/batch, batch_loss=15, batch_i

Validation:  80%|▊| 598/743 [2:33:47<36:57, 15.29s/batch, batch_loss=16, batch_i

Validation:  81%|▊| 599/743 [2:33:47<36:52, 15.36s/batch, batch_loss=16, batch_i

Validation:  81%|▊| 599/743 [2:34:03<36:52, 15.36s/batch, batch_loss=22.6, batch

Validation:  81%|▊| 600/743 [2:34:03<36:56, 15.50s/batch, batch_loss=22.6, batch

Validation:  81%|▊| 600/743 [2:34:19<36:56, 15.50s/batch, batch_loss=14.9, batch

Validation:  81%|▊| 601/743 [2:34:19<36:59, 15.63s/batch, batch_loss=14.9, batch

Validation:  81%|▊| 601/743 [2:34:35<36:59, 15.63s/batch, batch_loss=14.9, batch

Validation:  81%|▊| 602/743 [2:34:35<37:02, 15.76s/batch, batch_loss=14.9, batch

Validation:  81%|▊| 602/743 [2:34:50<37:02, 15.76s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:34:50<35:51, 15.37s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:35:04<35:51, 15.37s/batch, batch_loss=24, batch_i

Validation:  81%|▊| 604/743 [2:35:04<34:48, 15.03s/batch, batch_loss=24, batch_i

Validation:  81%|▊| 604/743 [2:35:21<34:48, 15.03s/batch, batch_loss=28, batch_i

Validation:  81%|▊| 605/743 [2:35:21<35:46, 15.55s/batch, batch_loss=28, batch_i

Validation:  81%|▊| 605/743 [2:35:35<35:46, 15.55s/batch, batch_loss=256, batch_

Validation:  82%|▊| 606/743 [2:35:35<34:52, 15.27s/batch, batch_loss=256, batch_

Validation:  82%|▊| 606/743 [2:35:50<34:52, 15.27s/batch, batch_loss=28, batch_i

Validation:  82%|▊| 607/743 [2:35:50<34:06, 15.05s/batch, batch_loss=28, batch_i

Validation:  82%|▊| 607/743 [2:36:04<34:06, 15.05s/batch, batch_loss=20.9, batch

Validation:  82%|▊| 608/743 [2:36:04<33:18, 14.80s/batch, batch_loss=20.9, batch

Validation:  82%|▊| 608/743 [2:36:18<33:18, 14.80s/batch, batch_loss=15.3, batch

Validation:  82%|▊| 609/743 [2:36:18<32:49, 14.70s/batch, batch_loss=15.3, batch

Validation:  82%|▊| 609/743 [2:36:33<32:49, 14.70s/batch, batch_loss=19, batch_i

Validation:  82%|▊| 610/743 [2:36:33<32:31, 14.67s/batch, batch_loss=19, batch_i

Validation:  82%|▊| 610/743 [2:36:48<32:31, 14.67s/batch, batch_loss=19.8, batch

Validation:  82%|▊| 611/743 [2:36:48<32:16, 14.67s/batch, batch_loss=19.8, batch

Validation:  82%|▊| 611/743 [2:37:02<32:16, 14.67s/batch, batch_loss=10.2, batch

Validation:  82%|▊| 612/743 [2:37:02<32:05, 14.69s/batch, batch_loss=10.2, batch

Validation:  82%|▊| 612/743 [2:37:19<32:05, 14.69s/batch, batch_loss=14.8, batch

Validation:  83%|▊| 613/743 [2:37:19<33:21, 15.40s/batch, batch_loss=14.8, batch

Validation:  83%|▊| 613/743 [2:37:34<33:21, 15.40s/batch, batch_loss=5.65e+3, ba

Validation:  83%|▊| 614/743 [2:37:34<32:38, 15.18s/batch, batch_loss=5.65e+3, ba

Validation:  83%|▊| 614/743 [2:37:50<32:38, 15.18s/batch, batch_loss=12.3, batch

Validation:  83%|▊| 615/743 [2:37:50<32:49, 15.39s/batch, batch_loss=12.3, batch

Validation:  83%|▊| 615/743 [2:38:05<32:49, 15.39s/batch, batch_loss=19.3, batch

Validation:  83%|▊| 616/743 [2:38:05<32:21, 15.28s/batch, batch_loss=19.3, batch

Validation:  83%|▊| 616/743 [2:38:20<32:21, 15.28s/batch, batch_loss=6.38, batch

Validation:  83%|▊| 617/743 [2:38:20<31:58, 15.23s/batch, batch_loss=6.38, batch

Validation:  83%|▊| 617/743 [2:38:35<31:58, 15.23s/batch, batch_loss=10.4, batch

Validation:  83%|▊| 618/743 [2:38:35<31:40, 15.20s/batch, batch_loss=10.4, batch

Validation:  83%|▊| 618/743 [2:38:51<31:40, 15.20s/batch, batch_loss=351, batch_

Validation:  83%|▊| 619/743 [2:38:51<31:42, 15.34s/batch, batch_loss=351, batch_

Validation:  83%|▊| 619/743 [2:39:06<31:42, 15.34s/batch, batch_loss=24, batch_i

Validation:  83%|▊| 620/743 [2:39:06<31:18, 15.27s/batch, batch_loss=24, batch_i

Validation:  83%|▊| 620/743 [2:39:23<31:18, 15.27s/batch, batch_loss=8.91, batch

Validation:  84%|▊| 621/743 [2:39:23<32:01, 15.75s/batch, batch_loss=8.91, batch

Validation:  84%|▊| 621/743 [2:39:38<32:01, 15.75s/batch, batch_loss=12.6, batch

Validation:  84%|▊| 622/743 [2:39:38<31:35, 15.66s/batch, batch_loss=12.6, batch

Validation:  84%|▊| 622/743 [2:39:54<31:35, 15.66s/batch, batch_loss=189, batch_

Validation:  84%|▊| 623/743 [2:39:54<31:18, 15.65s/batch, batch_loss=189, batch_

Validation:  84%|▊| 623/743 [2:40:09<31:18, 15.65s/batch, batch_loss=13, batch_i

Validation:  84%|▊| 624/743 [2:40:09<30:50, 15.55s/batch, batch_loss=13, batch_i

Validation:  84%|▊| 624/743 [2:40:24<30:50, 15.55s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:40:24<29:53, 15.20s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:40:39<29:53, 15.20s/batch, batch_loss=19.6, batch

Validation:  84%|▊| 626/743 [2:40:39<29:28, 15.11s/batch, batch_loss=19.6, batch

Validation:  84%|▊| 626/743 [2:40:54<29:28, 15.11s/batch, batch_loss=18.2, batch

Validation:  84%|▊| 627/743 [2:40:54<29:19, 15.17s/batch, batch_loss=18.2, batch

Validation:  84%|▊| 627/743 [2:41:10<29:19, 15.17s/batch, batch_loss=15.3, batch

Validation:  85%|▊| 628/743 [2:41:10<29:24, 15.34s/batch, batch_loss=15.3, batch

Validation:  85%|▊| 628/743 [2:41:24<29:24, 15.34s/batch, batch_loss=13.9, batch

Validation:  85%|▊| 629/743 [2:41:24<28:37, 15.07s/batch, batch_loss=13.9, batch

Validation:  85%|▊| 629/743 [2:41:40<28:37, 15.07s/batch, batch_loss=14.2, batch

Validation:  85%|▊| 630/743 [2:41:40<28:48, 15.30s/batch, batch_loss=14.2, batch

Validation:  85%|▊| 630/743 [2:41:56<28:48, 15.30s/batch, batch_loss=246, batch_

Validation:  85%|▊| 631/743 [2:41:56<28:57, 15.51s/batch, batch_loss=246, batch_

Validation:  85%|▊| 631/743 [2:42:11<28:57, 15.51s/batch, batch_loss=17.7, batch

Validation:  85%|▊| 632/743 [2:42:11<28:26, 15.38s/batch, batch_loss=17.7, batch

Validation:  85%|▊| 632/743 [2:42:25<28:26, 15.38s/batch, batch_loss=13.7, batch

Validation:  85%|▊| 633/743 [2:42:25<27:28, 14.99s/batch, batch_loss=13.7, batch

Validation:  85%|▊| 633/743 [2:42:40<27:28, 14.99s/batch, batch_loss=10.6, batch

Validation:  85%|▊| 634/743 [2:42:40<27:08, 14.94s/batch, batch_loss=10.6, batch

Validation:  85%|▊| 634/743 [2:42:55<27:08, 14.94s/batch, batch_loss=7.79, batch

Validation:  85%|▊| 635/743 [2:42:55<27:02, 15.03s/batch, batch_loss=7.79, batch

Validation:  85%|▊| 635/743 [2:43:09<27:02, 15.03s/batch, batch_loss=805, batch_

Validation:  86%|▊| 636/743 [2:43:09<26:17, 14.75s/batch, batch_loss=805, batch_

Validation:  86%|▊| 636/743 [2:43:27<26:17, 14.75s/batch, batch_loss=713, batch_

Validation:  86%|▊| 637/743 [2:43:27<27:32, 15.59s/batch, batch_loss=713, batch_

Validation:  86%|▊| 637/743 [2:43:42<27:32, 15.59s/batch, batch_loss=26.5, batch

Validation:  86%|▊| 638/743 [2:43:42<26:53, 15.37s/batch, batch_loss=26.5, batch

Validation:  86%|▊| 638/743 [2:43:56<26:53, 15.37s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:43:56<26:20, 15.20s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:44:12<26:20, 15.20s/batch, batch_loss=23.6, batch

Validation:  86%|▊| 640/743 [2:44:12<26:02, 15.17s/batch, batch_loss=23.6, batch

Validation:  86%|▊| 640/743 [2:44:27<26:02, 15.17s/batch, batch_loss=33.8, batch

Validation:  86%|▊| 641/743 [2:44:27<25:40, 15.11s/batch, batch_loss=33.8, batch

Validation:  86%|▊| 641/743 [2:44:41<25:40, 15.11s/batch, batch_loss=35.4, batch

Validation:  86%|▊| 642/743 [2:44:41<25:17, 15.02s/batch, batch_loss=35.4, batch

Validation:  86%|▊| 642/743 [2:44:56<25:17, 15.02s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [2:44:56<24:44, 14.85s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [2:45:10<24:44, 14.85s/batch, batch_loss=21.3, batch

Validation:  87%|▊| 644/743 [2:45:10<24:08, 14.63s/batch, batch_loss=21.3, batch

Validation:  87%|▊| 644/743 [2:45:24<24:08, 14.63s/batch, batch_loss=22.2, batch

Validation:  87%|▊| 645/743 [2:45:24<23:36, 14.46s/batch, batch_loss=22.2, batch

Validation:  87%|▊| 645/743 [2:45:38<23:36, 14.46s/batch, batch_loss=6.23e+3, ba

Validation:  87%|▊| 646/743 [2:45:38<23:21, 14.44s/batch, batch_loss=6.23e+3, ba

Validation:  87%|▊| 646/743 [2:45:53<23:21, 14.44s/batch, batch_loss=16.2, batch

Validation:  87%|▊| 647/743 [2:45:53<23:15, 14.53s/batch, batch_loss=16.2, batch

Validation:  87%|▊| 647/743 [2:46:08<23:15, 14.53s/batch, batch_loss=5.62, batch

Validation:  87%|▊| 648/743 [2:46:08<23:02, 14.56s/batch, batch_loss=5.62, batch

Validation:  87%|▊| 648/743 [2:46:21<23:02, 14.56s/batch, batch_loss=10.8, batch

Validation:  87%|▊| 649/743 [2:46:21<22:19, 14.25s/batch, batch_loss=10.8, batch

Validation:  87%|▊| 649/743 [2:46:36<22:19, 14.25s/batch, batch_loss=12.6, batch

Validation:  87%|▊| 650/743 [2:46:36<22:22, 14.43s/batch, batch_loss=12.6, batch

Validation:  87%|▊| 650/743 [2:46:53<22:22, 14.43s/batch, batch_loss=24.1, batch

Validation:  88%|▉| 651/743 [2:46:53<23:05, 15.06s/batch, batch_loss=24.1, batch

Validation:  88%|▉| 651/743 [2:47:11<23:05, 15.06s/batch, batch_loss=22.8, batch

Validation:  88%|▉| 652/743 [2:47:11<24:09, 15.93s/batch, batch_loss=22.8, batch

Validation:  88%|▉| 652/743 [2:47:26<24:09, 15.93s/batch, batch_loss=15.1, batch

Validation:  88%|▉| 653/743 [2:47:26<23:47, 15.87s/batch, batch_loss=15.1, batch

Validation:  88%|▉| 653/743 [2:47:42<23:47, 15.87s/batch, batch_loss=31.9, batch

Validation:  88%|▉| 654/743 [2:47:42<23:31, 15.86s/batch, batch_loss=31.9, batch

Validation:  88%|▉| 654/743 [2:47:59<23:31, 15.86s/batch, batch_loss=26.8, batch

Validation:  88%|▉| 655/743 [2:47:59<23:30, 16.03s/batch, batch_loss=26.8, batch

Validation:  88%|▉| 655/743 [2:48:15<23:30, 16.03s/batch, batch_loss=18.7, batch

Validation:  88%|▉| 656/743 [2:48:15<23:26, 16.16s/batch, batch_loss=18.7, batch

Validation:  88%|▉| 656/743 [2:48:32<23:26, 16.16s/batch, batch_loss=13.3, batch

Validation:  88%|▉| 657/743 [2:48:32<23:27, 16.37s/batch, batch_loss=13.3, batch

Validation:  88%|▉| 657/743 [2:48:52<23:27, 16.37s/batch, batch_loss=17.6, batch

Validation:  89%|▉| 658/743 [2:48:52<24:35, 17.36s/batch, batch_loss=17.6, batch

Validation:  89%|▉| 658/743 [2:49:08<24:35, 17.36s/batch, batch_loss=34.1, batch

Validation:  89%|▉| 659/743 [2:49:08<23:46, 16.98s/batch, batch_loss=34.1, batch

Validation:  89%|▉| 659/743 [2:49:24<23:46, 16.98s/batch, batch_loss=27.5, batch

Validation:  89%|▉| 660/743 [2:49:24<23:15, 16.82s/batch, batch_loss=27.5, batch

Validation:  89%|▉| 660/743 [2:49:40<23:15, 16.82s/batch, batch_loss=16.6, batch

Validation:  89%|▉| 661/743 [2:49:40<22:46, 16.66s/batch, batch_loss=16.6, batch

Validation:  89%|▉| 661/743 [2:49:57<22:46, 16.66s/batch, batch_loss=6.39, batch

Validation:  89%|▉| 662/743 [2:49:57<22:18, 16.52s/batch, batch_loss=6.39, batch

Validation:  89%|▉| 662/743 [2:50:14<22:18, 16.52s/batch, batch_loss=3.59e+3, ba

Validation:  89%|▉| 663/743 [2:50:14<22:12, 16.66s/batch, batch_loss=3.59e+3, ba

Validation:  89%|▉| 663/743 [2:50:30<22:12, 16.66s/batch, batch_loss=13.1, batch

Validation:  89%|▉| 664/743 [2:50:30<21:39, 16.45s/batch, batch_loss=13.1, batch

Validation:  89%|▉| 664/743 [2:50:45<21:39, 16.45s/batch, batch_loss=25.1, batch

Validation:  90%|▉| 665/743 [2:50:45<21:04, 16.22s/batch, batch_loss=25.1, batch

Validation:  90%|▉| 665/743 [2:51:03<21:04, 16.22s/batch, batch_loss=13.6, batch

Validation:  90%|▉| 666/743 [2:51:03<21:16, 16.58s/batch, batch_loss=13.6, batch

Validation:  90%|▉| 666/743 [2:51:17<21:16, 16.58s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:51:17<20:08, 15.91s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:51:32<20:08, 15.91s/batch, batch_loss=21.6, batch

Validation:  90%|▉| 668/743 [2:51:32<19:35, 15.68s/batch, batch_loss=21.6, batch

Validation:  90%|▉| 668/743 [2:51:46<19:35, 15.68s/batch, batch_loss=25, batch_i

Validation:  90%|▉| 669/743 [2:51:46<18:48, 15.25s/batch, batch_loss=25, batch_i

Validation:  90%|▉| 669/743 [2:52:01<18:48, 15.25s/batch, batch_loss=21.3, batch

Validation:  90%|▉| 670/743 [2:52:01<18:30, 15.21s/batch, batch_loss=21.3, batch

Validation:  90%|▉| 670/743 [2:52:16<18:30, 15.21s/batch, batch_loss=3.09e+3, ba

Validation:  90%|▉| 671/743 [2:52:16<18:06, 15.09s/batch, batch_loss=3.09e+3, ba

Validation:  90%|▉| 671/743 [2:52:30<18:06, 15.09s/batch, batch_loss=22.4, batch

Validation:  90%|▉| 672/743 [2:52:30<17:27, 14.75s/batch, batch_loss=22.4, batch

Validation:  90%|▉| 672/743 [2:52:45<17:27, 14.75s/batch, batch_loss=14.9, batch

Validation:  91%|▉| 673/743 [2:52:45<17:21, 14.87s/batch, batch_loss=14.9, batch

Validation:  91%|▉| 673/743 [2:53:01<17:21, 14.87s/batch, batch_loss=11.9, batch

Validation:  91%|▉| 674/743 [2:53:01<17:11, 14.95s/batch, batch_loss=11.9, batch

Validation:  91%|▉| 674/743 [2:53:15<17:11, 14.95s/batch, batch_loss=21.9, batch

Validation:  91%|▉| 675/743 [2:53:15<16:45, 14.79s/batch, batch_loss=21.9, batch

Validation:  91%|▉| 675/743 [2:53:29<16:45, 14.79s/batch, batch_loss=20.4, batch

Validation:  91%|▉| 676/743 [2:53:29<16:22, 14.66s/batch, batch_loss=20.4, batch

Validation:  91%|▉| 676/743 [2:53:44<16:22, 14.66s/batch, batch_loss=20.5, batch

Validation:  91%|▉| 677/743 [2:53:44<16:13, 14.76s/batch, batch_loss=20.5, batch

Validation:  91%|▉| 677/743 [2:53:59<16:13, 14.76s/batch, batch_loss=15.1, batch

Validation:  91%|▉| 678/743 [2:53:59<15:51, 14.63s/batch, batch_loss=15.1, batch

Validation:  91%|▉| 678/743 [2:54:12<15:51, 14.63s/batch, batch_loss=12.8, batch

Validation:  91%|▉| 679/743 [2:54:12<15:20, 14.39s/batch, batch_loss=12.8, batch

Validation:  91%|▉| 679/743 [2:54:28<15:20, 14.39s/batch, batch_loss=24.4, batch

Validation:  92%|▉| 680/743 [2:54:28<15:25, 14.68s/batch, batch_loss=24.4, batch

Validation:  92%|▉| 680/743 [2:54:43<15:25, 14.68s/batch, batch_loss=27.5, batch

Validation:  92%|▉| 681/743 [2:54:43<15:28, 14.97s/batch, batch_loss=27.5, batch

Validation:  92%|▉| 681/743 [2:54:58<15:28, 14.97s/batch, batch_loss=35.5, batch

Validation:  92%|▉| 682/743 [2:54:58<15:05, 14.84s/batch, batch_loss=35.5, batch

Validation:  92%|▉| 682/743 [2:55:13<15:05, 14.84s/batch, batch_loss=22.3, batch

Validation:  92%|▉| 683/743 [2:55:13<14:59, 15.00s/batch, batch_loss=22.3, batch

Validation:  92%|▉| 683/743 [2:55:29<14:59, 15.00s/batch, batch_loss=14.5, batch

Validation:  92%|▉| 684/743 [2:55:29<14:57, 15.21s/batch, batch_loss=14.5, batch

Validation:  92%|▉| 684/743 [2:55:44<14:57, 15.21s/batch, batch_loss=16.4, batch

Validation:  92%|▉| 685/743 [2:55:44<14:43, 15.23s/batch, batch_loss=16.4, batch

Validation:  92%|▉| 685/743 [2:56:00<14:43, 15.23s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [2:56:00<14:26, 15.21s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [2:56:14<14:26, 15.21s/batch, batch_loss=22, batch_i

Validation:  92%|▉| 687/743 [2:56:14<14:07, 15.13s/batch, batch_loss=22, batch_i

Validation:  92%|▉| 687/743 [2:56:29<14:07, 15.13s/batch, batch_loss=16, batch_i

Validation:  93%|▉| 688/743 [2:56:29<13:50, 15.09s/batch, batch_loss=16, batch_i

Validation:  93%|▉| 688/743 [2:56:44<13:50, 15.09s/batch, batch_loss=15, batch_i

Validation:  93%|▉| 689/743 [2:56:44<13:32, 15.04s/batch, batch_loss=15, batch_i

Validation:  93%|▉| 689/743 [2:57:00<13:32, 15.04s/batch, batch_loss=23.4, batch

Validation:  93%|▉| 690/743 [2:57:00<13:32, 15.33s/batch, batch_loss=23.4, batch

Validation:  93%|▉| 690/743 [2:57:16<13:32, 15.33s/batch, batch_loss=16.1, batch

Validation:  93%|▉| 691/743 [2:57:16<13:17, 15.33s/batch, batch_loss=16.1, batch

Validation:  93%|▉| 691/743 [2:57:31<13:17, 15.33s/batch, batch_loss=22.8, batch

Validation:  93%|▉| 692/743 [2:57:31<13:02, 15.34s/batch, batch_loss=22.8, batch

Validation:  93%|▉| 692/743 [2:57:46<13:02, 15.34s/batch, batch_loss=21.4, batch

Validation:  93%|▉| 693/743 [2:57:46<12:42, 15.24s/batch, batch_loss=21.4, batch

Validation:  93%|▉| 693/743 [2:58:02<12:42, 15.24s/batch, batch_loss=26.7, batch

Validation:  93%|▉| 694/743 [2:58:02<12:38, 15.48s/batch, batch_loss=26.7, batch

Validation:  93%|▉| 694/743 [2:58:17<12:38, 15.48s/batch, batch_loss=3.11e+3, ba

Validation:  94%|▉| 695/743 [2:58:17<12:13, 15.28s/batch, batch_loss=3.11e+3, ba

Validation:  94%|▉| 695/743 [2:58:32<12:13, 15.28s/batch, batch_loss=9.04, batch

Validation:  94%|▉| 696/743 [2:58:32<12:01, 15.35s/batch, batch_loss=9.04, batch

Validation:  94%|▉| 696/743 [2:58:48<12:01, 15.35s/batch, batch_loss=38.2, batch

Validation:  94%|▉| 697/743 [2:58:48<11:43, 15.30s/batch, batch_loss=38.2, batch

Validation:  94%|▉| 697/743 [2:59:02<11:43, 15.30s/batch, batch_loss=756, batch_

Validation:  94%|▉| 698/743 [2:59:02<11:18, 15.07s/batch, batch_loss=756, batch_

Validation:  94%|▉| 698/743 [2:59:16<11:18, 15.07s/batch, batch_loss=6.85, batch

Validation:  94%|▉| 699/743 [2:59:16<10:51, 14.82s/batch, batch_loss=6.85, batch

Validation:  94%|▉| 699/743 [2:59:31<10:51, 14.82s/batch, batch_loss=953, batch_

Validation:  94%|▉| 700/743 [2:59:31<10:33, 14.74s/batch, batch_loss=953, batch_

Validation:  94%|▉| 700/743 [2:59:46<10:33, 14.74s/batch, batch_loss=7.73, batch

Validation:  94%|▉| 701/743 [2:59:46<10:21, 14.80s/batch, batch_loss=7.73, batch

Validation:  94%|▉| 701/743 [3:00:00<10:21, 14.80s/batch, batch_loss=7.77, batch

Validation:  94%|▉| 702/743 [3:00:00<09:57, 14.57s/batch, batch_loss=7.77, batch

Validation:  94%|▉| 702/743 [3:00:15<09:57, 14.57s/batch, batch_loss=180, batch_

Validation:  95%|▉| 703/743 [3:00:15<09:44, 14.61s/batch, batch_loss=180, batch_

Validation:  95%|▉| 703/743 [3:00:29<09:44, 14.61s/batch, batch_loss=469, batch_

Validation:  95%|▉| 704/743 [3:00:29<09:24, 14.47s/batch, batch_loss=469, batch_

Validation:  95%|▉| 704/743 [3:00:43<09:24, 14.47s/batch, batch_loss=8.91, batch

Validation:  95%|▉| 705/743 [3:00:43<09:09, 14.46s/batch, batch_loss=8.91, batch

Validation:  95%|▉| 705/743 [3:01:01<09:09, 14.46s/batch, batch_loss=17.9, batch

Validation:  95%|▉| 706/743 [3:01:01<09:28, 15.37s/batch, batch_loss=17.9, batch

Validation:  95%|▉| 706/743 [3:01:16<09:28, 15.37s/batch, batch_loss=421, batch_

Validation:  95%|▉| 707/743 [3:01:16<09:16, 15.45s/batch, batch_loss=421, batch_

Validation:  95%|▉| 707/743 [3:01:31<09:16, 15.45s/batch, batch_loss=19.9, batch

Validation:  95%|▉| 708/743 [3:01:31<08:56, 15.32s/batch, batch_loss=19.9, batch

Validation:  95%|▉| 708/743 [3:01:47<08:56, 15.32s/batch, batch_loss=26.8, batch

Validation:  95%|▉| 709/743 [3:01:47<08:43, 15.39s/batch, batch_loss=26.8, batch

Validation:  95%|▉| 709/743 [3:02:02<08:43, 15.39s/batch, batch_loss=18.3, batch

Validation:  96%|▉| 710/743 [3:02:02<08:24, 15.28s/batch, batch_loss=18.3, batch

Validation:  96%|▉| 710/743 [3:02:15<08:24, 15.28s/batch, batch_loss=15.6, batch

Validation:  96%|▉| 711/743 [3:02:15<07:50, 14.70s/batch, batch_loss=15.6, batch

Validation:  96%|▉| 711/743 [3:02:30<07:50, 14.70s/batch, batch_loss=19.1, batch

Validation:  96%|▉| 712/743 [3:02:30<07:36, 14.73s/batch, batch_loss=19.1, batch

Validation:  96%|▉| 712/743 [3:02:47<07:36, 14.73s/batch, batch_loss=18.2, batch

Validation:  96%|▉| 713/743 [3:02:47<07:37, 15.25s/batch, batch_loss=18.2, batch

Validation:  96%|▉| 713/743 [3:03:03<07:37, 15.25s/batch, batch_loss=6.1, batch_

Validation:  96%|▉| 714/743 [3:03:03<07:28, 15.47s/batch, batch_loss=6.1, batch_

Validation:  96%|▉| 714/743 [3:03:18<07:28, 15.47s/batch, batch_loss=9.84, batch

Validation:  96%|▉| 715/743 [3:03:18<07:14, 15.51s/batch, batch_loss=9.84, batch

Validation:  96%|▉| 715/743 [3:03:36<07:14, 15.51s/batch, batch_loss=18.9, batch

Validation:  96%|▉| 716/743 [3:03:36<07:18, 16.24s/batch, batch_loss=18.9, batch

Validation:  96%|▉| 716/743 [3:03:52<07:18, 16.24s/batch, batch_loss=387, batch_

Validation:  97%|▉| 717/743 [3:03:52<06:59, 16.13s/batch, batch_loss=387, batch_

Validation:  97%|▉| 717/743 [3:04:07<06:59, 16.13s/batch, batch_loss=28.4, batch

Validation:  97%|▉| 718/743 [3:04:07<06:33, 15.75s/batch, batch_loss=28.4, batch

Validation:  97%|▉| 718/743 [3:04:21<06:33, 15.75s/batch, batch_loss=23.9, batch

Validation:  97%|▉| 719/743 [3:04:21<06:06, 15.26s/batch, batch_loss=23.9, batch

Validation:  97%|▉| 719/743 [3:04:36<06:06, 15.26s/batch, batch_loss=23.5, batch

Validation:  97%|▉| 720/743 [3:04:36<05:52, 15.33s/batch, batch_loss=23.5, batch

Validation:  97%|▉| 720/743 [3:04:53<05:52, 15.33s/batch, batch_loss=13.4, batch

Validation:  97%|▉| 721/743 [3:04:53<05:45, 15.71s/batch, batch_loss=13.4, batch

Validation:  97%|▉| 721/743 [3:05:08<05:45, 15.71s/batch, batch_loss=26.7, batch

Validation:  97%|▉| 722/743 [3:05:08<05:25, 15.52s/batch, batch_loss=26.7, batch

Validation:  97%|▉| 722/743 [3:05:24<05:25, 15.52s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [3:05:24<05:12, 15.61s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [3:05:40<05:12, 15.61s/batch, batch_loss=18.6, batch

Validation:  97%|▉| 724/743 [3:05:40<04:59, 15.76s/batch, batch_loss=18.6, batch

Validation:  97%|▉| 724/743 [3:05:56<04:59, 15.76s/batch, batch_loss=16.9, batch

Validation:  98%|▉| 725/743 [3:05:56<04:46, 15.90s/batch, batch_loss=16.9, batch

Validation:  98%|▉| 725/743 [3:06:12<04:46, 15.90s/batch, batch_loss=20, batch_i

Validation:  98%|▉| 726/743 [3:06:12<04:31, 15.99s/batch, batch_loss=20, batch_i

Validation:  98%|▉| 726/743 [3:06:28<04:31, 15.99s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [3:06:28<04:13, 15.82s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [3:06:42<04:13, 15.82s/batch, batch_loss=31.2, batch

Validation:  98%|▉| 728/743 [3:06:42<03:51, 15.45s/batch, batch_loss=31.2, batch

Validation:  98%|▉| 728/743 [3:06:58<03:51, 15.45s/batch, batch_loss=32, batch_i

Validation:  98%|▉| 729/743 [3:06:58<03:38, 15.58s/batch, batch_loss=32, batch_i

Validation:  98%|▉| 729/743 [3:07:13<03:38, 15.58s/batch, batch_loss=20.3, batch

Validation:  98%|▉| 730/743 [3:07:13<03:20, 15.42s/batch, batch_loss=20.3, batch

Validation:  98%|▉| 730/743 [3:07:29<03:20, 15.42s/batch, batch_loss=15.6, batch

Validation:  98%|▉| 731/743 [3:07:29<03:04, 15.34s/batch, batch_loss=15.6, batch

Validation:  98%|▉| 731/743 [3:07:44<03:04, 15.34s/batch, batch_loss=11.5, batch

Validation:  99%|▉| 732/743 [3:07:44<02:47, 15.24s/batch, batch_loss=11.5, batch

Validation:  99%|▉| 732/743 [3:07:57<02:47, 15.24s/batch, batch_loss=31.3, batch

Validation:  99%|▉| 733/743 [3:07:57<02:28, 14.85s/batch, batch_loss=31.3, batch

Validation:  99%|▉| 733/743 [3:08:12<02:28, 14.85s/batch, batch_loss=3.98, batch

Validation:  99%|▉| 734/743 [3:08:12<02:13, 14.84s/batch, batch_loss=3.98, batch

Validation:  99%|▉| 734/743 [3:08:26<02:13, 14.84s/batch, batch_loss=7.61, batch

Validation:  99%|▉| 735/743 [3:08:26<01:56, 14.57s/batch, batch_loss=7.61, batch

Validation:  99%|▉| 735/743 [3:08:40<01:56, 14.57s/batch, batch_loss=1.69, batch

Validation:  99%|▉| 736/743 [3:08:40<01:40, 14.42s/batch, batch_loss=1.69, batch

Validation:  99%|▉| 736/743 [3:08:53<01:40, 14.42s/batch, batch_loss=0.525, batc

Validation:  99%|▉| 737/743 [3:08:53<01:24, 14.00s/batch, batch_loss=0.525, batc

Validation:  99%|▉| 737/743 [3:09:08<01:24, 14.00s/batch, batch_loss=0.525, batc

Validation:  99%|▉| 738/743 [3:09:08<01:11, 14.29s/batch, batch_loss=0.525, batc

Validation:  99%|▉| 738/743 [3:09:21<01:11, 14.29s/batch, batch_loss=0.525, batc

Validation:  99%|▉| 739/743 [3:09:21<00:55, 13.78s/batch, batch_loss=0.525, batc

Validation:  99%|▉| 739/743 [3:09:33<00:55, 13.78s/batch, batch_loss=0.525, batc

Validation: 100%|▉| 740/743 [3:09:33<00:40, 13.41s/batch, batch_loss=0.525, batc

Validation: 100%|▉| 740/743 [3:09:46<00:40, 13.41s/batch, batch_loss=0.525, batc

Validation: 100%|▉| 741/743 [3:09:46<00:26, 13.21s/batch, batch_loss=0.525, batc

Validation: 100%|▉| 741/743 [3:09:59<00:26, 13.21s/batch, batch_loss=0.525, batc

Validation: 100%|▉| 742/743 [3:09:59<00:13, 13.20s/batch, batch_loss=0.525, batc

Validation: 100%|▉| 742/743 [3:10:11<00:13, 13.20s/batch, batch_loss=0.524, batc

Validation: 100%|█| 743/743 [3:10:11<00:00, 12.67s/batch, batch_loss=0.524, batc

Validation: 100%|█| 743/743 [3:10:11<00:00, 15.36s/batch, batch_loss=0.524, batc




Val Loss: 1297.6048


Epoch 5/10:   0%|                                    | 0/991 [00:00<?, ?batch/s]

Epoch 5/10:   0%| | 0/991 [00:14<?, ?batch/s, batch_loss=18.3, batch_index=1, ba

Epoch 5/10:   0%| | 1/991 [00:14<4:06:15, 14.93s/batch, batch_loss=18.3, batch_i

Epoch 5/10:   0%| | 1/991 [00:30<4:06:15, 14.93s/batch, batch_loss=17.3, batch_i

Epoch 5/10:   0%| | 2/991 [00:30<4:11:16, 15.24s/batch, batch_loss=17.3, batch_i

Epoch 5/10:   0%| | 2/991 [00:48<4:11:16, 15.24s/batch, batch_loss=13, batch_ind

Epoch 5/10:   0%| | 3/991 [00:48<4:33:52, 16.63s/batch, batch_loss=13, batch_ind

Epoch 5/10:   0%| | 3/991 [01:04<4:33:52, 16.63s/batch, batch_loss=6.54, batch_i

Epoch 5/10:   0%| | 4/991 [01:04<4:25:10, 16.12s/batch, batch_loss=6.54, batch_i

Epoch 5/10:   0%| | 4/991 [01:19<4:25:10, 16.12s/batch, batch_loss=19.8, batch_i

Epoch 5/10:   1%| | 5/991 [01:19<4:23:36, 16.04s/batch, batch_loss=19.8, batch_i

Epoch 5/10:   1%| | 5/991 [01:34<4:23:36, 16.04s/batch, batch_loss=25.6, batch_i

Epoch 5/10:   1%| | 6/991 [01:34<4:16:19, 15.61s/batch, batch_loss=25.6, batch_i

Epoch 5/10:   1%| | 6/991 [01:50<4:16:19, 15.61s/batch, batch_loss=20.2, batch_i

Epoch 5/10:   1%| | 7/991 [01:50<4:15:38, 15.59s/batch, batch_loss=20.2, batch_i

Epoch 5/10:   1%| | 7/991 [02:05<4:15:38, 15.59s/batch, batch_loss=604, batch_in

Epoch 5/10:   1%| | 8/991 [02:05<4:12:43, 15.43s/batch, batch_loss=604, batch_in

Epoch 5/10:   1%| | 8/991 [02:20<4:12:43, 15.43s/batch, batch_loss=16.4, batch_i

Epoch 5/10:   1%| | 9/991 [02:20<4:10:56, 15.33s/batch, batch_loss=16.4, batch_i

Epoch 5/10:   1%| | 9/991 [02:36<4:10:56, 15.33s/batch, batch_loss=14.7, batch_i

Epoch 5/10:   1%| | 10/991 [02:36<4:12:32, 15.45s/batch, batch_loss=14.7, batch_

Epoch 5/10:   1%| | 10/991 [02:52<4:12:32, 15.45s/batch, batch_loss=14.2, batch_

Epoch 5/10:   1%| | 11/991 [02:52<4:18:31, 15.83s/batch, batch_loss=14.2, batch_

Epoch 5/10:   1%| | 11/991 [03:08<4:18:31, 15.83s/batch, batch_loss=1.99e+3, bat

Epoch 5/10:   1%| | 12/991 [03:08<4:15:38, 15.67s/batch, batch_loss=1.99e+3, bat

Epoch 5/10:   1%| | 12/991 [03:24<4:15:38, 15.67s/batch, batch_loss=20, batch_in

Epoch 5/10:   1%| | 13/991 [03:24<4:17:12, 15.78s/batch, batch_loss=20, batch_in

Epoch 5/10:   1%| | 13/991 [03:40<4:17:12, 15.78s/batch, batch_loss=12.4, batch_

Epoch 5/10:   1%| | 14/991 [03:40<4:21:14, 16.04s/batch, batch_loss=12.4, batch_

Epoch 5/10:   1%| | 14/991 [03:56<4:21:14, 16.04s/batch, batch_loss=10.6, batch_

Epoch 5/10:   2%| | 15/991 [03:56<4:19:35, 15.96s/batch, batch_loss=10.6, batch_

Epoch 5/10:   2%| | 15/991 [04:11<4:19:35, 15.96s/batch, batch_loss=13.5, batch_

Epoch 5/10:   2%| | 16/991 [04:11<4:16:04, 15.76s/batch, batch_loss=13.5, batch_

Epoch 5/10:   2%| | 16/991 [04:28<4:16:04, 15.76s/batch, batch_loss=15.5, batch_

Epoch 5/10:   2%| | 17/991 [04:28<4:21:41, 16.12s/batch, batch_loss=15.5, batch_

Epoch 5/10:   2%| | 17/991 [04:44<4:21:41, 16.12s/batch, batch_loss=10.1, batch_

Epoch 5/10:   2%| | 18/991 [04:44<4:17:36, 15.89s/batch, batch_loss=10.1, batch_

Epoch 5/10:   2%| | 18/991 [04:59<4:17:36, 15.89s/batch, batch_loss=8.96e+3, bat

Epoch 5/10:   2%| | 19/991 [04:59<4:15:07, 15.75s/batch, batch_loss=8.96e+3, bat

Epoch 5/10:   2%| | 19/991 [05:14<4:15:07, 15.75s/batch, batch_loss=10.9, batch_

Epoch 5/10:   2%| | 20/991 [05:14<4:09:55, 15.44s/batch, batch_loss=10.9, batch_

Epoch 5/10:   2%| | 20/991 [05:29<4:09:55, 15.44s/batch, batch_loss=20.3, batch_

Epoch 5/10:   2%| | 21/991 [05:29<4:06:20, 15.24s/batch, batch_loss=20.3, batch_

Epoch 5/10:   2%| | 21/991 [05:47<4:06:20, 15.24s/batch, batch_loss=1.08e+3, bat

Epoch 5/10:   2%| | 22/991 [05:47<4:23:49, 16.34s/batch, batch_loss=1.08e+3, bat

Epoch 5/10:   2%| | 22/991 [06:03<4:23:49, 16.34s/batch, batch_loss=7.87, batch_

Epoch 5/10:   2%| | 23/991 [06:03<4:18:44, 16.04s/batch, batch_loss=7.87, batch_

Epoch 5/10:   2%| | 23/991 [06:18<4:18:44, 16.04s/batch, batch_loss=13.5, batch_

Epoch 5/10:   2%| | 24/991 [06:18<4:16:13, 15.90s/batch, batch_loss=13.5, batch_

Epoch 5/10:   2%| | 24/991 [06:34<4:16:13, 15.90s/batch, batch_loss=12.5, batch_

Epoch 5/10:   3%| | 25/991 [06:34<4:14:51, 15.83s/batch, batch_loss=12.5, batch_

Epoch 5/10:   3%| | 25/991 [06:49<4:14:51, 15.83s/batch, batch_loss=14.8, batch_

Epoch 5/10:   3%| | 26/991 [06:49<4:11:12, 15.62s/batch, batch_loss=14.8, batch_

Epoch 5/10:   3%| | 26/991 [07:04<4:11:12, 15.62s/batch, batch_loss=15.3, batch_

Epoch 5/10:   3%| | 27/991 [07:04<4:08:36, 15.47s/batch, batch_loss=15.3, batch_

Epoch 5/10:   3%| | 27/991 [07:19<4:08:36, 15.47s/batch, batch_loss=1.14e+3, bat

Epoch 5/10:   3%| | 28/991 [07:19<4:06:03, 15.33s/batch, batch_loss=1.14e+3, bat

Epoch 5/10:   3%| | 28/991 [07:37<4:06:03, 15.33s/batch, batch_loss=10.8, batch_

Epoch 5/10:   3%| | 29/991 [07:37<4:16:40, 16.01s/batch, batch_loss=10.8, batch_

Epoch 5/10:   3%| | 29/991 [07:53<4:16:40, 16.01s/batch, batch_loss=10.5, batch_

Epoch 5/10:   3%| | 30/991 [07:53<4:15:24, 15.95s/batch, batch_loss=10.5, batch_

Epoch 5/10:   3%| | 30/991 [08:08<4:15:24, 15.95s/batch, batch_loss=10.2, batch_

Epoch 5/10:   3%| | 31/991 [08:08<4:12:17, 15.77s/batch, batch_loss=10.2, batch_

Epoch 5/10:   3%| | 31/991 [08:24<4:12:17, 15.77s/batch, batch_loss=1.3e+4, batc

Epoch 5/10:   3%| | 32/991 [08:24<4:11:11, 15.72s/batch, batch_loss=1.3e+4, batc

Epoch 5/10:   3%| | 32/991 [08:40<4:11:11, 15.72s/batch, batch_loss=16, batch_in

Epoch 5/10:   3%| | 33/991 [08:40<4:13:13, 15.86s/batch, batch_loss=16, batch_in

Epoch 5/10:   3%| | 33/991 [08:55<4:13:13, 15.86s/batch, batch_loss=8.34, batch_

Epoch 5/10:   3%| | 34/991 [08:55<4:09:41, 15.65s/batch, batch_loss=8.34, batch_

Epoch 5/10:   3%| | 34/991 [09:11<4:09:41, 15.65s/batch, batch_loss=13.2, batch_

Epoch 5/10:   4%| | 35/991 [09:11<4:11:12, 15.77s/batch, batch_loss=13.2, batch_

Epoch 5/10:   4%| | 35/991 [09:27<4:11:12, 15.77s/batch, batch_loss=10.6, batch_

Epoch 5/10:   4%| | 36/991 [09:27<4:11:34, 15.81s/batch, batch_loss=10.6, batch_

Epoch 5/10:   4%| | 36/991 [09:44<4:11:34, 15.81s/batch, batch_loss=10.5, batch_

Epoch 5/10:   4%| | 37/991 [09:44<4:16:22, 16.12s/batch, batch_loss=10.5, batch_

Epoch 5/10:   4%| | 37/991 [10:00<4:16:22, 16.12s/batch, batch_loss=7.13, batch_

Epoch 5/10:   4%| | 38/991 [10:00<4:14:41, 16.04s/batch, batch_loss=7.13, batch_

Epoch 5/10:   4%| | 38/991 [10:15<4:14:41, 16.04s/batch, batch_loss=1.73e+3, bat

Epoch 5/10:   4%| | 39/991 [10:15<4:13:07, 15.95s/batch, batch_loss=1.73e+3, bat

Epoch 5/10:   4%| | 39/991 [10:31<4:13:07, 15.95s/batch, batch_loss=14.3, batch_

Epoch 5/10:   4%| | 40/991 [10:31<4:09:15, 15.73s/batch, batch_loss=14.3, batch_

Epoch 5/10:   4%| | 40/991 [10:47<4:09:15, 15.73s/batch, batch_loss=6.2e+3, batc

Epoch 5/10:   4%| | 41/991 [10:47<4:10:01, 15.79s/batch, batch_loss=6.2e+3, batc

Epoch 5/10:   4%| | 41/991 [11:02<4:10:01, 15.79s/batch, batch_loss=16, batch_in

Epoch 5/10:   4%| | 42/991 [11:02<4:08:08, 15.69s/batch, batch_loss=16, batch_in

Epoch 5/10:   4%| | 42/991 [11:18<4:08:08, 15.69s/batch, batch_loss=11.4, batch_

Epoch 5/10:   4%| | 43/991 [11:18<4:10:10, 15.83s/batch, batch_loss=11.4, batch_

Epoch 5/10:   4%| | 43/991 [11:34<4:10:10, 15.83s/batch, batch_loss=15.7, batch_

Epoch 5/10:   4%| | 44/991 [11:34<4:08:35, 15.75s/batch, batch_loss=15.7, batch_

Epoch 5/10:   4%| | 44/991 [11:49<4:08:35, 15.75s/batch, batch_loss=18.8, batch_

Epoch 5/10:   5%| | 45/991 [11:49<4:06:23, 15.63s/batch, batch_loss=18.8, batch_

Epoch 5/10:   5%| | 45/991 [12:05<4:06:23, 15.63s/batch, batch_loss=14, batch_in

Epoch 5/10:   5%| | 46/991 [12:05<4:06:04, 15.62s/batch, batch_loss=14, batch_in

Epoch 5/10:   5%| | 46/991 [12:20<4:06:04, 15.62s/batch, batch_loss=7.38, batch_

Epoch 5/10:   5%| | 47/991 [12:20<4:04:28, 15.54s/batch, batch_loss=7.38, batch_

Epoch 5/10:   5%| | 47/991 [12:36<4:04:28, 15.54s/batch, batch_loss=12.8, batch_

Epoch 5/10:   5%| | 48/991 [12:36<4:05:49, 15.64s/batch, batch_loss=12.8, batch_

Epoch 5/10:   5%| | 48/991 [12:52<4:05:49, 15.64s/batch, batch_loss=12.4, batch_

Epoch 5/10:   5%| | 49/991 [12:52<4:09:25, 15.89s/batch, batch_loss=12.4, batch_

Epoch 5/10:   5%| | 49/991 [13:07<4:09:25, 15.89s/batch, batch_loss=16.5, batch_

Epoch 5/10:   5%| | 50/991 [13:07<4:01:38, 15.41s/batch, batch_loss=16.5, batch_

Epoch 5/10:   5%| | 50/991 [13:22<4:01:38, 15.41s/batch, batch_loss=8.33, batch_

Epoch 5/10:   5%| | 51/991 [13:22<4:00:18, 15.34s/batch, batch_loss=8.33, batch_

Epoch 5/10:   5%| | 51/991 [13:37<4:00:18, 15.34s/batch, batch_loss=14.4, batch_

Epoch 5/10:   5%| | 52/991 [13:37<3:59:54, 15.33s/batch, batch_loss=14.4, batch_

Epoch 5/10:   5%| | 52/991 [13:53<3:59:54, 15.33s/batch, batch_loss=13.5, batch_

Epoch 5/10:   5%| | 53/991 [13:53<4:01:28, 15.45s/batch, batch_loss=13.5, batch_

Epoch 5/10:   5%| | 53/991 [14:08<4:01:28, 15.45s/batch, batch_loss=10.1, batch_

Epoch 5/10:   5%| | 54/991 [14:08<3:58:44, 15.29s/batch, batch_loss=10.1, batch_

Epoch 5/10:   5%| | 54/991 [14:23<3:58:44, 15.29s/batch, batch_loss=10.3, batch_

Epoch 5/10:   6%| | 55/991 [14:23<3:57:41, 15.24s/batch, batch_loss=10.3, batch_

Epoch 5/10:   6%| | 55/991 [14:38<3:57:41, 15.24s/batch, batch_loss=11.2, batch_

Epoch 5/10:   6%| | 56/991 [14:38<3:58:10, 15.28s/batch, batch_loss=11.2, batch_

Epoch 5/10:   6%| | 56/991 [14:54<3:58:10, 15.28s/batch, batch_loss=7.26, batch_

Epoch 5/10:   6%| | 57/991 [14:54<3:58:29, 15.32s/batch, batch_loss=7.26, batch_

Epoch 5/10:   6%| | 57/991 [15:11<3:58:29, 15.32s/batch, batch_loss=19.8, batch_

Epoch 5/10:   6%| | 58/991 [15:11<4:07:52, 15.94s/batch, batch_loss=19.8, batch_

Epoch 5/10:   6%| | 58/991 [15:27<4:07:52, 15.94s/batch, batch_loss=9.63, batch_

Epoch 5/10:   6%| | 59/991 [15:27<4:06:11, 15.85s/batch, batch_loss=9.63, batch_

Epoch 5/10:   6%| | 59/991 [15:42<4:06:11, 15.85s/batch, batch_loss=19.3, batch_

Epoch 5/10:   6%| | 60/991 [15:42<4:05:04, 15.79s/batch, batch_loss=19.3, batch_

Epoch 5/10:   6%| | 60/991 [15:59<4:05:04, 15.79s/batch, batch_loss=11.8, batch_

Epoch 5/10:   6%| | 61/991 [15:59<4:07:38, 15.98s/batch, batch_loss=11.8, batch_

Epoch 5/10:   6%| | 61/991 [16:14<4:07:38, 15.98s/batch, batch_loss=13.6, batch_

Epoch 5/10:   6%| | 62/991 [16:14<4:05:08, 15.83s/batch, batch_loss=13.6, batch_

Epoch 5/10:   6%| | 62/991 [16:30<4:05:08, 15.83s/batch, batch_loss=422, batch_i

Epoch 5/10:   6%| | 63/991 [16:30<4:03:25, 15.74s/batch, batch_loss=422, batch_i

Epoch 5/10:   6%| | 63/991 [16:46<4:03:25, 15.74s/batch, batch_loss=796, batch_i

Epoch 5/10:   6%| | 64/991 [16:46<4:03:05, 15.73s/batch, batch_loss=796, batch_i

Epoch 5/10:   6%| | 64/991 [17:04<4:03:05, 15.73s/batch, batch_loss=2.55e+3, bat

Epoch 5/10:   7%| | 65/991 [17:04<4:14:28, 16.49s/batch, batch_loss=2.55e+3, bat

Epoch 5/10:   7%| | 65/991 [17:19<4:14:28, 16.49s/batch, batch_loss=4.45, batch_

Epoch 5/10:   7%| | 66/991 [17:19<4:07:24, 16.05s/batch, batch_loss=4.45, batch_

Epoch 5/10:   7%| | 66/991 [17:36<4:07:24, 16.05s/batch, batch_loss=11.3, batch_

Epoch 5/10:   7%| | 67/991 [17:36<4:12:05, 16.37s/batch, batch_loss=11.3, batch_

Epoch 5/10:   7%| | 67/991 [17:52<4:12:05, 16.37s/batch, batch_loss=9.33, batch_

Epoch 5/10:   7%| | 68/991 [17:52<4:11:24, 16.34s/batch, batch_loss=9.33, batch_

Epoch 5/10:   7%| | 68/991 [18:07<4:11:24, 16.34s/batch, batch_loss=24.6, batch_

Epoch 5/10:   7%| | 69/991 [18:07<4:06:03, 16.01s/batch, batch_loss=24.6, batch_

Epoch 5/10:   7%| | 69/991 [18:23<4:06:03, 16.01s/batch, batch_loss=8.12, batch_

Epoch 5/10:   7%| | 70/991 [18:23<4:02:30, 15.80s/batch, batch_loss=8.12, batch_

Epoch 5/10:   7%| | 70/991 [18:38<4:02:30, 15.80s/batch, batch_loss=16.5, batch_

Epoch 5/10:   7%| | 71/991 [18:38<4:00:16, 15.67s/batch, batch_loss=16.5, batch_

Epoch 5/10:   7%| | 71/991 [18:53<4:00:16, 15.67s/batch, batch_loss=13.7, batch_

Epoch 5/10:   7%| | 72/991 [18:53<3:57:53, 15.53s/batch, batch_loss=13.7, batch_

Epoch 5/10:   7%| | 72/991 [19:10<3:57:53, 15.53s/batch, batch_loss=25.4, batch_

Epoch 5/10:   7%| | 73/991 [19:10<4:00:45, 15.74s/batch, batch_loss=25.4, batch_

Epoch 5/10:   7%| | 73/991 [19:26<4:00:45, 15.74s/batch, batch_loss=1.74e+3, bat

Epoch 5/10:   7%| | 74/991 [19:26<4:02:48, 15.89s/batch, batch_loss=1.74e+3, bat

Epoch 5/10:   7%| | 74/991 [19:41<4:02:48, 15.89s/batch, batch_loss=17.7, batch_

Epoch 5/10:   8%| | 75/991 [19:41<4:00:19, 15.74s/batch, batch_loss=17.7, batch_

Epoch 5/10:   8%| | 75/991 [19:57<4:00:19, 15.74s/batch, batch_loss=11.3, batch_

Epoch 5/10:   8%| | 76/991 [19:57<4:00:55, 15.80s/batch, batch_loss=11.3, batch_

Epoch 5/10:   8%| | 76/991 [20:13<4:00:55, 15.80s/batch, batch_loss=11.5, batch_

Epoch 5/10:   8%| | 77/991 [20:13<3:59:28, 15.72s/batch, batch_loss=11.5, batch_

Epoch 5/10:   8%| | 77/991 [20:28<3:59:28, 15.72s/batch, batch_loss=11.9, batch_

Epoch 5/10:   8%| | 78/991 [20:28<3:59:32, 15.74s/batch, batch_loss=11.9, batch_

Epoch 5/10:   8%| | 78/991 [20:43<3:59:32, 15.74s/batch, batch_loss=8.63, batch_

Epoch 5/10:   8%| | 79/991 [20:43<3:53:31, 15.36s/batch, batch_loss=8.63, batch_

Epoch 5/10:   8%| | 79/991 [20:58<3:53:31, 15.36s/batch, batch_loss=9.89, batch_

Epoch 5/10:   8%| | 80/991 [20:58<3:50:35, 15.19s/batch, batch_loss=9.89, batch_

Epoch 5/10:   8%| | 80/991 [21:12<3:50:35, 15.19s/batch, batch_loss=14.7, batch_

Epoch 5/10:   8%| | 81/991 [21:12<3:48:19, 15.05s/batch, batch_loss=14.7, batch_

Epoch 5/10:   8%| | 81/991 [21:30<3:48:19, 15.05s/batch, batch_loss=15.6, batch_

Epoch 5/10:   8%| | 82/991 [21:30<3:59:06, 15.78s/batch, batch_loss=15.6, batch_

Epoch 5/10:   8%| | 82/991 [21:45<3:59:06, 15.78s/batch, batch_loss=11.2, batch_

Epoch 5/10:   8%| | 83/991 [21:45<3:53:50, 15.45s/batch, batch_loss=11.2, batch_

Epoch 5/10:   8%| | 83/991 [21:57<3:53:50, 15.45s/batch, batch_loss=11.2, batch_

Epoch 5/10:   8%| | 84/991 [21:57<3:41:29, 14.65s/batch, batch_loss=11.2, batch_

Epoch 5/10:   8%| | 84/991 [22:11<3:41:29, 14.65s/batch, batch_loss=9.96, batch_

Epoch 5/10:   9%| | 85/991 [22:11<3:37:56, 14.43s/batch, batch_loss=9.96, batch_

Epoch 5/10:   9%| | 85/991 [22:27<3:37:56, 14.43s/batch, batch_loss=11.7, batch_

Epoch 5/10:   9%| | 86/991 [22:27<3:42:33, 14.75s/batch, batch_loss=11.7, batch_

Epoch 5/10:   9%| | 86/991 [22:43<3:42:33, 14.75s/batch, batch_loss=10.3, batch_

Epoch 5/10:   9%| | 87/991 [22:43<3:48:22, 15.16s/batch, batch_loss=10.3, batch_

Epoch 5/10:   9%| | 87/991 [22:58<3:48:22, 15.16s/batch, batch_loss=9.62, batch_

Epoch 5/10:   9%| | 88/991 [22:58<3:49:01, 15.22s/batch, batch_loss=9.62, batch_

Epoch 5/10:   9%| | 88/991 [23:13<3:49:01, 15.22s/batch, batch_loss=5.61, batch_

Epoch 5/10:   9%| | 89/991 [23:13<3:47:46, 15.15s/batch, batch_loss=5.61, batch_

Epoch 5/10:   9%| | 89/991 [23:28<3:47:46, 15.15s/batch, batch_loss=247, batch_i

Epoch 5/10:   9%| | 90/991 [23:28<3:45:37, 15.02s/batch, batch_loss=247, batch_i

Epoch 5/10:   9%| | 90/991 [23:43<3:45:37, 15.02s/batch, batch_loss=1.54e+3, bat

Epoch 5/10:   9%| | 91/991 [23:43<3:44:22, 14.96s/batch, batch_loss=1.54e+3, bat

Epoch 5/10:   9%| | 91/991 [23:57<3:44:22, 14.96s/batch, batch_loss=18.5, batch_

Epoch 5/10:   9%| | 92/991 [23:57<3:42:16, 14.83s/batch, batch_loss=18.5, batch_

Epoch 5/10:   9%| | 92/991 [24:12<3:42:16, 14.83s/batch, batch_loss=20.9, batch_

Epoch 5/10:   9%| | 93/991 [24:12<3:43:20, 14.92s/batch, batch_loss=20.9, batch_

Epoch 5/10:   9%| | 93/991 [24:27<3:43:20, 14.92s/batch, batch_loss=18.3, batch_

Epoch 5/10:   9%| | 94/991 [24:27<3:42:46, 14.90s/batch, batch_loss=18.3, batch_

Epoch 5/10:   9%| | 94/991 [24:43<3:42:46, 14.90s/batch, batch_loss=18.7, batch_

Epoch 5/10:  10%| | 95/991 [24:43<3:44:08, 15.01s/batch, batch_loss=18.7, batch_

Epoch 5/10:  10%| | 95/991 [24:57<3:44:08, 15.01s/batch, batch_loss=18.7, batch_

Epoch 5/10:  10%| | 96/991 [24:57<3:41:01, 14.82s/batch, batch_loss=18.7, batch_

Epoch 5/10:  10%| | 96/991 [25:14<3:41:01, 14.82s/batch, batch_loss=18.3, batch_

Epoch 5/10:  10%| | 97/991 [25:14<3:50:54, 15.50s/batch, batch_loss=18.3, batch_

Epoch 5/10:  10%| | 97/991 [25:29<3:50:54, 15.50s/batch, batch_loss=19.4, batch_

Epoch 5/10:  10%| | 98/991 [25:29<3:50:10, 15.46s/batch, batch_loss=19.4, batch_

Epoch 5/10:  10%| | 98/991 [25:45<3:50:10, 15.46s/batch, batch_loss=17, batch_in

Epoch 5/10:  10%| | 99/991 [25:45<3:49:42, 15.45s/batch, batch_loss=17, batch_in

Epoch 5/10:  10%| | 99/991 [26:01<3:49:42, 15.45s/batch, batch_loss=17.9, batch_

Epoch 5/10:  10%| | 100/991 [26:01<3:50:43, 15.54s/batch, batch_loss=17.9, batch

Epoch 5/10:  10%| | 100/991 [26:16<3:50:43, 15.54s/batch, batch_loss=12.9, batch

Epoch 5/10:  10%| | 101/991 [26:16<3:51:20, 15.60s/batch, batch_loss=12.9, batch

Epoch 5/10:  10%| | 101/991 [26:31<3:51:20, 15.60s/batch, batch_loss=24.5, batch

Epoch 5/10:  10%| | 102/991 [26:31<3:48:05, 15.39s/batch, batch_loss=24.5, batch

Epoch 5/10:  10%| | 102/991 [26:46<3:48:05, 15.39s/batch, batch_loss=914, batch_

Epoch 5/10:  10%| | 103/991 [26:46<3:44:55, 15.20s/batch, batch_loss=914, batch_

Epoch 5/10:  10%| | 103/991 [27:00<3:44:55, 15.20s/batch, batch_loss=15, batch_i

Epoch 5/10:  10%| | 104/991 [27:00<3:41:03, 14.95s/batch, batch_loss=15, batch_i

Epoch 5/10:  10%| | 104/991 [27:18<3:41:03, 14.95s/batch, batch_loss=10.9, batch

Epoch 5/10:  11%| | 105/991 [27:18<3:51:48, 15.70s/batch, batch_loss=10.9, batch

Epoch 5/10:  11%| | 105/991 [27:32<3:51:48, 15.70s/batch, batch_loss=10.3, batch

Epoch 5/10:  11%| | 106/991 [27:32<3:46:56, 15.39s/batch, batch_loss=10.3, batch

Epoch 5/10:  11%| | 106/991 [27:48<3:46:56, 15.39s/batch, batch_loss=17.5, batch

Epoch 5/10:  11%| | 107/991 [27:48<3:45:33, 15.31s/batch, batch_loss=17.5, batch

Epoch 5/10:  11%| | 107/991 [28:01<3:45:33, 15.31s/batch, batch_loss=25.1, batch

Epoch 5/10:  11%| | 108/991 [28:01<3:38:40, 14.86s/batch, batch_loss=25.1, batch

Epoch 5/10:  11%| | 108/991 [28:15<3:38:40, 14.86s/batch, batch_loss=16.3, batch

Epoch 5/10:  11%| | 109/991 [28:15<3:34:33, 14.60s/batch, batch_loss=16.3, batch

Epoch 5/10:  11%| | 109/991 [28:30<3:34:33, 14.60s/batch, batch_loss=15, batch_i

Epoch 5/10:  11%| | 110/991 [28:30<3:34:52, 14.63s/batch, batch_loss=15, batch_i

Epoch 5/10:  11%| | 110/991 [28:45<3:34:52, 14.63s/batch, batch_loss=16.2, batch

Epoch 5/10:  11%| | 111/991 [28:45<3:35:41, 14.71s/batch, batch_loss=16.2, batch

Epoch 5/10:  11%| | 111/991 [29:01<3:35:41, 14.71s/batch, batch_loss=16.7, batch

Epoch 5/10:  11%| | 112/991 [29:01<3:40:10, 15.03s/batch, batch_loss=16.7, batch

Epoch 5/10:  11%| | 112/991 [29:19<3:40:10, 15.03s/batch, batch_loss=11.2, batch

Epoch 5/10:  11%| | 113/991 [29:19<3:54:37, 16.03s/batch, batch_loss=11.2, batch

Epoch 5/10:  11%| | 113/991 [29:35<3:54:37, 16.03s/batch, batch_loss=15.2, batch

Epoch 5/10:  12%| | 114/991 [29:35<3:54:42, 16.06s/batch, batch_loss=15.2, batch

Epoch 5/10:  12%| | 114/991 [29:51<3:54:42, 16.06s/batch, batch_loss=18.2, batch

Epoch 5/10:  12%| | 115/991 [29:51<3:52:33, 15.93s/batch, batch_loss=18.2, batch

Epoch 5/10:  12%| | 115/991 [30:07<3:52:33, 15.93s/batch, batch_loss=9.97, batch

Epoch 5/10:  12%| | 116/991 [30:07<3:51:21, 15.87s/batch, batch_loss=9.97, batch

Epoch 5/10:  12%| | 116/991 [30:22<3:51:21, 15.87s/batch, batch_loss=17.5, batch

Epoch 5/10:  12%| | 117/991 [30:22<3:48:17, 15.67s/batch, batch_loss=17.5, batch

Epoch 5/10:  12%| | 117/991 [30:38<3:48:17, 15.67s/batch, batch_loss=15.6, batch

Epoch 5/10:  12%| | 118/991 [30:38<3:51:20, 15.90s/batch, batch_loss=15.6, batch

Epoch 5/10:  12%| | 118/991 [30:54<3:51:20, 15.90s/batch, batch_loss=23.2, batch

Epoch 5/10:  12%| | 119/991 [30:54<3:49:09, 15.77s/batch, batch_loss=23.2, batch

Epoch 5/10:  12%| | 119/991 [31:09<3:49:09, 15.77s/batch, batch_loss=17, batch_i

Epoch 5/10:  12%| | 120/991 [31:09<3:45:33, 15.54s/batch, batch_loss=17, batch_i

Epoch 5/10:  12%| | 120/991 [31:24<3:45:33, 15.54s/batch, batch_loss=27.7, batch

Epoch 5/10:  12%| | 121/991 [31:24<3:45:33, 15.56s/batch, batch_loss=27.7, batch

Epoch 5/10:  12%| | 121/991 [31:40<3:45:33, 15.56s/batch, batch_loss=10.2, batch

Epoch 5/10:  12%| | 122/991 [31:40<3:47:07, 15.68s/batch, batch_loss=10.2, batch

Epoch 5/10:  12%| | 122/991 [31:56<3:47:07, 15.68s/batch, batch_loss=18.3, batch

Epoch 5/10:  12%| | 123/991 [31:56<3:45:24, 15.58s/batch, batch_loss=18.3, batch

Epoch 5/10:  12%| | 123/991 [32:11<3:45:24, 15.58s/batch, batch_loss=3.49e+3, ba

Epoch 5/10:  13%|▏| 124/991 [32:11<3:42:17, 15.38s/batch, batch_loss=3.49e+3, ba

Epoch 5/10:  13%|▏| 124/991 [32:26<3:42:17, 15.38s/batch, batch_loss=9, batch_in

Epoch 5/10:  13%|▏| 125/991 [32:26<3:41:19, 15.33s/batch, batch_loss=9, batch_in

Epoch 5/10:  13%|▏| 125/991 [32:41<3:41:19, 15.33s/batch, batch_loss=10.7, batch

Epoch 5/10:  13%|▏| 126/991 [32:41<3:38:39, 15.17s/batch, batch_loss=10.7, batch

Epoch 5/10:  13%|▏| 126/991 [32:55<3:38:39, 15.17s/batch, batch_loss=1.89e+3, ba

Epoch 5/10:  13%|▏| 127/991 [32:55<3:35:04, 14.94s/batch, batch_loss=1.89e+3, ba

Epoch 5/10:  13%|▏| 127/991 [33:12<3:35:04, 14.94s/batch, batch_loss=1.57e+3, ba

Epoch 5/10:  13%|▏| 128/991 [33:12<3:44:24, 15.60s/batch, batch_loss=1.57e+3, ba

Epoch 5/10:  13%|▏| 128/991 [33:27<3:44:24, 15.60s/batch, batch_loss=224, batch_

Epoch 5/10:  13%|▏| 129/991 [33:27<3:43:10, 15.53s/batch, batch_loss=224, batch_

Epoch 5/10:  13%|▏| 129/991 [33:43<3:43:10, 15.53s/batch, batch_loss=1e+3, batch

Epoch 5/10:  13%|▏| 130/991 [33:43<3:42:18, 15.49s/batch, batch_loss=1e+3, batch

Epoch 5/10:  13%|▏| 130/991 [33:58<3:42:18, 15.49s/batch, batch_loss=8.34e+3, ba

Epoch 5/10:  13%|▏| 131/991 [33:58<3:38:57, 15.28s/batch, batch_loss=8.34e+3, ba

Epoch 5/10:  13%|▏| 131/991 [34:12<3:38:57, 15.28s/batch, batch_loss=17.1, batch

Epoch 5/10:  13%|▏| 132/991 [34:12<3:33:38, 14.92s/batch, batch_loss=17.1, batch

Epoch 5/10:  13%|▏| 132/991 [34:27<3:33:38, 14.92s/batch, batch_loss=7.99, batch

Epoch 5/10:  13%|▏| 133/991 [34:27<3:34:00, 14.97s/batch, batch_loss=7.99, batch

Epoch 5/10:  13%|▏| 133/991 [34:41<3:34:00, 14.97s/batch, batch_loss=10.8, batch

Epoch 5/10:  14%|▏| 134/991 [34:41<3:31:00, 14.77s/batch, batch_loss=10.8, batch

Epoch 5/10:  14%|▏| 134/991 [34:56<3:31:00, 14.77s/batch, batch_loss=20.6, batch

Epoch 5/10:  14%|▏| 135/991 [34:56<3:31:01, 14.79s/batch, batch_loss=20.6, batch

Epoch 5/10:  14%|▏| 135/991 [35:11<3:31:01, 14.79s/batch, batch_loss=9.29, batch

Epoch 5/10:  14%|▏| 136/991 [35:11<3:32:50, 14.94s/batch, batch_loss=9.29, batch

Epoch 5/10:  14%|▏| 136/991 [35:26<3:32:50, 14.94s/batch, batch_loss=12.6, batch

Epoch 5/10:  14%|▏| 137/991 [35:26<3:32:53, 14.96s/batch, batch_loss=12.6, batch

Epoch 5/10:  14%|▏| 137/991 [35:41<3:32:53, 14.96s/batch, batch_loss=16.3, batch

Epoch 5/10:  14%|▏| 138/991 [35:41<3:31:37, 14.89s/batch, batch_loss=16.3, batch

Epoch 5/10:  14%|▏| 138/991 [35:57<3:31:37, 14.89s/batch, batch_loss=7.66, batch

Epoch 5/10:  14%|▏| 139/991 [35:57<3:34:22, 15.10s/batch, batch_loss=7.66, batch

Epoch 5/10:  14%|▏| 139/991 [36:12<3:34:22, 15.10s/batch, batch_loss=10.5, batch

Epoch 5/10:  14%|▏| 140/991 [36:12<3:34:42, 15.14s/batch, batch_loss=10.5, batch

Epoch 5/10:  14%|▏| 140/991 [36:27<3:34:42, 15.14s/batch, batch_loss=7.12, batch

Epoch 5/10:  14%|▏| 141/991 [36:27<3:33:16, 15.05s/batch, batch_loss=7.12, batch

Epoch 5/10:  14%|▏| 141/991 [36:42<3:33:16, 15.05s/batch, batch_loss=7.27, batch

Epoch 5/10:  14%|▏| 142/991 [36:42<3:33:49, 15.11s/batch, batch_loss=7.27, batch

Epoch 5/10:  14%|▏| 142/991 [36:56<3:33:49, 15.11s/batch, batch_loss=12.2, batch

Epoch 5/10:  14%|▏| 143/991 [36:56<3:30:56, 14.93s/batch, batch_loss=12.2, batch

Epoch 5/10:  14%|▏| 143/991 [37:14<3:30:56, 14.93s/batch, batch_loss=15.5, batch

Epoch 5/10:  15%|▏| 144/991 [37:14<3:43:15, 15.82s/batch, batch_loss=15.5, batch

Epoch 5/10:  15%|▏| 144/991 [37:30<3:43:15, 15.82s/batch, batch_loss=17.8, batch

Epoch 5/10:  15%|▏| 145/991 [37:30<3:42:49, 15.80s/batch, batch_loss=17.8, batch

Epoch 5/10:  15%|▏| 145/991 [37:46<3:42:49, 15.80s/batch, batch_loss=14, batch_i

Epoch 5/10:  15%|▏| 146/991 [37:46<3:43:09, 15.85s/batch, batch_loss=14, batch_i

Epoch 5/10:  15%|▏| 146/991 [38:02<3:43:09, 15.85s/batch, batch_loss=7.93, batch

Epoch 5/10:  15%|▏| 147/991 [38:02<3:41:33, 15.75s/batch, batch_loss=7.93, batch

Epoch 5/10:  15%|▏| 147/991 [38:17<3:41:33, 15.75s/batch, batch_loss=20.5, batch

Epoch 5/10:  15%|▏| 148/991 [38:17<3:38:41, 15.57s/batch, batch_loss=20.5, batch

Epoch 5/10:  15%|▏| 148/991 [38:32<3:38:41, 15.57s/batch, batch_loss=11.8, batch

Epoch 5/10:  15%|▏| 149/991 [38:32<3:36:32, 15.43s/batch, batch_loss=11.8, batch

Epoch 5/10:  15%|▏| 149/991 [38:50<3:36:32, 15.43s/batch, batch_loss=10.9, batch

Epoch 5/10:  15%|▏| 150/991 [38:50<3:49:28, 16.37s/batch, batch_loss=10.9, batch

Epoch 5/10:  15%|▏| 150/991 [39:06<3:49:28, 16.37s/batch, batch_loss=18.9, batch

Epoch 5/10:  15%|▏| 151/991 [39:06<3:46:15, 16.16s/batch, batch_loss=18.9, batch

Epoch 5/10:  15%|▏| 151/991 [39:21<3:46:15, 16.16s/batch, batch_loss=15.9, batch

Epoch 5/10:  15%|▏| 152/991 [39:21<3:42:00, 15.88s/batch, batch_loss=15.9, batch

Epoch 5/10:  15%|▏| 152/991 [39:37<3:42:00, 15.88s/batch, batch_loss=19, batch_i

Epoch 5/10:  15%|▏| 153/991 [39:37<3:40:05, 15.76s/batch, batch_loss=19, batch_i

Epoch 5/10:  15%|▏| 153/991 [39:53<3:40:05, 15.76s/batch, batch_loss=21, batch_i

Epoch 5/10:  16%|▏| 154/991 [39:53<3:41:19, 15.87s/batch, batch_loss=21, batch_i

Epoch 5/10:  16%|▏| 154/991 [40:09<3:41:19, 15.87s/batch, batch_loss=23.6, batch

Epoch 5/10:  16%|▏| 155/991 [40:09<3:40:31, 15.83s/batch, batch_loss=23.6, batch

Epoch 5/10:  16%|▏| 155/991 [40:25<3:40:31, 15.83s/batch, batch_loss=8.05, batch

Epoch 5/10:  16%|▏| 156/991 [40:25<3:40:52, 15.87s/batch, batch_loss=8.05, batch

Epoch 5/10:  16%|▏| 156/991 [40:40<3:40:52, 15.87s/batch, batch_loss=23.9, batch

Epoch 5/10:  16%|▏| 157/991 [40:40<3:39:34, 15.80s/batch, batch_loss=23.9, batch

Epoch 5/10:  16%|▏| 157/991 [40:55<3:39:34, 15.80s/batch, batch_loss=8.07, batch

Epoch 5/10:  16%|▏| 158/991 [40:55<3:35:46, 15.54s/batch, batch_loss=8.07, batch

Epoch 5/10:  16%|▏| 158/991 [41:10<3:35:46, 15.54s/batch, batch_loss=6.04, batch

Epoch 5/10:  16%|▏| 159/991 [41:10<3:34:23, 15.46s/batch, batch_loss=6.04, batch

Epoch 5/10:  16%|▏| 159/991 [41:26<3:34:23, 15.46s/batch, batch_loss=11.7, batch

Epoch 5/10:  16%|▏| 160/991 [41:26<3:35:59, 15.59s/batch, batch_loss=11.7, batch

Epoch 5/10:  16%|▏| 160/991 [41:42<3:35:59, 15.59s/batch, batch_loss=459, batch_

Epoch 5/10:  16%|▏| 161/991 [41:42<3:36:38, 15.66s/batch, batch_loss=459, batch_

Epoch 5/10:  16%|▏| 161/991 [41:58<3:36:38, 15.66s/batch, batch_loss=14.9, batch

Epoch 5/10:  16%|▏| 162/991 [41:58<3:37:15, 15.72s/batch, batch_loss=14.9, batch

Epoch 5/10:  16%|▏| 162/991 [42:17<3:37:15, 15.72s/batch, batch_loss=7.92, batch

Epoch 5/10:  16%|▏| 163/991 [42:17<3:48:49, 16.58s/batch, batch_loss=7.92, batch

Epoch 5/10:  16%|▏| 163/991 [42:33<3:48:49, 16.58s/batch, batch_loss=12.5, batch

Epoch 5/10:  17%|▏| 164/991 [42:33<3:46:59, 16.47s/batch, batch_loss=12.5, batch

Epoch 5/10:  17%|▏| 164/991 [42:49<3:46:59, 16.47s/batch, batch_loss=11, batch_i

Epoch 5/10:  17%|▏| 165/991 [42:49<3:44:37, 16.32s/batch, batch_loss=11, batch_i

Epoch 5/10:  17%|▏| 165/991 [43:05<3:44:37, 16.32s/batch, batch_loss=12.1, batch

Epoch 5/10:  17%|▏| 166/991 [43:05<3:43:06, 16.23s/batch, batch_loss=12.1, batch

Epoch 5/10:  17%|▏| 166/991 [43:22<3:43:06, 16.23s/batch, batch_loss=16.9, batch

Epoch 5/10:  17%|▏| 167/991 [43:22<3:45:37, 16.43s/batch, batch_loss=16.9, batch

Epoch 5/10:  17%|▏| 167/991 [43:38<3:45:37, 16.43s/batch, batch_loss=13.5, batch

Epoch 5/10:  17%|▏| 168/991 [43:38<3:45:05, 16.41s/batch, batch_loss=13.5, batch

Epoch 5/10:  17%|▏| 168/991 [43:55<3:45:05, 16.41s/batch, batch_loss=13.4, batch

Epoch 5/10:  17%|▏| 169/991 [43:55<3:47:53, 16.63s/batch, batch_loss=13.4, batch

Epoch 5/10:  17%|▏| 169/991 [44:11<3:47:53, 16.63s/batch, batch_loss=9.29, batch

Epoch 5/10:  17%|▏| 170/991 [44:11<3:44:03, 16.37s/batch, batch_loss=9.29, batch

Epoch 5/10:  17%|▏| 170/991 [44:27<3:44:03, 16.37s/batch, batch_loss=6.15, batch

Epoch 5/10:  17%|▏| 171/991 [44:27<3:44:21, 16.42s/batch, batch_loss=6.15, batch

Epoch 5/10:  17%|▏| 171/991 [44:43<3:44:21, 16.42s/batch, batch_loss=8.75, batch

Epoch 5/10:  17%|▏| 172/991 [44:43<3:39:30, 16.08s/batch, batch_loss=8.75, batch

Epoch 5/10:  17%|▏| 172/991 [44:59<3:39:30, 16.08s/batch, batch_loss=7.47, batch

Epoch 5/10:  17%|▏| 173/991 [44:59<3:38:06, 16.00s/batch, batch_loss=7.47, batch

Epoch 5/10:  17%|▏| 173/991 [45:15<3:38:06, 16.00s/batch, batch_loss=3.02e+4, ba

Epoch 5/10:  18%|▏| 174/991 [45:15<3:40:33, 16.20s/batch, batch_loss=3.02e+4, ba

Epoch 5/10:  18%|▏| 174/991 [45:32<3:40:33, 16.20s/batch, batch_loss=22.2, batch

Epoch 5/10:  18%|▏| 175/991 [45:32<3:42:32, 16.36s/batch, batch_loss=22.2, batch

Epoch 5/10:  18%|▏| 175/991 [45:48<3:42:32, 16.36s/batch, batch_loss=24.4, batch

Epoch 5/10:  18%|▏| 176/991 [45:48<3:42:36, 16.39s/batch, batch_loss=24.4, batch

Epoch 5/10:  18%|▏| 176/991 [46:04<3:42:36, 16.39s/batch, batch_loss=24.8, batch

Epoch 5/10:  18%|▏| 177/991 [46:04<3:39:03, 16.15s/batch, batch_loss=24.8, batch

Epoch 5/10:  18%|▏| 177/991 [46:19<3:39:03, 16.15s/batch, batch_loss=23.9, batch

Epoch 5/10:  18%|▏| 178/991 [46:19<3:35:36, 15.91s/batch, batch_loss=23.9, batch

Epoch 5/10:  18%|▏| 178/991 [46:35<3:35:36, 15.91s/batch, batch_loss=12.2, batch

Epoch 5/10:  18%|▏| 179/991 [46:35<3:35:33, 15.93s/batch, batch_loss=12.2, batch

Epoch 5/10:  18%|▏| 179/991 [46:51<3:35:33, 15.93s/batch, batch_loss=7.06, batch

Epoch 5/10:  18%|▏| 180/991 [46:51<3:32:48, 15.74s/batch, batch_loss=7.06, batch

Epoch 5/10:  18%|▏| 180/991 [47:06<3:32:48, 15.74s/batch, batch_loss=2.51e+4, ba

Epoch 5/10:  18%|▏| 181/991 [47:06<3:31:37, 15.68s/batch, batch_loss=2.51e+4, ba

Epoch 5/10:  18%|▏| 181/991 [47:23<3:31:37, 15.68s/batch, batch_loss=12.9, batch

Epoch 5/10:  18%|▏| 182/991 [47:23<3:34:39, 15.92s/batch, batch_loss=12.9, batch

Epoch 5/10:  18%|▏| 182/991 [47:40<3:34:39, 15.92s/batch, batch_loss=21, batch_i

Epoch 5/10:  18%|▏| 183/991 [47:40<3:38:35, 16.23s/batch, batch_loss=21, batch_i

Epoch 5/10:  18%|▏| 183/991 [47:56<3:38:35, 16.23s/batch, batch_loss=19, batch_i

Epoch 5/10:  19%|▏| 184/991 [47:56<3:36:58, 16.13s/batch, batch_loss=19, batch_i

Epoch 5/10:  19%|▏| 184/991 [48:12<3:36:58, 16.13s/batch, batch_loss=13.4, batch

Epoch 5/10:  19%|▏| 185/991 [48:12<3:37:03, 16.16s/batch, batch_loss=13.4, batch

Epoch 5/10:  19%|▏| 185/991 [48:27<3:37:03, 16.16s/batch, batch_loss=18, batch_i

Epoch 5/10:  19%|▏| 186/991 [48:27<3:33:47, 15.94s/batch, batch_loss=18, batch_i

Epoch 5/10:  19%|▏| 186/991 [48:44<3:33:47, 15.94s/batch, batch_loss=16.3, batch

Epoch 5/10:  19%|▏| 187/991 [48:44<3:38:36, 16.31s/batch, batch_loss=16.3, batch

Epoch 5/10:  19%|▏| 187/991 [49:00<3:38:36, 16.31s/batch, batch_loss=15.4, batch

Epoch 5/10:  19%|▏| 188/991 [49:00<3:35:50, 16.13s/batch, batch_loss=15.4, batch

Epoch 5/10:  19%|▏| 188/991 [49:16<3:35:50, 16.13s/batch, batch_loss=19.8, batch

Epoch 5/10:  19%|▏| 189/991 [49:16<3:34:06, 16.02s/batch, batch_loss=19.8, batch

Epoch 5/10:  19%|▏| 189/991 [49:31<3:34:06, 16.02s/batch, batch_loss=21.7, batch

Epoch 5/10:  19%|▏| 190/991 [49:31<3:31:43, 15.86s/batch, batch_loss=21.7, batch

Epoch 5/10:  19%|▏| 190/991 [49:46<3:31:43, 15.86s/batch, batch_loss=19.5, batch

Epoch 5/10:  19%|▏| 191/991 [49:46<3:28:22, 15.63s/batch, batch_loss=19.5, batch

Epoch 5/10:  19%|▏| 191/991 [50:02<3:28:22, 15.63s/batch, batch_loss=11.7, batch

Epoch 5/10:  19%|▏| 192/991 [50:02<3:29:45, 15.75s/batch, batch_loss=11.7, batch

Epoch 5/10:  19%|▏| 192/991 [50:18<3:29:45, 15.75s/batch, batch_loss=18.1, batch

Epoch 5/10:  19%|▏| 193/991 [50:18<3:27:54, 15.63s/batch, batch_loss=18.1, batch

Epoch 5/10:  19%|▏| 193/991 [50:34<3:27:54, 15.63s/batch, batch_loss=8.96, batch

Epoch 5/10:  20%|▏| 194/991 [50:34<3:28:37, 15.71s/batch, batch_loss=8.96, batch

Epoch 5/10:  20%|▏| 194/991 [50:53<3:28:37, 15.71s/batch, batch_loss=4.05, batch

Epoch 5/10:  20%|▏| 195/991 [50:53<3:41:26, 16.69s/batch, batch_loss=4.05, batch

Epoch 5/10:  20%|▏| 195/991 [51:09<3:41:26, 16.69s/batch, batch_loss=6.64, batch

Epoch 5/10:  20%|▏| 196/991 [51:09<3:39:38, 16.58s/batch, batch_loss=6.64, batch

Epoch 5/10:  20%|▏| 196/991 [51:25<3:39:38, 16.58s/batch, batch_loss=14.1, batch

Epoch 5/10:  20%|▏| 197/991 [51:25<3:35:40, 16.30s/batch, batch_loss=14.1, batch

Epoch 5/10:  20%|▏| 197/991 [51:43<3:35:40, 16.30s/batch, batch_loss=9.54, batch

Epoch 5/10:  20%|▏| 198/991 [51:43<3:42:03, 16.80s/batch, batch_loss=9.54, batch

Epoch 5/10:  20%|▏| 198/991 [51:58<3:42:03, 16.80s/batch, batch_loss=15.5, batch

Epoch 5/10:  20%|▏| 199/991 [51:58<3:37:36, 16.49s/batch, batch_loss=15.5, batch

Epoch 5/10:  20%|▏| 199/991 [52:13<3:37:36, 16.49s/batch, batch_loss=8.33, batch

Epoch 5/10:  20%|▏| 200/991 [52:13<3:31:58, 16.08s/batch, batch_loss=8.33, batch

Epoch 5/10:  20%|▏| 200/991 [52:30<3:31:58, 16.08s/batch, batch_loss=13.1, batch

Epoch 5/10:  20%|▏| 201/991 [52:30<3:35:26, 16.36s/batch, batch_loss=13.1, batch

Epoch 5/10:  20%|▏| 201/991 [52:48<3:35:26, 16.36s/batch, batch_loss=11.1, batch

Epoch 5/10:  20%|▏| 202/991 [52:48<3:40:44, 16.79s/batch, batch_loss=11.1, batch

Epoch 5/10:  20%|▏| 202/991 [53:08<3:40:44, 16.79s/batch, batch_loss=15.5, batch

Epoch 5/10:  20%|▏| 203/991 [53:08<3:50:45, 17.57s/batch, batch_loss=15.5, batch

Epoch 5/10:  20%|▏| 203/991 [53:24<3:50:45, 17.57s/batch, batch_loss=23.3, batch

Epoch 5/10:  21%|▏| 204/991 [53:24<3:44:28, 17.11s/batch, batch_loss=23.3, batch

Epoch 5/10:  21%|▏| 204/991 [53:39<3:44:28, 17.11s/batch, batch_loss=21, batch_i

Epoch 5/10:  21%|▏| 205/991 [53:39<3:37:53, 16.63s/batch, batch_loss=21, batch_i

Epoch 5/10:  21%|▏| 205/991 [53:55<3:37:53, 16.63s/batch, batch_loss=8.48, batch

Epoch 5/10:  21%|▏| 206/991 [53:55<3:32:38, 16.25s/batch, batch_loss=8.48, batch

Epoch 5/10:  21%|▏| 206/991 [54:11<3:32:38, 16.25s/batch, batch_loss=11, batch_i

Epoch 5/10:  21%|▏| 207/991 [54:11<3:32:59, 16.30s/batch, batch_loss=11, batch_i

Epoch 5/10:  21%|▏| 207/991 [54:27<3:32:59, 16.30s/batch, batch_loss=11, batch_i

Epoch 5/10:  21%|▏| 208/991 [54:27<3:32:46, 16.30s/batch, batch_loss=11, batch_i

Epoch 5/10:  21%|▏| 208/991 [54:47<3:32:46, 16.30s/batch, batch_loss=10.1, batch

Epoch 5/10:  21%|▏| 209/991 [54:47<3:46:22, 17.37s/batch, batch_loss=10.1, batch

Epoch 5/10:  21%|▏| 209/991 [55:03<3:46:22, 17.37s/batch, batch_loss=18.7, batch

Epoch 5/10:  21%|▏| 210/991 [55:03<3:41:32, 17.02s/batch, batch_loss=18.7, batch

Epoch 5/10:  21%|▏| 210/991 [55:20<3:41:32, 17.02s/batch, batch_loss=12, batch_i

Epoch 5/10:  21%|▏| 211/991 [55:20<3:38:26, 16.80s/batch, batch_loss=12, batch_i

Epoch 5/10:  21%|▏| 211/991 [55:36<3:38:26, 16.80s/batch, batch_loss=17.1, batch

Epoch 5/10:  21%|▏| 212/991 [55:36<3:37:38, 16.76s/batch, batch_loss=17.1, batch

Epoch 5/10:  21%|▏| 212/991 [55:53<3:37:38, 16.76s/batch, batch_loss=3.39, batch

Epoch 5/10:  21%|▏| 213/991 [55:53<3:37:02, 16.74s/batch, batch_loss=3.39, batch

Epoch 5/10:  21%|▏| 213/991 [56:08<3:37:02, 16.74s/batch, batch_loss=12.8, batch

Epoch 5/10:  22%|▏| 214/991 [56:08<3:29:15, 16.16s/batch, batch_loss=12.8, batch

Epoch 5/10:  22%|▏| 214/991 [56:24<3:29:15, 16.16s/batch, batch_loss=16.4, batch

Epoch 5/10:  22%|▏| 215/991 [56:24<3:28:31, 16.12s/batch, batch_loss=16.4, batch

Epoch 5/10:  22%|▏| 215/991 [56:40<3:28:31, 16.12s/batch, batch_loss=10, batch_i

Epoch 5/10:  22%|▏| 216/991 [56:40<3:26:32, 15.99s/batch, batch_loss=10, batch_i

Epoch 5/10:  22%|▏| 216/991 [56:55<3:26:32, 15.99s/batch, batch_loss=9.87, batch

Epoch 5/10:  22%|▏| 217/991 [56:55<3:22:41, 15.71s/batch, batch_loss=9.87, batch

Epoch 5/10:  22%|▏| 217/991 [57:12<3:22:41, 15.71s/batch, batch_loss=19.9, batch

Epoch 5/10:  22%|▏| 218/991 [57:12<3:29:41, 16.28s/batch, batch_loss=19.9, batch

Epoch 5/10:  22%|▏| 218/991 [57:28<3:29:41, 16.28s/batch, batch_loss=18.4, batch

Epoch 5/10:  22%|▏| 219/991 [57:28<3:28:02, 16.17s/batch, batch_loss=18.4, batch

Epoch 5/10:  22%|▏| 219/991 [57:43<3:28:02, 16.17s/batch, batch_loss=22.3, batch

Epoch 5/10:  22%|▏| 220/991 [57:43<3:24:20, 15.90s/batch, batch_loss=22.3, batch

Epoch 5/10:  22%|▏| 220/991 [58:00<3:24:20, 15.90s/batch, batch_loss=20.8, batch

Epoch 5/10:  22%|▏| 221/991 [58:00<3:26:15, 16.07s/batch, batch_loss=20.8, batch

Epoch 5/10:  22%|▏| 221/991 [58:16<3:26:15, 16.07s/batch, batch_loss=14.3, batch

Epoch 5/10:  22%|▏| 222/991 [58:16<3:25:52, 16.06s/batch, batch_loss=14.3, batch

Epoch 5/10:  22%|▏| 222/991 [58:31<3:25:52, 16.06s/batch, batch_loss=22.8, batch

Epoch 5/10:  23%|▏| 223/991 [58:31<3:22:19, 15.81s/batch, batch_loss=22.8, batch

Epoch 5/10:  23%|▏| 223/991 [58:47<3:22:19, 15.81s/batch, batch_loss=12.9, batch

Epoch 5/10:  23%|▏| 224/991 [58:47<3:21:08, 15.73s/batch, batch_loss=12.9, batch

Epoch 5/10:  23%|▏| 224/991 [59:05<3:21:08, 15.73s/batch, batch_loss=11, batch_i

Epoch 5/10:  23%|▏| 225/991 [59:05<3:29:50, 16.44s/batch, batch_loss=11, batch_i

Epoch 5/10:  23%|▏| 225/991 [59:21<3:29:50, 16.44s/batch, batch_loss=21.8, batch

Epoch 5/10:  23%|▏| 226/991 [59:21<3:27:48, 16.30s/batch, batch_loss=21.8, batch

Epoch 5/10:  23%|▏| 226/991 [59:38<3:27:48, 16.30s/batch, batch_loss=2.42e+3, ba

Epoch 5/10:  23%|▏| 227/991 [59:38<3:30:20, 16.52s/batch, batch_loss=2.42e+3, ba

Epoch 5/10:  23%|▏| 227/991 [59:53<3:30:20, 16.52s/batch, batch_loss=3.6e+3, bat

Epoch 5/10:  23%|▏| 228/991 [59:53<3:25:48, 16.18s/batch, batch_loss=3.6e+3, bat

Epoch 5/10:  23%|▏| 228/991 [1:00:09<3:25:48, 16.18s/batch, batch_loss=13.1, bat

Epoch 5/10:  23%|▏| 229/991 [1:00:09<3:22:46, 15.97s/batch, batch_loss=13.1, bat

Epoch 5/10:  23%|▏| 229/991 [1:00:24<3:22:46, 15.97s/batch, batch_loss=9.01, bat

Epoch 5/10:  23%|▏| 230/991 [1:00:24<3:21:49, 15.91s/batch, batch_loss=9.01, bat

Epoch 5/10:  23%|▏| 230/991 [1:00:40<3:21:49, 15.91s/batch, batch_loss=12, batch

Epoch 5/10:  23%|▏| 231/991 [1:00:40<3:20:34, 15.84s/batch, batch_loss=12, batch

Epoch 5/10:  23%|▏| 231/991 [1:00:55<3:20:34, 15.84s/batch, batch_loss=8.49, bat

Epoch 5/10:  23%|▏| 232/991 [1:00:55<3:18:47, 15.72s/batch, batch_loss=8.49, bat

Epoch 5/10:  23%|▏| 232/991 [1:01:13<3:18:47, 15.72s/batch, batch_loss=8.02, bat

Epoch 5/10:  24%|▏| 233/991 [1:01:13<3:26:13, 16.32s/batch, batch_loss=8.02, bat

Epoch 5/10:  24%|▏| 233/991 [1:01:29<3:26:13, 16.32s/batch, batch_loss=17, batch

Epoch 5/10:  24%|▏| 234/991 [1:01:29<3:22:19, 16.04s/batch, batch_loss=17, batch

Epoch 5/10:  24%|▏| 234/991 [1:01:45<3:22:19, 16.04s/batch, batch_loss=16.2, bat

Epoch 5/10:  24%|▏| 235/991 [1:01:45<3:22:36, 16.08s/batch, batch_loss=16.2, bat

Epoch 5/10:  24%|▏| 235/991 [1:02:00<3:22:36, 16.08s/batch, batch_loss=28.5, bat

Epoch 5/10:  24%|▏| 236/991 [1:02:00<3:20:48, 15.96s/batch, batch_loss=28.5, bat

Epoch 5/10:  24%|▏| 236/991 [1:02:18<3:20:48, 15.96s/batch, batch_loss=23.7, bat

Epoch 5/10:  24%|▏| 237/991 [1:02:18<3:27:48, 16.54s/batch, batch_loss=23.7, bat

Epoch 5/10:  24%|▏| 237/991 [1:02:34<3:27:48, 16.54s/batch, batch_loss=21.1, bat

Epoch 5/10:  24%|▏| 238/991 [1:02:34<3:24:42, 16.31s/batch, batch_loss=21.1, bat

Epoch 5/10:  24%|▏| 238/991 [1:02:50<3:24:42, 16.31s/batch, batch_loss=7.07, bat

Epoch 5/10:  24%|▏| 239/991 [1:02:50<3:21:30, 16.08s/batch, batch_loss=7.07, bat

Epoch 5/10:  24%|▏| 239/991 [1:03:05<3:21:30, 16.08s/batch, batch_loss=6.64, bat

Epoch 5/10:  24%|▏| 240/991 [1:03:05<3:17:40, 15.79s/batch, batch_loss=6.64, bat

Epoch 5/10:  24%|▏| 240/991 [1:03:19<3:17:40, 15.79s/batch, batch_loss=9.52, bat

Epoch 5/10:  24%|▏| 241/991 [1:03:19<3:12:41, 15.42s/batch, batch_loss=9.52, bat

Epoch 5/10:  24%|▏| 241/991 [1:03:35<3:12:41, 15.42s/batch, batch_loss=38.4, bat

Epoch 5/10:  24%|▏| 242/991 [1:03:35<3:14:27, 15.58s/batch, batch_loss=38.4, bat

Epoch 5/10:  24%|▏| 242/991 [1:03:51<3:14:27, 15.58s/batch, batch_loss=268, batc

Epoch 5/10:  25%|▏| 243/991 [1:03:51<3:14:29, 15.60s/batch, batch_loss=268, batc

Epoch 5/10:  25%|▏| 243/991 [1:04:07<3:14:29, 15.60s/batch, batch_loss=19.2, bat

Epoch 5/10:  25%|▏| 244/991 [1:04:07<3:16:11, 15.76s/batch, batch_loss=19.2, bat

Epoch 5/10:  25%|▏| 244/991 [1:04:22<3:16:11, 15.76s/batch, batch_loss=7.3, batc

Epoch 5/10:  25%|▏| 245/991 [1:04:22<3:14:02, 15.61s/batch, batch_loss=7.3, batc

Epoch 5/10:  25%|▏| 245/991 [1:04:37<3:14:02, 15.61s/batch, batch_loss=6.9, batc

Epoch 5/10:  25%|▏| 246/991 [1:04:37<3:09:48, 15.29s/batch, batch_loss=6.9, batc

Epoch 5/10:  25%|▏| 246/991 [1:04:52<3:09:48, 15.29s/batch, batch_loss=16.1, bat

Epoch 5/10:  25%|▏| 247/991 [1:04:52<3:09:39, 15.29s/batch, batch_loss=16.1, bat

Epoch 5/10:  25%|▏| 247/991 [1:05:08<3:09:39, 15.29s/batch, batch_loss=4.29, bat

Epoch 5/10:  25%|▎| 248/991 [1:05:08<3:10:26, 15.38s/batch, batch_loss=4.29, bat

Epoch 5/10:  25%|▎| 248/991 [1:05:23<3:10:26, 15.38s/batch, batch_loss=14, batch

Epoch 5/10:  25%|▎| 249/991 [1:05:23<3:08:03, 15.21s/batch, batch_loss=14, batch

Epoch 5/10:  25%|▎| 249/991 [1:05:39<3:08:03, 15.21s/batch, batch_loss=9.29, bat

Epoch 5/10:  25%|▎| 250/991 [1:05:39<3:10:47, 15.45s/batch, batch_loss=9.29, bat

Epoch 5/10:  25%|▎| 250/991 [1:05:53<3:10:47, 15.45s/batch, batch_loss=6.61, bat

Epoch 5/10:  25%|▎| 251/991 [1:05:53<3:07:49, 15.23s/batch, batch_loss=6.61, bat

Epoch 5/10:  25%|▎| 251/991 [1:06:08<3:07:49, 15.23s/batch, batch_loss=12.7, bat

Epoch 5/10:  25%|▎| 252/991 [1:06:08<3:06:38, 15.15s/batch, batch_loss=12.7, bat

Epoch 5/10:  25%|▎| 252/991 [1:06:23<3:06:38, 15.15s/batch, batch_loss=7.67, bat

Epoch 5/10:  26%|▎| 253/991 [1:06:23<3:05:19, 15.07s/batch, batch_loss=7.67, bat

Epoch 5/10:  26%|▎| 253/991 [1:06:40<3:05:19, 15.07s/batch, batch_loss=19.3, bat

Epoch 5/10:  26%|▎| 254/991 [1:06:40<3:12:15, 15.65s/batch, batch_loss=19.3, bat

Epoch 5/10:  26%|▎| 254/991 [1:06:55<3:12:15, 15.65s/batch, batch_loss=15.3, bat

Epoch 5/10:  26%|▎| 255/991 [1:06:55<3:07:43, 15.30s/batch, batch_loss=15.3, bat

Epoch 5/10:  26%|▎| 255/991 [1:07:08<3:07:43, 15.30s/batch, batch_loss=697, batc

Epoch 5/10:  26%|▎| 256/991 [1:07:08<3:01:54, 14.85s/batch, batch_loss=697, batc

Epoch 5/10:  26%|▎| 256/991 [1:07:22<3:01:54, 14.85s/batch, batch_loss=19.6, bat

Epoch 5/10:  26%|▎| 257/991 [1:07:22<2:57:22, 14.50s/batch, batch_loss=19.6, bat

Epoch 5/10:  26%|▎| 257/991 [1:07:37<2:57:22, 14.50s/batch, batch_loss=204, batc

Epoch 5/10:  26%|▎| 258/991 [1:07:37<2:57:48, 14.55s/batch, batch_loss=204, batc

Epoch 5/10:  26%|▎| 258/991 [1:07:51<2:57:48, 14.55s/batch, batch_loss=13.8, bat

Epoch 5/10:  26%|▎| 259/991 [1:07:51<2:57:49, 14.58s/batch, batch_loss=13.8, bat

Epoch 5/10:  26%|▎| 259/991 [1:08:09<2:57:49, 14.58s/batch, batch_loss=16.7, bat

Epoch 5/10:  26%|▎| 260/991 [1:08:09<3:07:17, 15.37s/batch, batch_loss=16.7, bat

Epoch 5/10:  26%|▎| 260/991 [1:08:23<3:07:17, 15.37s/batch, batch_loss=12.2, bat

Epoch 5/10:  26%|▎| 261/991 [1:08:23<3:04:15, 15.15s/batch, batch_loss=12.2, bat

Epoch 5/10:  26%|▎| 261/991 [1:08:38<3:04:15, 15.15s/batch, batch_loss=11.1, bat

Epoch 5/10:  26%|▎| 262/991 [1:08:38<3:01:29, 14.94s/batch, batch_loss=11.1, bat

Epoch 5/10:  26%|▎| 262/991 [1:08:53<3:01:29, 14.94s/batch, batch_loss=12, batch

Epoch 5/10:  27%|▎| 263/991 [1:08:53<3:02:57, 15.08s/batch, batch_loss=12, batch

Epoch 5/10:  27%|▎| 263/991 [1:09:07<3:02:57, 15.08s/batch, batch_loss=13.5, bat

Epoch 5/10:  27%|▎| 264/991 [1:09:07<2:59:30, 14.81s/batch, batch_loss=13.5, bat

Epoch 5/10:  27%|▎| 264/991 [1:09:21<2:59:30, 14.81s/batch, batch_loss=15.2, bat

Epoch 5/10:  27%|▎| 265/991 [1:09:21<2:56:29, 14.59s/batch, batch_loss=15.2, bat

Epoch 5/10:  27%|▎| 265/991 [1:09:36<2:56:29, 14.59s/batch, batch_loss=14.4, bat

Epoch 5/10:  27%|▎| 266/991 [1:09:36<2:55:03, 14.49s/batch, batch_loss=14.4, bat

Epoch 5/10:  27%|▎| 266/991 [1:09:50<2:55:03, 14.49s/batch, batch_loss=11.1, bat

Epoch 5/10:  27%|▎| 267/991 [1:09:50<2:55:10, 14.52s/batch, batch_loss=11.1, bat

Epoch 5/10:  27%|▎| 267/991 [1:10:05<2:55:10, 14.52s/batch, batch_loss=7.43, bat

Epoch 5/10:  27%|▎| 268/991 [1:10:05<2:55:09, 14.54s/batch, batch_loss=7.43, bat

Epoch 5/10:  27%|▎| 268/991 [1:10:20<2:55:09, 14.54s/batch, batch_loss=11.4, bat

Epoch 5/10:  27%|▎| 269/991 [1:10:20<2:56:51, 14.70s/batch, batch_loss=11.4, bat

Epoch 5/10:  27%|▎| 269/991 [1:10:35<2:56:51, 14.70s/batch, batch_loss=1.59, bat

Epoch 5/10:  27%|▎| 270/991 [1:10:35<2:59:45, 14.96s/batch, batch_loss=1.59, bat

Epoch 5/10:  27%|▎| 270/991 [1:10:50<2:59:45, 14.96s/batch, batch_loss=10.9, bat

Epoch 5/10:  27%|▎| 271/991 [1:10:50<2:59:45, 14.98s/batch, batch_loss=10.9, bat

Epoch 5/10:  27%|▎| 271/991 [1:11:05<2:59:45, 14.98s/batch, batch_loss=8.61, bat

Epoch 5/10:  27%|▎| 272/991 [1:11:05<2:59:15, 14.96s/batch, batch_loss=8.61, bat

Epoch 5/10:  27%|▎| 272/991 [1:11:20<2:59:15, 14.96s/batch, batch_loss=16.8, bat

Epoch 5/10:  28%|▎| 273/991 [1:11:20<2:58:43, 14.93s/batch, batch_loss=16.8, bat

Epoch 5/10:  28%|▎| 273/991 [1:11:35<2:58:43, 14.93s/batch, batch_loss=12.1, bat

Epoch 5/10:  28%|▎| 274/991 [1:11:35<2:59:36, 15.03s/batch, batch_loss=12.1, bat

Epoch 5/10:  28%|▎| 274/991 [1:11:50<2:59:36, 15.03s/batch, batch_loss=3.32e+3, 

Epoch 5/10:  28%|▎| 275/991 [1:11:50<2:57:46, 14.90s/batch, batch_loss=3.32e+3, 

Epoch 5/10:  28%|▎| 275/991 [1:12:04<2:57:46, 14.90s/batch, batch_loss=12.9, bat

Epoch 5/10:  28%|▎| 276/991 [1:12:04<2:54:39, 14.66s/batch, batch_loss=12.9, bat

Epoch 5/10:  28%|▎| 276/991 [1:12:19<2:54:39, 14.66s/batch, batch_loss=4.79e+3, 

Epoch 5/10:  28%|▎| 277/991 [1:12:19<2:53:42, 14.60s/batch, batch_loss=4.79e+3, 

Epoch 5/10:  28%|▎| 277/991 [1:12:33<2:53:42, 14.60s/batch, batch_loss=10.7, bat

Epoch 5/10:  28%|▎| 278/991 [1:12:33<2:54:12, 14.66s/batch, batch_loss=10.7, bat

Epoch 5/10:  28%|▎| 278/991 [1:12:50<2:54:12, 14.66s/batch, batch_loss=17, batch

Epoch 5/10:  28%|▎| 279/991 [1:12:50<2:59:12, 15.10s/batch, batch_loss=17, batch

Epoch 5/10:  28%|▎| 279/991 [1:13:08<2:59:12, 15.10s/batch, batch_loss=13.7, bat

Epoch 5/10:  28%|▎| 280/991 [1:13:08<3:09:39, 16.01s/batch, batch_loss=13.7, bat

Epoch 5/10:  28%|▎| 280/991 [1:13:23<3:09:39, 16.01s/batch, batch_loss=11.6, bat

Epoch 5/10:  28%|▎| 281/991 [1:13:23<3:06:51, 15.79s/batch, batch_loss=11.6, bat

Epoch 5/10:  28%|▎| 281/991 [1:13:38<3:06:51, 15.79s/batch, batch_loss=7.11, bat

Epoch 5/10:  28%|▎| 282/991 [1:13:38<3:02:53, 15.48s/batch, batch_loss=7.11, bat

Epoch 5/10:  28%|▎| 282/991 [1:13:53<3:02:53, 15.48s/batch, batch_loss=14.7, bat

Epoch 5/10:  29%|▎| 283/991 [1:13:53<3:03:11, 15.52s/batch, batch_loss=14.7, bat

Epoch 5/10:  29%|▎| 283/991 [1:14:10<3:03:11, 15.52s/batch, batch_loss=14.5, bat

Epoch 5/10:  29%|▎| 284/991 [1:14:10<3:05:51, 15.77s/batch, batch_loss=14.5, bat

Epoch 5/10:  29%|▎| 284/991 [1:14:25<3:05:51, 15.77s/batch, batch_loss=12.1, bat

Epoch 5/10:  29%|▎| 285/991 [1:14:25<3:03:24, 15.59s/batch, batch_loss=12.1, bat

Epoch 5/10:  29%|▎| 285/991 [1:14:40<3:03:24, 15.59s/batch, batch_loss=8.96, bat

Epoch 5/10:  29%|▎| 286/991 [1:14:40<3:00:42, 15.38s/batch, batch_loss=8.96, bat

Epoch 5/10:  29%|▎| 286/991 [1:14:55<3:00:42, 15.38s/batch, batch_loss=7.54, bat

Epoch 5/10:  29%|▎| 287/991 [1:14:55<2:59:14, 15.28s/batch, batch_loss=7.54, bat

Epoch 5/10:  29%|▎| 287/991 [1:15:13<2:59:14, 15.28s/batch, batch_loss=2.61e+3, 

Epoch 5/10:  29%|▎| 288/991 [1:15:13<3:08:36, 16.10s/batch, batch_loss=2.61e+3, 

Epoch 5/10:  29%|▎| 288/991 [1:15:28<3:08:36, 16.10s/batch, batch_loss=1.25e+3, 

Epoch 5/10:  29%|▎| 289/991 [1:15:28<3:03:36, 15.69s/batch, batch_loss=1.25e+3, 

Epoch 5/10:  29%|▎| 289/991 [1:15:43<3:03:36, 15.69s/batch, batch_loss=12.3, bat

Epoch 5/10:  29%|▎| 290/991 [1:15:43<3:02:31, 15.62s/batch, batch_loss=12.3, bat

Epoch 5/10:  29%|▎| 290/991 [1:15:58<3:02:31, 15.62s/batch, batch_loss=5.71, bat

Epoch 5/10:  29%|▎| 291/991 [1:15:58<3:00:41, 15.49s/batch, batch_loss=5.71, bat

Epoch 5/10:  29%|▎| 291/991 [1:16:14<3:00:41, 15.49s/batch, batch_loss=9.84, bat

Epoch 5/10:  29%|▎| 292/991 [1:16:14<3:00:12, 15.47s/batch, batch_loss=9.84, bat

Epoch 5/10:  29%|▎| 292/991 [1:16:28<3:00:12, 15.47s/batch, batch_loss=13.8, bat

Epoch 5/10:  30%|▎| 293/991 [1:16:28<2:57:15, 15.24s/batch, batch_loss=13.8, bat

Epoch 5/10:  30%|▎| 293/991 [1:16:42<2:57:15, 15.24s/batch, batch_loss=12.9, bat

Epoch 5/10:  30%|▎| 294/991 [1:16:42<2:53:07, 14.90s/batch, batch_loss=12.9, bat

Epoch 5/10:  30%|▎| 294/991 [1:16:57<2:53:07, 14.90s/batch, batch_loss=9.38, bat

Epoch 5/10:  30%|▎| 295/991 [1:16:57<2:51:04, 14.75s/batch, batch_loss=9.38, bat

Epoch 5/10:  30%|▎| 295/991 [1:17:14<2:51:04, 14.75s/batch, batch_loss=16.8, bat

Epoch 5/10:  30%|▎| 296/991 [1:17:14<2:57:48, 15.35s/batch, batch_loss=16.8, bat

Epoch 5/10:  30%|▎| 296/991 [1:17:28<2:57:48, 15.35s/batch, batch_loss=11.8, bat

Epoch 5/10:  30%|▎| 297/991 [1:17:28<2:52:36, 14.92s/batch, batch_loss=11.8, bat

Epoch 5/10:  30%|▎| 297/991 [1:17:43<2:52:36, 14.92s/batch, batch_loss=3.23e+4, 

Epoch 5/10:  30%|▎| 298/991 [1:17:43<2:53:01, 14.98s/batch, batch_loss=3.23e+4, 

Epoch 5/10:  30%|▎| 298/991 [1:17:57<2:53:01, 14.98s/batch, batch_loss=14.9, bat

Epoch 5/10:  30%|▎| 299/991 [1:17:57<2:52:00, 14.91s/batch, batch_loss=14.9, bat

Epoch 5/10:  30%|▎| 299/991 [1:18:12<2:52:00, 14.91s/batch, batch_loss=6.38, bat

Epoch 5/10:  30%|▎| 300/991 [1:18:12<2:50:58, 14.85s/batch, batch_loss=6.38, bat

Epoch 5/10:  30%|▎| 300/991 [1:18:27<2:50:58, 14.85s/batch, batch_loss=7.45, bat

Epoch 5/10:  30%|▎| 301/991 [1:18:27<2:49:48, 14.77s/batch, batch_loss=7.45, bat

Epoch 5/10:  30%|▎| 301/991 [1:18:42<2:49:48, 14.77s/batch, batch_loss=11.3, bat

Epoch 5/10:  30%|▎| 302/991 [1:18:42<2:50:24, 14.84s/batch, batch_loss=11.3, bat

Epoch 5/10:  30%|▎| 302/991 [1:18:57<2:50:24, 14.84s/batch, batch_loss=9.37, bat

Epoch 5/10:  31%|▎| 303/991 [1:18:57<2:50:28, 14.87s/batch, batch_loss=9.37, bat

Epoch 5/10:  31%|▎| 303/991 [1:19:11<2:50:28, 14.87s/batch, batch_loss=3.29, bat

Epoch 5/10:  31%|▎| 304/991 [1:19:11<2:48:07, 14.68s/batch, batch_loss=3.29, bat

Epoch 5/10:  31%|▎| 304/991 [1:19:25<2:48:07, 14.68s/batch, batch_loss=15, batch

Epoch 5/10:  31%|▎| 305/991 [1:19:25<2:46:29, 14.56s/batch, batch_loss=15, batch

Epoch 5/10:  31%|▎| 305/991 [1:19:39<2:46:29, 14.56s/batch, batch_loss=7.68, bat

Epoch 5/10:  31%|▎| 306/991 [1:19:39<2:43:55, 14.36s/batch, batch_loss=7.68, bat

Epoch 5/10:  31%|▎| 306/991 [1:19:52<2:43:55, 14.36s/batch, batch_loss=6.28e+3, 

Epoch 5/10:  31%|▎| 307/991 [1:19:52<2:40:08, 14.05s/batch, batch_loss=6.28e+3, 

Epoch 5/10:  31%|▎| 307/991 [1:20:06<2:40:08, 14.05s/batch, batch_loss=11.7, bat

Epoch 5/10:  31%|▎| 308/991 [1:20:06<2:39:31, 14.01s/batch, batch_loss=11.7, bat

Epoch 5/10:  31%|▎| 308/991 [1:20:21<2:39:31, 14.01s/batch, batch_loss=18.4, bat

Epoch 5/10:  31%|▎| 309/991 [1:20:21<2:40:54, 14.16s/batch, batch_loss=18.4, bat

Epoch 5/10:  31%|▎| 309/991 [1:20:36<2:40:54, 14.16s/batch, batch_loss=11.7, bat

Epoch 5/10:  31%|▎| 310/991 [1:20:36<2:44:51, 14.53s/batch, batch_loss=11.7, bat

Epoch 5/10:  31%|▎| 310/991 [1:20:52<2:44:51, 14.53s/batch, batch_loss=20.3, bat

Epoch 5/10:  31%|▎| 311/991 [1:20:52<2:49:48, 14.98s/batch, batch_loss=20.3, bat

Epoch 5/10:  31%|▎| 311/991 [1:21:08<2:49:48, 14.98s/batch, batch_loss=11.7, bat

Epoch 5/10:  31%|▎| 312/991 [1:21:08<2:51:57, 15.20s/batch, batch_loss=11.7, bat

Epoch 5/10:  31%|▎| 312/991 [1:21:25<2:51:57, 15.20s/batch, batch_loss=1.06e+4, 

Epoch 5/10:  32%|▎| 313/991 [1:21:25<2:57:41, 15.72s/batch, batch_loss=1.06e+4, 

Epoch 5/10:  32%|▎| 313/991 [1:21:39<2:57:41, 15.72s/batch, batch_loss=8.36, bat

Epoch 5/10:  32%|▎| 314/991 [1:21:39<2:52:07, 15.26s/batch, batch_loss=8.36, bat

Epoch 5/10:  32%|▎| 314/991 [1:22:03<2:52:07, 15.26s/batch, batch_loss=14.4, bat

Epoch 5/10:  32%|▎| 315/991 [1:22:03<3:22:47, 18.00s/batch, batch_loss=14.4, bat

Epoch 5/10:  32%|▎| 315/991 [1:22:24<3:22:47, 18.00s/batch, batch_loss=24.9, bat

Epoch 5/10:  32%|▎| 316/991 [1:22:24<3:31:41, 18.82s/batch, batch_loss=24.9, bat

Epoch 5/10:  32%|▎| 316/991 [1:22:41<3:31:41, 18.82s/batch, batch_loss=29.6, bat

Epoch 5/10:  32%|▎| 317/991 [1:22:41<3:24:14, 18.18s/batch, batch_loss=29.6, bat

Epoch 5/10:  32%|▎| 317/991 [1:22:56<3:24:14, 18.18s/batch, batch_loss=27, batch

Epoch 5/10:  32%|▎| 318/991 [1:22:56<3:12:11, 17.13s/batch, batch_loss=27, batch

Epoch 5/10:  32%|▎| 318/991 [1:23:10<3:12:11, 17.13s/batch, batch_loss=22.6, bat

Epoch 5/10:  32%|▎| 319/991 [1:23:10<3:01:48, 16.23s/batch, batch_loss=22.6, bat

Epoch 5/10:  32%|▎| 319/991 [1:23:24<3:01:48, 16.23s/batch, batch_loss=22.3, bat

Epoch 5/10:  32%|▎| 320/991 [1:23:24<2:55:25, 15.69s/batch, batch_loss=22.3, bat

Epoch 5/10:  32%|▎| 320/991 [1:23:39<2:55:25, 15.69s/batch, batch_loss=21.9, bat

Epoch 5/10:  32%|▎| 321/991 [1:23:39<2:52:08, 15.42s/batch, batch_loss=21.9, bat

Epoch 5/10:  32%|▎| 321/991 [1:23:54<2:52:08, 15.42s/batch, batch_loss=6.19, bat

Epoch 5/10:  32%|▎| 322/991 [1:23:54<2:50:14, 15.27s/batch, batch_loss=6.19, bat

Epoch 5/10:  32%|▎| 322/991 [1:24:09<2:50:14, 15.27s/batch, batch_loss=14, batch

Epoch 5/10:  33%|▎| 323/991 [1:24:09<2:48:59, 15.18s/batch, batch_loss=14, batch

Epoch 5/10:  33%|▎| 323/991 [1:24:23<2:48:59, 15.18s/batch, batch_loss=25.2, bat

Epoch 5/10:  33%|▎| 324/991 [1:24:23<2:47:10, 15.04s/batch, batch_loss=25.2, bat

Epoch 5/10:  33%|▎| 324/991 [1:24:44<2:47:10, 15.04s/batch, batch_loss=10.5, bat

Epoch 5/10:  33%|▎| 325/991 [1:24:44<3:06:43, 16.82s/batch, batch_loss=10.5, bat

Epoch 5/10:  33%|▎| 325/991 [1:24:59<3:06:43, 16.82s/batch, batch_loss=43.7, bat

Epoch 5/10:  33%|▎| 326/991 [1:24:59<2:59:00, 16.15s/batch, batch_loss=43.7, bat

Epoch 5/10:  33%|▎| 326/991 [1:25:13<2:59:00, 16.15s/batch, batch_loss=3.05e+3, 

Epoch 5/10:  33%|▎| 327/991 [1:25:13<2:52:48, 15.61s/batch, batch_loss=3.05e+3, 

Epoch 5/10:  33%|▎| 327/991 [1:25:28<2:52:48, 15.61s/batch, batch_loss=13.4, bat

Epoch 5/10:  33%|▎| 328/991 [1:25:28<2:49:36, 15.35s/batch, batch_loss=13.4, bat

Epoch 5/10:  33%|▎| 328/991 [1:25:42<2:49:36, 15.35s/batch, batch_loss=28.2, bat

Epoch 5/10:  33%|▎| 329/991 [1:25:42<2:45:42, 15.02s/batch, batch_loss=28.2, bat

Epoch 5/10:  33%|▎| 329/991 [1:25:56<2:45:42, 15.02s/batch, batch_loss=37.7, bat

Epoch 5/10:  33%|▎| 330/991 [1:25:56<2:42:29, 14.75s/batch, batch_loss=37.7, bat

Epoch 5/10:  33%|▎| 330/991 [1:26:11<2:42:29, 14.75s/batch, batch_loss=13, batch

Epoch 5/10:  33%|▎| 331/991 [1:26:11<2:42:02, 14.73s/batch, batch_loss=13, batch

Epoch 5/10:  33%|▎| 331/991 [1:26:26<2:42:02, 14.73s/batch, batch_loss=18.2, bat

Epoch 5/10:  34%|▎| 332/991 [1:26:26<2:41:47, 14.73s/batch, batch_loss=18.2, bat

Epoch 5/10:  34%|▎| 332/991 [1:26:40<2:41:47, 14.73s/batch, batch_loss=26.5, bat

Epoch 5/10:  34%|▎| 333/991 [1:26:40<2:39:35, 14.55s/batch, batch_loss=26.5, bat

Epoch 5/10:  34%|▎| 333/991 [1:26:55<2:39:35, 14.55s/batch, batch_loss=13.5, bat

Epoch 5/10:  34%|▎| 334/991 [1:26:55<2:39:39, 14.58s/batch, batch_loss=13.5, bat

Epoch 5/10:  34%|▎| 334/991 [1:27:10<2:39:39, 14.58s/batch, batch_loss=3.96, bat

Epoch 5/10:  34%|▎| 335/991 [1:27:10<2:40:20, 14.67s/batch, batch_loss=3.96, bat

Epoch 5/10:  34%|▎| 335/991 [1:27:24<2:40:20, 14.67s/batch, batch_loss=8.44e+3, 

Epoch 5/10:  34%|▎| 336/991 [1:27:24<2:39:49, 14.64s/batch, batch_loss=8.44e+3, 

Epoch 5/10:  34%|▎| 336/991 [1:27:39<2:39:49, 14.64s/batch, batch_loss=2.35e+3, 

Epoch 5/10:  34%|▎| 337/991 [1:27:39<2:39:18, 14.62s/batch, batch_loss=2.35e+3, 

Epoch 5/10:  34%|▎| 337/991 [1:27:54<2:39:18, 14.62s/batch, batch_loss=8.65, bat

Epoch 5/10:  34%|▎| 338/991 [1:27:54<2:40:07, 14.71s/batch, batch_loss=8.65, bat

Epoch 5/10:  34%|▎| 338/991 [1:28:07<2:40:07, 14.71s/batch, batch_loss=22, batch

Epoch 5/10:  34%|▎| 339/991 [1:28:07<2:36:04, 14.36s/batch, batch_loss=22, batch

Epoch 5/10:  34%|▎| 339/991 [1:28:21<2:36:04, 14.36s/batch, batch_loss=15, batch

Epoch 5/10:  34%|▎| 340/991 [1:28:21<2:35:17, 14.31s/batch, batch_loss=15, batch

Epoch 5/10:  34%|▎| 340/991 [1:28:36<2:35:17, 14.31s/batch, batch_loss=9.7, batc

Epoch 5/10:  34%|▎| 341/991 [1:28:36<2:34:31, 14.26s/batch, batch_loss=9.7, batc

Epoch 5/10:  34%|▎| 341/991 [1:28:50<2:34:31, 14.26s/batch, batch_loss=0.777, ba

Epoch 5/10:  35%|▎| 342/991 [1:28:50<2:34:35, 14.29s/batch, batch_loss=0.777, ba

Epoch 5/10:  35%|▎| 342/991 [1:29:05<2:34:35, 14.29s/batch, batch_loss=9.03, bat

Epoch 5/10:  35%|▎| 343/991 [1:29:05<2:35:56, 14.44s/batch, batch_loss=9.03, bat

Epoch 5/10:  35%|▎| 343/991 [1:29:19<2:35:56, 14.44s/batch, batch_loss=18.1, bat

Epoch 5/10:  35%|▎| 344/991 [1:29:19<2:36:58, 14.56s/batch, batch_loss=18.1, bat

Epoch 5/10:  35%|▎| 344/991 [1:29:34<2:36:58, 14.56s/batch, batch_loss=116, batc

Epoch 5/10:  35%|▎| 345/991 [1:29:34<2:37:36, 14.64s/batch, batch_loss=116, batc

Epoch 5/10:  35%|▎| 345/991 [1:29:50<2:37:36, 14.64s/batch, batch_loss=20, batch

Epoch 5/10:  35%|▎| 346/991 [1:29:50<2:41:29, 15.02s/batch, batch_loss=20, batch

Epoch 5/10:  35%|▎| 346/991 [1:30:05<2:41:29, 15.02s/batch, batch_loss=13.2, bat

Epoch 5/10:  35%|▎| 347/991 [1:30:05<2:41:27, 15.04s/batch, batch_loss=13.2, bat

Epoch 5/10:  35%|▎| 347/991 [1:30:20<2:41:27, 15.04s/batch, batch_loss=19.3, bat

Epoch 5/10:  35%|▎| 348/991 [1:30:20<2:40:22, 14.96s/batch, batch_loss=19.3, bat

Epoch 5/10:  35%|▎| 348/991 [1:30:36<2:40:22, 14.96s/batch, batch_loss=10.4, bat

Epoch 5/10:  35%|▎| 349/991 [1:30:36<2:44:26, 15.37s/batch, batch_loss=10.4, bat

Epoch 5/10:  35%|▎| 349/991 [1:30:51<2:44:26, 15.37s/batch, batch_loss=15.6, bat

Epoch 5/10:  35%|▎| 350/991 [1:30:51<2:41:40, 15.13s/batch, batch_loss=15.6, bat

Epoch 5/10:  35%|▎| 350/991 [1:31:07<2:41:40, 15.13s/batch, batch_loss=10.9, bat

Epoch 5/10:  35%|▎| 351/991 [1:31:07<2:43:29, 15.33s/batch, batch_loss=10.9, bat

Epoch 5/10:  35%|▎| 351/991 [1:31:23<2:43:29, 15.33s/batch, batch_loss=19, batch

Epoch 5/10:  36%|▎| 352/991 [1:31:23<2:44:36, 15.46s/batch, batch_loss=19, batch

Epoch 5/10:  36%|▎| 352/991 [1:31:39<2:44:36, 15.46s/batch, batch_loss=31.6, bat

Epoch 5/10:  36%|▎| 353/991 [1:31:39<2:47:07, 15.72s/batch, batch_loss=31.6, bat

Epoch 5/10:  36%|▎| 353/991 [1:31:55<2:47:07, 15.72s/batch, batch_loss=23.5, bat

Epoch 5/10:  36%|▎| 354/991 [1:31:55<2:47:30, 15.78s/batch, batch_loss=23.5, bat

Epoch 5/10:  36%|▎| 354/991 [1:32:11<2:47:30, 15.78s/batch, batch_loss=12.5, bat

Epoch 5/10:  36%|▎| 355/991 [1:32:11<2:48:33, 15.90s/batch, batch_loss=12.5, bat

Epoch 5/10:  36%|▎| 355/991 [1:32:27<2:48:33, 15.90s/batch, batch_loss=22.4, bat

Epoch 5/10:  36%|▎| 356/991 [1:32:27<2:48:32, 15.92s/batch, batch_loss=22.4, bat

Epoch 5/10:  36%|▎| 356/991 [1:32:41<2:48:32, 15.92s/batch, batch_loss=26.3, bat

Epoch 5/10:  36%|▎| 357/991 [1:32:41<2:43:28, 15.47s/batch, batch_loss=26.3, bat

Epoch 5/10:  36%|▎| 357/991 [1:32:56<2:43:28, 15.47s/batch, batch_loss=19.3, bat

Epoch 5/10:  36%|▎| 358/991 [1:32:56<2:40:22, 15.20s/batch, batch_loss=19.3, bat

Epoch 5/10:  36%|▎| 358/991 [1:33:10<2:40:22, 15.20s/batch, batch_loss=6.04, bat

Epoch 5/10:  36%|▎| 359/991 [1:33:10<2:35:43, 14.78s/batch, batch_loss=6.04, bat

Epoch 5/10:  36%|▎| 359/991 [1:33:25<2:35:43, 14.78s/batch, batch_loss=9.26, bat

Epoch 5/10:  36%|▎| 360/991 [1:33:25<2:36:03, 14.84s/batch, batch_loss=9.26, bat

Epoch 5/10:  36%|▎| 360/991 [1:33:39<2:36:03, 14.84s/batch, batch_loss=32.6, bat

Epoch 5/10:  36%|▎| 361/991 [1:33:39<2:35:19, 14.79s/batch, batch_loss=32.6, bat

Epoch 5/10:  36%|▎| 361/991 [1:33:55<2:35:19, 14.79s/batch, batch_loss=23.3, bat

Epoch 5/10:  37%|▎| 362/991 [1:33:55<2:36:29, 14.93s/batch, batch_loss=23.3, bat

Epoch 5/10:  37%|▎| 362/991 [1:34:09<2:36:29, 14.93s/batch, batch_loss=15.5, bat

Epoch 5/10:  37%|▎| 363/991 [1:34:09<2:34:51, 14.79s/batch, batch_loss=15.5, bat

Epoch 5/10:  37%|▎| 363/991 [1:34:23<2:34:51, 14.79s/batch, batch_loss=14, batch

Epoch 5/10:  37%|▎| 364/991 [1:34:23<2:30:28, 14.40s/batch, batch_loss=14, batch

Epoch 5/10:  37%|▎| 364/991 [1:34:37<2:30:28, 14.40s/batch, batch_loss=10.3, bat

Epoch 5/10:  37%|▎| 365/991 [1:34:37<2:31:02, 14.48s/batch, batch_loss=10.3, bat

Epoch 5/10:  37%|▎| 365/991 [1:34:52<2:31:02, 14.48s/batch, batch_loss=14, batch

Epoch 5/10:  37%|▎| 366/991 [1:34:52<2:33:09, 14.70s/batch, batch_loss=14, batch

Epoch 5/10:  37%|▎| 366/991 [1:35:07<2:33:09, 14.70s/batch, batch_loss=16.1, bat

Epoch 5/10:  37%|▎| 367/991 [1:35:07<2:33:03, 14.72s/batch, batch_loss=16.1, bat

Epoch 5/10:  37%|▎| 367/991 [1:35:22<2:33:03, 14.72s/batch, batch_loss=20.4, bat

Epoch 5/10:  37%|▎| 368/991 [1:35:22<2:31:55, 14.63s/batch, batch_loss=20.4, bat

Epoch 5/10:  37%|▎| 368/991 [1:35:36<2:31:55, 14.63s/batch, batch_loss=13.6, bat

Epoch 5/10:  37%|▎| 369/991 [1:35:36<2:29:36, 14.43s/batch, batch_loss=13.6, bat

Epoch 5/10:  37%|▎| 369/991 [1:35:51<2:29:36, 14.43s/batch, batch_loss=1.21e+4, 

Epoch 5/10:  37%|▎| 370/991 [1:35:51<2:33:23, 14.82s/batch, batch_loss=1.21e+4, 

Epoch 5/10:  37%|▎| 370/991 [1:36:06<2:33:23, 14.82s/batch, batch_loss=22.5, bat

Epoch 5/10:  37%|▎| 371/991 [1:36:06<2:33:22, 14.84s/batch, batch_loss=22.5, bat

Epoch 5/10:  37%|▎| 371/991 [1:36:21<2:33:22, 14.84s/batch, batch_loss=16.7, bat

Epoch 5/10:  38%|▍| 372/991 [1:36:21<2:31:47, 14.71s/batch, batch_loss=16.7, bat

Epoch 5/10:  38%|▍| 372/991 [1:36:36<2:31:47, 14.71s/batch, batch_loss=28.1, bat

Epoch 5/10:  38%|▍| 373/991 [1:36:36<2:32:53, 14.84s/batch, batch_loss=28.1, bat

Epoch 5/10:  38%|▍| 373/991 [1:36:52<2:32:53, 14.84s/batch, batch_loss=476, batc

Epoch 5/10:  38%|▍| 374/991 [1:36:52<2:35:34, 15.13s/batch, batch_loss=476, batc

Epoch 5/10:  38%|▍| 374/991 [1:37:07<2:35:34, 15.13s/batch, batch_loss=1.43e+3, 

Epoch 5/10:  38%|▍| 375/991 [1:37:07<2:36:49, 15.28s/batch, batch_loss=1.43e+3, 

Epoch 5/10:  38%|▍| 375/991 [1:37:22<2:36:49, 15.28s/batch, batch_loss=1.22e+3, 

Epoch 5/10:  38%|▍| 376/991 [1:37:22<2:35:56, 15.21s/batch, batch_loss=1.22e+3, 

Epoch 5/10:  38%|▍| 376/991 [1:37:37<2:35:56, 15.21s/batch, batch_loss=27.5, bat

Epoch 5/10:  38%|▍| 377/991 [1:37:37<2:33:45, 15.03s/batch, batch_loss=27.5, bat

Epoch 5/10:  38%|▍| 377/991 [1:37:53<2:33:45, 15.03s/batch, batch_loss=1.18e+3, 

Epoch 5/10:  38%|▍| 378/991 [1:37:53<2:37:48, 15.45s/batch, batch_loss=1.18e+3, 

Epoch 5/10:  38%|▍| 378/991 [1:38:09<2:37:48, 15.45s/batch, batch_loss=11.3, bat

Epoch 5/10:  38%|▍| 379/991 [1:38:09<2:37:11, 15.41s/batch, batch_loss=11.3, bat

Epoch 5/10:  38%|▍| 379/991 [1:38:24<2:37:11, 15.41s/batch, batch_loss=22.4, bat

Epoch 5/10:  38%|▍| 380/991 [1:38:24<2:35:54, 15.31s/batch, batch_loss=22.4, bat

Epoch 5/10:  38%|▍| 380/991 [1:38:39<2:35:54, 15.31s/batch, batch_loss=19.9, bat

Epoch 5/10:  38%|▍| 381/991 [1:38:39<2:34:19, 15.18s/batch, batch_loss=19.9, bat

Epoch 5/10:  38%|▍| 381/991 [1:38:54<2:34:19, 15.18s/batch, batch_loss=15.6, bat

Epoch 5/10:  39%|▍| 382/991 [1:38:54<2:34:28, 15.22s/batch, batch_loss=15.6, bat

Epoch 5/10:  39%|▍| 382/991 [1:39:08<2:34:28, 15.22s/batch, batch_loss=11.1, bat

Epoch 5/10:  39%|▍| 383/991 [1:39:08<2:32:04, 15.01s/batch, batch_loss=11.1, bat

Epoch 5/10:  39%|▍| 383/991 [1:39:24<2:32:04, 15.01s/batch, batch_loss=26.1, bat

Epoch 5/10:  39%|▍| 384/991 [1:39:24<2:33:24, 15.16s/batch, batch_loss=26.1, bat

Epoch 5/10:  39%|▍| 384/991 [1:39:39<2:33:24, 15.16s/batch, batch_loss=9.29, bat

Epoch 5/10:  39%|▍| 385/991 [1:39:39<2:31:58, 15.05s/batch, batch_loss=9.29, bat

Epoch 5/10:  39%|▍| 385/991 [1:39:54<2:31:58, 15.05s/batch, batch_loss=28.7, bat

Epoch 5/10:  39%|▍| 386/991 [1:39:54<2:31:06, 14.99s/batch, batch_loss=28.7, bat

Epoch 5/10:  39%|▍| 386/991 [1:40:09<2:31:06, 14.99s/batch, batch_loss=40.9, bat

Epoch 5/10:  39%|▍| 387/991 [1:40:09<2:30:52, 14.99s/batch, batch_loss=40.9, bat

Epoch 5/10:  39%|▍| 387/991 [1:40:23<2:30:52, 14.99s/batch, batch_loss=816, batc

Epoch 5/10:  39%|▍| 388/991 [1:40:23<2:28:25, 14.77s/batch, batch_loss=816, batc

Epoch 5/10:  39%|▍| 388/991 [1:40:37<2:28:25, 14.77s/batch, batch_loss=20.8, bat

Epoch 5/10:  39%|▍| 389/991 [1:40:37<2:27:25, 14.69s/batch, batch_loss=20.8, bat

Epoch 5/10:  39%|▍| 389/991 [1:40:52<2:27:25, 14.69s/batch, batch_loss=889, batc

Epoch 5/10:  39%|▍| 390/991 [1:40:52<2:26:59, 14.68s/batch, batch_loss=889, batc

Epoch 5/10:  39%|▍| 390/991 [1:41:06<2:26:59, 14.68s/batch, batch_loss=25.5, bat

Epoch 5/10:  39%|▍| 391/991 [1:41:06<2:24:35, 14.46s/batch, batch_loss=25.5, bat

Epoch 5/10:  39%|▍| 391/991 [1:41:20<2:24:35, 14.46s/batch, batch_loss=24.8, bat

Epoch 5/10:  40%|▍| 392/991 [1:41:20<2:22:18, 14.25s/batch, batch_loss=24.8, bat

Epoch 5/10:  40%|▍| 392/991 [1:41:34<2:22:18, 14.25s/batch, batch_loss=25.4, bat

Epoch 5/10:  40%|▍| 393/991 [1:41:34<2:21:47, 14.23s/batch, batch_loss=25.4, bat

Epoch 5/10:  40%|▍| 393/991 [1:41:48<2:21:47, 14.23s/batch, batch_loss=611, batc

Epoch 5/10:  40%|▍| 394/991 [1:41:48<2:21:57, 14.27s/batch, batch_loss=611, batc

Epoch 5/10:  40%|▍| 394/991 [1:42:03<2:21:57, 14.27s/batch, batch_loss=20.6, bat

Epoch 5/10:  40%|▍| 395/991 [1:42:03<2:22:54, 14.39s/batch, batch_loss=20.6, bat

Epoch 5/10:  40%|▍| 395/991 [1:42:17<2:22:54, 14.39s/batch, batch_loss=14.1, bat

Epoch 5/10:  40%|▍| 396/991 [1:42:17<2:22:41, 14.39s/batch, batch_loss=14.1, bat

Epoch 5/10:  40%|▍| 396/991 [1:42:32<2:22:41, 14.39s/batch, batch_loss=18.4, bat

Epoch 5/10:  40%|▍| 397/991 [1:42:32<2:24:24, 14.59s/batch, batch_loss=18.4, bat

Epoch 5/10:  40%|▍| 397/991 [1:42:48<2:24:24, 14.59s/batch, batch_loss=17.6, bat

Epoch 5/10:  40%|▍| 398/991 [1:42:48<2:27:08, 14.89s/batch, batch_loss=17.6, bat

Epoch 5/10:  40%|▍| 398/991 [1:43:03<2:27:08, 14.89s/batch, batch_loss=27.1, bat

Epoch 5/10:  40%|▍| 399/991 [1:43:03<2:26:00, 14.80s/batch, batch_loss=27.1, bat

Epoch 5/10:  40%|▍| 399/991 [1:43:17<2:26:00, 14.80s/batch, batch_loss=12.8, bat

Epoch 5/10:  40%|▍| 400/991 [1:43:17<2:26:13, 14.85s/batch, batch_loss=12.8, bat

Epoch 5/10:  40%|▍| 400/991 [1:43:32<2:26:13, 14.85s/batch, batch_loss=10.8, bat

Epoch 5/10:  40%|▍| 401/991 [1:43:32<2:25:28, 14.79s/batch, batch_loss=10.8, bat

Epoch 5/10:  40%|▍| 401/991 [1:43:47<2:25:28, 14.79s/batch, batch_loss=19.2, bat

Epoch 5/10:  41%|▍| 402/991 [1:43:47<2:26:20, 14.91s/batch, batch_loss=19.2, bat

Epoch 5/10:  41%|▍| 402/991 [1:44:02<2:26:20, 14.91s/batch, batch_loss=17, batch

Epoch 5/10:  41%|▍| 403/991 [1:44:02<2:26:12, 14.92s/batch, batch_loss=17, batch

Epoch 5/10:  41%|▍| 403/991 [1:44:17<2:26:12, 14.92s/batch, batch_loss=11.9, bat

Epoch 5/10:  41%|▍| 404/991 [1:44:17<2:24:45, 14.80s/batch, batch_loss=11.9, bat

Epoch 5/10:  41%|▍| 404/991 [1:44:32<2:24:45, 14.80s/batch, batch_loss=13.1, bat

Epoch 5/10:  41%|▍| 405/991 [1:44:32<2:25:15, 14.87s/batch, batch_loss=13.1, bat

Epoch 5/10:  41%|▍| 405/991 [1:44:47<2:25:15, 14.87s/batch, batch_loss=7.12, bat

Epoch 5/10:  41%|▍| 406/991 [1:44:47<2:26:08, 14.99s/batch, batch_loss=7.12, bat

Epoch 5/10:  41%|▍| 406/991 [1:45:03<2:26:08, 14.99s/batch, batch_loss=23.5, bat

Epoch 5/10:  41%|▍| 407/991 [1:45:03<2:29:44, 15.38s/batch, batch_loss=23.5, bat

Epoch 5/10:  41%|▍| 407/991 [1:45:19<2:29:44, 15.38s/batch, batch_loss=8.52, bat

Epoch 5/10:  41%|▍| 408/991 [1:45:19<2:31:37, 15.60s/batch, batch_loss=8.52, bat

Epoch 5/10:  41%|▍| 408/991 [1:45:35<2:31:37, 15.60s/batch, batch_loss=22.3, bat

Epoch 5/10:  41%|▍| 409/991 [1:45:35<2:31:02, 15.57s/batch, batch_loss=22.3, bat

Epoch 5/10:  41%|▍| 409/991 [1:45:50<2:31:02, 15.57s/batch, batch_loss=21.9, bat

Epoch 5/10:  41%|▍| 410/991 [1:45:50<2:28:01, 15.29s/batch, batch_loss=21.9, bat

Epoch 5/10:  41%|▍| 410/991 [1:46:05<2:28:01, 15.29s/batch, batch_loss=16.3, bat

Epoch 5/10:  41%|▍| 411/991 [1:46:05<2:29:14, 15.44s/batch, batch_loss=16.3, bat

Epoch 5/10:  41%|▍| 411/991 [1:46:21<2:29:14, 15.44s/batch, batch_loss=12.7, bat

Epoch 5/10:  42%|▍| 412/991 [1:46:21<2:28:47, 15.42s/batch, batch_loss=12.7, bat

Epoch 5/10:  42%|▍| 412/991 [1:46:37<2:28:47, 15.42s/batch, batch_loss=21.9, bat

Epoch 5/10:  42%|▍| 413/991 [1:46:37<2:31:23, 15.72s/batch, batch_loss=21.9, bat

Epoch 5/10:  42%|▍| 413/991 [1:46:52<2:31:23, 15.72s/batch, batch_loss=14.3, bat

Epoch 5/10:  42%|▍| 414/991 [1:46:52<2:29:49, 15.58s/batch, batch_loss=14.3, bat

Epoch 5/10:  42%|▍| 414/991 [1:47:07<2:29:49, 15.58s/batch, batch_loss=11.5, bat

Epoch 5/10:  42%|▍| 415/991 [1:47:07<2:27:52, 15.40s/batch, batch_loss=11.5, bat

Epoch 5/10:  42%|▍| 415/991 [1:47:23<2:27:52, 15.40s/batch, batch_loss=12, batch

Epoch 5/10:  42%|▍| 416/991 [1:47:23<2:29:20, 15.58s/batch, batch_loss=12, batch

Epoch 5/10:  42%|▍| 416/991 [1:47:40<2:29:20, 15.58s/batch, batch_loss=8.22, bat

Epoch 5/10:  42%|▍| 417/991 [1:47:40<2:30:48, 15.76s/batch, batch_loss=8.22, bat

Epoch 5/10:  42%|▍| 417/991 [1:47:56<2:30:48, 15.76s/batch, batch_loss=12.3, bat

Epoch 5/10:  42%|▍| 418/991 [1:47:56<2:31:14, 15.84s/batch, batch_loss=12.3, bat

Epoch 5/10:  42%|▍| 418/991 [1:48:12<2:31:14, 15.84s/batch, batch_loss=1.3e+3, b

Epoch 5/10:  42%|▍| 419/991 [1:48:12<2:32:29, 16.00s/batch, batch_loss=1.3e+3, b

Epoch 5/10:  42%|▍| 419/991 [1:48:28<2:32:29, 16.00s/batch, batch_loss=17.2, bat

Epoch 5/10:  42%|▍| 420/991 [1:48:28<2:31:35, 15.93s/batch, batch_loss=17.2, bat

Epoch 5/10:  42%|▍| 420/991 [1:48:43<2:31:35, 15.93s/batch, batch_loss=12.4, bat

Epoch 5/10:  42%|▍| 421/991 [1:48:43<2:30:13, 15.81s/batch, batch_loss=12.4, bat

Epoch 5/10:  42%|▍| 421/991 [1:48:58<2:30:13, 15.81s/batch, batch_loss=8.15, bat

Epoch 5/10:  43%|▍| 422/991 [1:48:58<2:26:59, 15.50s/batch, batch_loss=8.15, bat

Epoch 5/10:  43%|▍| 422/991 [1:49:12<2:26:59, 15.50s/batch, batch_loss=11.5, bat

Epoch 5/10:  43%|▍| 423/991 [1:49:12<2:22:50, 15.09s/batch, batch_loss=11.5, bat

Epoch 5/10:  43%|▍| 423/991 [1:49:26<2:22:50, 15.09s/batch, batch_loss=11.2, bat

Epoch 5/10:  43%|▍| 424/991 [1:49:26<2:18:52, 14.70s/batch, batch_loss=11.2, bat

Epoch 5/10:  43%|▍| 424/991 [1:49:41<2:18:52, 14.70s/batch, batch_loss=8.56, bat

Epoch 5/10:  43%|▍| 425/991 [1:49:41<2:18:25, 14.67s/batch, batch_loss=8.56, bat

Epoch 5/10:  43%|▍| 425/991 [1:49:56<2:18:25, 14.67s/batch, batch_loss=3.59, bat

Epoch 5/10:  43%|▍| 426/991 [1:49:56<2:19:51, 14.85s/batch, batch_loss=3.59, bat

Epoch 5/10:  43%|▍| 426/991 [1:50:10<2:19:51, 14.85s/batch, batch_loss=11.6, bat

Epoch 5/10:  43%|▍| 427/991 [1:50:10<2:18:03, 14.69s/batch, batch_loss=11.6, bat

Epoch 5/10:  43%|▍| 427/991 [1:50:26<2:18:03, 14.69s/batch, batch_loss=16.7, bat

Epoch 5/10:  43%|▍| 428/991 [1:50:26<2:20:35, 14.98s/batch, batch_loss=16.7, bat

Epoch 5/10:  43%|▍| 428/991 [1:50:42<2:20:35, 14.98s/batch, batch_loss=16.4, bat

Epoch 5/10:  43%|▍| 429/991 [1:50:42<2:22:39, 15.23s/batch, batch_loss=16.4, bat

Epoch 5/10:  43%|▍| 429/991 [1:50:56<2:22:39, 15.23s/batch, batch_loss=9.3e+3, b

Epoch 5/10:  43%|▍| 430/991 [1:50:56<2:19:35, 14.93s/batch, batch_loss=9.3e+3, b

Epoch 5/10:  43%|▍| 430/991 [1:51:11<2:19:35, 14.93s/batch, batch_loss=24.8, bat

Epoch 5/10:  43%|▍| 431/991 [1:51:11<2:19:43, 14.97s/batch, batch_loss=24.8, bat

Epoch 5/10:  43%|▍| 431/991 [1:51:26<2:19:43, 14.97s/batch, batch_loss=18.5, bat

Epoch 5/10:  44%|▍| 432/991 [1:51:26<2:19:00, 14.92s/batch, batch_loss=18.5, bat

Epoch 5/10:  44%|▍| 432/991 [1:51:41<2:19:00, 14.92s/batch, batch_loss=11.7, bat

Epoch 5/10:  44%|▍| 433/991 [1:51:41<2:18:48, 14.93s/batch, batch_loss=11.7, bat

Epoch 5/10:  44%|▍| 433/991 [1:51:54<2:18:48, 14.93s/batch, batch_loss=18.4, bat

Epoch 5/10:  44%|▍| 434/991 [1:51:54<2:13:40, 14.40s/batch, batch_loss=18.4, bat

Epoch 5/10:  44%|▍| 434/991 [1:52:07<2:13:40, 14.40s/batch, batch_loss=12.3, bat

Epoch 5/10:  44%|▍| 435/991 [1:52:07<2:09:55, 14.02s/batch, batch_loss=12.3, bat

Epoch 5/10:  44%|▍| 435/991 [1:52:22<2:09:55, 14.02s/batch, batch_loss=17.1, bat

Epoch 5/10:  44%|▍| 436/991 [1:52:22<2:11:44, 14.24s/batch, batch_loss=17.1, bat

Epoch 5/10:  44%|▍| 436/991 [1:52:37<2:11:44, 14.24s/batch, batch_loss=15.1, bat

Epoch 5/10:  44%|▍| 437/991 [1:52:37<2:15:05, 14.63s/batch, batch_loss=15.1, bat

Epoch 5/10:  44%|▍| 437/991 [1:52:53<2:15:05, 14.63s/batch, batch_loss=21.4, bat

Epoch 5/10:  44%|▍| 438/991 [1:52:53<2:17:22, 14.90s/batch, batch_loss=21.4, bat

Epoch 5/10:  44%|▍| 438/991 [1:53:08<2:17:22, 14.90s/batch, batch_loss=13.5, bat

Epoch 5/10:  44%|▍| 439/991 [1:53:08<2:18:45, 15.08s/batch, batch_loss=13.5, bat

Epoch 5/10:  44%|▍| 439/991 [1:53:25<2:18:45, 15.08s/batch, batch_loss=20.6, bat

Epoch 5/10:  44%|▍| 440/991 [1:53:25<2:21:33, 15.41s/batch, batch_loss=20.6, bat

Epoch 5/10:  44%|▍| 440/991 [1:53:40<2:21:33, 15.41s/batch, batch_loss=19.2, bat

Epoch 5/10:  45%|▍| 441/991 [1:53:40<2:22:44, 15.57s/batch, batch_loss=19.2, bat

Epoch 5/10:  45%|▍| 441/991 [1:53:56<2:22:44, 15.57s/batch, batch_loss=14.7, bat

Epoch 5/10:  45%|▍| 442/991 [1:53:56<2:23:39, 15.70s/batch, batch_loss=14.7, bat

Epoch 5/10:  45%|▍| 442/991 [1:54:12<2:23:39, 15.70s/batch, batch_loss=20.3, bat

Epoch 5/10:  45%|▍| 443/991 [1:54:12<2:22:24, 15.59s/batch, batch_loss=20.3, bat

Epoch 5/10:  45%|▍| 443/991 [1:54:27<2:22:24, 15.59s/batch, batch_loss=17.9, bat

Epoch 5/10:  45%|▍| 444/991 [1:54:27<2:21:53, 15.56s/batch, batch_loss=17.9, bat

Epoch 5/10:  45%|▍| 444/991 [1:54:44<2:21:53, 15.56s/batch, batch_loss=21.5, bat

Epoch 5/10:  45%|▍| 445/991 [1:54:44<2:25:14, 15.96s/batch, batch_loss=21.5, bat

Epoch 5/10:  45%|▍| 445/991 [1:54:59<2:25:14, 15.96s/batch, batch_loss=25.1, bat

Epoch 5/10:  45%|▍| 446/991 [1:54:59<2:22:58, 15.74s/batch, batch_loss=25.1, bat

Epoch 5/10:  45%|▍| 446/991 [1:55:14<2:22:58, 15.74s/batch, batch_loss=12.2, bat

Epoch 5/10:  45%|▍| 447/991 [1:55:14<2:20:07, 15.45s/batch, batch_loss=12.2, bat

Epoch 5/10:  45%|▍| 447/991 [1:55:30<2:20:07, 15.45s/batch, batch_loss=16.5, bat

Epoch 5/10:  45%|▍| 448/991 [1:55:30<2:20:57, 15.58s/batch, batch_loss=16.5, bat

Epoch 5/10:  45%|▍| 448/991 [1:55:45<2:20:57, 15.58s/batch, batch_loss=17.8, bat

Epoch 5/10:  45%|▍| 449/991 [1:55:45<2:18:06, 15.29s/batch, batch_loss=17.8, bat

Epoch 5/10:  45%|▍| 449/991 [1:56:00<2:18:06, 15.29s/batch, batch_loss=22.3, bat

Epoch 5/10:  45%|▍| 450/991 [1:56:00<2:19:14, 15.44s/batch, batch_loss=22.3, bat

Epoch 5/10:  45%|▍| 450/991 [1:56:16<2:19:14, 15.44s/batch, batch_loss=20.1, bat

Epoch 5/10:  46%|▍| 451/991 [1:56:16<2:17:57, 15.33s/batch, batch_loss=20.1, bat

Epoch 5/10:  46%|▍| 451/991 [1:56:31<2:17:57, 15.33s/batch, batch_loss=17.1, bat

Epoch 5/10:  46%|▍| 452/991 [1:56:31<2:17:00, 15.25s/batch, batch_loss=17.1, bat

Epoch 5/10:  46%|▍| 452/991 [1:56:46<2:17:00, 15.25s/batch, batch_loss=19.9, bat

Epoch 5/10:  46%|▍| 453/991 [1:56:46<2:17:13, 15.30s/batch, batch_loss=19.9, bat

Epoch 5/10:  46%|▍| 453/991 [1:57:02<2:17:13, 15.30s/batch, batch_loss=7.23e+3, 

Epoch 5/10:  46%|▍| 454/991 [1:57:02<2:17:33, 15.37s/batch, batch_loss=7.23e+3, 

Epoch 5/10:  46%|▍| 454/991 [1:57:16<2:17:33, 15.37s/batch, batch_loss=35.2, bat

Epoch 5/10:  46%|▍| 455/991 [1:57:16<2:15:20, 15.15s/batch, batch_loss=35.2, bat

Epoch 5/10:  46%|▍| 455/991 [1:57:31<2:15:20, 15.15s/batch, batch_loss=21.5, bat

Epoch 5/10:  46%|▍| 456/991 [1:57:31<2:14:56, 15.13s/batch, batch_loss=21.5, bat

Epoch 5/10:  46%|▍| 456/991 [1:57:46<2:14:56, 15.13s/batch, batch_loss=13.5, bat

Epoch 5/10:  46%|▍| 457/991 [1:57:46<2:12:55, 14.94s/batch, batch_loss=13.5, bat

Epoch 5/10:  46%|▍| 457/991 [1:58:01<2:12:55, 14.94s/batch, batch_loss=14.7, bat

Epoch 5/10:  46%|▍| 458/991 [1:58:01<2:12:20, 14.90s/batch, batch_loss=14.7, bat

Epoch 5/10:  46%|▍| 458/991 [1:58:15<2:12:20, 14.90s/batch, batch_loss=22.4, bat

Epoch 5/10:  46%|▍| 459/991 [1:58:15<2:11:10, 14.79s/batch, batch_loss=22.4, bat

Epoch 5/10:  46%|▍| 459/991 [1:58:30<2:11:10, 14.79s/batch, batch_loss=19.9, bat

Epoch 5/10:  46%|▍| 460/991 [1:58:30<2:11:45, 14.89s/batch, batch_loss=19.9, bat

Epoch 5/10:  46%|▍| 460/991 [1:58:45<2:11:45, 14.89s/batch, batch_loss=52.4, bat

Epoch 5/10:  47%|▍| 461/991 [1:58:45<2:11:10, 14.85s/batch, batch_loss=52.4, bat

Epoch 5/10:  47%|▍| 461/991 [1:59:00<2:11:10, 14.85s/batch, batch_loss=13.5, bat

Epoch 5/10:  47%|▍| 462/991 [1:59:00<2:11:14, 14.89s/batch, batch_loss=13.5, bat

Epoch 5/10:  47%|▍| 462/991 [1:59:14<2:11:14, 14.89s/batch, batch_loss=6.21e+4, 

Epoch 5/10:  47%|▍| 463/991 [1:59:14<2:09:30, 14.72s/batch, batch_loss=6.21e+4, 

Epoch 5/10:  47%|▍| 463/991 [1:59:29<2:09:30, 14.72s/batch, batch_loss=13.9, bat

Epoch 5/10:  47%|▍| 464/991 [1:59:29<2:10:23, 14.84s/batch, batch_loss=13.9, bat

Epoch 5/10:  47%|▍| 464/991 [1:59:45<2:10:23, 14.84s/batch, batch_loss=13.3, bat

Epoch 5/10:  47%|▍| 465/991 [1:59:45<2:11:04, 14.95s/batch, batch_loss=13.3, bat

Epoch 5/10:  47%|▍| 465/991 [2:00:00<2:11:04, 14.95s/batch, batch_loss=15.3, bat

Epoch 5/10:  47%|▍| 466/991 [2:00:00<2:11:55, 15.08s/batch, batch_loss=15.3, bat

Epoch 5/10:  47%|▍| 466/991 [2:00:14<2:11:55, 15.08s/batch, batch_loss=14.4, bat

Epoch 5/10:  47%|▍| 467/991 [2:00:14<2:09:45, 14.86s/batch, batch_loss=14.4, bat

Epoch 5/10:  47%|▍| 467/991 [2:00:29<2:09:45, 14.86s/batch, batch_loss=17.1, bat

Epoch 5/10:  47%|▍| 468/991 [2:00:29<2:09:07, 14.81s/batch, batch_loss=17.1, bat

Epoch 5/10:  47%|▍| 468/991 [2:00:45<2:09:07, 14.81s/batch, batch_loss=18.3, bat

Epoch 5/10:  47%|▍| 469/991 [2:00:45<2:11:51, 15.16s/batch, batch_loss=18.3, bat

Epoch 5/10:  47%|▍| 469/991 [2:01:00<2:11:51, 15.16s/batch, batch_loss=12.6, bat

Epoch 5/10:  47%|▍| 470/991 [2:01:00<2:12:22, 15.25s/batch, batch_loss=12.6, bat

Epoch 5/10:  47%|▍| 470/991 [2:01:16<2:12:22, 15.25s/batch, batch_loss=19.4, bat

Epoch 5/10:  48%|▍| 471/991 [2:01:16<2:12:35, 15.30s/batch, batch_loss=19.4, bat

Epoch 5/10:  48%|▍| 471/991 [2:01:31<2:12:35, 15.30s/batch, batch_loss=25.3, bat

Epoch 5/10:  48%|▍| 472/991 [2:01:31<2:12:13, 15.29s/batch, batch_loss=25.3, bat

Epoch 5/10:  48%|▍| 472/991 [2:01:46<2:12:13, 15.29s/batch, batch_loss=19, batch

Epoch 5/10:  48%|▍| 473/991 [2:01:46<2:11:45, 15.26s/batch, batch_loss=19, batch

Epoch 5/10:  48%|▍| 473/991 [2:02:01<2:11:45, 15.26s/batch, batch_loss=15.8, bat

Epoch 5/10:  48%|▍| 474/991 [2:02:01<2:10:35, 15.15s/batch, batch_loss=15.8, bat

Epoch 5/10:  48%|▍| 474/991 [2:02:17<2:10:35, 15.15s/batch, batch_loss=2.4e+3, b

Epoch 5/10:  48%|▍| 475/991 [2:02:17<2:11:18, 15.27s/batch, batch_loss=2.4e+3, b

Epoch 5/10:  48%|▍| 475/991 [2:02:32<2:11:18, 15.27s/batch, batch_loss=18.5, bat

Epoch 5/10:  48%|▍| 476/991 [2:02:32<2:10:17, 15.18s/batch, batch_loss=18.5, bat

Epoch 5/10:  48%|▍| 476/991 [2:02:47<2:10:17, 15.18s/batch, batch_loss=17.6, bat

Epoch 5/10:  48%|▍| 477/991 [2:02:47<2:09:08, 15.08s/batch, batch_loss=17.6, bat

Epoch 5/10:  48%|▍| 477/991 [2:03:02<2:09:08, 15.08s/batch, batch_loss=16.4, bat

Epoch 5/10:  48%|▍| 478/991 [2:03:02<2:10:06, 15.22s/batch, batch_loss=16.4, bat

Epoch 5/10:  48%|▍| 478/991 [2:03:17<2:10:06, 15.22s/batch, batch_loss=19.2, bat

Epoch 5/10:  48%|▍| 479/991 [2:03:17<2:08:57, 15.11s/batch, batch_loss=19.2, bat

Epoch 5/10:  48%|▍| 479/991 [2:03:33<2:08:57, 15.11s/batch, batch_loss=18.2, bat

Epoch 5/10:  48%|▍| 480/991 [2:03:33<2:10:23, 15.31s/batch, batch_loss=18.2, bat

Epoch 5/10:  48%|▍| 480/991 [2:03:47<2:10:23, 15.31s/batch, batch_loss=27, batch

Epoch 5/10:  49%|▍| 481/991 [2:03:47<2:08:01, 15.06s/batch, batch_loss=27, batch

Epoch 5/10:  49%|▍| 481/991 [2:04:02<2:08:01, 15.06s/batch, batch_loss=20.4, bat

Epoch 5/10:  49%|▍| 482/991 [2:04:02<2:07:41, 15.05s/batch, batch_loss=20.4, bat

Epoch 5/10:  49%|▍| 482/991 [2:04:17<2:07:41, 15.05s/batch, batch_loss=13.1, bat

Epoch 5/10:  49%|▍| 483/991 [2:04:17<2:06:18, 14.92s/batch, batch_loss=13.1, bat

Epoch 5/10:  49%|▍| 483/991 [2:04:32<2:06:18, 14.92s/batch, batch_loss=21.7, bat

Epoch 5/10:  49%|▍| 484/991 [2:04:32<2:06:50, 15.01s/batch, batch_loss=21.7, bat

Epoch 5/10:  49%|▍| 484/991 [2:04:46<2:06:50, 15.01s/batch, batch_loss=9.94, bat

Epoch 5/10:  49%|▍| 485/991 [2:04:46<2:04:06, 14.72s/batch, batch_loss=9.94, bat

Epoch 5/10:  49%|▍| 485/991 [2:05:01<2:04:06, 14.72s/batch, batch_loss=25.1, bat

Epoch 5/10:  49%|▍| 486/991 [2:05:01<2:03:44, 14.70s/batch, batch_loss=25.1, bat

Epoch 5/10:  49%|▍| 486/991 [2:05:15<2:03:44, 14.70s/batch, batch_loss=14.5, bat

Epoch 5/10:  49%|▍| 487/991 [2:05:15<2:02:51, 14.63s/batch, batch_loss=14.5, bat

Epoch 5/10:  49%|▍| 487/991 [2:05:30<2:02:51, 14.63s/batch, batch_loss=9.19, bat

Epoch 5/10:  49%|▍| 488/991 [2:05:30<2:03:03, 14.68s/batch, batch_loss=9.19, bat

Epoch 5/10:  49%|▍| 488/991 [2:05:45<2:03:03, 14.68s/batch, batch_loss=11.8, bat

Epoch 5/10:  49%|▍| 489/991 [2:05:45<2:03:46, 14.79s/batch, batch_loss=11.8, bat

Epoch 5/10:  49%|▍| 489/991 [2:06:00<2:03:46, 14.79s/batch, batch_loss=8.9, batc

Epoch 5/10:  49%|▍| 490/991 [2:06:00<2:02:36, 14.68s/batch, batch_loss=8.9, batc

Epoch 5/10:  49%|▍| 490/991 [2:06:15<2:02:36, 14.68s/batch, batch_loss=21.5, bat

Epoch 5/10:  50%|▍| 491/991 [2:06:15<2:03:12, 14.79s/batch, batch_loss=21.5, bat

Epoch 5/10:  50%|▍| 491/991 [2:06:29<2:03:12, 14.79s/batch, batch_loss=22.2, bat

Epoch 5/10:  50%|▍| 492/991 [2:06:29<2:02:28, 14.73s/batch, batch_loss=22.2, bat

Epoch 5/10:  50%|▍| 492/991 [2:06:44<2:02:28, 14.73s/batch, batch_loss=23.7, bat

Epoch 5/10:  50%|▍| 493/991 [2:06:44<2:01:42, 14.66s/batch, batch_loss=23.7, bat

Epoch 5/10:  50%|▍| 493/991 [2:06:58<2:01:42, 14.66s/batch, batch_loss=9.88, bat

Epoch 5/10:  50%|▍| 494/991 [2:06:58<2:00:49, 14.59s/batch, batch_loss=9.88, bat

Epoch 5/10:  50%|▍| 494/991 [2:07:12<2:00:49, 14.59s/batch, batch_loss=8.54e+4, 

Epoch 5/10:  50%|▍| 495/991 [2:07:12<1:57:38, 14.23s/batch, batch_loss=8.54e+4, 

Epoch 5/10:  50%|▍| 495/991 [2:07:27<1:57:38, 14.23s/batch, batch_loss=12.3, bat

Epoch 5/10:  50%|▌| 496/991 [2:07:27<1:59:25, 14.48s/batch, batch_loss=12.3, bat

Epoch 5/10:  50%|▌| 496/991 [2:07:41<1:59:25, 14.48s/batch, batch_loss=176, batc

Epoch 5/10:  50%|▌| 497/991 [2:07:41<1:59:46, 14.55s/batch, batch_loss=176, batc

Epoch 5/10:  50%|▌| 497/991 [2:07:56<1:59:46, 14.55s/batch, batch_loss=13.9, bat

Epoch 5/10:  50%|▌| 498/991 [2:07:56<2:00:03, 14.61s/batch, batch_loss=13.9, bat

Epoch 5/10:  50%|▌| 498/991 [2:08:11<2:00:03, 14.61s/batch, batch_loss=405, batc

Epoch 5/10:  50%|▌| 499/991 [2:08:11<1:59:27, 14.57s/batch, batch_loss=405, batc

Epoch 5/10:  50%|▌| 499/991 [2:08:25<1:59:27, 14.57s/batch, batch_loss=18, batch

Epoch 5/10:  50%|▌| 500/991 [2:08:25<1:58:31, 14.48s/batch, batch_loss=18, batch

Epoch 5/10:  50%|▌| 500/991 [2:08:40<1:58:31, 14.48s/batch, batch_loss=8.85, bat

Epoch 5/10:  51%|▌| 501/991 [2:08:40<2:00:54, 14.80s/batch, batch_loss=8.85, bat

Epoch 5/10:  51%|▌| 501/991 [2:08:55<2:00:54, 14.80s/batch, batch_loss=9.98, bat

Epoch 5/10:  51%|▌| 502/991 [2:08:55<2:00:07, 14.74s/batch, batch_loss=9.98, bat

Epoch 5/10:  51%|▌| 502/991 [2:09:10<2:00:07, 14.74s/batch, batch_loss=16.5, bat

Epoch 5/10:  51%|▌| 503/991 [2:09:10<2:01:07, 14.89s/batch, batch_loss=16.5, bat

Epoch 5/10:  51%|▌| 503/991 [2:09:26<2:01:07, 14.89s/batch, batch_loss=11.4, bat

Epoch 5/10:  51%|▌| 504/991 [2:09:26<2:03:08, 15.17s/batch, batch_loss=11.4, bat

Epoch 5/10:  51%|▌| 504/991 [2:09:41<2:03:08, 15.17s/batch, batch_loss=8.25, bat

Epoch 5/10:  51%|▌| 505/991 [2:09:41<2:03:08, 15.20s/batch, batch_loss=8.25, bat

Epoch 5/10:  51%|▌| 505/991 [2:09:57<2:03:08, 15.20s/batch, batch_loss=14.3, bat

Epoch 5/10:  51%|▌| 506/991 [2:09:57<2:03:23, 15.27s/batch, batch_loss=14.3, bat

Epoch 5/10:  51%|▌| 506/991 [2:10:12<2:03:23, 15.27s/batch, batch_loss=12.6, bat

Epoch 5/10:  51%|▌| 507/991 [2:10:12<2:02:30, 15.19s/batch, batch_loss=12.6, bat

Epoch 5/10:  51%|▌| 507/991 [2:10:27<2:02:30, 15.19s/batch, batch_loss=15.1, bat

Epoch 5/10:  51%|▌| 508/991 [2:10:27<2:01:45, 15.13s/batch, batch_loss=15.1, bat

Epoch 5/10:  51%|▌| 508/991 [2:10:42<2:01:45, 15.13s/batch, batch_loss=16.8, bat

Epoch 5/10:  51%|▌| 509/991 [2:10:42<2:01:34, 15.13s/batch, batch_loss=16.8, bat

Epoch 5/10:  51%|▌| 509/991 [2:10:58<2:01:34, 15.13s/batch, batch_loss=11.5, bat

Epoch 5/10:  51%|▌| 510/991 [2:10:58<2:04:08, 15.48s/batch, batch_loss=11.5, bat

Epoch 5/10:  51%|▌| 510/991 [2:11:14<2:04:08, 15.48s/batch, batch_loss=11.4, bat

Epoch 5/10:  52%|▌| 511/991 [2:11:14<2:04:00, 15.50s/batch, batch_loss=11.4, bat

Epoch 5/10:  52%|▌| 511/991 [2:11:30<2:04:00, 15.50s/batch, batch_loss=9.13, bat

Epoch 5/10:  52%|▌| 512/991 [2:11:30<2:04:53, 15.64s/batch, batch_loss=9.13, bat

Epoch 5/10:  52%|▌| 512/991 [2:11:45<2:04:53, 15.64s/batch, batch_loss=7.63, bat

Epoch 5/10:  52%|▌| 513/991 [2:11:45<2:03:44, 15.53s/batch, batch_loss=7.63, bat

Epoch 5/10:  52%|▌| 513/991 [2:12:00<2:03:44, 15.53s/batch, batch_loss=13.7, bat

Epoch 5/10:  52%|▌| 514/991 [2:12:00<2:03:11, 15.50s/batch, batch_loss=13.7, bat

Epoch 5/10:  52%|▌| 514/991 [2:12:16<2:03:11, 15.50s/batch, batch_loss=14.3, bat

Epoch 5/10:  52%|▌| 515/991 [2:12:16<2:04:14, 15.66s/batch, batch_loss=14.3, bat

Epoch 5/10:  52%|▌| 515/991 [2:12:31<2:04:14, 15.66s/batch, batch_loss=15.8, bat

Epoch 5/10:  52%|▌| 516/991 [2:12:31<2:01:46, 15.38s/batch, batch_loss=15.8, bat

Epoch 5/10:  52%|▌| 516/991 [2:12:46<2:01:46, 15.38s/batch, batch_loss=10.7, bat

Epoch 5/10:  52%|▌| 517/991 [2:12:46<2:00:06, 15.20s/batch, batch_loss=10.7, bat

Epoch 5/10:  52%|▌| 517/991 [2:13:01<2:00:06, 15.20s/batch, batch_loss=19, batch

Epoch 5/10:  52%|▌| 518/991 [2:13:01<2:00:03, 15.23s/batch, batch_loss=19, batch

Epoch 5/10:  52%|▌| 518/991 [2:13:17<2:00:03, 15.23s/batch, batch_loss=13.4, bat

Epoch 5/10:  52%|▌| 519/991 [2:13:17<2:00:06, 15.27s/batch, batch_loss=13.4, bat

Epoch 5/10:  52%|▌| 519/991 [2:13:32<2:00:06, 15.27s/batch, batch_loss=12.3, bat

Epoch 5/10:  52%|▌| 520/991 [2:13:32<1:59:28, 15.22s/batch, batch_loss=12.3, bat

Epoch 5/10:  52%|▌| 520/991 [2:13:47<1:59:28, 15.22s/batch, batch_loss=7.6, batc

Epoch 5/10:  53%|▌| 521/991 [2:13:47<1:59:51, 15.30s/batch, batch_loss=7.6, batc

Epoch 5/10:  53%|▌| 521/991 [2:14:02<1:59:51, 15.30s/batch, batch_loss=11.2, bat

Epoch 5/10:  53%|▌| 522/991 [2:14:02<1:57:44, 15.06s/batch, batch_loss=11.2, bat

Epoch 5/10:  53%|▌| 522/991 [2:14:17<1:57:44, 15.06s/batch, batch_loss=3.31, bat

Epoch 5/10:  53%|▌| 523/991 [2:14:17<1:57:55, 15.12s/batch, batch_loss=3.31, bat

Epoch 5/10:  53%|▌| 523/991 [2:14:32<1:57:55, 15.12s/batch, batch_loss=8.25, bat

Epoch 5/10:  53%|▌| 524/991 [2:14:32<1:57:01, 15.04s/batch, batch_loss=8.25, bat

Epoch 5/10:  53%|▌| 524/991 [2:14:46<1:57:01, 15.04s/batch, batch_loss=6.61, bat

Epoch 5/10:  53%|▌| 525/991 [2:14:46<1:55:38, 14.89s/batch, batch_loss=6.61, bat

Epoch 5/10:  53%|▌| 525/991 [2:15:01<1:55:38, 14.89s/batch, batch_loss=7.51, bat

Epoch 5/10:  53%|▌| 526/991 [2:15:01<1:55:23, 14.89s/batch, batch_loss=7.51, bat

Epoch 5/10:  53%|▌| 526/991 [2:15:16<1:55:23, 14.89s/batch, batch_loss=14.4, bat

Epoch 5/10:  53%|▌| 527/991 [2:15:16<1:55:52, 14.98s/batch, batch_loss=14.4, bat

Epoch 5/10:  53%|▌| 527/991 [2:15:31<1:55:52, 14.98s/batch, batch_loss=15, batch

Epoch 5/10:  53%|▌| 528/991 [2:15:31<1:55:37, 14.98s/batch, batch_loss=15, batch

Epoch 5/10:  53%|▌| 528/991 [2:15:46<1:55:37, 14.98s/batch, batch_loss=9.09, bat

Epoch 5/10:  53%|▌| 529/991 [2:15:46<1:53:46, 14.78s/batch, batch_loss=9.09, bat

Epoch 5/10:  53%|▌| 529/991 [2:16:00<1:53:46, 14.78s/batch, batch_loss=14.4, bat

Epoch 5/10:  53%|▌| 530/991 [2:16:00<1:52:02, 14.58s/batch, batch_loss=14.4, bat

Epoch 5/10:  53%|▌| 530/991 [2:16:14<1:52:02, 14.58s/batch, batch_loss=12.8, bat

Epoch 5/10:  54%|▌| 531/991 [2:16:14<1:51:19, 14.52s/batch, batch_loss=12.8, bat

Epoch 5/10:  54%|▌| 531/991 [2:16:29<1:51:19, 14.52s/batch, batch_loss=12.9, bat

Epoch 5/10:  54%|▌| 532/991 [2:16:29<1:51:08, 14.53s/batch, batch_loss=12.9, bat

Epoch 5/10:  54%|▌| 532/991 [2:16:44<1:51:08, 14.53s/batch, batch_loss=13.7, bat

Epoch 5/10:  54%|▌| 533/991 [2:16:44<1:53:28, 14.87s/batch, batch_loss=13.7, bat

Epoch 5/10:  54%|▌| 533/991 [2:17:00<1:53:28, 14.87s/batch, batch_loss=12.1, bat

Epoch 5/10:  54%|▌| 534/991 [2:17:00<1:56:03, 15.24s/batch, batch_loss=12.1, bat

Epoch 5/10:  54%|▌| 534/991 [2:17:16<1:56:03, 15.24s/batch, batch_loss=17.4, bat

Epoch 5/10:  54%|▌| 535/991 [2:17:16<1:55:44, 15.23s/batch, batch_loss=17.4, bat

Epoch 5/10:  54%|▌| 535/991 [2:17:31<1:55:44, 15.23s/batch, batch_loss=16.1, bat

Epoch 5/10:  54%|▌| 536/991 [2:17:31<1:55:47, 15.27s/batch, batch_loss=16.1, bat

Epoch 5/10:  54%|▌| 536/991 [2:17:47<1:55:47, 15.27s/batch, batch_loss=9.08, bat

Epoch 5/10:  54%|▌| 537/991 [2:17:47<1:55:59, 15.33s/batch, batch_loss=9.08, bat

Epoch 5/10:  54%|▌| 537/991 [2:18:05<1:55:59, 15.33s/batch, batch_loss=1.78e+3, 

Epoch 5/10:  54%|▌| 538/991 [2:18:05<2:02:30, 16.23s/batch, batch_loss=1.78e+3, 

Epoch 5/10:  54%|▌| 538/991 [2:18:20<2:02:30, 16.23s/batch, batch_loss=24.7, bat

Epoch 5/10:  54%|▌| 539/991 [2:18:20<1:59:58, 15.93s/batch, batch_loss=24.7, bat

Epoch 5/10:  54%|▌| 539/991 [2:18:35<1:59:58, 15.93s/batch, batch_loss=23.8, bat

Epoch 5/10:  54%|▌| 540/991 [2:18:35<1:57:29, 15.63s/batch, batch_loss=23.8, bat

Epoch 5/10:  54%|▌| 540/991 [2:18:50<1:57:29, 15.63s/batch, batch_loss=1.3e+4, b

Epoch 5/10:  55%|▌| 541/991 [2:18:50<1:55:08, 15.35s/batch, batch_loss=1.3e+4, b

Epoch 5/10:  55%|▌| 541/991 [2:19:05<1:55:08, 15.35s/batch, batch_loss=2.86e+3, 

Epoch 5/10:  55%|▌| 542/991 [2:19:05<1:55:49, 15.48s/batch, batch_loss=2.86e+3, 

Epoch 5/10:  55%|▌| 542/991 [2:19:21<1:55:49, 15.48s/batch, batch_loss=37.4, bat

Epoch 5/10:  55%|▌| 543/991 [2:19:21<1:55:14, 15.43s/batch, batch_loss=37.4, bat

Epoch 5/10:  55%|▌| 543/991 [2:19:36<1:55:14, 15.43s/batch, batch_loss=22.3, bat

Epoch 5/10:  55%|▌| 544/991 [2:19:36<1:53:34, 15.25s/batch, batch_loss=22.3, bat

Epoch 5/10:  55%|▌| 544/991 [2:19:51<1:53:34, 15.25s/batch, batch_loss=14.1, bat

Epoch 5/10:  55%|▌| 545/991 [2:19:51<1:53:19, 15.25s/batch, batch_loss=14.1, bat

Epoch 5/10:  55%|▌| 545/991 [2:20:06<1:53:19, 15.25s/batch, batch_loss=299, batc

Epoch 5/10:  55%|▌| 546/991 [2:20:06<1:53:07, 15.25s/batch, batch_loss=299, batc

Epoch 5/10:  55%|▌| 546/991 [2:20:21<1:53:07, 15.25s/batch, batch_loss=15.2, bat

Epoch 5/10:  55%|▌| 547/991 [2:20:21<1:52:34, 15.21s/batch, batch_loss=15.2, bat

Epoch 5/10:  55%|▌| 547/991 [2:20:36<1:52:34, 15.21s/batch, batch_loss=12.2, bat

Epoch 5/10:  55%|▌| 548/991 [2:20:36<1:51:49, 15.15s/batch, batch_loss=12.2, bat

Epoch 5/10:  55%|▌| 548/991 [2:20:52<1:51:49, 15.15s/batch, batch_loss=9.91, bat

Epoch 5/10:  55%|▌| 549/991 [2:20:52<1:52:00, 15.20s/batch, batch_loss=9.91, bat

Epoch 5/10:  55%|▌| 549/991 [2:21:07<1:52:00, 15.20s/batch, batch_loss=20.4, bat

Epoch 5/10:  55%|▌| 550/991 [2:21:07<1:52:53, 15.36s/batch, batch_loss=20.4, bat

Epoch 5/10:  55%|▌| 550/991 [2:21:24<1:52:53, 15.36s/batch, batch_loss=16.1, bat

Epoch 5/10:  56%|▌| 551/991 [2:21:24<1:55:44, 15.78s/batch, batch_loss=16.1, bat

Epoch 5/10:  56%|▌| 551/991 [2:21:42<1:55:44, 15.78s/batch, batch_loss=15.7, bat

Epoch 5/10:  56%|▌| 552/991 [2:21:42<1:59:20, 16.31s/batch, batch_loss=15.7, bat

Epoch 5/10:  56%|▌| 552/991 [2:21:58<1:59:20, 16.31s/batch, batch_loss=16.1, bat

Epoch 5/10:  56%|▌| 553/991 [2:21:58<1:58:44, 16.27s/batch, batch_loss=16.1, bat

Epoch 5/10:  56%|▌| 553/991 [2:22:13<1:58:44, 16.27s/batch, batch_loss=5.74e+3, 

Epoch 5/10:  56%|▌| 554/991 [2:22:13<1:55:39, 15.88s/batch, batch_loss=5.74e+3, 

Epoch 5/10:  56%|▌| 554/991 [2:22:29<1:55:39, 15.88s/batch, batch_loss=2.57e+3, 

Epoch 5/10:  56%|▌| 555/991 [2:22:29<1:56:15, 16.00s/batch, batch_loss=2.57e+3, 

Epoch 5/10:  56%|▌| 555/991 [2:22:47<1:56:15, 16.00s/batch, batch_loss=16, batch

Epoch 5/10:  56%|▌| 556/991 [2:22:47<1:59:15, 16.45s/batch, batch_loss=16, batch

Epoch 5/10:  56%|▌| 556/991 [2:23:04<1:59:15, 16.45s/batch, batch_loss=1.27e+4, 

Epoch 5/10:  56%|▌| 557/991 [2:23:04<2:00:07, 16.61s/batch, batch_loss=1.27e+4, 

Epoch 5/10:  56%|▌| 557/991 [2:23:20<2:00:07, 16.61s/batch, batch_loss=8.16, bat

Epoch 5/10:  56%|▌| 558/991 [2:23:20<1:59:08, 16.51s/batch, batch_loss=8.16, bat

Epoch 5/10:  56%|▌| 558/991 [2:23:37<1:59:08, 16.51s/batch, batch_loss=17.2, bat

Epoch 5/10:  56%|▌| 559/991 [2:23:37<2:00:23, 16.72s/batch, batch_loss=17.2, bat

Epoch 5/10:  56%|▌| 559/991 [2:23:55<2:00:23, 16.72s/batch, batch_loss=8.04, bat

Epoch 5/10:  57%|▌| 560/991 [2:23:55<2:02:12, 17.01s/batch, batch_loss=8.04, bat

Epoch 5/10:  57%|▌| 560/991 [2:24:11<2:02:12, 17.01s/batch, batch_loss=7.03, bat

Epoch 5/10:  57%|▌| 561/991 [2:24:11<2:01:21, 16.93s/batch, batch_loss=7.03, bat

Epoch 5/10:  57%|▌| 561/991 [2:24:28<2:01:21, 16.93s/batch, batch_loss=14.1, bat

Epoch 5/10:  57%|▌| 562/991 [2:24:28<1:59:34, 16.72s/batch, batch_loss=14.1, bat

Epoch 5/10:  57%|▌| 562/991 [2:24:44<1:59:34, 16.72s/batch, batch_loss=7.57, bat

Epoch 5/10:  57%|▌| 563/991 [2:24:44<1:57:30, 16.47s/batch, batch_loss=7.57, bat

Epoch 5/10:  57%|▌| 563/991 [2:24:59<1:57:30, 16.47s/batch, batch_loss=12, batch

Epoch 5/10:  57%|▌| 564/991 [2:24:59<1:54:54, 16.15s/batch, batch_loss=12, batch

Epoch 5/10:  57%|▌| 564/991 [2:25:14<1:54:54, 16.15s/batch, batch_loss=495, batc

Epoch 5/10:  57%|▌| 565/991 [2:25:14<1:53:06, 15.93s/batch, batch_loss=495, batc

Epoch 5/10:  57%|▌| 565/991 [2:25:31<1:53:06, 15.93s/batch, batch_loss=11.6, bat

Epoch 5/10:  57%|▌| 566/991 [2:25:31<1:54:20, 16.14s/batch, batch_loss=11.6, bat

Epoch 5/10:  57%|▌| 566/991 [2:25:47<1:54:20, 16.14s/batch, batch_loss=14.9, bat

Epoch 5/10:  57%|▌| 567/991 [2:25:47<1:54:07, 16.15s/batch, batch_loss=14.9, bat

Epoch 5/10:  57%|▌| 567/991 [2:26:02<1:54:07, 16.15s/batch, batch_loss=299, batc

Epoch 5/10:  57%|▌| 568/991 [2:26:02<1:50:53, 15.73s/batch, batch_loss=299, batc

Epoch 5/10:  57%|▌| 568/991 [2:26:17<1:50:53, 15.73s/batch, batch_loss=25.8, bat

Epoch 5/10:  57%|▌| 569/991 [2:26:17<1:49:31, 15.57s/batch, batch_loss=25.8, bat

Epoch 5/10:  57%|▌| 569/991 [2:26:32<1:49:31, 15.57s/batch, batch_loss=8.45e+3, 

Epoch 5/10:  58%|▌| 570/991 [2:26:32<1:47:03, 15.26s/batch, batch_loss=8.45e+3, 

Epoch 5/10:  58%|▌| 570/991 [2:26:47<1:47:03, 15.26s/batch, batch_loss=10.2, bat

Epoch 5/10:  58%|▌| 571/991 [2:26:47<1:47:09, 15.31s/batch, batch_loss=10.2, bat

Epoch 5/10:  58%|▌| 571/991 [2:27:02<1:47:09, 15.31s/batch, batch_loss=10.2, bat

Epoch 5/10:  58%|▌| 572/991 [2:27:02<1:45:34, 15.12s/batch, batch_loss=10.2, bat

Epoch 5/10:  58%|▌| 572/991 [2:27:16<1:45:34, 15.12s/batch, batch_loss=7.27, bat

Epoch 5/10:  58%|▌| 573/991 [2:27:16<1:43:27, 14.85s/batch, batch_loss=7.27, bat

Epoch 5/10:  58%|▌| 573/991 [2:27:31<1:43:27, 14.85s/batch, batch_loss=13.7, bat

Epoch 5/10:  58%|▌| 574/991 [2:27:31<1:43:00, 14.82s/batch, batch_loss=13.7, bat

Epoch 5/10:  58%|▌| 574/991 [2:27:48<1:43:00, 14.82s/batch, batch_loss=19.9, bat

Epoch 5/10:  58%|▌| 575/991 [2:27:48<1:46:59, 15.43s/batch, batch_loss=19.9, bat

Epoch 5/10:  58%|▌| 575/991 [2:28:03<1:46:59, 15.43s/batch, batch_loss=29.3, bat

Epoch 5/10:  58%|▌| 576/991 [2:28:03<1:46:30, 15.40s/batch, batch_loss=29.3, bat

Epoch 5/10:  58%|▌| 576/991 [2:28:17<1:46:30, 15.40s/batch, batch_loss=11.1, bat

Epoch 5/10:  58%|▌| 577/991 [2:28:17<1:44:18, 15.12s/batch, batch_loss=11.1, bat

Epoch 5/10:  58%|▌| 577/991 [2:28:32<1:44:18, 15.12s/batch, batch_loss=8.25, bat

Epoch 5/10:  58%|▌| 578/991 [2:28:32<1:43:18, 15.01s/batch, batch_loss=8.25, bat

Epoch 5/10:  58%|▌| 578/991 [2:28:47<1:43:18, 15.01s/batch, batch_loss=10.4, bat

Epoch 5/10:  58%|▌| 579/991 [2:28:47<1:43:23, 15.06s/batch, batch_loss=10.4, bat

Epoch 5/10:  58%|▌| 579/991 [2:29:02<1:43:23, 15.06s/batch, batch_loss=18, batch

Epoch 5/10:  59%|▌| 580/991 [2:29:02<1:42:51, 15.02s/batch, batch_loss=18, batch

Epoch 5/10:  59%|▌| 580/991 [2:29:16<1:42:51, 15.02s/batch, batch_loss=6.41, bat

Epoch 5/10:  59%|▌| 581/991 [2:29:16<1:40:38, 14.73s/batch, batch_loss=6.41, bat

Epoch 5/10:  59%|▌| 581/991 [2:29:33<1:40:38, 14.73s/batch, batch_loss=0.27, bat

Epoch 5/10:  59%|▌| 582/991 [2:29:33<1:44:35, 15.34s/batch, batch_loss=0.27, bat

Epoch 5/10:  59%|▌| 582/991 [2:29:48<1:44:35, 15.34s/batch, batch_loss=6.62e+3, 

Epoch 5/10:  59%|▌| 583/991 [2:29:48<1:42:51, 15.13s/batch, batch_loss=6.62e+3, 

Epoch 5/10:  59%|▌| 583/991 [2:30:03<1:42:51, 15.13s/batch, batch_loss=11.1, bat

Epoch 5/10:  59%|▌| 584/991 [2:30:03<1:42:39, 15.14s/batch, batch_loss=11.1, bat

Epoch 5/10:  59%|▌| 584/991 [2:30:18<1:42:39, 15.14s/batch, batch_loss=9.5, batc

Epoch 5/10:  59%|▌| 585/991 [2:30:18<1:42:18, 15.12s/batch, batch_loss=9.5, batc

Epoch 5/10:  59%|▌| 585/991 [2:30:33<1:42:18, 15.12s/batch, batch_loss=27, batch

Epoch 5/10:  59%|▌| 586/991 [2:30:33<1:42:33, 15.19s/batch, batch_loss=27, batch

Epoch 5/10:  59%|▌| 586/991 [2:30:48<1:42:33, 15.19s/batch, batch_loss=21.8, bat

Epoch 5/10:  59%|▌| 587/991 [2:30:48<1:41:57, 15.14s/batch, batch_loss=21.8, bat

Epoch 5/10:  59%|▌| 587/991 [2:31:03<1:41:57, 15.14s/batch, batch_loss=17.1, bat

Epoch 5/10:  59%|▌| 588/991 [2:31:03<1:40:47, 15.00s/batch, batch_loss=17.1, bat

Epoch 5/10:  59%|▌| 588/991 [2:31:17<1:40:47, 15.00s/batch, batch_loss=6.91, bat

Epoch 5/10:  59%|▌| 589/991 [2:31:17<1:39:27, 14.85s/batch, batch_loss=6.91, bat

Epoch 5/10:  59%|▌| 589/991 [2:31:32<1:39:27, 14.85s/batch, batch_loss=16.6, bat

Epoch 5/10:  60%|▌| 590/991 [2:31:32<1:37:47, 14.63s/batch, batch_loss=16.6, bat

Epoch 5/10:  60%|▌| 590/991 [2:31:47<1:37:47, 14.63s/batch, batch_loss=14.9, bat

Epoch 5/10:  60%|▌| 591/991 [2:31:47<1:38:31, 14.78s/batch, batch_loss=14.9, bat

Epoch 5/10:  60%|▌| 591/991 [2:32:01<1:38:31, 14.78s/batch, batch_loss=6.38, bat

Epoch 5/10:  60%|▌| 592/991 [2:32:01<1:38:10, 14.76s/batch, batch_loss=6.38, bat

Epoch 5/10:  60%|▌| 592/991 [2:32:17<1:38:10, 14.76s/batch, batch_loss=11.5, bat

Epoch 5/10:  60%|▌| 593/991 [2:32:17<1:38:31, 14.85s/batch, batch_loss=11.5, bat

Epoch 5/10:  60%|▌| 593/991 [2:32:31<1:38:31, 14.85s/batch, batch_loss=12.5, bat

Epoch 5/10:  60%|▌| 594/991 [2:32:31<1:37:32, 14.74s/batch, batch_loss=12.5, bat

Epoch 5/10:  60%|▌| 594/991 [2:32:47<1:37:32, 14.74s/batch, batch_loss=8, batch_

Epoch 5/10:  60%|▌| 595/991 [2:32:47<1:39:22, 15.06s/batch, batch_loss=8, batch_

Epoch 5/10:  60%|▌| 595/991 [2:33:02<1:39:22, 15.06s/batch, batch_loss=7.05, bat

Epoch 5/10:  60%|▌| 596/991 [2:33:02<1:39:43, 15.15s/batch, batch_loss=7.05, bat

Epoch 5/10:  60%|▌| 596/991 [2:33:19<1:39:43, 15.15s/batch, batch_loss=21.5, bat

Epoch 5/10:  60%|▌| 597/991 [2:33:19<1:43:43, 15.80s/batch, batch_loss=21.5, bat

Epoch 5/10:  60%|▌| 597/991 [2:33:36<1:43:43, 15.80s/batch, batch_loss=9.93, bat

Epoch 5/10:  60%|▌| 598/991 [2:33:36<1:44:18, 15.93s/batch, batch_loss=9.93, bat

Epoch 5/10:  60%|▌| 598/991 [2:33:52<1:44:18, 15.93s/batch, batch_loss=17.3, bat

Epoch 5/10:  60%|▌| 599/991 [2:33:52<1:45:27, 16.14s/batch, batch_loss=17.3, bat

Epoch 5/10:  60%|▌| 599/991 [2:34:08<1:45:27, 16.14s/batch, batch_loss=13.8, bat

Epoch 5/10:  61%|▌| 600/991 [2:34:08<1:44:58, 16.11s/batch, batch_loss=13.8, bat

Epoch 5/10:  61%|▌| 600/991 [2:34:24<1:44:58, 16.11s/batch, batch_loss=14.9, bat

Epoch 5/10:  61%|▌| 601/991 [2:34:24<1:44:20, 16.05s/batch, batch_loss=14.9, bat

Epoch 5/10:  61%|▌| 601/991 [2:34:40<1:44:20, 16.05s/batch, batch_loss=9.83, bat

Epoch 5/10:  61%|▌| 602/991 [2:34:40<1:44:15, 16.08s/batch, batch_loss=9.83, bat

Epoch 5/10:  61%|▌| 602/991 [2:34:55<1:44:15, 16.08s/batch, batch_loss=7.15, bat

Epoch 5/10:  61%|▌| 603/991 [2:34:55<1:41:57, 15.77s/batch, batch_loss=7.15, bat

Epoch 5/10:  61%|▌| 603/991 [2:35:11<1:41:57, 15.77s/batch, batch_loss=1.01e+4, 

Epoch 5/10:  61%|▌| 604/991 [2:35:11<1:40:39, 15.61s/batch, batch_loss=1.01e+4, 

Epoch 5/10:  61%|▌| 604/991 [2:35:25<1:40:39, 15.61s/batch, batch_loss=10.2, bat

Epoch 5/10:  61%|▌| 605/991 [2:35:25<1:38:19, 15.28s/batch, batch_loss=10.2, bat

Epoch 5/10:  61%|▌| 605/991 [2:35:41<1:38:19, 15.28s/batch, batch_loss=7.27, bat

Epoch 5/10:  61%|▌| 606/991 [2:35:41<1:39:33, 15.52s/batch, batch_loss=7.27, bat

Epoch 5/10:  61%|▌| 606/991 [2:35:57<1:39:33, 15.52s/batch, batch_loss=11, batch

Epoch 5/10:  61%|▌| 607/991 [2:35:57<1:40:26, 15.69s/batch, batch_loss=11, batch

Epoch 5/10:  61%|▌| 607/991 [2:36:13<1:40:26, 15.69s/batch, batch_loss=14.7, bat

Epoch 5/10:  61%|▌| 608/991 [2:36:13<1:40:04, 15.68s/batch, batch_loss=14.7, bat

Epoch 5/10:  61%|▌| 608/991 [2:36:28<1:40:04, 15.68s/batch, batch_loss=14.6, bat

Epoch 5/10:  61%|▌| 609/991 [2:36:28<1:38:52, 15.53s/batch, batch_loss=14.6, bat

Epoch 5/10:  61%|▌| 609/991 [2:36:43<1:38:52, 15.53s/batch, batch_loss=15, batch

Epoch 5/10:  62%|▌| 610/991 [2:36:43<1:38:05, 15.45s/batch, batch_loss=15, batch

Epoch 5/10:  62%|▌| 610/991 [2:36:58<1:38:05, 15.45s/batch, batch_loss=28.2, bat

Epoch 5/10:  62%|▌| 611/991 [2:36:58<1:35:59, 15.16s/batch, batch_loss=28.2, bat

Epoch 5/10:  62%|▌| 611/991 [2:37:13<1:35:59, 15.16s/batch, batch_loss=8.08, bat

Epoch 5/10:  62%|▌| 612/991 [2:37:13<1:35:39, 15.14s/batch, batch_loss=8.08, bat

Epoch 5/10:  62%|▌| 612/991 [2:37:30<1:35:39, 15.14s/batch, batch_loss=11.2, bat

Epoch 5/10:  62%|▌| 613/991 [2:37:30<1:39:14, 15.75s/batch, batch_loss=11.2, bat

Epoch 5/10:  62%|▌| 613/991 [2:37:46<1:39:14, 15.75s/batch, batch_loss=1.73e+4, 

Epoch 5/10:  62%|▌| 614/991 [2:37:46<1:38:46, 15.72s/batch, batch_loss=1.73e+4, 

Epoch 5/10:  62%|▌| 614/991 [2:38:01<1:38:46, 15.72s/batch, batch_loss=988, batc

Epoch 5/10:  62%|▌| 615/991 [2:38:01<1:37:40, 15.59s/batch, batch_loss=988, batc

Epoch 5/10:  62%|▌| 615/991 [2:38:16<1:37:40, 15.59s/batch, batch_loss=8.56, bat

Epoch 5/10:  62%|▌| 616/991 [2:38:16<1:36:51, 15.50s/batch, batch_loss=8.56, bat

Epoch 5/10:  62%|▌| 616/991 [2:38:31<1:36:51, 15.50s/batch, batch_loss=17.3, bat

Epoch 5/10:  62%|▌| 617/991 [2:38:31<1:35:36, 15.34s/batch, batch_loss=17.3, bat

Epoch 5/10:  62%|▌| 617/991 [2:38:49<1:35:36, 15.34s/batch, batch_loss=12.3, bat

Epoch 5/10:  62%|▌| 618/991 [2:38:49<1:39:27, 16.00s/batch, batch_loss=12.3, bat

Epoch 5/10:  62%|▌| 618/991 [2:39:05<1:39:27, 16.00s/batch, batch_loss=22.1, bat

Epoch 5/10:  62%|▌| 619/991 [2:39:05<1:39:24, 16.03s/batch, batch_loss=22.1, bat

Epoch 5/10:  62%|▌| 619/991 [2:39:20<1:39:24, 16.03s/batch, batch_loss=12.4, bat

Epoch 5/10:  63%|▋| 620/991 [2:39:20<1:37:05, 15.70s/batch, batch_loss=12.4, bat

Epoch 5/10:  63%|▋| 620/991 [2:39:35<1:37:05, 15.70s/batch, batch_loss=10.2, bat

Epoch 5/10:  63%|▋| 621/991 [2:39:35<1:34:48, 15.37s/batch, batch_loss=10.2, bat

Epoch 5/10:  63%|▋| 621/991 [2:39:50<1:34:48, 15.37s/batch, batch_loss=5.49e+3, 

Epoch 5/10:  63%|▋| 622/991 [2:39:50<1:34:13, 15.32s/batch, batch_loss=5.49e+3, 

Epoch 5/10:  63%|▋| 622/991 [2:40:05<1:34:13, 15.32s/batch, batch_loss=27.4, bat

Epoch 5/10:  63%|▋| 623/991 [2:40:05<1:33:31, 15.25s/batch, batch_loss=27.4, bat

Epoch 5/10:  63%|▋| 623/991 [2:40:20<1:33:31, 15.25s/batch, batch_loss=1.6e+4, b

Epoch 5/10:  63%|▋| 624/991 [2:40:20<1:33:07, 15.23s/batch, batch_loss=1.6e+4, b

Epoch 5/10:  63%|▋| 624/991 [2:40:35<1:33:07, 15.23s/batch, batch_loss=9.62, bat

Epoch 5/10:  63%|▋| 625/991 [2:40:35<1:32:14, 15.12s/batch, batch_loss=9.62, bat

Epoch 5/10:  63%|▋| 625/991 [2:40:51<1:32:14, 15.12s/batch, batch_loss=6.55, bat

Epoch 5/10:  63%|▋| 626/991 [2:40:51<1:32:58, 15.28s/batch, batch_loss=6.55, bat

Epoch 5/10:  63%|▋| 626/991 [2:41:06<1:32:58, 15.28s/batch, batch_loss=4.32e+3, 

Epoch 5/10:  63%|▋| 627/991 [2:41:06<1:33:50, 15.47s/batch, batch_loss=4.32e+3, 

Epoch 5/10:  63%|▋| 627/991 [2:41:22<1:33:50, 15.47s/batch, batch_loss=1.05e+3, 

Epoch 5/10:  63%|▋| 628/991 [2:41:22<1:33:27, 15.45s/batch, batch_loss=1.05e+3, 

Epoch 5/10:  63%|▋| 628/991 [2:41:36<1:33:27, 15.45s/batch, batch_loss=11.3, bat

Epoch 5/10:  63%|▋| 629/991 [2:41:36<1:31:22, 15.15s/batch, batch_loss=11.3, bat

Epoch 5/10:  63%|▋| 629/991 [2:41:50<1:31:22, 15.15s/batch, batch_loss=22, batch

Epoch 5/10:  64%|▋| 630/991 [2:41:50<1:29:20, 14.85s/batch, batch_loss=22, batch

Epoch 5/10:  64%|▋| 630/991 [2:42:05<1:29:20, 14.85s/batch, batch_loss=16, batch

Epoch 5/10:  64%|▋| 631/991 [2:42:05<1:28:50, 14.81s/batch, batch_loss=16, batch

Epoch 5/10:  64%|▋| 631/991 [2:42:19<1:28:50, 14.81s/batch, batch_loss=3.8, batc

Epoch 5/10:  64%|▋| 632/991 [2:42:19<1:27:16, 14.59s/batch, batch_loss=3.8, batc

Epoch 5/10:  64%|▋| 632/991 [2:42:35<1:27:16, 14.59s/batch, batch_loss=24.8, bat

Epoch 5/10:  64%|▋| 633/991 [2:42:35<1:28:54, 14.90s/batch, batch_loss=24.8, bat

Epoch 5/10:  64%|▋| 633/991 [2:42:49<1:28:54, 14.90s/batch, batch_loss=25.9, bat

Epoch 5/10:  64%|▋| 634/991 [2:42:49<1:27:36, 14.72s/batch, batch_loss=25.9, bat

Epoch 5/10:  64%|▋| 634/991 [2:43:04<1:27:36, 14.72s/batch, batch_loss=22.6, bat

Epoch 5/10:  64%|▋| 635/991 [2:43:04<1:26:47, 14.63s/batch, batch_loss=22.6, bat

Epoch 5/10:  64%|▋| 635/991 [2:43:18<1:26:47, 14.63s/batch, batch_loss=17.9, bat

Epoch 5/10:  64%|▋| 636/991 [2:43:18<1:26:25, 14.61s/batch, batch_loss=17.9, bat

Epoch 5/10:  64%|▋| 636/991 [2:43:33<1:26:25, 14.61s/batch, batch_loss=20, batch

Epoch 5/10:  64%|▋| 637/991 [2:43:33<1:27:23, 14.81s/batch, batch_loss=20, batch

Epoch 5/10:  64%|▋| 637/991 [2:43:49<1:27:23, 14.81s/batch, batch_loss=17, batch

Epoch 5/10:  64%|▋| 638/991 [2:43:49<1:28:11, 14.99s/batch, batch_loss=17, batch

Epoch 5/10:  64%|▋| 638/991 [2:44:04<1:28:11, 14.99s/batch, batch_loss=10.9, bat

Epoch 5/10:  64%|▋| 639/991 [2:44:04<1:28:42, 15.12s/batch, batch_loss=10.9, bat

Epoch 5/10:  64%|▋| 639/991 [2:44:20<1:28:42, 15.12s/batch, batch_loss=673, batc

Epoch 5/10:  65%|▋| 640/991 [2:44:20<1:29:52, 15.36s/batch, batch_loss=673, batc

Epoch 5/10:  65%|▋| 640/991 [2:44:35<1:29:52, 15.36s/batch, batch_loss=15.9, bat

Epoch 5/10:  65%|▋| 641/991 [2:44:35<1:29:15, 15.30s/batch, batch_loss=15.9, bat

Epoch 5/10:  65%|▋| 641/991 [2:44:51<1:29:15, 15.30s/batch, batch_loss=9.39, bat

Epoch 5/10:  65%|▋| 642/991 [2:44:51<1:28:41, 15.25s/batch, batch_loss=9.39, bat

Epoch 5/10:  65%|▋| 642/991 [2:45:08<1:28:41, 15.25s/batch, batch_loss=2.12e+4, 

Epoch 5/10:  65%|▋| 643/991 [2:45:08<1:32:06, 15.88s/batch, batch_loss=2.12e+4, 

Epoch 5/10:  65%|▋| 643/991 [2:45:23<1:32:06, 15.88s/batch, batch_loss=1.76e+4, 

Epoch 5/10:  65%|▋| 644/991 [2:45:23<1:30:16, 15.61s/batch, batch_loss=1.76e+4, 

Epoch 5/10:  65%|▋| 644/991 [2:45:37<1:30:16, 15.61s/batch, batch_loss=2.19e+3, 

Epoch 5/10:  65%|▋| 645/991 [2:45:37<1:28:19, 15.32s/batch, batch_loss=2.19e+3, 

Epoch 5/10:  65%|▋| 645/991 [2:45:53<1:28:19, 15.32s/batch, batch_loss=10.5, bat

Epoch 5/10:  65%|▋| 646/991 [2:45:53<1:28:40, 15.42s/batch, batch_loss=10.5, bat

Epoch 5/10:  65%|▋| 646/991 [2:46:08<1:28:40, 15.42s/batch, batch_loss=14.6, bat

Epoch 5/10:  65%|▋| 647/991 [2:46:08<1:27:42, 15.30s/batch, batch_loss=14.6, bat

Epoch 5/10:  65%|▋| 647/991 [2:46:22<1:27:42, 15.30s/batch, batch_loss=13.2, bat

Epoch 5/10:  65%|▋| 648/991 [2:46:22<1:25:25, 14.94s/batch, batch_loss=13.2, bat

Epoch 5/10:  65%|▋| 648/991 [2:46:37<1:25:25, 14.94s/batch, batch_loss=14.1, bat

Epoch 5/10:  65%|▋| 649/991 [2:46:37<1:25:25, 14.99s/batch, batch_loss=14.1, bat

Epoch 5/10:  65%|▋| 649/991 [2:46:53<1:25:25, 14.99s/batch, batch_loss=1.34e+4, 

Epoch 5/10:  66%|▋| 650/991 [2:46:53<1:25:45, 15.09s/batch, batch_loss=1.34e+4, 

Epoch 5/10:  66%|▋| 650/991 [2:47:07<1:25:45, 15.09s/batch, batch_loss=9.6, batc

Epoch 5/10:  66%|▋| 651/991 [2:47:07<1:23:20, 14.71s/batch, batch_loss=9.6, batc

Epoch 5/10:  66%|▋| 651/991 [2:47:22<1:23:20, 14.71s/batch, batch_loss=12.3, bat

Epoch 5/10:  66%|▋| 652/991 [2:47:22<1:23:43, 14.82s/batch, batch_loss=12.3, bat

Epoch 5/10:  66%|▋| 652/991 [2:47:35<1:23:43, 14.82s/batch, batch_loss=17.3, bat

Epoch 5/10:  66%|▋| 653/991 [2:47:35<1:21:31, 14.47s/batch, batch_loss=17.3, bat

Epoch 5/10:  66%|▋| 653/991 [2:47:50<1:21:31, 14.47s/batch, batch_loss=19.6, bat

Epoch 5/10:  66%|▋| 654/991 [2:47:50<1:22:23, 14.67s/batch, batch_loss=19.6, bat

Epoch 5/10:  66%|▋| 654/991 [2:48:06<1:22:23, 14.67s/batch, batch_loss=3.84e+3, 

Epoch 5/10:  66%|▋| 655/991 [2:48:06<1:23:06, 14.84s/batch, batch_loss=3.84e+3, 

Epoch 5/10:  66%|▋| 655/991 [2:48:20<1:23:06, 14.84s/batch, batch_loss=5.19e+3, 

Epoch 5/10:  66%|▋| 656/991 [2:48:20<1:22:33, 14.79s/batch, batch_loss=5.19e+3, 

Epoch 5/10:  66%|▋| 656/991 [2:48:35<1:22:33, 14.79s/batch, batch_loss=4.21e+3, 

Epoch 5/10:  66%|▋| 657/991 [2:48:35<1:21:45, 14.69s/batch, batch_loss=4.21e+3, 

Epoch 5/10:  66%|▋| 657/991 [2:48:50<1:21:45, 14.69s/batch, batch_loss=2.2e+4, b

Epoch 5/10:  66%|▋| 658/991 [2:48:50<1:22:32, 14.87s/batch, batch_loss=2.2e+4, b

Epoch 5/10:  66%|▋| 658/991 [2:49:06<1:22:32, 14.87s/batch, batch_loss=4.35, bat

Epoch 5/10:  66%|▋| 659/991 [2:49:06<1:23:57, 15.17s/batch, batch_loss=4.35, bat

Epoch 5/10:  66%|▋| 659/991 [2:49:21<1:23:57, 15.17s/batch, batch_loss=3.97, bat

Epoch 5/10:  67%|▋| 660/991 [2:49:21<1:23:01, 15.05s/batch, batch_loss=3.97, bat

Epoch 5/10:  67%|▋| 660/991 [2:49:37<1:23:01, 15.05s/batch, batch_loss=10.8, bat

Epoch 5/10:  67%|▋| 661/991 [2:49:37<1:24:42, 15.40s/batch, batch_loss=10.8, bat

Epoch 5/10:  67%|▋| 661/991 [2:49:53<1:24:42, 15.40s/batch, batch_loss=14.5, bat

Epoch 5/10:  67%|▋| 662/991 [2:49:53<1:25:16, 15.55s/batch, batch_loss=14.5, bat

Epoch 5/10:  67%|▋| 662/991 [2:50:08<1:25:16, 15.55s/batch, batch_loss=17.4, bat

Epoch 5/10:  67%|▋| 663/991 [2:50:08<1:24:23, 15.44s/batch, batch_loss=17.4, bat

Epoch 5/10:  67%|▋| 663/991 [2:50:24<1:24:23, 15.44s/batch, batch_loss=3.04e+3, 

Epoch 5/10:  67%|▋| 664/991 [2:50:24<1:25:04, 15.61s/batch, batch_loss=3.04e+3, 

Epoch 5/10:  67%|▋| 664/991 [2:50:41<1:25:04, 15.61s/batch, batch_loss=12.9, bat

Epoch 5/10:  67%|▋| 665/991 [2:50:41<1:27:12, 16.05s/batch, batch_loss=12.9, bat

Epoch 5/10:  67%|▋| 665/991 [2:50:56<1:27:12, 16.05s/batch, batch_loss=3.05e+3, 

Epoch 5/10:  67%|▋| 666/991 [2:50:56<1:25:33, 15.80s/batch, batch_loss=3.05e+3, 

Epoch 5/10:  67%|▋| 666/991 [2:51:11<1:25:33, 15.80s/batch, batch_loss=19, batch

Epoch 5/10:  67%|▋| 667/991 [2:51:11<1:24:02, 15.56s/batch, batch_loss=19, batch

Epoch 5/10:  67%|▋| 667/991 [2:51:29<1:24:02, 15.56s/batch, batch_loss=377, batc

Epoch 5/10:  67%|▋| 668/991 [2:51:29<1:26:27, 16.06s/batch, batch_loss=377, batc

Epoch 5/10:  67%|▋| 668/991 [2:51:44<1:26:27, 16.06s/batch, batch_loss=2.96e+3, 

Epoch 5/10:  68%|▋| 669/991 [2:51:44<1:25:33, 15.94s/batch, batch_loss=2.96e+3, 

Epoch 5/10:  68%|▋| 669/991 [2:51:58<1:25:33, 15.94s/batch, batch_loss=1.01e+3, 

Epoch 5/10:  68%|▋| 670/991 [2:51:58<1:21:43, 15.28s/batch, batch_loss=1.01e+3, 

Epoch 5/10:  68%|▋| 670/991 [2:52:12<1:21:43, 15.28s/batch, batch_loss=10.1, bat

Epoch 5/10:  68%|▋| 671/991 [2:52:12<1:19:31, 14.91s/batch, batch_loss=10.1, bat

Epoch 5/10:  68%|▋| 671/991 [2:52:27<1:19:31, 14.91s/batch, batch_loss=13.6, bat

Epoch 5/10:  68%|▋| 672/991 [2:52:27<1:19:53, 15.03s/batch, batch_loss=13.6, bat

Epoch 5/10:  68%|▋| 672/991 [2:52:42<1:19:53, 15.03s/batch, batch_loss=17.2, bat

Epoch 5/10:  68%|▋| 673/991 [2:52:42<1:19:32, 15.01s/batch, batch_loss=17.2, bat

Epoch 5/10:  68%|▋| 673/991 [2:52:56<1:19:32, 15.01s/batch, batch_loss=15.8, bat

Epoch 5/10:  68%|▋| 674/991 [2:52:56<1:17:18, 14.63s/batch, batch_loss=15.8, bat

Epoch 5/10:  68%|▋| 674/991 [2:53:11<1:17:18, 14.63s/batch, batch_loss=4.34, bat

Epoch 5/10:  68%|▋| 675/991 [2:53:11<1:18:21, 14.88s/batch, batch_loss=4.34, bat

Epoch 5/10:  68%|▋| 675/991 [2:53:29<1:18:21, 14.88s/batch, batch_loss=9.69, bat

Epoch 5/10:  68%|▋| 676/991 [2:53:29<1:22:47, 15.77s/batch, batch_loss=9.69, bat

Epoch 5/10:  68%|▋| 676/991 [2:53:45<1:22:47, 15.77s/batch, batch_loss=18.6, bat

Epoch 5/10:  68%|▋| 677/991 [2:53:45<1:21:58, 15.66s/batch, batch_loss=18.6, bat

Epoch 5/10:  68%|▋| 677/991 [2:53:59<1:21:58, 15.66s/batch, batch_loss=6.68, bat

Epoch 5/10:  68%|▋| 678/991 [2:53:59<1:20:14, 15.38s/batch, batch_loss=6.68, bat

Epoch 5/10:  68%|▋| 678/991 [2:54:14<1:20:14, 15.38s/batch, batch_loss=3.81e+3, 

Epoch 5/10:  69%|▋| 679/991 [2:54:14<1:19:03, 15.20s/batch, batch_loss=3.81e+3, 

Epoch 5/10:  69%|▋| 679/991 [2:54:29<1:19:03, 15.20s/batch, batch_loss=6.11e+3, 

Epoch 5/10:  69%|▋| 680/991 [2:54:29<1:17:29, 14.95s/batch, batch_loss=6.11e+3, 

Epoch 5/10:  69%|▋| 680/991 [2:54:46<1:17:29, 14.95s/batch, batch_loss=7.23e+4, 

Epoch 5/10:  69%|▋| 681/991 [2:54:46<1:21:34, 15.79s/batch, batch_loss=7.23e+4, 

Epoch 5/10:  69%|▋| 681/991 [2:55:02<1:21:34, 15.79s/batch, batch_loss=11.5, bat

Epoch 5/10:  69%|▋| 682/991 [2:55:02<1:20:34, 15.65s/batch, batch_loss=11.5, bat

Epoch 5/10:  69%|▋| 682/991 [2:55:16<1:20:34, 15.65s/batch, batch_loss=377, batc

Epoch 5/10:  69%|▋| 683/991 [2:55:16<1:18:49, 15.36s/batch, batch_loss=377, batc

Epoch 5/10:  69%|▋| 683/991 [2:55:32<1:18:49, 15.36s/batch, batch_loss=4.76, bat

Epoch 5/10:  69%|▋| 684/991 [2:55:32<1:19:25, 15.52s/batch, batch_loss=4.76, bat

Epoch 5/10:  69%|▋| 684/991 [2:55:47<1:19:25, 15.52s/batch, batch_loss=11.6, bat

Epoch 5/10:  69%|▋| 685/991 [2:55:47<1:18:36, 15.41s/batch, batch_loss=11.6, bat

Epoch 5/10:  69%|▋| 685/991 [2:56:02<1:18:36, 15.41s/batch, batch_loss=12.6, bat

Epoch 5/10:  69%|▋| 686/991 [2:56:02<1:17:48, 15.31s/batch, batch_loss=12.6, bat

Epoch 5/10:  69%|▋| 686/991 [2:56:17<1:17:48, 15.31s/batch, batch_loss=532, batc

Epoch 5/10:  69%|▋| 687/991 [2:56:17<1:16:03, 15.01s/batch, batch_loss=532, batc

Epoch 5/10:  69%|▋| 687/991 [2:56:31<1:16:03, 15.01s/batch, batch_loss=5.26, bat

Epoch 5/10:  69%|▋| 688/991 [2:56:31<1:15:18, 14.91s/batch, batch_loss=5.26, bat

Epoch 5/10:  69%|▋| 688/991 [2:56:47<1:15:18, 14.91s/batch, batch_loss=5.43, bat

Epoch 5/10:  70%|▋| 689/991 [2:56:47<1:16:05, 15.12s/batch, batch_loss=5.43, bat

Epoch 5/10:  70%|▋| 689/991 [2:57:02<1:16:05, 15.12s/batch, batch_loss=11.8, bat

Epoch 5/10:  70%|▋| 690/991 [2:57:02<1:15:46, 15.11s/batch, batch_loss=11.8, bat

Epoch 5/10:  70%|▋| 690/991 [2:57:20<1:15:46, 15.11s/batch, batch_loss=14.4, bat

Epoch 5/10:  70%|▋| 691/991 [2:57:20<1:19:05, 15.82s/batch, batch_loss=14.4, bat

Epoch 5/10:  70%|▋| 691/991 [2:57:36<1:19:05, 15.82s/batch, batch_loss=5.61, bat

Epoch 5/10:  70%|▋| 692/991 [2:57:36<1:19:16, 15.91s/batch, batch_loss=5.61, bat

Epoch 5/10:  70%|▋| 692/991 [2:57:52<1:19:16, 15.91s/batch, batch_loss=4.65e+3, 

Epoch 5/10:  70%|▋| 693/991 [2:57:52<1:19:13, 15.95s/batch, batch_loss=4.65e+3, 

Epoch 5/10:  70%|▋| 693/991 [2:58:07<1:19:13, 15.95s/batch, batch_loss=453, batc

Epoch 5/10:  70%|▋| 694/991 [2:58:07<1:18:02, 15.77s/batch, batch_loss=453, batc

Epoch 5/10:  70%|▋| 694/991 [2:58:23<1:18:02, 15.77s/batch, batch_loss=781, batc

Epoch 5/10:  70%|▋| 695/991 [2:58:23<1:17:54, 15.79s/batch, batch_loss=781, batc

Epoch 5/10:  70%|▋| 695/991 [2:58:38<1:17:54, 15.79s/batch, batch_loss=9.6, batc

Epoch 5/10:  70%|▋| 696/991 [2:58:38<1:16:31, 15.56s/batch, batch_loss=9.6, batc

Epoch 5/10:  70%|▋| 696/991 [2:58:53<1:16:31, 15.56s/batch, batch_loss=6.8e+3, b

Epoch 5/10:  70%|▋| 697/991 [2:58:53<1:15:22, 15.38s/batch, batch_loss=6.8e+3, b

Epoch 5/10:  70%|▋| 697/991 [2:59:08<1:15:22, 15.38s/batch, batch_loss=13.1, bat

Epoch 5/10:  70%|▋| 698/991 [2:59:08<1:14:29, 15.25s/batch, batch_loss=13.1, bat

Epoch 5/10:  70%|▋| 698/991 [2:59:23<1:14:29, 15.25s/batch, batch_loss=6.88, bat

Epoch 5/10:  71%|▋| 699/991 [2:59:23<1:14:25, 15.29s/batch, batch_loss=6.88, bat

Epoch 5/10:  71%|▋| 699/991 [2:59:41<1:14:25, 15.29s/batch, batch_loss=9.69, bat

Epoch 5/10:  71%|▋| 700/991 [2:59:41<1:18:26, 16.17s/batch, batch_loss=9.69, bat

Epoch 5/10:  71%|▋| 700/991 [2:59:57<1:18:26, 16.17s/batch, batch_loss=211, batc

Epoch 5/10:  71%|▋| 701/991 [2:59:57<1:17:50, 16.11s/batch, batch_loss=211, batc

Epoch 5/10:  71%|▋| 701/991 [3:00:12<1:17:50, 16.11s/batch, batch_loss=18.6, bat

Epoch 5/10:  71%|▋| 702/991 [3:00:12<1:15:23, 15.65s/batch, batch_loss=18.6, bat

Epoch 5/10:  71%|▋| 702/991 [3:00:26<1:15:23, 15.65s/batch, batch_loss=273, batc

Epoch 5/10:  71%|▋| 703/991 [3:00:26<1:13:14, 15.26s/batch, batch_loss=273, batc

Epoch 5/10:  71%|▋| 703/991 [3:00:42<1:13:14, 15.26s/batch, batch_loss=7.27, bat

Epoch 5/10:  71%|▋| 704/991 [3:00:42<1:13:53, 15.45s/batch, batch_loss=7.27, bat

Epoch 5/10:  71%|▋| 704/991 [3:00:58<1:13:53, 15.45s/batch, batch_loss=9.17, bat

Epoch 5/10:  71%|▋| 705/991 [3:00:58<1:13:49, 15.49s/batch, batch_loss=9.17, bat

Epoch 5/10:  71%|▋| 705/991 [3:01:14<1:13:49, 15.49s/batch, batch_loss=17.3, bat

Epoch 5/10:  71%|▋| 706/991 [3:01:14<1:14:25, 15.67s/batch, batch_loss=17.3, bat

Epoch 5/10:  71%|▋| 706/991 [3:01:30<1:14:25, 15.67s/batch, batch_loss=17.7, bat

Epoch 5/10:  71%|▋| 707/991 [3:01:30<1:14:24, 15.72s/batch, batch_loss=17.7, bat

Epoch 5/10:  71%|▋| 707/991 [3:01:45<1:14:24, 15.72s/batch, batch_loss=9.24, bat

Epoch 5/10:  71%|▋| 708/991 [3:01:45<1:13:53, 15.67s/batch, batch_loss=9.24, bat

Epoch 5/10:  71%|▋| 708/991 [3:02:01<1:13:53, 15.67s/batch, batch_loss=7.1, batc

Epoch 5/10:  72%|▋| 709/991 [3:02:01<1:14:09, 15.78s/batch, batch_loss=7.1, batc

Epoch 5/10:  72%|▋| 709/991 [3:02:20<1:14:09, 15.78s/batch, batch_loss=27.5, bat

Epoch 5/10:  72%|▋| 710/991 [3:02:20<1:17:19, 16.51s/batch, batch_loss=27.5, bat

Epoch 5/10:  72%|▋| 710/991 [3:02:38<1:17:19, 16.51s/batch, batch_loss=98.2, bat

Epoch 5/10:  72%|▋| 711/991 [3:02:38<1:19:17, 16.99s/batch, batch_loss=98.2, bat

Epoch 5/10:  72%|▋| 711/991 [3:02:54<1:19:17, 16.99s/batch, batch_loss=11.4, bat

Epoch 5/10:  72%|▋| 712/991 [3:02:54<1:18:08, 16.80s/batch, batch_loss=11.4, bat

Epoch 5/10:  72%|▋| 712/991 [3:03:10<1:18:08, 16.80s/batch, batch_loss=77.2, bat

Epoch 5/10:  72%|▋| 713/991 [3:03:10<1:16:28, 16.50s/batch, batch_loss=77.2, bat

Epoch 5/10:  72%|▋| 713/991 [3:03:26<1:16:28, 16.50s/batch, batch_loss=27.4, bat

Epoch 5/10:  72%|▋| 714/991 [3:03:26<1:16:05, 16.48s/batch, batch_loss=27.4, bat

Epoch 5/10:  72%|▋| 714/991 [3:03:43<1:16:05, 16.48s/batch, batch_loss=17.8, bat

Epoch 5/10:  72%|▋| 715/991 [3:03:43<1:16:29, 16.63s/batch, batch_loss=17.8, bat

Epoch 5/10:  72%|▋| 715/991 [3:04:00<1:16:29, 16.63s/batch, batch_loss=15.1, bat

Epoch 5/10:  72%|▋| 716/991 [3:04:00<1:16:21, 16.66s/batch, batch_loss=15.1, bat

Epoch 5/10:  72%|▋| 716/991 [3:04:16<1:16:21, 16.66s/batch, batch_loss=20.9, bat

Epoch 5/10:  72%|▋| 717/991 [3:04:16<1:15:10, 16.46s/batch, batch_loss=20.9, bat

Epoch 5/10:  72%|▋| 717/991 [3:04:32<1:15:10, 16.46s/batch, batch_loss=22.3, bat

Epoch 5/10:  72%|▋| 718/991 [3:04:32<1:14:18, 16.33s/batch, batch_loss=22.3, bat

Epoch 5/10:  72%|▋| 718/991 [3:04:51<1:14:18, 16.33s/batch, batch_loss=12.6, bat

Epoch 5/10:  73%|▋| 719/991 [3:04:51<1:18:01, 17.21s/batch, batch_loss=12.6, bat

Epoch 5/10:  73%|▋| 719/991 [3:05:07<1:18:01, 17.21s/batch, batch_loss=11.6, bat

Epoch 5/10:  73%|▋| 720/991 [3:05:07<1:15:21, 16.69s/batch, batch_loss=11.6, bat

Epoch 5/10:  73%|▋| 720/991 [3:05:22<1:15:21, 16.69s/batch, batch_loss=18.5, bat

Epoch 5/10:  73%|▋| 721/991 [3:05:22<1:13:24, 16.31s/batch, batch_loss=18.5, bat

Epoch 5/10:  73%|▋| 721/991 [3:05:37<1:13:24, 16.31s/batch, batch_loss=19.4, bat

Epoch 5/10:  73%|▋| 722/991 [3:05:37<1:10:51, 15.81s/batch, batch_loss=19.4, bat

Epoch 5/10:  73%|▋| 722/991 [3:05:52<1:10:51, 15.81s/batch, batch_loss=7.23e+3, 

Epoch 5/10:  73%|▋| 723/991 [3:05:52<1:09:32, 15.57s/batch, batch_loss=7.23e+3, 

Epoch 5/10:  73%|▋| 723/991 [3:06:07<1:09:32, 15.57s/batch, batch_loss=5.72, bat

Epoch 5/10:  73%|▋| 724/991 [3:06:07<1:08:47, 15.46s/batch, batch_loss=5.72, bat

Epoch 5/10:  73%|▋| 724/991 [3:06:22<1:08:47, 15.46s/batch, batch_loss=14.2, bat

Epoch 5/10:  73%|▋| 725/991 [3:06:22<1:08:20, 15.41s/batch, batch_loss=14.2, bat

Epoch 5/10:  73%|▋| 725/991 [3:06:40<1:08:20, 15.41s/batch, batch_loss=13.3, bat

Epoch 5/10:  73%|▋| 726/991 [3:06:40<1:11:27, 16.18s/batch, batch_loss=13.3, bat

Epoch 5/10:  73%|▋| 726/991 [3:06:56<1:11:27, 16.18s/batch, batch_loss=1.29e+4, 

Epoch 5/10:  73%|▋| 727/991 [3:06:56<1:11:03, 16.15s/batch, batch_loss=1.29e+4, 

Epoch 5/10:  73%|▋| 727/991 [3:07:12<1:11:03, 16.15s/batch, batch_loss=13.9, bat

Epoch 5/10:  73%|▋| 728/991 [3:07:12<1:10:38, 16.12s/batch, batch_loss=13.9, bat

Epoch 5/10:  73%|▋| 728/991 [3:07:28<1:10:38, 16.12s/batch, batch_loss=128, batc

Epoch 5/10:  74%|▋| 729/991 [3:07:28<1:09:56, 16.02s/batch, batch_loss=128, batc

Epoch 5/10:  74%|▋| 729/991 [3:07:43<1:09:56, 16.02s/batch, batch_loss=11.2, bat

Epoch 5/10:  74%|▋| 730/991 [3:07:43<1:08:23, 15.72s/batch, batch_loss=11.2, bat

Epoch 5/10:  74%|▋| 730/991 [3:07:58<1:08:23, 15.72s/batch, batch_loss=107, batc

Epoch 5/10:  74%|▋| 731/991 [3:07:58<1:07:20, 15.54s/batch, batch_loss=107, batc

Epoch 5/10:  74%|▋| 731/991 [3:08:15<1:07:20, 15.54s/batch, batch_loss=1.39e+4, 

Epoch 5/10:  74%|▋| 732/991 [3:08:15<1:08:00, 15.75s/batch, batch_loss=1.39e+4, 

Epoch 5/10:  74%|▋| 732/991 [3:08:30<1:08:00, 15.75s/batch, batch_loss=15.8, bat

Epoch 5/10:  74%|▋| 733/991 [3:08:30<1:07:34, 15.71s/batch, batch_loss=15.8, bat

Epoch 5/10:  74%|▋| 733/991 [3:08:45<1:07:34, 15.71s/batch, batch_loss=6.8e+3, b

Epoch 5/10:  74%|▋| 734/991 [3:08:45<1:06:30, 15.53s/batch, batch_loss=6.8e+3, b

Epoch 5/10:  74%|▋| 734/991 [3:09:01<1:06:30, 15.53s/batch, batch_loss=16.2, bat

Epoch 5/10:  74%|▋| 735/991 [3:09:01<1:06:53, 15.68s/batch, batch_loss=16.2, bat

Epoch 5/10:  74%|▋| 735/991 [3:09:17<1:06:53, 15.68s/batch, batch_loss=13.7, bat

Epoch 5/10:  74%|▋| 736/991 [3:09:17<1:06:56, 15.75s/batch, batch_loss=13.7, bat

Epoch 5/10:  74%|▋| 736/991 [3:09:34<1:06:56, 15.75s/batch, batch_loss=11, batch

Epoch 5/10:  74%|▋| 737/991 [3:09:34<1:07:19, 15.90s/batch, batch_loss=11, batch

Epoch 5/10:  74%|▋| 737/991 [3:09:50<1:07:19, 15.90s/batch, batch_loss=1.48e+3, 

Epoch 5/10:  74%|▋| 738/991 [3:09:50<1:07:15, 15.95s/batch, batch_loss=1.48e+3, 

Epoch 5/10:  74%|▋| 738/991 [3:10:06<1:07:15, 15.95s/batch, batch_loss=26.9, bat

Epoch 5/10:  75%|▋| 739/991 [3:10:06<1:07:31, 16.08s/batch, batch_loss=26.9, bat

Epoch 5/10:  75%|▋| 739/991 [3:10:22<1:07:31, 16.08s/batch, batch_loss=10.7, bat

Epoch 5/10:  75%|▋| 740/991 [3:10:22<1:07:21, 16.10s/batch, batch_loss=10.7, bat

Epoch 5/10:  75%|▋| 740/991 [3:10:37<1:07:21, 16.10s/batch, batch_loss=1.81e+4, 

Epoch 5/10:  75%|▋| 741/991 [3:10:37<1:05:56, 15.83s/batch, batch_loss=1.81e+4, 

Epoch 5/10:  75%|▋| 741/991 [3:10:53<1:05:56, 15.83s/batch, batch_loss=2.28e+3, 

Epoch 5/10:  75%|▋| 742/991 [3:10:53<1:05:26, 15.77s/batch, batch_loss=2.28e+3, 

Epoch 5/10:  75%|▋| 742/991 [3:11:09<1:05:26, 15.77s/batch, batch_loss=11.9, bat

Epoch 5/10:  75%|▋| 743/991 [3:11:09<1:05:08, 15.76s/batch, batch_loss=11.9, bat

Epoch 5/10:  75%|▋| 743/991 [3:11:24<1:05:08, 15.76s/batch, batch_loss=13.3, bat

Epoch 5/10:  75%|▊| 744/991 [3:11:24<1:03:58, 15.54s/batch, batch_loss=13.3, bat

Epoch 5/10:  75%|▊| 744/991 [3:11:40<1:03:58, 15.54s/batch, batch_loss=14, batch

Epoch 5/10:  75%|▊| 745/991 [3:11:40<1:04:41, 15.78s/batch, batch_loss=14, batch

Epoch 5/10:  75%|▊| 745/991 [3:11:56<1:04:41, 15.78s/batch, batch_loss=1.15e+3, 

Epoch 5/10:  75%|▊| 746/991 [3:11:56<1:04:36, 15.82s/batch, batch_loss=1.15e+3, 

Epoch 5/10:  75%|▊| 746/991 [3:12:11<1:04:36, 15.82s/batch, batch_loss=3.88e+3, 

Epoch 5/10:  75%|▊| 747/991 [3:12:11<1:03:53, 15.71s/batch, batch_loss=3.88e+3, 

Epoch 5/10:  75%|▊| 747/991 [3:12:27<1:03:53, 15.71s/batch, batch_loss=13.1, bat

Epoch 5/10:  75%|▊| 748/991 [3:12:27<1:03:28, 15.67s/batch, batch_loss=13.1, bat

Epoch 5/10:  75%|▊| 748/991 [3:12:45<1:03:28, 15.67s/batch, batch_loss=13.1, bat

Epoch 5/10:  76%|▊| 749/991 [3:12:45<1:05:35, 16.26s/batch, batch_loss=13.1, bat

Epoch 5/10:  76%|▊| 749/991 [3:12:59<1:05:35, 16.26s/batch, batch_loss=12.1, bat

Epoch 5/10:  76%|▊| 750/991 [3:12:59<1:03:07, 15.71s/batch, batch_loss=12.1, bat

Epoch 5/10:  76%|▊| 750/991 [3:13:14<1:03:07, 15.71s/batch, batch_loss=13.7, bat

Epoch 5/10:  76%|▊| 751/991 [3:13:14<1:02:03, 15.51s/batch, batch_loss=13.7, bat

Epoch 5/10:  76%|▊| 751/991 [3:13:30<1:02:03, 15.51s/batch, batch_loss=7.66, bat

Epoch 5/10:  76%|▊| 752/991 [3:13:30<1:01:57, 15.55s/batch, batch_loss=7.66, bat

Epoch 5/10:  76%|▊| 752/991 [3:13:45<1:01:57, 15.55s/batch, batch_loss=8.66, bat

Epoch 5/10:  76%|▊| 753/991 [3:13:45<1:01:53, 15.60s/batch, batch_loss=8.66, bat

Epoch 5/10:  76%|▊| 753/991 [3:14:00<1:01:53, 15.60s/batch, batch_loss=5.96, bat

Epoch 5/10:  76%|▊| 754/991 [3:14:00<1:00:35, 15.34s/batch, batch_loss=5.96, bat

Epoch 5/10:  76%|▊| 754/991 [3:14:15<1:00:35, 15.34s/batch, batch_loss=16.6, bat

Epoch 5/10:  76%|▊| 755/991 [3:14:15<59:38, 15.16s/batch, batch_loss=16.6, batch

Epoch 5/10:  76%|▊| 755/991 [3:14:30<59:38, 15.16s/batch, batch_loss=13.6, batch

Epoch 5/10:  76%|▊| 756/991 [3:14:30<59:07, 15.09s/batch, batch_loss=13.6, batch

Epoch 5/10:  76%|▊| 756/991 [3:14:45<59:07, 15.09s/batch, batch_loss=6, batch_in

Epoch 5/10:  76%|▊| 757/991 [3:14:45<58:53, 15.10s/batch, batch_loss=6, batch_in

Epoch 5/10:  76%|▊| 757/991 [3:15:01<58:53, 15.10s/batch, batch_loss=15.1, batch

Epoch 5/10:  76%|▊| 758/991 [3:15:01<59:31, 15.33s/batch, batch_loss=15.1, batch

Epoch 5/10:  76%|▊| 758/991 [3:15:17<59:31, 15.33s/batch, batch_loss=14, batch_i

Epoch 5/10:  77%|▊| 759/991 [3:15:17<59:47, 15.46s/batch, batch_loss=14, batch_i

Epoch 5/10:  77%|▊| 759/991 [3:15:34<59:47, 15.46s/batch, batch_loss=16.9, batch

Epoch 5/10:  77%|▊| 760/991 [3:15:34<1:02:12, 16.16s/batch, batch_loss=16.9, bat

Epoch 5/10:  77%|▊| 760/991 [3:15:49<1:02:12, 16.16s/batch, batch_loss=15.9, bat

Epoch 5/10:  77%|▊| 761/991 [3:15:49<1:00:05, 15.68s/batch, batch_loss=15.9, bat

Epoch 5/10:  77%|▊| 761/991 [3:16:04<1:00:05, 15.68s/batch, batch_loss=26.5, bat

Epoch 5/10:  77%|▊| 762/991 [3:16:04<58:52, 15.43s/batch, batch_loss=26.5, batch

Epoch 5/10:  77%|▊| 762/991 [3:16:19<58:52, 15.43s/batch, batch_loss=511, batch_

Epoch 5/10:  77%|▊| 763/991 [3:16:19<58:11, 15.32s/batch, batch_loss=511, batch_

Epoch 5/10:  77%|▊| 763/991 [3:16:34<58:11, 15.32s/batch, batch_loss=10.9, batch

Epoch 5/10:  77%|▊| 764/991 [3:16:34<57:43, 15.26s/batch, batch_loss=10.9, batch

Epoch 5/10:  77%|▊| 764/991 [3:16:49<57:43, 15.26s/batch, batch_loss=3.93, batch

Epoch 5/10:  77%|▊| 765/991 [3:16:49<57:05, 15.16s/batch, batch_loss=3.93, batch

Epoch 5/10:  77%|▊| 765/991 [3:17:05<57:05, 15.16s/batch, batch_loss=13.2, batch

Epoch 5/10:  77%|▊| 766/991 [3:17:05<57:23, 15.30s/batch, batch_loss=13.2, batch

Epoch 5/10:  77%|▊| 766/991 [3:17:20<57:23, 15.30s/batch, batch_loss=13.8, batch

Epoch 5/10:  77%|▊| 767/991 [3:17:20<56:48, 15.22s/batch, batch_loss=13.8, batch

Epoch 5/10:  77%|▊| 767/991 [3:17:34<56:48, 15.22s/batch, batch_loss=3.3, batch_

Epoch 5/10:  77%|▊| 768/991 [3:17:34<56:02, 15.08s/batch, batch_loss=3.3, batch_

Epoch 5/10:  77%|▊| 768/991 [3:17:49<56:02, 15.08s/batch, batch_loss=2.12, batch

Epoch 5/10:  78%|▊| 769/991 [3:17:49<55:42, 15.06s/batch, batch_loss=2.12, batch

Epoch 5/10:  78%|▊| 769/991 [3:18:04<55:42, 15.06s/batch, batch_loss=14, batch_i

Epoch 5/10:  78%|▊| 770/991 [3:18:04<54:45, 14.87s/batch, batch_loss=14, batch_i

Epoch 5/10:  78%|▊| 770/991 [3:18:21<54:45, 14.87s/batch, batch_loss=2.75e+3, ba

Epoch 5/10:  78%|▊| 771/991 [3:18:21<57:17, 15.63s/batch, batch_loss=2.75e+3, ba

Epoch 5/10:  78%|▊| 771/991 [3:18:36<57:17, 15.63s/batch, batch_loss=5.65, batch

Epoch 5/10:  78%|▊| 772/991 [3:18:36<56:24, 15.45s/batch, batch_loss=5.65, batch

Epoch 5/10:  78%|▊| 772/991 [3:18:51<56:24, 15.45s/batch, batch_loss=1.97, batch

Epoch 5/10:  78%|▊| 773/991 [3:18:51<54:56, 15.12s/batch, batch_loss=1.97, batch

Epoch 5/10:  78%|▊| 773/991 [3:19:05<54:56, 15.12s/batch, batch_loss=7.42, batch

Epoch 5/10:  78%|▊| 774/991 [3:19:05<54:17, 15.01s/batch, batch_loss=7.42, batch

Epoch 5/10:  78%|▊| 774/991 [3:19:20<54:17, 15.01s/batch, batch_loss=8.96, batch

Epoch 5/10:  78%|▊| 775/991 [3:19:20<53:49, 14.95s/batch, batch_loss=8.96, batch

Epoch 5/10:  78%|▊| 775/991 [3:19:35<53:49, 14.95s/batch, batch_loss=258, batch_

Epoch 5/10:  78%|▊| 776/991 [3:19:35<53:24, 14.91s/batch, batch_loss=258, batch_

Epoch 5/10:  78%|▊| 776/991 [3:19:50<53:24, 14.91s/batch, batch_loss=0.478, batc

Epoch 5/10:  78%|▊| 777/991 [3:19:50<53:25, 14.98s/batch, batch_loss=0.478, batc

Epoch 5/10:  78%|▊| 777/991 [3:20:05<53:25, 14.98s/batch, batch_loss=0.713, batc

Epoch 5/10:  79%|▊| 778/991 [3:20:05<53:07, 14.96s/batch, batch_loss=0.713, batc

Epoch 5/10:  79%|▊| 778/991 [3:20:20<53:07, 14.96s/batch, batch_loss=5.4, batch_

Epoch 5/10:  79%|▊| 779/991 [3:20:20<52:29, 14.86s/batch, batch_loss=5.4, batch_

Epoch 5/10:  79%|▊| 779/991 [3:20:34<52:29, 14.86s/batch, batch_loss=2.98, batch

Epoch 5/10:  79%|▊| 780/991 [3:20:34<51:58, 14.78s/batch, batch_loss=2.98, batch

Epoch 5/10:  79%|▊| 780/991 [3:20:51<51:58, 14.78s/batch, batch_loss=3.88, batch

Epoch 5/10:  79%|▊| 781/991 [3:20:51<54:08, 15.47s/batch, batch_loss=3.88, batch

Epoch 5/10:  79%|▊| 781/991 [3:21:06<54:08, 15.47s/batch, batch_loss=2.51e+4, ba

Epoch 5/10:  79%|▊| 782/991 [3:21:06<53:04, 15.23s/batch, batch_loss=2.51e+4, ba

Epoch 5/10:  79%|▊| 782/991 [3:21:22<53:04, 15.23s/batch, batch_loss=19.2, batch

Epoch 5/10:  79%|▊| 783/991 [3:21:22<53:13, 15.35s/batch, batch_loss=19.2, batch

Epoch 5/10:  79%|▊| 783/991 [3:21:37<53:13, 15.35s/batch, batch_loss=12.7, batch

Epoch 5/10:  79%|▊| 784/991 [3:21:37<53:29, 15.50s/batch, batch_loss=12.7, batch

Epoch 5/10:  79%|▊| 784/991 [3:21:52<53:29, 15.50s/batch, batch_loss=13.8, batch

Epoch 5/10:  79%|▊| 785/991 [3:21:52<51:47, 15.09s/batch, batch_loss=13.8, batch

Epoch 5/10:  79%|▊| 785/991 [3:22:06<51:47, 15.09s/batch, batch_loss=8.75, batch

Epoch 5/10:  79%|▊| 786/991 [3:22:06<50:42, 14.84s/batch, batch_loss=8.75, batch

Epoch 5/10:  79%|▊| 786/991 [3:22:20<50:42, 14.84s/batch, batch_loss=2.48e+4, ba

Epoch 5/10:  79%|▊| 787/991 [3:22:20<50:13, 14.77s/batch, batch_loss=2.48e+4, ba

Epoch 5/10:  79%|▊| 787/991 [3:22:37<50:13, 14.77s/batch, batch_loss=684, batch_

Epoch 5/10:  80%|▊| 788/991 [3:22:37<51:38, 15.27s/batch, batch_loss=684, batch_

Epoch 5/10:  80%|▊| 788/991 [3:22:53<51:38, 15.27s/batch, batch_loss=18.3, batch

Epoch 5/10:  80%|▊| 789/991 [3:22:53<52:31, 15.60s/batch, batch_loss=18.3, batch

Epoch 5/10:  80%|▊| 789/991 [3:23:08<52:31, 15.60s/batch, batch_loss=13.4, batch

Epoch 5/10:  80%|▊| 790/991 [3:23:08<51:45, 15.45s/batch, batch_loss=13.4, batch

Epoch 5/10:  80%|▊| 790/991 [3:23:23<51:45, 15.45s/batch, batch_loss=12.4, batch

Epoch 5/10:  80%|▊| 791/991 [3:23:23<50:39, 15.20s/batch, batch_loss=12.4, batch

Epoch 5/10:  80%|▊| 791/991 [3:23:39<50:39, 15.20s/batch, batch_loss=1.04e+4, ba

Epoch 5/10:  80%|▊| 792/991 [3:23:39<51:34, 15.55s/batch, batch_loss=1.04e+4, ba

Epoch 5/10:  80%|▊| 792/991 [3:23:55<51:34, 15.55s/batch, batch_loss=8.86, batch

Epoch 5/10:  80%|▊| 793/991 [3:23:55<51:29, 15.60s/batch, batch_loss=8.86, batch

Epoch 5/10:  80%|▊| 793/991 [3:24:11<51:29, 15.60s/batch, batch_loss=1.98, batch

Epoch 5/10:  80%|▊| 794/991 [3:24:11<51:22, 15.64s/batch, batch_loss=1.98, batch

Epoch 5/10:  80%|▊| 794/991 [3:24:27<51:22, 15.64s/batch, batch_loss=6.96, batch

Epoch 5/10:  80%|▊| 795/991 [3:24:27<51:19, 15.71s/batch, batch_loss=6.96, batch

Epoch 5/10:  80%|▊| 795/991 [3:24:41<51:19, 15.71s/batch, batch_loss=11.3, batch

Epoch 5/10:  80%|▊| 796/991 [3:24:41<49:58, 15.38s/batch, batch_loss=11.3, batch

Epoch 5/10:  80%|▊| 796/991 [3:24:56<49:58, 15.38s/batch, batch_loss=20.7, batch

Epoch 5/10:  80%|▊| 797/991 [3:24:56<49:18, 15.25s/batch, batch_loss=20.7, batch

Epoch 5/10:  80%|▊| 797/991 [3:25:13<49:18, 15.25s/batch, batch_loss=336, batch_

Epoch 5/10:  81%|▊| 798/991 [3:25:13<50:50, 15.81s/batch, batch_loss=336, batch_

Epoch 5/10:  81%|▊| 798/991 [3:25:28<50:50, 15.81s/batch, batch_loss=9.21, batch

Epoch 5/10:  81%|▊| 799/991 [3:25:28<49:15, 15.39s/batch, batch_loss=9.21, batch

Epoch 5/10:  81%|▊| 799/991 [3:25:43<49:15, 15.39s/batch, batch_loss=15.9, batch

Epoch 5/10:  81%|▊| 800/991 [3:25:43<48:44, 15.31s/batch, batch_loss=15.9, batch

Epoch 5/10:  81%|▊| 800/991 [3:25:58<48:44, 15.31s/batch, batch_loss=11.3, batch

Epoch 5/10:  81%|▊| 801/991 [3:25:58<48:02, 15.17s/batch, batch_loss=11.3, batch

Epoch 5/10:  81%|▊| 801/991 [3:26:13<48:02, 15.17s/batch, batch_loss=16.5, batch

Epoch 5/10:  81%|▊| 802/991 [3:26:13<47:29, 15.08s/batch, batch_loss=16.5, batch

Epoch 5/10:  81%|▊| 802/991 [3:26:27<47:29, 15.08s/batch, batch_loss=6.87, batch

Epoch 5/10:  81%|▊| 803/991 [3:26:27<46:45, 14.93s/batch, batch_loss=6.87, batch

Epoch 5/10:  81%|▊| 803/991 [3:26:42<46:45, 14.93s/batch, batch_loss=13.1, batch

Epoch 5/10:  81%|▊| 804/991 [3:26:42<46:35, 14.95s/batch, batch_loss=13.1, batch

Epoch 5/10:  81%|▊| 804/991 [3:26:58<46:35, 14.95s/batch, batch_loss=5.31, batch

Epoch 5/10:  81%|▊| 805/991 [3:26:58<46:57, 15.15s/batch, batch_loss=5.31, batch

Epoch 5/10:  81%|▊| 805/991 [3:27:15<46:57, 15.15s/batch, batch_loss=10.6, batch

Epoch 5/10:  81%|▊| 806/991 [3:27:15<48:35, 15.76s/batch, batch_loss=10.6, batch

Epoch 5/10:  81%|▊| 806/991 [3:27:30<48:35, 15.76s/batch, batch_loss=8.88, batch

Epoch 5/10:  81%|▊| 807/991 [3:27:30<47:13, 15.40s/batch, batch_loss=8.88, batch

Epoch 5/10:  81%|▊| 807/991 [3:27:44<47:13, 15.40s/batch, batch_loss=15.9, batch

Epoch 5/10:  82%|▊| 808/991 [3:27:44<46:17, 15.18s/batch, batch_loss=15.9, batch

Epoch 5/10:  82%|▊| 808/991 [3:27:59<46:17, 15.18s/batch, batch_loss=1.21e+4, ba

Epoch 5/10:  82%|▊| 809/991 [3:27:59<45:36, 15.03s/batch, batch_loss=1.21e+4, ba

Epoch 5/10:  82%|▊| 809/991 [3:28:14<45:36, 15.03s/batch, batch_loss=13.4, batch

Epoch 5/10:  82%|▊| 810/991 [3:28:14<45:00, 14.92s/batch, batch_loss=13.4, batch

Epoch 5/10:  82%|▊| 810/991 [3:28:29<45:00, 14.92s/batch, batch_loss=6.33, batch

Epoch 5/10:  82%|▊| 811/991 [3:28:29<45:10, 15.06s/batch, batch_loss=6.33, batch

Epoch 5/10:  82%|▊| 811/991 [3:28:44<45:10, 15.06s/batch, batch_loss=6.52, batch

Epoch 5/10:  82%|▊| 812/991 [3:28:44<45:20, 15.20s/batch, batch_loss=6.52, batch

Epoch 5/10:  82%|▊| 812/991 [3:29:03<45:20, 15.20s/batch, batch_loss=6.68, batch

Epoch 5/10:  82%|▊| 813/991 [3:29:03<47:52, 16.14s/batch, batch_loss=6.68, batch

Epoch 5/10:  82%|▊| 813/991 [3:29:18<47:52, 16.14s/batch, batch_loss=11.5, batch

Epoch 5/10:  82%|▊| 814/991 [3:29:18<47:13, 16.01s/batch, batch_loss=11.5, batch

Epoch 5/10:  82%|▊| 814/991 [3:29:34<47:13, 16.01s/batch, batch_loss=6.27, batch

Epoch 5/10:  82%|▊| 815/991 [3:29:34<46:17, 15.78s/batch, batch_loss=6.27, batch

Epoch 5/10:  82%|▊| 815/991 [3:29:50<46:17, 15.78s/batch, batch_loss=90.1, batch

Epoch 5/10:  82%|▊| 816/991 [3:29:50<46:12, 15.84s/batch, batch_loss=90.1, batch

Epoch 5/10:  82%|▊| 816/991 [3:30:04<46:12, 15.84s/batch, batch_loss=351, batch_

Epoch 5/10:  82%|▊| 817/991 [3:30:04<44:25, 15.32s/batch, batch_loss=351, batch_

Epoch 5/10:  82%|▊| 817/991 [3:30:19<44:25, 15.32s/batch, batch_loss=362, batch_

Epoch 5/10:  83%|▊| 818/991 [3:30:19<43:43, 15.16s/batch, batch_loss=362, batch_

Epoch 5/10:  83%|▊| 818/991 [3:30:34<43:43, 15.16s/batch, batch_loss=12.6, batch

Epoch 5/10:  83%|▊| 819/991 [3:30:34<43:19, 15.12s/batch, batch_loss=12.6, batch

Epoch 5/10:  83%|▊| 819/991 [3:30:49<43:19, 15.12s/batch, batch_loss=6.95, batch

Epoch 5/10:  83%|▊| 820/991 [3:30:49<43:20, 15.21s/batch, batch_loss=6.95, batch

Epoch 5/10:  83%|▊| 820/991 [3:31:04<43:20, 15.21s/batch, batch_loss=6.92, batch

Epoch 5/10:  83%|▊| 821/991 [3:31:04<42:53, 15.14s/batch, batch_loss=6.92, batch

Epoch 5/10:  83%|▊| 821/991 [3:31:20<42:53, 15.14s/batch, batch_loss=8.33, batch

Epoch 5/10:  83%|▊| 822/991 [3:31:20<43:36, 15.48s/batch, batch_loss=8.33, batch

Epoch 5/10:  83%|▊| 822/991 [3:31:36<43:36, 15.48s/batch, batch_loss=152, batch_

Epoch 5/10:  83%|▊| 823/991 [3:31:36<43:36, 15.58s/batch, batch_loss=152, batch_

Epoch 5/10:  83%|▊| 823/991 [3:31:51<43:36, 15.58s/batch, batch_loss=6.77, batch

Epoch 5/10:  83%|▊| 824/991 [3:31:51<42:28, 15.26s/batch, batch_loss=6.77, batch

Epoch 5/10:  83%|▊| 824/991 [3:32:06<42:28, 15.26s/batch, batch_loss=13.4, batch

Epoch 5/10:  83%|▊| 825/991 [3:32:06<42:35, 15.40s/batch, batch_loss=13.4, batch

Epoch 5/10:  83%|▊| 825/991 [3:32:21<42:35, 15.40s/batch, batch_loss=2.6e+3, bat

Epoch 5/10:  83%|▊| 826/991 [3:32:21<42:02, 15.29s/batch, batch_loss=2.6e+3, bat

Epoch 5/10:  83%|▊| 826/991 [3:32:36<42:02, 15.29s/batch, batch_loss=21.3, batch

Epoch 5/10:  83%|▊| 827/991 [3:32:36<41:21, 15.13s/batch, batch_loss=21.3, batch

Epoch 5/10:  83%|▊| 827/991 [3:32:51<41:21, 15.13s/batch, batch_loss=15.4, batch

Epoch 5/10:  84%|▊| 828/991 [3:32:51<41:14, 15.18s/batch, batch_loss=15.4, batch

Epoch 5/10:  84%|▊| 828/991 [3:33:09<41:14, 15.18s/batch, batch_loss=6.5, batch_

Epoch 5/10:  84%|▊| 829/991 [3:33:09<42:51, 15.87s/batch, batch_loss=6.5, batch_

Epoch 5/10:  84%|▊| 829/991 [3:33:24<42:51, 15.87s/batch, batch_loss=12, batch_i

Epoch 5/10:  84%|▊| 830/991 [3:33:24<42:06, 15.70s/batch, batch_loss=12, batch_i

Epoch 5/10:  84%|▊| 830/991 [3:33:40<42:06, 15.70s/batch, batch_loss=9.15, batch

Epoch 5/10:  84%|▊| 831/991 [3:33:40<42:15, 15.85s/batch, batch_loss=9.15, batch

Epoch 5/10:  84%|▊| 831/991 [3:33:56<42:15, 15.85s/batch, batch_loss=15, batch_i

Epoch 5/10:  84%|▊| 832/991 [3:33:56<41:59, 15.85s/batch, batch_loss=15, batch_i

Epoch 5/10:  84%|▊| 832/991 [3:34:12<41:59, 15.85s/batch, batch_loss=216, batch_

Epoch 5/10:  84%|▊| 833/991 [3:34:12<41:49, 15.88s/batch, batch_loss=216, batch_

Epoch 5/10:  84%|▊| 833/991 [3:34:28<41:49, 15.88s/batch, batch_loss=15.3, batch

Epoch 5/10:  84%|▊| 834/991 [3:34:28<41:23, 15.82s/batch, batch_loss=15.3, batch

Epoch 5/10:  84%|▊| 834/991 [3:34:45<41:23, 15.82s/batch, batch_loss=10.9, batch

Epoch 5/10:  84%|▊| 835/991 [3:34:45<41:45, 16.06s/batch, batch_loss=10.9, batch

Epoch 5/10:  84%|▊| 835/991 [3:35:01<41:45, 16.06s/batch, batch_loss=3.27e+3, ba

Epoch 5/10:  84%|▊| 836/991 [3:35:01<41:57, 16.24s/batch, batch_loss=3.27e+3, ba

Epoch 5/10:  84%|▊| 836/991 [3:35:17<41:57, 16.24s/batch, batch_loss=4.89e+3, ba

Epoch 5/10:  84%|▊| 837/991 [3:35:17<41:32, 16.19s/batch, batch_loss=4.89e+3, ba

Epoch 5/10:  84%|▊| 837/991 [3:35:33<41:32, 16.19s/batch, batch_loss=14, batch_i

Epoch 5/10:  85%|▊| 838/991 [3:35:33<40:45, 15.98s/batch, batch_loss=14, batch_i

Epoch 5/10:  85%|▊| 838/991 [3:35:48<40:45, 15.98s/batch, batch_loss=4.19, batch

Epoch 5/10:  85%|▊| 839/991 [3:35:48<40:15, 15.89s/batch, batch_loss=4.19, batch

Epoch 5/10:  85%|▊| 839/991 [3:36:04<40:15, 15.89s/batch, batch_loss=3.84, batch

Epoch 5/10:  85%|▊| 840/991 [3:36:04<39:31, 15.70s/batch, batch_loss=3.84, batch

Epoch 5/10:  85%|▊| 840/991 [3:36:19<39:31, 15.70s/batch, batch_loss=14.5, batch

Epoch 5/10:  85%|▊| 841/991 [3:36:19<39:17, 15.72s/batch, batch_loss=14.5, batch

Epoch 5/10:  85%|▊| 841/991 [3:36:35<39:17, 15.72s/batch, batch_loss=14.9, batch

Epoch 5/10:  85%|▊| 842/991 [3:36:35<39:04, 15.73s/batch, batch_loss=14.9, batch

Epoch 5/10:  85%|▊| 842/991 [3:36:51<39:04, 15.73s/batch, batch_loss=7.95, batch

Epoch 5/10:  85%|▊| 843/991 [3:36:51<38:49, 15.74s/batch, batch_loss=7.95, batch

Epoch 5/10:  85%|▊| 843/991 [3:37:09<38:49, 15.74s/batch, batch_loss=1.69e+3, ba

Epoch 5/10:  85%|▊| 844/991 [3:37:09<40:27, 16.51s/batch, batch_loss=1.69e+3, ba

Epoch 5/10:  85%|▊| 844/991 [3:37:24<40:27, 16.51s/batch, batch_loss=13.9, batch

Epoch 5/10:  85%|▊| 845/991 [3:37:24<39:10, 16.10s/batch, batch_loss=13.9, batch

Epoch 5/10:  85%|▊| 845/991 [3:37:40<39:10, 16.10s/batch, batch_loss=1.18e+4, ba

Epoch 5/10:  85%|▊| 846/991 [3:37:40<38:48, 16.06s/batch, batch_loss=1.18e+4, ba

Epoch 5/10:  85%|▊| 846/991 [3:37:56<38:48, 16.06s/batch, batch_loss=18.4, batch

Epoch 5/10:  85%|▊| 847/991 [3:37:56<38:09, 15.90s/batch, batch_loss=18.4, batch

Epoch 5/10:  85%|▊| 847/991 [3:38:12<38:09, 15.90s/batch, batch_loss=29.3, batch

Epoch 5/10:  86%|▊| 848/991 [3:38:12<38:07, 16.00s/batch, batch_loss=29.3, batch

Epoch 5/10:  86%|▊| 848/991 [3:38:28<38:07, 16.00s/batch, batch_loss=1.01e+3, ba

Epoch 5/10:  86%|▊| 849/991 [3:38:28<38:01, 16.07s/batch, batch_loss=1.01e+3, ba

Epoch 5/10:  86%|▊| 849/991 [3:38:48<38:01, 16.07s/batch, batch_loss=7.38, batch

Epoch 5/10:  86%|▊| 850/991 [3:38:48<40:26, 17.21s/batch, batch_loss=7.38, batch

Epoch 5/10:  86%|▊| 850/991 [3:39:04<40:26, 17.21s/batch, batch_loss=13.8, batch

Epoch 5/10:  86%|▊| 851/991 [3:39:04<39:08, 16.78s/batch, batch_loss=13.8, batch

Epoch 5/10:  86%|▊| 851/991 [3:39:19<39:08, 16.78s/batch, batch_loss=15, batch_i

Epoch 5/10:  86%|▊| 852/991 [3:39:19<37:52, 16.35s/batch, batch_loss=15, batch_i

Epoch 5/10:  86%|▊| 852/991 [3:39:36<37:52, 16.35s/batch, batch_loss=7.66e+3, ba

Epoch 5/10:  86%|▊| 853/991 [3:39:36<37:31, 16.32s/batch, batch_loss=7.66e+3, ba

Epoch 5/10:  86%|▊| 853/991 [3:39:51<37:31, 16.32s/batch, batch_loss=15.6, batch

Epoch 5/10:  86%|▊| 854/991 [3:39:51<36:40, 16.06s/batch, batch_loss=15.6, batch

Epoch 5/10:  86%|▊| 854/991 [3:40:10<36:40, 16.06s/batch, batch_loss=7.14, batch

Epoch 5/10:  86%|▊| 855/991 [3:40:10<38:05, 16.80s/batch, batch_loss=7.14, batch

Epoch 5/10:  86%|▊| 855/991 [3:40:26<38:05, 16.80s/batch, batch_loss=8.14, batch

Epoch 5/10:  86%|▊| 856/991 [3:40:26<37:49, 16.81s/batch, batch_loss=8.14, batch

Epoch 5/10:  86%|▊| 856/991 [3:40:42<37:49, 16.81s/batch, batch_loss=9.12, batch

Epoch 5/10:  86%|▊| 857/991 [3:40:42<36:47, 16.47s/batch, batch_loss=9.12, batch

Epoch 5/10:  86%|▊| 857/991 [3:40:59<36:47, 16.47s/batch, batch_loss=17.2, batch

Epoch 5/10:  87%|▊| 858/991 [3:40:59<36:32, 16.49s/batch, batch_loss=17.2, batch

Epoch 5/10:  87%|▊| 858/991 [3:41:16<36:32, 16.49s/batch, batch_loss=17.8, batch

Epoch 5/10:  87%|▊| 859/991 [3:41:16<36:33, 16.62s/batch, batch_loss=17.8, batch

Epoch 5/10:  87%|▊| 859/991 [3:41:33<36:33, 16.62s/batch, batch_loss=18.7, batch

Epoch 5/10:  87%|▊| 860/991 [3:41:33<36:35, 16.76s/batch, batch_loss=18.7, batch

Epoch 5/10:  87%|▊| 860/991 [3:41:50<36:35, 16.76s/batch, batch_loss=9.24, batch

Epoch 5/10:  87%|▊| 861/991 [3:41:50<36:23, 16.80s/batch, batch_loss=9.24, batch

Epoch 5/10:  87%|▊| 861/991 [3:42:06<36:23, 16.80s/batch, batch_loss=17.8, batch

Epoch 5/10:  87%|▊| 862/991 [3:42:06<36:13, 16.85s/batch, batch_loss=17.8, batch

Epoch 5/10:  87%|▊| 862/991 [3:42:23<36:13, 16.85s/batch, batch_loss=25.6, batch

Epoch 5/10:  87%|▊| 863/991 [3:42:23<35:39, 16.71s/batch, batch_loss=25.6, batch

Epoch 5/10:  87%|▊| 863/991 [3:42:39<35:39, 16.71s/batch, batch_loss=13.3, batch

Epoch 5/10:  87%|▊| 864/991 [3:42:39<35:11, 16.62s/batch, batch_loss=13.3, batch

Epoch 5/10:  87%|▊| 864/991 [3:42:57<35:11, 16.62s/batch, batch_loss=15.5, batch

Epoch 5/10:  87%|▊| 865/991 [3:42:57<35:29, 16.90s/batch, batch_loss=15.5, batch

Epoch 5/10:  87%|▊| 865/991 [3:43:13<35:29, 16.90s/batch, batch_loss=19.4, batch

Epoch 5/10:  87%|▊| 866/991 [3:43:13<34:49, 16.72s/batch, batch_loss=19.4, batch

Epoch 5/10:  87%|▊| 866/991 [3:43:30<34:49, 16.72s/batch, batch_loss=20.8, batch

Epoch 5/10:  87%|▊| 867/991 [3:43:30<34:52, 16.87s/batch, batch_loss=20.8, batch

Epoch 5/10:  87%|▊| 867/991 [3:43:47<34:52, 16.87s/batch, batch_loss=20.1, batch

Epoch 5/10:  88%|▉| 868/991 [3:43:47<34:21, 16.76s/batch, batch_loss=20.1, batch

Epoch 5/10:  88%|▉| 868/991 [3:44:04<34:21, 16.76s/batch, batch_loss=11.1, batch

Epoch 5/10:  88%|▉| 869/991 [3:44:04<34:24, 16.93s/batch, batch_loss=11.1, batch

Epoch 5/10:  88%|▉| 869/991 [3:44:20<34:24, 16.93s/batch, batch_loss=14.4, batch

Epoch 5/10:  88%|▉| 870/991 [3:44:20<33:36, 16.66s/batch, batch_loss=14.4, batch

Epoch 5/10:  88%|▉| 870/991 [3:44:37<33:36, 16.66s/batch, batch_loss=7.87, batch

Epoch 5/10:  88%|▉| 871/991 [3:44:37<33:15, 16.63s/batch, batch_loss=7.87, batch

Epoch 5/10:  88%|▉| 871/991 [3:44:53<33:15, 16.63s/batch, batch_loss=17.8, batch

Epoch 5/10:  88%|▉| 872/991 [3:44:53<33:00, 16.64s/batch, batch_loss=17.8, batch

Epoch 5/10:  88%|▉| 872/991 [3:45:10<33:00, 16.64s/batch, batch_loss=13.2, batch

Epoch 5/10:  88%|▉| 873/991 [3:45:10<32:53, 16.72s/batch, batch_loss=13.2, batch

Epoch 5/10:  88%|▉| 873/991 [3:45:26<32:53, 16.72s/batch, batch_loss=7.41, batch

Epoch 5/10:  88%|▉| 874/991 [3:45:26<31:41, 16.25s/batch, batch_loss=7.41, batch

Epoch 5/10:  88%|▉| 874/991 [3:45:41<31:41, 16.25s/batch, batch_loss=15, batch_i

Epoch 5/10:  88%|▉| 875/991 [3:45:41<31:15, 16.17s/batch, batch_loss=15, batch_i

Epoch 5/10:  88%|▉| 875/991 [3:45:57<31:15, 16.17s/batch, batch_loss=18.7, batch

Epoch 5/10:  88%|▉| 876/991 [3:45:57<30:42, 16.02s/batch, batch_loss=18.7, batch

Epoch 5/10:  88%|▉| 876/991 [3:46:14<30:42, 16.02s/batch, batch_loss=15.4, batch

Epoch 5/10:  88%|▉| 877/991 [3:46:14<30:38, 16.13s/batch, batch_loss=15.4, batch

Epoch 5/10:  88%|▉| 877/991 [3:46:31<30:38, 16.13s/batch, batch_loss=22.9, batch

Epoch 5/10:  89%|▉| 878/991 [3:46:31<30:56, 16.43s/batch, batch_loss=22.9, batch

Epoch 5/10:  89%|▉| 878/991 [3:46:47<30:56, 16.43s/batch, batch_loss=17.8, batch

Epoch 5/10:  89%|▉| 879/991 [3:46:47<30:43, 16.46s/batch, batch_loss=17.8, batch

Epoch 5/10:  89%|▉| 879/991 [3:47:04<30:43, 16.46s/batch, batch_loss=13, batch_i

Epoch 5/10:  89%|▉| 880/991 [3:47:04<30:25, 16.44s/batch, batch_loss=13, batch_i

Epoch 5/10:  89%|▉| 880/991 [3:47:20<30:25, 16.44s/batch, batch_loss=5.12e+3, ba

Epoch 5/10:  89%|▉| 881/991 [3:47:20<30:11, 16.47s/batch, batch_loss=5.12e+3, ba

Epoch 5/10:  89%|▉| 881/991 [3:47:37<30:11, 16.47s/batch, batch_loss=15.2, batch

Epoch 5/10:  89%|▉| 882/991 [3:47:37<30:05, 16.57s/batch, batch_loss=15.2, batch

Epoch 5/10:  89%|▉| 882/991 [3:47:54<30:05, 16.57s/batch, batch_loss=17, batch_i

Epoch 5/10:  89%|▉| 883/991 [3:47:54<30:04, 16.71s/batch, batch_loss=17, batch_i

Epoch 5/10:  89%|▉| 883/991 [3:48:10<30:04, 16.71s/batch, batch_loss=8.2, batch_

Epoch 5/10:  89%|▉| 884/991 [3:48:10<29:40, 16.64s/batch, batch_loss=8.2, batch_

Epoch 5/10:  89%|▉| 884/991 [3:48:26<29:40, 16.64s/batch, batch_loss=13.2, batch

Epoch 5/10:  89%|▉| 885/991 [3:48:26<28:42, 16.25s/batch, batch_loss=13.2, batch

Epoch 5/10:  89%|▉| 885/991 [3:48:43<28:42, 16.25s/batch, batch_loss=15.2, batch

Epoch 5/10:  89%|▉| 886/991 [3:48:43<28:52, 16.50s/batch, batch_loss=15.2, batch

Epoch 5/10:  89%|▉| 886/991 [3:49:00<28:52, 16.50s/batch, batch_loss=1.93e+4, ba

Epoch 5/10:  90%|▉| 887/991 [3:49:00<28:40, 16.54s/batch, batch_loss=1.93e+4, ba

Epoch 5/10:  90%|▉| 887/991 [3:49:16<28:40, 16.54s/batch, batch_loss=17.3, batch

Epoch 5/10:  90%|▉| 888/991 [3:49:16<28:21, 16.52s/batch, batch_loss=17.3, batch

Epoch 5/10:  90%|▉| 888/991 [3:49:33<28:21, 16.52s/batch, batch_loss=18.3, batch

Epoch 5/10:  90%|▉| 889/991 [3:49:33<28:13, 16.60s/batch, batch_loss=18.3, batch

Epoch 5/10:  90%|▉| 889/991 [3:49:50<28:13, 16.60s/batch, batch_loss=12, batch_i

Epoch 5/10:  90%|▉| 890/991 [3:49:50<28:09, 16.73s/batch, batch_loss=12, batch_i

Epoch 5/10:  90%|▉| 890/991 [3:50:06<28:09, 16.73s/batch, batch_loss=14.9, batch

Epoch 5/10:  90%|▉| 891/991 [3:50:06<27:44, 16.65s/batch, batch_loss=14.9, batch

Epoch 5/10:  90%|▉| 891/991 [3:50:23<27:44, 16.65s/batch, batch_loss=14.5, batch

Epoch 5/10:  90%|▉| 892/991 [3:50:23<27:16, 16.54s/batch, batch_loss=14.5, batch

Epoch 5/10:  90%|▉| 892/991 [3:50:39<27:16, 16.54s/batch, batch_loss=3.7e+3, bat

Epoch 5/10:  90%|▉| 893/991 [3:50:39<27:01, 16.55s/batch, batch_loss=3.7e+3, bat

Epoch 5/10:  90%|▉| 893/991 [3:50:55<27:01, 16.55s/batch, batch_loss=8.72, batch

Epoch 5/10:  90%|▉| 894/991 [3:50:55<26:31, 16.41s/batch, batch_loss=8.72, batch

Epoch 5/10:  90%|▉| 894/991 [3:51:15<26:31, 16.41s/batch, batch_loss=13.6, batch

Epoch 5/10:  90%|▉| 895/991 [3:51:15<27:42, 17.31s/batch, batch_loss=13.6, batch

Epoch 5/10:  90%|▉| 895/991 [3:51:30<27:42, 17.31s/batch, batch_loss=12.1, batch

Epoch 5/10:  90%|▉| 896/991 [3:51:30<26:38, 16.83s/batch, batch_loss=12.1, batch

Epoch 5/10:  90%|▉| 896/991 [3:51:47<26:38, 16.83s/batch, batch_loss=16.5, batch

Epoch 5/10:  91%|▉| 897/991 [3:51:47<26:15, 16.76s/batch, batch_loss=16.5, batch

Epoch 5/10:  91%|▉| 897/991 [3:52:01<26:15, 16.76s/batch, batch_loss=18.8, batch

Epoch 5/10:  91%|▉| 898/991 [3:52:01<24:54, 16.06s/batch, batch_loss=18.8, batch

Epoch 5/10:  91%|▉| 898/991 [3:52:16<24:54, 16.06s/batch, batch_loss=17.4, batch

Epoch 5/10:  91%|▉| 899/991 [3:52:16<24:08, 15.74s/batch, batch_loss=17.4, batch

Epoch 5/10:  91%|▉| 899/991 [3:52:33<24:08, 15.74s/batch, batch_loss=17.1, batch

Epoch 5/10:  91%|▉| 900/991 [3:52:33<24:26, 16.12s/batch, batch_loss=17.1, batch

Epoch 5/10:  91%|▉| 900/991 [3:52:51<24:26, 16.12s/batch, batch_loss=15.2, batch

Epoch 5/10:  91%|▉| 901/991 [3:52:51<25:03, 16.71s/batch, batch_loss=15.2, batch

Epoch 5/10:  91%|▉| 901/991 [3:53:09<25:03, 16.71s/batch, batch_loss=11.6, batch

Epoch 5/10:  91%|▉| 902/991 [3:53:09<25:08, 16.95s/batch, batch_loss=11.6, batch

Epoch 5/10:  91%|▉| 902/991 [3:53:29<25:08, 16.95s/batch, batch_loss=7.39, batch

Epoch 5/10:  91%|▉| 903/991 [3:53:29<26:15, 17.90s/batch, batch_loss=7.39, batch

Epoch 5/10:  91%|▉| 903/991 [3:53:48<26:15, 17.90s/batch, batch_loss=8.34, batch

Epoch 5/10:  91%|▉| 904/991 [3:53:48<26:15, 18.11s/batch, batch_loss=8.34, batch

Epoch 5/10:  91%|▉| 904/991 [3:54:04<26:15, 18.11s/batch, batch_loss=21.3, batch

Epoch 5/10:  91%|▉| 905/991 [3:54:04<25:23, 17.72s/batch, batch_loss=21.3, batch

Epoch 5/10:  91%|▉| 905/991 [3:54:22<25:23, 17.72s/batch, batch_loss=17.7, batch

Epoch 5/10:  91%|▉| 906/991 [3:54:22<25:01, 17.66s/batch, batch_loss=17.7, batch

Epoch 5/10:  91%|▉| 906/991 [3:54:42<25:01, 17.66s/batch, batch_loss=18.5, batch

Epoch 5/10:  92%|▉| 907/991 [3:54:42<25:48, 18.43s/batch, batch_loss=18.5, batch

Epoch 5/10:  92%|▉| 907/991 [3:54:59<25:48, 18.43s/batch, batch_loss=13.1, batch

Epoch 5/10:  92%|▉| 908/991 [3:54:59<24:52, 17.98s/batch, batch_loss=13.1, batch

Epoch 5/10:  92%|▉| 908/991 [3:55:15<24:52, 17.98s/batch, batch_loss=5.4, batch_

Epoch 5/10:  92%|▉| 909/991 [3:55:15<23:30, 17.20s/batch, batch_loss=5.4, batch_

Epoch 5/10:  92%|▉| 909/991 [3:55:32<23:30, 17.20s/batch, batch_loss=683, batch_

Epoch 5/10:  92%|▉| 910/991 [3:55:32<23:15, 17.23s/batch, batch_loss=683, batch_

Epoch 5/10:  92%|▉| 910/991 [3:55:50<23:15, 17.23s/batch, batch_loss=1.02e+3, ba

Epoch 5/10:  92%|▉| 911/991 [3:55:50<23:24, 17.55s/batch, batch_loss=1.02e+3, ba

Epoch 5/10:  92%|▉| 911/991 [3:56:07<23:24, 17.55s/batch, batch_loss=23.1, batch

Epoch 5/10:  92%|▉| 912/991 [3:56:07<22:54, 17.39s/batch, batch_loss=23.1, batch

Epoch 5/10:  92%|▉| 912/991 [3:56:23<22:54, 17.39s/batch, batch_loss=20.4, batch

Epoch 5/10:  92%|▉| 913/991 [3:56:23<21:59, 16.91s/batch, batch_loss=20.4, batch

Epoch 5/10:  92%|▉| 913/991 [3:56:41<21:59, 16.91s/batch, batch_loss=18.9, batch

Epoch 5/10:  92%|▉| 914/991 [3:56:41<21:58, 17.12s/batch, batch_loss=18.9, batch

Epoch 5/10:  92%|▉| 914/991 [3:57:01<21:58, 17.12s/batch, batch_loss=17.9, batch

Epoch 5/10:  92%|▉| 915/991 [3:57:01<23:04, 18.21s/batch, batch_loss=17.9, batch

Epoch 5/10:  92%|▉| 915/991 [3:57:18<23:04, 18.21s/batch, batch_loss=14, batch_i

Epoch 5/10:  92%|▉| 916/991 [3:57:18<22:09, 17.72s/batch, batch_loss=14, batch_i

Epoch 5/10:  92%|▉| 916/991 [3:57:33<22:09, 17.72s/batch, batch_loss=7.1, batch_

Epoch 5/10:  93%|▉| 917/991 [3:57:33<21:01, 17.05s/batch, batch_loss=7.1, batch_

Epoch 5/10:  93%|▉| 917/991 [3:57:50<21:01, 17.05s/batch, batch_loss=11.4, batch

Epoch 5/10:  93%|▉| 918/991 [3:57:50<20:31, 16.86s/batch, batch_loss=11.4, batch

Epoch 5/10:  93%|▉| 918/991 [3:58:06<20:31, 16.86s/batch, batch_loss=11.3, batch

Epoch 5/10:  93%|▉| 919/991 [3:58:06<19:56, 16.62s/batch, batch_loss=11.3, batch

Epoch 5/10:  93%|▉| 919/991 [3:58:23<19:56, 16.62s/batch, batch_loss=13, batch_i

Epoch 5/10:  93%|▉| 920/991 [3:58:23<19:50, 16.77s/batch, batch_loss=13, batch_i

Epoch 5/10:  93%|▉| 920/991 [3:58:40<19:50, 16.77s/batch, batch_loss=15.5, batch

Epoch 5/10:  93%|▉| 921/991 [3:58:40<19:36, 16.81s/batch, batch_loss=15.5, batch

Epoch 5/10:  93%|▉| 921/991 [3:59:04<19:36, 16.81s/batch, batch_loss=19.7, batch

Epoch 5/10:  93%|▉| 922/991 [3:59:04<21:44, 18.91s/batch, batch_loss=19.7, batch

Epoch 5/10:  93%|▉| 922/991 [3:59:23<21:44, 18.91s/batch, batch_loss=6.01, batch

Epoch 5/10:  93%|▉| 923/991 [3:59:23<21:24, 18.89s/batch, batch_loss=6.01, batch

Epoch 5/10:  93%|▉| 923/991 [3:59:40<21:24, 18.89s/batch, batch_loss=10.5, batch

Epoch 5/10:  93%|▉| 924/991 [3:59:40<20:38, 18.48s/batch, batch_loss=10.5, batch

Epoch 5/10:  93%|▉| 924/991 [3:59:57<20:38, 18.48s/batch, batch_loss=10.6, batch

Epoch 5/10:  93%|▉| 925/991 [3:59:57<19:47, 17.99s/batch, batch_loss=10.6, batch

Epoch 5/10:  93%|▉| 925/991 [4:00:14<19:47, 17.99s/batch, batch_loss=3e+4, batch

Epoch 5/10:  93%|▉| 926/991 [4:00:14<19:06, 17.63s/batch, batch_loss=3e+4, batch

Epoch 5/10:  93%|▉| 926/991 [4:00:31<19:06, 17.63s/batch, batch_loss=6.59, batch

Epoch 5/10:  94%|▉| 927/991 [4:00:31<18:39, 17.50s/batch, batch_loss=6.59, batch

Epoch 5/10:  94%|▉| 927/991 [4:00:48<18:39, 17.50s/batch, batch_loss=853, batch_

Epoch 5/10:  94%|▉| 928/991 [4:00:48<18:18, 17.44s/batch, batch_loss=853, batch_

Epoch 5/10:  94%|▉| 928/991 [4:01:09<18:18, 17.44s/batch, batch_loss=10.6, batch

Epoch 5/10:  94%|▉| 929/991 [4:01:09<18:59, 18.37s/batch, batch_loss=10.6, batch

Epoch 5/10:  94%|▉| 929/991 [4:01:26<18:59, 18.37s/batch, batch_loss=8.55, batch

Epoch 5/10:  94%|▉| 930/991 [4:01:26<18:13, 17.92s/batch, batch_loss=8.55, batch

Epoch 5/10:  94%|▉| 930/991 [4:01:43<18:13, 17.92s/batch, batch_loss=11.2, batch

Epoch 5/10:  94%|▉| 931/991 [4:01:43<17:44, 17.74s/batch, batch_loss=11.2, batch

Epoch 5/10:  94%|▉| 931/991 [4:02:00<17:44, 17.74s/batch, batch_loss=9.2, batch_

Epoch 5/10:  94%|▉| 932/991 [4:02:00<17:14, 17.54s/batch, batch_loss=9.2, batch_

Epoch 5/10:  94%|▉| 932/991 [4:02:17<17:14, 17.54s/batch, batch_loss=10.2, batch

Epoch 5/10:  94%|▉| 933/991 [4:02:17<16:48, 17.39s/batch, batch_loss=10.2, batch

Epoch 5/10:  94%|▉| 933/991 [4:02:35<16:48, 17.39s/batch, batch_loss=2.13, batch

Epoch 5/10:  94%|▉| 934/991 [4:02:35<16:36, 17.48s/batch, batch_loss=2.13, batch

Epoch 5/10:  94%|▉| 934/991 [4:02:55<16:36, 17.48s/batch, batch_loss=1.73, batch

Epoch 5/10:  94%|▉| 935/991 [4:02:55<17:04, 18.30s/batch, batch_loss=1.73, batch

Epoch 5/10:  94%|▉| 935/991 [4:03:10<17:04, 18.30s/batch, batch_loss=164, batch_

Epoch 5/10:  94%|▉| 936/991 [4:03:10<16:01, 17.48s/batch, batch_loss=164, batch_

Epoch 5/10:  94%|▉| 936/991 [4:03:27<16:01, 17.48s/batch, batch_loss=36.9, batch

Epoch 5/10:  95%|▉| 937/991 [4:03:27<15:30, 17.23s/batch, batch_loss=36.9, batch

Epoch 5/10:  95%|▉| 937/991 [4:03:43<15:30, 17.23s/batch, batch_loss=9.52, batch

Epoch 5/10:  95%|▉| 938/991 [4:03:43<14:50, 16.79s/batch, batch_loss=9.52, batch

Epoch 5/10:  95%|▉| 938/991 [4:04:00<14:50, 16.79s/batch, batch_loss=8.66, batch

Epoch 5/10:  95%|▉| 939/991 [4:04:00<14:34, 16.83s/batch, batch_loss=8.66, batch

Epoch 5/10:  95%|▉| 939/991 [4:04:16<14:34, 16.83s/batch, batch_loss=423, batch_

Epoch 5/10:  95%|▉| 940/991 [4:04:16<14:13, 16.73s/batch, batch_loss=423, batch_

Epoch 5/10:  95%|▉| 940/991 [4:04:32<14:13, 16.73s/batch, batch_loss=17.7, batch

Epoch 5/10:  95%|▉| 941/991 [4:04:32<13:39, 16.39s/batch, batch_loss=17.7, batch

Epoch 5/10:  95%|▉| 941/991 [4:04:47<13:39, 16.39s/batch, batch_loss=13.1, batch

Epoch 5/10:  95%|▉| 942/991 [4:04:47<13:03, 16.00s/batch, batch_loss=13.1, batch

Epoch 5/10:  95%|▉| 942/991 [4:05:07<13:03, 16.00s/batch, batch_loss=9.87, batch

Epoch 5/10:  95%|▉| 943/991 [4:05:07<13:45, 17.20s/batch, batch_loss=9.87, batch

Epoch 5/10:  95%|▉| 943/991 [4:05:23<13:45, 17.20s/batch, batch_loss=14.4, batch

Epoch 5/10:  95%|▉| 944/991 [4:05:23<13:17, 16.97s/batch, batch_loss=14.4, batch

Epoch 5/10:  95%|▉| 944/991 [4:05:39<13:17, 16.97s/batch, batch_loss=1.93, batch

Epoch 5/10:  95%|▉| 945/991 [4:05:39<12:37, 16.46s/batch, batch_loss=1.93, batch

Epoch 5/10:  95%|▉| 945/991 [4:05:56<12:37, 16.46s/batch, batch_loss=11.5, batch

Epoch 5/10:  95%|▉| 946/991 [4:05:56<12:30, 16.68s/batch, batch_loss=11.5, batch

Epoch 5/10:  95%|▉| 946/991 [4:06:12<12:30, 16.68s/batch, batch_loss=13.9, batch

Epoch 5/10:  96%|▉| 947/991 [4:06:12<12:00, 16.37s/batch, batch_loss=13.9, batch

Epoch 5/10:  96%|▉| 947/991 [4:06:27<12:00, 16.37s/batch, batch_loss=9.9, batch_

Epoch 5/10:  96%|▉| 948/991 [4:06:27<11:36, 16.20s/batch, batch_loss=9.9, batch_

Epoch 5/10:  96%|▉| 948/991 [4:06:46<11:36, 16.20s/batch, batch_loss=6.15, batch

Epoch 5/10:  96%|▉| 949/991 [4:06:46<11:56, 17.07s/batch, batch_loss=6.15, batch

Epoch 5/10:  96%|▉| 949/991 [4:07:03<11:56, 17.07s/batch, batch_loss=8.03, batch

Epoch 5/10:  96%|▉| 950/991 [4:07:03<11:36, 17.00s/batch, batch_loss=8.03, batch

Epoch 5/10:  96%|▉| 950/991 [4:07:21<11:36, 17.00s/batch, batch_loss=15.9, batch

Epoch 5/10:  96%|▉| 951/991 [4:07:21<11:25, 17.13s/batch, batch_loss=15.9, batch

Epoch 5/10:  96%|▉| 951/991 [4:07:38<11:25, 17.13s/batch, batch_loss=15.9, batch

Epoch 5/10:  96%|▉| 952/991 [4:07:38<11:12, 17.24s/batch, batch_loss=15.9, batch

Epoch 5/10:  96%|▉| 952/991 [4:07:55<11:12, 17.24s/batch, batch_loss=7.12, batch

Epoch 5/10:  96%|▉| 953/991 [4:07:55<10:54, 17.23s/batch, batch_loss=7.12, batch

Epoch 5/10:  96%|▉| 953/991 [4:08:12<10:54, 17.23s/batch, batch_loss=331, batch_

Epoch 5/10:  96%|▉| 954/991 [4:08:12<10:31, 17.06s/batch, batch_loss=331, batch_

Epoch 5/10:  96%|▉| 954/991 [4:08:28<10:31, 17.06s/batch, batch_loss=11, batch_i

Epoch 5/10:  96%|▉| 955/991 [4:08:28<10:00, 16.69s/batch, batch_loss=11, batch_i

Epoch 5/10:  96%|▉| 955/991 [4:08:45<10:00, 16.69s/batch, batch_loss=13, batch_i

Epoch 5/10:  96%|▉| 956/991 [4:08:45<09:48, 16.83s/batch, batch_loss=13, batch_i

Epoch 5/10:  96%|▉| 956/991 [4:09:02<09:48, 16.83s/batch, batch_loss=13.9, batch

Epoch 5/10:  97%|▉| 957/991 [4:09:02<09:33, 16.87s/batch, batch_loss=13.9, batch

Epoch 5/10:  97%|▉| 957/991 [4:09:19<09:33, 16.87s/batch, batch_loss=11.7, batch

Epoch 5/10:  97%|▉| 958/991 [4:09:19<09:22, 17.05s/batch, batch_loss=11.7, batch

Epoch 5/10:  97%|▉| 958/991 [4:09:37<09:22, 17.05s/batch, batch_loss=7.46, batch

Epoch 5/10:  97%|▉| 959/991 [4:09:37<09:13, 17.29s/batch, batch_loss=7.46, batch

Epoch 5/10:  97%|▉| 959/991 [4:09:54<09:13, 17.29s/batch, batch_loss=11, batch_i

Epoch 5/10:  97%|▉| 960/991 [4:09:54<08:48, 17.04s/batch, batch_loss=11, batch_i

Epoch 5/10:  97%|▉| 960/991 [4:10:11<08:48, 17.04s/batch, batch_loss=14.1, batch

Epoch 5/10:  97%|▉| 961/991 [4:10:11<08:28, 16.94s/batch, batch_loss=14.1, batch

Epoch 5/10:  97%|▉| 961/991 [4:10:27<08:28, 16.94s/batch, batch_loss=4.74, batch

Epoch 5/10:  97%|▉| 962/991 [4:10:27<08:08, 16.84s/batch, batch_loss=4.74, batch

Epoch 5/10:  97%|▉| 962/991 [4:10:43<08:08, 16.84s/batch, batch_loss=5.85, batch

Epoch 5/10:  97%|▉| 963/991 [4:10:43<07:42, 16.51s/batch, batch_loss=5.85, batch

Epoch 5/10:  97%|▉| 963/991 [4:10:59<07:42, 16.51s/batch, batch_loss=9.44e+3, ba

Epoch 5/10:  97%|▉| 964/991 [4:10:59<07:23, 16.41s/batch, batch_loss=9.44e+3, ba

Epoch 5/10:  97%|▉| 964/991 [4:11:14<07:23, 16.41s/batch, batch_loss=19.1, batch

Epoch 5/10:  97%|▉| 965/991 [4:11:14<06:54, 15.96s/batch, batch_loss=19.1, batch

Epoch 5/10:  97%|▉| 965/991 [4:11:30<06:54, 15.96s/batch, batch_loss=15.9, batch

Epoch 5/10:  97%|▉| 966/991 [4:11:30<06:42, 16.11s/batch, batch_loss=15.9, batch

Epoch 5/10:  97%|▉| 966/991 [4:11:47<06:42, 16.11s/batch, batch_loss=2.41e+4, ba

Epoch 5/10:  98%|▉| 967/991 [4:11:47<06:27, 16.13s/batch, batch_loss=2.41e+4, ba

Epoch 5/10:  98%|▉| 967/991 [4:12:03<06:27, 16.13s/batch, batch_loss=407, batch_

Epoch 5/10:  98%|▉| 968/991 [4:12:03<06:14, 16.30s/batch, batch_loss=407, batch_

Epoch 5/10:  98%|▉| 968/991 [4:12:20<06:14, 16.30s/batch, batch_loss=19.4, batch

Epoch 5/10:  98%|▉| 969/991 [4:12:20<06:02, 16.48s/batch, batch_loss=19.4, batch

Epoch 5/10:  98%|▉| 969/991 [4:12:37<06:02, 16.48s/batch, batch_loss=1.13, batch

Epoch 5/10:  98%|▉| 970/991 [4:12:37<05:50, 16.68s/batch, batch_loss=1.13, batch

Epoch 5/10:  98%|▉| 970/991 [4:12:55<05:50, 16.68s/batch, batch_loss=10.3, batch

Epoch 5/10:  98%|▉| 971/991 [4:12:55<05:36, 16.84s/batch, batch_loss=10.3, batch

Epoch 5/10:  98%|▉| 971/991 [4:13:14<05:36, 16.84s/batch, batch_loss=27.2, batch

Epoch 5/10:  98%|▉| 972/991 [4:13:14<05:36, 17.69s/batch, batch_loss=27.2, batch

Epoch 5/10:  98%|▉| 972/991 [4:13:31<05:36, 17.69s/batch, batch_loss=24.4, batch

Epoch 5/10:  98%|▉| 973/991 [4:13:31<05:11, 17.33s/batch, batch_loss=24.4, batch

Epoch 5/10:  98%|▉| 973/991 [4:13:48<05:11, 17.33s/batch, batch_loss=13.5, batch

Epoch 5/10:  98%|▉| 974/991 [4:13:48<04:53, 17.27s/batch, batch_loss=13.5, batch

Epoch 5/10:  98%|▉| 974/991 [4:14:03<04:53, 17.27s/batch, batch_loss=9.62, batch

Epoch 5/10:  98%|▉| 975/991 [4:14:03<04:28, 16.76s/batch, batch_loss=9.62, batch

Epoch 5/10:  98%|▉| 975/991 [4:14:18<04:28, 16.76s/batch, batch_loss=23.8, batch

Epoch 5/10:  98%|▉| 976/991 [4:14:18<04:03, 16.23s/batch, batch_loss=23.8, batch

Epoch 5/10:  98%|▉| 976/991 [4:14:33<04:03, 16.23s/batch, batch_loss=6.29, batch

Epoch 5/10:  99%|▉| 977/991 [4:14:33<03:39, 15.71s/batch, batch_loss=6.29, batch

Epoch 5/10:  99%|▉| 977/991 [4:14:46<03:39, 15.71s/batch, batch_loss=4.34, batch

Epoch 5/10:  99%|▉| 978/991 [4:14:46<03:14, 14.95s/batch, batch_loss=4.34, batch

Epoch 5/10:  99%|▉| 978/991 [4:15:04<03:14, 14.95s/batch, batch_loss=0.894, batc

Epoch 5/10:  99%|▉| 979/991 [4:15:04<03:08, 15.71s/batch, batch_loss=0.894, batc

Epoch 5/10:  99%|▉| 979/991 [4:15:18<03:08, 15.71s/batch, batch_loss=0.64, batch

Epoch 5/10:  99%|▉| 980/991 [4:15:18<02:49, 15.39s/batch, batch_loss=0.64, batch

Epoch 5/10:  99%|▉| 980/991 [4:15:32<02:49, 15.39s/batch, batch_loss=0.519, batc

Epoch 5/10:  99%|▉| 981/991 [4:15:32<02:28, 14.82s/batch, batch_loss=0.519, batc

Epoch 5/10:  99%|▉| 981/991 [4:15:46<02:28, 14.82s/batch, batch_loss=0.384, batc

Epoch 5/10:  99%|▉| 982/991 [4:15:46<02:10, 14.55s/batch, batch_loss=0.384, batc

Epoch 5/10:  99%|▉| 982/991 [4:15:59<02:10, 14.55s/batch, batch_loss=0.322, batc

Epoch 5/10:  99%|▉| 983/991 [4:15:59<01:54, 14.34s/batch, batch_loss=0.322, batc

Epoch 5/10:  99%|▉| 983/991 [4:16:14<01:54, 14.34s/batch, batch_loss=0.259, batc

Epoch 5/10:  99%|▉| 984/991 [4:16:14<01:40, 14.32s/batch, batch_loss=0.259, batc

Epoch 5/10:  99%|▉| 984/991 [4:16:28<01:40, 14.32s/batch, batch_loss=0.207, batc

Epoch 5/10:  99%|▉| 985/991 [4:16:28<01:25, 14.20s/batch, batch_loss=0.207, batc

Epoch 5/10:  99%|▉| 985/991 [4:16:42<01:25, 14.20s/batch, batch_loss=0.14, batch

Epoch 5/10:  99%|▉| 986/991 [4:16:42<01:11, 14.23s/batch, batch_loss=0.14, batch

Epoch 5/10:  99%|▉| 986/991 [4:16:56<01:11, 14.23s/batch, batch_loss=0.103, batc

Epoch 5/10: 100%|▉| 987/991 [4:16:56<00:56, 14.20s/batch, batch_loss=0.103, batc

Epoch 5/10: 100%|▉| 987/991 [4:17:13<00:56, 14.20s/batch, batch_loss=0.094, batc

Epoch 5/10: 100%|▉| 988/991 [4:17:13<00:44, 14.91s/batch, batch_loss=0.094, batc

Epoch 5/10: 100%|▉| 988/991 [4:17:27<00:44, 14.91s/batch, batch_loss=0.0989, bat

Epoch 5/10: 100%|▉| 989/991 [4:17:27<00:29, 14.69s/batch, batch_loss=0.0989, bat

Epoch 5/10: 100%|▉| 989/991 [4:17:41<00:29, 14.69s/batch, batch_loss=0.107, batc

Epoch 5/10: 100%|▉| 990/991 [4:17:41<00:14, 14.49s/batch, batch_loss=0.107, batc

Epoch 5/10: 100%|▉| 990/991 [4:17:53<00:14, 14.49s/batch, batch_loss=0.115, batc

Epoch 5/10: 100%|█| 991/991 [4:17:53<00:00, 13.85s/batch, batch_loss=0.115, batc

Epoch 5/10: 100%|█| 991/991 [4:17:53<00:00, 15.61s/batch, batch_loss=0.115, batc




Epoch 5, Loss: 986.9840


Validation:   0%|                                    | 0/743 [00:00<?, ?batch/s]

Validation:   0%| | 0/743 [00:15<?, ?batch/s, batch_loss=30.1, batch_index=1, ba

Validation:   0%| | 1/743 [00:15<3:11:46, 15.51s/batch, batch_loss=30.1, batch_i

Validation:   0%| | 1/743 [00:30<3:11:46, 15.51s/batch, batch_loss=32.8, batch_i

Validation:   0%| | 2/743 [00:30<3:08:57, 15.30s/batch, batch_loss=32.8, batch_i

Validation:   0%| | 2/743 [00:46<3:08:57, 15.30s/batch, batch_loss=19.3, batch_i

Validation:   0%| | 3/743 [00:46<3:09:33, 15.37s/batch, batch_loss=19.3, batch_i

Validation:   0%| | 3/743 [01:00<3:09:33, 15.37s/batch, batch_loss=19.5, batch_i

Validation:   1%| | 4/743 [01:00<3:02:54, 14.85s/batch, batch_loss=19.5, batch_i

Validation:   1%| | 4/743 [01:15<3:02:54, 14.85s/batch, batch_loss=32, batch_ind

Validation:   1%| | 5/743 [01:15<3:04:18, 14.98s/batch, batch_loss=32, batch_ind

Validation:   1%| | 5/743 [01:31<3:04:18, 14.98s/batch, batch_loss=37.2, batch_i

Validation:   1%| | 6/743 [01:31<3:07:45, 15.29s/batch, batch_loss=37.2, batch_i

Validation:   1%| | 6/743 [01:46<3:07:45, 15.29s/batch, batch_loss=557, batch_in

Validation:   1%| | 7/743 [01:46<3:09:16, 15.43s/batch, batch_loss=557, batch_in

Validation:   1%| | 7/743 [02:02<3:09:16, 15.43s/batch, batch_loss=25.1, batch_i

Validation:   1%| | 8/743 [02:02<3:10:23, 15.54s/batch, batch_loss=25.1, batch_i

Validation:   1%| | 8/743 [02:19<3:10:23, 15.54s/batch, batch_loss=22.4, batch_i

Validation:   1%| | 9/743 [02:19<3:13:44, 15.84s/batch, batch_loss=22.4, batch_i

Validation:   1%| | 9/743 [02:35<3:13:44, 15.84s/batch, batch_loss=25.7, batch_i

Validation:   1%| | 10/743 [02:35<3:14:45, 15.94s/batch, batch_loss=25.7, batch_

Validation:   1%| | 10/743 [02:53<3:14:45, 15.94s/batch, batch_loss=17.1, batch_

Validation:   1%| | 11/743 [02:53<3:22:02, 16.56s/batch, batch_loss=17.1, batch_

Validation:   1%| | 11/743 [03:08<3:22:02, 16.56s/batch, batch_loss=2.18e+3, bat

Validation:   2%| | 12/743 [03:08<3:16:27, 16.13s/batch, batch_loss=2.18e+3, bat

Validation:   2%| | 12/743 [03:24<3:16:27, 16.13s/batch, batch_loss=22.3, batch_

Validation:   2%| | 13/743 [03:24<3:15:43, 16.09s/batch, batch_loss=22.3, batch_

Validation:   2%| | 13/743 [03:39<3:15:43, 16.09s/batch, batch_loss=18.5, batch_

Validation:   2%| | 14/743 [03:39<3:12:24, 15.84s/batch, batch_loss=18.5, batch_

Validation:   2%| | 14/743 [03:54<3:12:24, 15.84s/batch, batch_loss=31.6, batch_

Validation:   2%| | 15/743 [03:54<3:06:36, 15.38s/batch, batch_loss=31.6, batch_

Validation:   2%| | 15/743 [04:08<3:06:36, 15.38s/batch, batch_loss=27.1, batch_

Validation:   2%| | 16/743 [04:08<3:03:11, 15.12s/batch, batch_loss=27.1, batch_

Validation:   2%| | 16/743 [04:22<3:03:11, 15.12s/batch, batch_loss=16.6, batch_

Validation:   2%| | 17/743 [04:22<3:00:01, 14.88s/batch, batch_loss=16.6, batch_

Validation:   2%| | 17/743 [04:37<3:00:01, 14.88s/batch, batch_loss=4.58e+3, bat

Validation:   2%| | 18/743 [04:37<3:00:17, 14.92s/batch, batch_loss=4.58e+3, bat

Validation:   2%| | 18/743 [04:53<3:00:17, 14.92s/batch, batch_loss=19.6, batch_

Validation:   3%| | 19/743 [04:53<3:01:23, 15.03s/batch, batch_loss=19.6, batch_

Validation:   3%| | 19/743 [05:08<3:01:23, 15.03s/batch, batch_loss=25.5, batch_

Validation:   3%| | 20/743 [05:08<3:01:18, 15.05s/batch, batch_loss=25.5, batch_

Validation:   3%| | 20/743 [05:27<3:01:18, 15.05s/batch, batch_loss=961, batch_i

Validation:   3%| | 21/743 [05:27<3:14:46, 16.19s/batch, batch_loss=961, batch_i

Validation:   3%| | 21/743 [05:42<3:14:46, 16.19s/batch, batch_loss=26.4, batch_

Validation:   3%| | 22/743 [05:42<3:11:58, 15.98s/batch, batch_loss=26.4, batch_

Validation:   3%| | 22/743 [05:58<3:11:58, 15.98s/batch, batch_loss=13.3, batch_

Validation:   3%| | 23/743 [05:58<3:09:32, 15.80s/batch, batch_loss=13.3, batch_

Validation:   3%| | 23/743 [06:13<3:09:32, 15.80s/batch, batch_loss=21.1, batch_

Validation:   3%| | 24/743 [06:13<3:08:04, 15.70s/batch, batch_loss=21.1, batch_

Validation:   3%| | 24/743 [06:29<3:08:04, 15.70s/batch, batch_loss=25.9, batch_

Validation:   3%| | 25/743 [06:29<3:07:12, 15.64s/batch, batch_loss=25.9, batch_

Validation:   3%| | 25/743 [06:43<3:07:12, 15.64s/batch, batch_loss=38.2, batch_

Validation:   3%| | 26/743 [06:43<3:02:13, 15.25s/batch, batch_loss=38.2, batch_

Validation:   3%| | 26/743 [06:59<3:02:13, 15.25s/batch, batch_loss=1.64e+3, bat

Validation:   4%| | 27/743 [06:59<3:06:13, 15.61s/batch, batch_loss=1.64e+3, bat

Validation:   4%| | 27/743 [07:18<3:06:13, 15.61s/batch, batch_loss=24, batch_in

Validation:   4%| | 28/743 [07:18<3:16:12, 16.46s/batch, batch_loss=24, batch_in

Validation:   4%| | 28/743 [07:33<3:16:12, 16.46s/batch, batch_loss=16.7, batch_

Validation:   4%| | 29/743 [07:33<3:11:27, 16.09s/batch, batch_loss=16.7, batch_

Validation:   4%| | 29/743 [07:49<3:11:27, 16.09s/batch, batch_loss=1.19e+4, bat

Validation:   4%| | 30/743 [07:49<3:10:49, 16.06s/batch, batch_loss=1.19e+4, bat

Validation:   4%| | 30/743 [08:04<3:10:49, 16.06s/batch, batch_loss=30.4, batch_

Validation:   4%| | 31/743 [08:04<3:08:22, 15.87s/batch, batch_loss=30.4, batch_

Validation:   4%| | 31/743 [08:19<3:08:22, 15.87s/batch, batch_loss=31.8, batch_

Validation:   4%| | 32/743 [08:19<3:05:03, 15.62s/batch, batch_loss=31.8, batch_

Validation:   4%| | 32/743 [08:35<3:05:03, 15.62s/batch, batch_loss=30.4, batch_

Validation:   4%| | 33/743 [08:35<3:05:09, 15.65s/batch, batch_loss=30.4, batch_

Validation:   4%| | 33/743 [08:51<3:05:09, 15.65s/batch, batch_loss=18.7, batch_

Validation:   5%| | 34/743 [08:51<3:04:45, 15.64s/batch, batch_loss=18.7, batch_

Validation:   5%| | 34/743 [09:07<3:04:45, 15.64s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [09:07<3:07:00, 15.85s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [09:26<3:07:00, 15.85s/batch, batch_loss=21.1, batch_

Validation:   5%| | 36/743 [09:26<3:16:22, 16.67s/batch, batch_loss=21.1, batch_

Validation:   5%| | 36/743 [09:41<3:16:22, 16.67s/batch, batch_loss=167, batch_i

Validation:   5%| | 37/743 [09:41<3:12:59, 16.40s/batch, batch_loss=167, batch_i

Validation:   5%| | 37/743 [09:57<3:12:59, 16.40s/batch, batch_loss=6.52e+3, bat

Validation:   5%| | 38/743 [09:57<3:10:32, 16.22s/batch, batch_loss=6.52e+3, bat

Validation:   5%| | 38/743 [10:13<3:10:32, 16.22s/batch, batch_loss=20.1, batch_

Validation:   5%| | 39/743 [10:13<3:07:24, 15.97s/batch, batch_loss=20.1, batch_

Validation:   5%| | 39/743 [10:27<3:07:24, 15.97s/batch, batch_loss=27.8, batch_

Validation:   5%| | 40/743 [10:27<3:03:03, 15.62s/batch, batch_loss=27.8, batch_

Validation:   5%| | 40/743 [10:42<3:03:03, 15.62s/batch, batch_loss=19.3, batch_

Validation:   6%| | 41/743 [10:42<3:00:19, 15.41s/batch, batch_loss=19.3, batch_

Validation:   6%| | 41/743 [10:59<3:00:19, 15.41s/batch, batch_loss=18.1, batch_

Validation:   6%| | 42/743 [10:59<3:04:30, 15.79s/batch, batch_loss=18.1, batch_

Validation:   6%| | 42/743 [11:17<3:04:30, 15.79s/batch, batch_loss=19.1, batch_

Validation:   6%| | 43/743 [11:17<3:10:10, 16.30s/batch, batch_loss=19.1, batch_

Validation:   6%| | 43/743 [11:31<3:10:10, 16.30s/batch, batch_loss=19.2, batch_

Validation:   6%| | 44/743 [11:31<3:04:18, 15.82s/batch, batch_loss=19.2, batch_

Validation:   6%| | 44/743 [11:46<3:04:18, 15.82s/batch, batch_loss=25.5, batch_

Validation:   6%| | 45/743 [11:46<3:01:21, 15.59s/batch, batch_loss=25.5, batch_

Validation:   6%| | 45/743 [12:01<3:01:21, 15.59s/batch, batch_loss=16.5, batch_

Validation:   6%| | 46/743 [12:01<2:57:56, 15.32s/batch, batch_loss=16.5, batch_

Validation:   6%| | 46/743 [12:16<2:57:56, 15.32s/batch, batch_loss=26, batch_in

Validation:   6%| | 47/743 [12:16<2:57:38, 15.31s/batch, batch_loss=26, batch_in

Validation:   6%| | 47/743 [12:33<2:57:38, 15.31s/batch, batch_loss=26.9, batch_

Validation:   6%| | 48/743 [12:33<3:02:28, 15.75s/batch, batch_loss=26.9, batch_

Validation:   6%| | 48/743 [12:49<3:02:28, 15.75s/batch, batch_loss=23.5, batch_

Validation:   7%| | 49/743 [12:49<3:04:08, 15.92s/batch, batch_loss=23.5, batch_

Validation:   7%| | 49/743 [13:04<3:04:08, 15.92s/batch, batch_loss=21.5, batch_

Validation:   7%| | 50/743 [13:04<3:00:21, 15.62s/batch, batch_loss=21.5, batch_

Validation:   7%| | 50/743 [13:21<3:00:21, 15.62s/batch, batch_loss=23, batch_in

Validation:   7%| | 51/743 [13:21<3:03:04, 15.87s/batch, batch_loss=23, batch_in

Validation:   7%| | 51/743 [13:36<3:03:04, 15.87s/batch, batch_loss=26.6, batch_

Validation:   7%| | 52/743 [13:36<3:02:03, 15.81s/batch, batch_loss=26.6, batch_

Validation:   7%| | 52/743 [13:52<3:02:03, 15.81s/batch, batch_loss=41.5, batch_

Validation:   7%| | 53/743 [13:52<3:01:22, 15.77s/batch, batch_loss=41.5, batch_

Validation:   7%| | 53/743 [14:07<3:01:22, 15.77s/batch, batch_loss=20.9, batch_

Validation:   7%| | 54/743 [14:07<2:57:19, 15.44s/batch, batch_loss=20.9, batch_

Validation:   7%| | 54/743 [14:22<2:57:19, 15.44s/batch, batch_loss=24, batch_in

Validation:   7%| | 55/743 [14:22<2:55:23, 15.30s/batch, batch_loss=24, batch_in

Validation:   7%| | 55/743 [14:37<2:55:23, 15.30s/batch, batch_loss=25.5, batch_

Validation:   8%| | 56/743 [14:37<2:54:58, 15.28s/batch, batch_loss=25.5, batch_

Validation:   8%| | 56/743 [14:52<2:54:58, 15.28s/batch, batch_loss=20.1, batch_

Validation:   8%| | 57/743 [14:52<2:54:14, 15.24s/batch, batch_loss=20.1, batch_

Validation:   8%| | 57/743 [15:10<2:54:14, 15.24s/batch, batch_loss=28.8, batch_

Validation:   8%| | 58/743 [15:10<3:03:14, 16.05s/batch, batch_loss=28.8, batch_

Validation:   8%| | 58/743 [15:26<3:03:14, 16.05s/batch, batch_loss=114, batch_i

Validation:   8%| | 59/743 [15:26<3:01:12, 15.90s/batch, batch_loss=114, batch_i

Validation:   8%| | 59/743 [15:41<3:01:12, 15.90s/batch, batch_loss=6.15e+3, bat

Validation:   8%| | 60/743 [15:41<2:58:52, 15.71s/batch, batch_loss=6.15e+3, bat

Validation:   8%| | 60/743 [15:57<2:58:52, 15.71s/batch, batch_loss=11.1, batch_

Validation:   8%| | 61/743 [15:57<3:00:46, 15.90s/batch, batch_loss=11.1, batch_

Validation:   8%| | 61/743 [16:14<3:00:46, 15.90s/batch, batch_loss=15.1, batch_

Validation:   8%| | 62/743 [16:14<3:01:53, 16.03s/batch, batch_loss=15.1, batch_

Validation:   8%| | 62/743 [16:29<3:01:53, 16.03s/batch, batch_loss=30.3, batch_

Validation:   8%| | 63/743 [16:29<2:58:46, 15.77s/batch, batch_loss=30.3, batch_

Validation:   8%| | 63/743 [16:44<2:58:46, 15.77s/batch, batch_loss=17.4, batch_

Validation:   9%| | 64/743 [16:44<2:56:26, 15.59s/batch, batch_loss=17.4, batch_

Validation:   9%| | 64/743 [16:58<2:56:26, 15.59s/batch, batch_loss=19.1, batch_

Validation:   9%| | 65/743 [16:58<2:52:55, 15.30s/batch, batch_loss=19.1, batch_

Validation:   9%| | 65/743 [17:13<2:52:55, 15.30s/batch, batch_loss=1.28e+3, bat

Validation:   9%| | 66/743 [17:13<2:50:39, 15.13s/batch, batch_loss=1.28e+3, bat

Validation:   9%| | 66/743 [17:27<2:50:39, 15.13s/batch, batch_loss=15.3, batch_

Validation:   9%| | 67/743 [17:27<2:45:26, 14.68s/batch, batch_loss=15.3, batch_

Validation:   9%| | 67/743 [17:42<2:45:26, 14.68s/batch, batch_loss=20.2, batch_

Validation:   9%| | 68/743 [17:42<2:46:29, 14.80s/batch, batch_loss=20.2, batch_

Validation:   9%| | 68/743 [17:57<2:46:29, 14.80s/batch, batch_loss=15.9, batch_

Validation:   9%| | 69/743 [17:57<2:47:20, 14.90s/batch, batch_loss=15.9, batch_

Validation:   9%| | 69/743 [18:12<2:47:20, 14.90s/batch, batch_loss=20.1, batch_

Validation:   9%| | 70/743 [18:12<2:46:39, 14.86s/batch, batch_loss=20.1, batch_

Validation:   9%| | 70/743 [18:26<2:46:39, 14.86s/batch, batch_loss=13.7, batch_

Validation:  10%| | 71/743 [18:26<2:44:37, 14.70s/batch, batch_loss=13.7, batch_

Validation:  10%| | 71/743 [18:42<2:44:37, 14.70s/batch, batch_loss=15.4, batch_

Validation:  10%| | 72/743 [18:42<2:48:09, 15.04s/batch, batch_loss=15.4, batch_

Validation:  10%| | 72/743 [18:58<2:48:09, 15.04s/batch, batch_loss=22.8, batch_

Validation:  10%| | 73/743 [18:58<2:52:51, 15.48s/batch, batch_loss=22.8, batch_

Validation:  10%| | 73/743 [19:19<2:52:51, 15.48s/batch, batch_loss=34.7, batch_

Validation:  10%| | 74/743 [19:19<3:10:11, 17.06s/batch, batch_loss=34.7, batch_

Validation:  10%| | 74/743 [19:35<3:10:11, 17.06s/batch, batch_loss=19.2, batch_

Validation:  10%| | 75/743 [19:35<3:05:01, 16.62s/batch, batch_loss=19.2, batch_

Validation:  10%| | 75/743 [19:50<3:05:01, 16.62s/batch, batch_loss=18.3, batch_

Validation:  10%| | 76/743 [19:50<3:00:32, 16.24s/batch, batch_loss=18.3, batch_

Validation:  10%| | 76/743 [20:06<3:00:32, 16.24s/batch, batch_loss=23.1, batch_

Validation:  10%| | 77/743 [20:06<2:57:27, 15.99s/batch, batch_loss=23.1, batch_

Validation:  10%| | 77/743 [20:21<2:57:27, 15.99s/batch, batch_loss=23.6, batch_

Validation:  10%| | 78/743 [20:21<2:55:11, 15.81s/batch, batch_loss=23.6, batch_

Validation:  10%| | 78/743 [20:37<2:55:11, 15.81s/batch, batch_loss=19.8, batch_

Validation:  11%| | 79/743 [20:37<2:56:27, 15.95s/batch, batch_loss=19.8, batch_

Validation:  11%| | 79/743 [20:56<2:56:27, 15.95s/batch, batch_loss=11.2, batch_

Validation:  11%| | 80/743 [20:56<3:06:12, 16.85s/batch, batch_loss=11.2, batch_

Validation:  11%| | 80/743 [21:11<3:06:12, 16.85s/batch, batch_loss=157, batch_i

Validation:  11%| | 81/743 [21:11<3:00:44, 16.38s/batch, batch_loss=157, batch_i

Validation:  11%| | 81/743 [21:27<3:00:44, 16.38s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [21:27<2:56:45, 16.05s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [21:43<2:56:45, 16.05s/batch, batch_loss=43.3, batch_

Validation:  11%| | 83/743 [21:43<2:56:24, 16.04s/batch, batch_loss=43.3, batch_

Validation:  11%| | 83/743 [21:59<2:56:24, 16.04s/batch, batch_loss=28.8, batch_

Validation:  11%| | 84/743 [21:59<2:57:30, 16.16s/batch, batch_loss=28.8, batch_

Validation:  11%| | 84/743 [22:16<2:57:30, 16.16s/batch, batch_loss=33.5, batch_

Validation:  11%| | 85/743 [22:16<2:59:43, 16.39s/batch, batch_loss=33.5, batch_

Validation:  11%| | 85/743 [22:37<2:59:43, 16.39s/batch, batch_loss=31.4, batch_

Validation:  12%| | 86/743 [22:37<3:12:43, 17.60s/batch, batch_loss=31.4, batch_

Validation:  12%| | 86/743 [22:53<3:12:43, 17.60s/batch, batch_loss=41.8, batch_

Validation:  12%| | 87/743 [22:53<3:08:30, 17.24s/batch, batch_loss=41.8, batch_

Validation:  12%| | 87/743 [23:11<3:08:30, 17.24s/batch, batch_loss=27.1, batch_

Validation:  12%| | 88/743 [23:11<3:09:36, 17.37s/batch, batch_loss=27.1, batch_

Validation:  12%| | 88/743 [23:27<3:09:36, 17.37s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [23:27<3:06:35, 17.12s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [23:43<3:06:35, 17.12s/batch, batch_loss=9.19, batch_

Validation:  12%| | 90/743 [23:43<3:00:31, 16.59s/batch, batch_loss=9.19, batch_

Validation:  12%| | 90/743 [23:57<3:00:31, 16.59s/batch, batch_loss=33.6, batch_

Validation:  12%| | 91/743 [23:57<2:52:42, 15.89s/batch, batch_loss=33.6, batch_

Validation:  12%| | 91/743 [24:13<2:52:42, 15.89s/batch, batch_loss=30.2, batch_

Validation:  12%| | 92/743 [24:13<2:52:22, 15.89s/batch, batch_loss=30.2, batch_

Validation:  12%| | 92/743 [24:28<2:52:22, 15.89s/batch, batch_loss=29.4, batch_

Validation:  13%|▏| 93/743 [24:28<2:50:00, 15.69s/batch, batch_loss=29.4, batch_

Validation:  13%|▏| 93/743 [24:44<2:50:00, 15.69s/batch, batch_loss=62.5, batch_

Validation:  13%|▏| 94/743 [24:44<2:51:29, 15.85s/batch, batch_loss=62.5, batch_

Validation:  13%|▏| 94/743 [25:01<2:51:29, 15.85s/batch, batch_loss=19.3, batch_

Validation:  13%|▏| 95/743 [25:01<2:54:44, 16.18s/batch, batch_loss=19.3, batch_

Validation:  13%|▏| 95/743 [25:19<2:54:44, 16.18s/batch, batch_loss=34.3, batch_

Validation:  13%|▏| 96/743 [25:19<2:58:42, 16.57s/batch, batch_loss=34.3, batch_

Validation:  13%|▏| 96/743 [25:35<2:58:42, 16.57s/batch, batch_loss=44.9, batch_

Validation:  13%|▏| 97/743 [25:35<2:57:29, 16.49s/batch, batch_loss=44.9, batch_

Validation:  13%|▏| 97/743 [25:52<2:57:29, 16.49s/batch, batch_loss=32.2, batch_

Validation:  13%|▏| 98/743 [25:52<2:57:59, 16.56s/batch, batch_loss=32.2, batch_

Validation:  13%|▏| 98/743 [26:08<2:57:59, 16.56s/batch, batch_loss=47.2, batch_

Validation:  13%|▏| 99/743 [26:08<2:57:08, 16.50s/batch, batch_loss=47.2, batch_

Validation:  13%|▏| 99/743 [26:25<2:57:08, 16.50s/batch, batch_loss=20.6, batch_

Validation:  13%|▏| 100/743 [26:25<2:57:19, 16.55s/batch, batch_loss=20.6, batch

Validation:  13%|▏| 100/743 [26:40<2:57:19, 16.55s/batch, batch_loss=39.1, batch

Validation:  14%|▏| 101/743 [26:40<2:54:20, 16.29s/batch, batch_loss=39.1, batch

Validation:  14%|▏| 101/743 [26:56<2:54:20, 16.29s/batch, batch_loss=20.1, batch

Validation:  14%|▏| 102/743 [26:56<2:51:50, 16.08s/batch, batch_loss=20.1, batch

Validation:  14%|▏| 102/743 [27:12<2:51:50, 16.08s/batch, batch_loss=3.42e+3, ba

Validation:  14%|▏| 103/743 [27:12<2:50:27, 15.98s/batch, batch_loss=3.42e+3, ba

Validation:  14%|▏| 103/743 [27:27<2:50:27, 15.98s/batch, batch_loss=16.8, batch

Validation:  14%|▏| 104/743 [27:27<2:49:40, 15.93s/batch, batch_loss=16.8, batch

Validation:  14%|▏| 104/743 [27:43<2:49:40, 15.93s/batch, batch_loss=14.1, batch

Validation:  14%|▏| 105/743 [27:43<2:49:31, 15.94s/batch, batch_loss=14.1, batch

Validation:  14%|▏| 105/743 [28:00<2:49:31, 15.94s/batch, batch_loss=26.3, batch

Validation:  14%|▏| 106/743 [28:00<2:49:42, 15.99s/batch, batch_loss=26.3, batch

Validation:  14%|▏| 106/743 [28:16<2:49:42, 15.99s/batch, batch_loss=714, batch_

Validation:  14%|▏| 107/743 [28:16<2:50:23, 16.07s/batch, batch_loss=714, batch_

Validation:  14%|▏| 107/743 [28:31<2:50:23, 16.07s/batch, batch_loss=1.61e+3, ba

Validation:  15%|▏| 108/743 [28:31<2:46:50, 15.76s/batch, batch_loss=1.61e+3, ba

Validation:  15%|▏| 108/743 [28:47<2:46:50, 15.76s/batch, batch_loss=201, batch_

Validation:  15%|▏| 109/743 [28:47<2:48:05, 15.91s/batch, batch_loss=201, batch_

Validation:  15%|▏| 109/743 [29:04<2:48:05, 15.91s/batch, batch_loss=36.2, batch

Validation:  15%|▏| 110/743 [29:04<2:52:10, 16.32s/batch, batch_loss=36.2, batch

Validation:  15%|▏| 110/743 [29:20<2:52:10, 16.32s/batch, batch_loss=22.9, batch

Validation:  15%|▏| 111/743 [29:20<2:50:10, 16.16s/batch, batch_loss=22.9, batch

Validation:  15%|▏| 111/743 [29:36<2:50:10, 16.16s/batch, batch_loss=29.2, batch

Validation:  15%|▏| 112/743 [29:36<2:48:01, 15.98s/batch, batch_loss=29.2, batch

Validation:  15%|▏| 112/743 [29:52<2:48:01, 15.98s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [29:52<2:48:33, 16.05s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [30:08<2:48:33, 16.05s/batch, batch_loss=31.5, batch

Validation:  15%|▏| 114/743 [30:08<2:49:17, 16.15s/batch, batch_loss=31.5, batch

Validation:  15%|▏| 114/743 [30:23<2:49:17, 16.15s/batch, batch_loss=35.1, batch

Validation:  15%|▏| 115/743 [30:23<2:44:43, 15.74s/batch, batch_loss=35.1, batch

Validation:  15%|▏| 115/743 [30:40<2:44:43, 15.74s/batch, batch_loss=18.8, batch

Validation:  16%|▏| 116/743 [30:40<2:48:45, 16.15s/batch, batch_loss=18.8, batch

Validation:  16%|▏| 116/743 [30:56<2:48:45, 16.15s/batch, batch_loss=46.4, batch

Validation:  16%|▏| 117/743 [30:56<2:48:21, 16.14s/batch, batch_loss=46.4, batch

Validation:  16%|▏| 117/743 [31:13<2:48:21, 16.14s/batch, batch_loss=47, batch_i

Validation:  16%|▏| 118/743 [31:13<2:49:48, 16.30s/batch, batch_loss=47, batch_i

Validation:  16%|▏| 118/743 [31:28<2:49:48, 16.30s/batch, batch_loss=26, batch_i

Validation:  16%|▏| 119/743 [31:28<2:46:43, 16.03s/batch, batch_loss=26, batch_i

Validation:  16%|▏| 119/743 [31:45<2:46:43, 16.03s/batch, batch_loss=36.1, batch

Validation:  16%|▏| 120/743 [31:45<2:47:25, 16.12s/batch, batch_loss=36.1, batch

Validation:  16%|▏| 120/743 [32:01<2:47:25, 16.12s/batch, batch_loss=19.1, batch

Validation:  16%|▏| 121/743 [32:01<2:47:46, 16.18s/batch, batch_loss=19.1, batch

Validation:  16%|▏| 121/743 [32:17<2:47:46, 16.18s/batch, batch_loss=11.5, batch

Validation:  16%|▏| 122/743 [32:17<2:47:32, 16.19s/batch, batch_loss=11.5, batch

Validation:  16%|▏| 122/743 [32:33<2:47:32, 16.19s/batch, batch_loss=16.3, batch

Validation:  17%|▏| 123/743 [32:33<2:47:19, 16.19s/batch, batch_loss=16.3, batch

Validation:  17%|▏| 123/743 [32:49<2:47:19, 16.19s/batch, batch_loss=18.3, batch

Validation:  17%|▏| 124/743 [32:49<2:46:06, 16.10s/batch, batch_loss=18.3, batch

Validation:  17%|▏| 124/743 [33:05<2:46:06, 16.10s/batch, batch_loss=45.1, batch

Validation:  17%|▏| 125/743 [33:05<2:45:49, 16.10s/batch, batch_loss=45.1, batch

Validation:  17%|▏| 125/743 [33:26<2:45:49, 16.10s/batch, batch_loss=22.6, batch

Validation:  17%|▏| 126/743 [33:26<2:59:09, 17.42s/batch, batch_loss=22.6, batch

Validation:  17%|▏| 126/743 [33:42<2:59:09, 17.42s/batch, batch_loss=19.7, batch

Validation:  17%|▏| 127/743 [33:42<2:54:00, 16.95s/batch, batch_loss=19.7, batch

Validation:  17%|▏| 127/743 [33:59<2:54:00, 16.95s/batch, batch_loss=41.7, batch

Validation:  17%|▏| 128/743 [33:59<2:54:19, 17.01s/batch, batch_loss=41.7, batch

Validation:  17%|▏| 128/743 [34:15<2:54:19, 17.01s/batch, batch_loss=21.5, batch

Validation:  17%|▏| 129/743 [34:15<2:51:10, 16.73s/batch, batch_loss=21.5, batch

Validation:  17%|▏| 129/743 [34:32<2:51:10, 16.73s/batch, batch_loss=31.8, batch

Validation:  17%|▏| 130/743 [34:32<2:50:21, 16.68s/batch, batch_loss=31.8, batch

Validation:  17%|▏| 130/743 [34:48<2:50:21, 16.68s/batch, batch_loss=24.2, batch

Validation:  18%|▏| 131/743 [34:48<2:50:48, 16.75s/batch, batch_loss=24.2, batch

Validation:  18%|▏| 131/743 [35:07<2:50:48, 16.75s/batch, batch_loss=27.3, batch

Validation:  18%|▏| 132/743 [35:07<2:56:16, 17.31s/batch, batch_loss=27.3, batch

Validation:  18%|▏| 132/743 [35:27<2:56:16, 17.31s/batch, batch_loss=46.3, batch

Validation:  18%|▏| 133/743 [35:27<3:04:39, 18.16s/batch, batch_loss=46.3, batch

Validation:  18%|▏| 133/743 [35:44<3:04:39, 18.16s/batch, batch_loss=29.4, batch

Validation:  18%|▏| 134/743 [35:44<3:01:01, 17.83s/batch, batch_loss=29.4, batch

Validation:  18%|▏| 134/743 [36:00<3:01:01, 17.83s/batch, batch_loss=50.3, batch

Validation:  18%|▏| 135/743 [36:00<2:53:46, 17.15s/batch, batch_loss=50.3, batch

Validation:  18%|▏| 135/743 [36:16<2:53:46, 17.15s/batch, batch_loss=28.5, batch

Validation:  18%|▏| 136/743 [36:16<2:51:13, 16.92s/batch, batch_loss=28.5, batch

Validation:  18%|▏| 136/743 [36:32<2:51:13, 16.92s/batch, batch_loss=35.5, batch

Validation:  18%|▏| 137/743 [36:32<2:47:25, 16.58s/batch, batch_loss=35.5, batch

Validation:  18%|▏| 137/743 [36:52<2:47:25, 16.58s/batch, batch_loss=10.8, batch

Validation:  19%|▏| 138/743 [36:52<2:58:22, 17.69s/batch, batch_loss=10.8, batch

Validation:  19%|▏| 138/743 [37:10<2:58:22, 17.69s/batch, batch_loss=257, batch_

Validation:  19%|▏| 139/743 [37:10<2:56:39, 17.55s/batch, batch_loss=257, batch_

Validation:  19%|▏| 139/743 [37:26<2:56:39, 17.55s/batch, batch_loss=23.7, batch

Validation:  19%|▏| 140/743 [37:26<2:53:11, 17.23s/batch, batch_loss=23.7, batch

Validation:  19%|▏| 140/743 [37:42<2:53:11, 17.23s/batch, batch_loss=23.8, batch

Validation:  19%|▏| 141/743 [37:42<2:48:37, 16.81s/batch, batch_loss=23.8, batch

Validation:  19%|▏| 141/743 [37:58<2:48:37, 16.81s/batch, batch_loss=22.2, batch

Validation:  19%|▏| 142/743 [37:58<2:45:51, 16.56s/batch, batch_loss=22.2, batch

Validation:  19%|▏| 142/743 [38:14<2:45:51, 16.56s/batch, batch_loss=22.6, batch

Validation:  19%|▏| 143/743 [38:14<2:43:18, 16.33s/batch, batch_loss=22.6, batch

Validation:  19%|▏| 143/743 [38:30<2:43:18, 16.33s/batch, batch_loss=27.2, batch

Validation:  19%|▏| 144/743 [38:30<2:43:52, 16.41s/batch, batch_loss=27.2, batch

Validation:  19%|▏| 144/743 [38:47<2:43:52, 16.41s/batch, batch_loss=21.3, batch

Validation:  20%|▏| 145/743 [38:47<2:44:17, 16.48s/batch, batch_loss=21.3, batch

Validation:  20%|▏| 145/743 [39:07<2:44:17, 16.48s/batch, batch_loss=25.1, batch

Validation:  20%|▏| 146/743 [39:07<2:53:25, 17.43s/batch, batch_loss=25.1, batch

Validation:  20%|▏| 146/743 [39:23<2:53:25, 17.43s/batch, batch_loss=25.6, batch

Validation:  20%|▏| 147/743 [39:23<2:50:26, 17.16s/batch, batch_loss=25.6, batch

Validation:  20%|▏| 147/743 [39:39<2:50:26, 17.16s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [39:39<2:46:01, 16.74s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [39:54<2:46:01, 16.74s/batch, batch_loss=41.1, batch

Validation:  20%|▏| 149/743 [39:54<2:41:44, 16.34s/batch, batch_loss=41.1, batch

Validation:  20%|▏| 149/743 [40:10<2:41:44, 16.34s/batch, batch_loss=33.7, batch

Validation:  20%|▏| 150/743 [40:10<2:41:22, 16.33s/batch, batch_loss=33.7, batch

Validation:  20%|▏| 150/743 [40:27<2:41:22, 16.33s/batch, batch_loss=21.4, batch

Validation:  20%|▏| 151/743 [40:27<2:41:57, 16.41s/batch, batch_loss=21.4, batch

Validation:  20%|▏| 151/743 [40:44<2:41:57, 16.41s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [40:44<2:44:19, 16.68s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [41:05<2:44:19, 16.68s/batch, batch_loss=20.1, batch

Validation:  21%|▏| 153/743 [41:05<2:55:56, 17.89s/batch, batch_loss=20.1, batch

Validation:  21%|▏| 153/743 [41:21<2:55:56, 17.89s/batch, batch_loss=27, batch_i

Validation:  21%|▏| 154/743 [41:21<2:49:28, 17.26s/batch, batch_loss=27, batch_i

Validation:  21%|▏| 154/743 [41:37<2:49:28, 17.26s/batch, batch_loss=34.7, batch

Validation:  21%|▏| 155/743 [41:37<2:45:07, 16.85s/batch, batch_loss=34.7, batch

Validation:  21%|▏| 155/743 [41:53<2:45:07, 16.85s/batch, batch_loss=22.6, batch

Validation:  21%|▏| 156/743 [41:53<2:43:16, 16.69s/batch, batch_loss=22.6, batch

Validation:  21%|▏| 156/743 [42:09<2:43:16, 16.69s/batch, batch_loss=42.4, batch

Validation:  21%|▏| 157/743 [42:09<2:41:58, 16.58s/batch, batch_loss=42.4, batch

Validation:  21%|▏| 157/743 [42:26<2:41:58, 16.58s/batch, batch_loss=37.2, batch

Validation:  21%|▏| 158/743 [42:26<2:40:19, 16.44s/batch, batch_loss=37.2, batch

Validation:  21%|▏| 158/743 [42:41<2:40:19, 16.44s/batch, batch_loss=40.8, batch

Validation:  21%|▏| 159/743 [42:41<2:36:13, 16.05s/batch, batch_loss=40.8, batch

Validation:  21%|▏| 159/743 [42:56<2:36:13, 16.05s/batch, batch_loss=20.7, batch

Validation:  22%|▏| 160/743 [42:56<2:32:40, 15.71s/batch, batch_loss=20.7, batch

Validation:  22%|▏| 160/743 [43:11<2:32:40, 15.71s/batch, batch_loss=31.3, batch

Validation:  22%|▏| 161/743 [43:11<2:31:50, 15.65s/batch, batch_loss=31.3, batch

Validation:  22%|▏| 161/743 [43:26<2:31:50, 15.65s/batch, batch_loss=36.9, batch

Validation:  22%|▏| 162/743 [43:26<2:30:27, 15.54s/batch, batch_loss=36.9, batch

Validation:  22%|▏| 162/743 [43:42<2:30:27, 15.54s/batch, batch_loss=18.7, batch

Validation:  22%|▏| 163/743 [43:42<2:30:01, 15.52s/batch, batch_loss=18.7, batch

Validation:  22%|▏| 163/743 [44:00<2:30:01, 15.52s/batch, batch_loss=18.2, batch

Validation:  22%|▏| 164/743 [44:00<2:38:01, 16.38s/batch, batch_loss=18.2, batch

Validation:  22%|▏| 164/743 [44:17<2:38:01, 16.38s/batch, batch_loss=21.9, batch

Validation:  22%|▏| 165/743 [44:17<2:37:42, 16.37s/batch, batch_loss=21.9, batch

Validation:  22%|▏| 165/743 [44:34<2:37:42, 16.37s/batch, batch_loss=21.4, batch

Validation:  22%|▏| 166/743 [44:34<2:40:50, 16.72s/batch, batch_loss=21.4, batch

Validation:  22%|▏| 166/743 [44:51<2:40:50, 16.72s/batch, batch_loss=21.3, batch

Validation:  22%|▏| 167/743 [44:51<2:39:35, 16.62s/batch, batch_loss=21.3, batch

Validation:  22%|▏| 167/743 [45:06<2:39:35, 16.62s/batch, batch_loss=29.5, batch

Validation:  23%|▏| 168/743 [45:06<2:36:11, 16.30s/batch, batch_loss=29.5, batch

Validation:  23%|▏| 168/743 [45:23<2:36:11, 16.30s/batch, batch_loss=35.3, batch

Validation:  23%|▏| 169/743 [45:23<2:36:34, 16.37s/batch, batch_loss=35.3, batch

Validation:  23%|▏| 169/743 [45:39<2:36:34, 16.37s/batch, batch_loss=34.1, batch

Validation:  23%|▏| 170/743 [45:39<2:35:26, 16.28s/batch, batch_loss=34.1, batch

Validation:  23%|▏| 170/743 [45:55<2:35:26, 16.28s/batch, batch_loss=35.2, batch

Validation:  23%|▏| 171/743 [45:55<2:36:32, 16.42s/batch, batch_loss=35.2, batch

Validation:  23%|▏| 171/743 [46:13<2:36:32, 16.42s/batch, batch_loss=25.8, batch

Validation:  23%|▏| 172/743 [46:13<2:40:34, 16.87s/batch, batch_loss=25.8, batch

Validation:  23%|▏| 172/743 [46:30<2:40:34, 16.87s/batch, batch_loss=33.8, batch

Validation:  23%|▏| 173/743 [46:30<2:39:34, 16.80s/batch, batch_loss=33.8, batch

Validation:  23%|▏| 173/743 [46:46<2:39:34, 16.80s/batch, batch_loss=20.3, batch

Validation:  23%|▏| 174/743 [46:46<2:38:12, 16.68s/batch, batch_loss=20.3, batch

Validation:  23%|▏| 174/743 [47:02<2:38:12, 16.68s/batch, batch_loss=37.9, batch

Validation:  24%|▏| 175/743 [47:02<2:35:57, 16.47s/batch, batch_loss=37.9, batch

Validation:  24%|▏| 175/743 [47:18<2:35:57, 16.47s/batch, batch_loss=29.5, batch

Validation:  24%|▏| 176/743 [47:18<2:34:17, 16.33s/batch, batch_loss=29.5, batch

Validation:  24%|▏| 176/743 [47:36<2:34:17, 16.33s/batch, batch_loss=21.9, batch

Validation:  24%|▏| 177/743 [47:36<2:38:27, 16.80s/batch, batch_loss=21.9, batch

Validation:  24%|▏| 177/743 [47:53<2:38:27, 16.80s/batch, batch_loss=33.4, batch

Validation:  24%|▏| 178/743 [47:53<2:37:32, 16.73s/batch, batch_loss=33.4, batch

Validation:  24%|▏| 178/743 [48:10<2:37:32, 16.73s/batch, batch_loss=30.6, batch

Validation:  24%|▏| 179/743 [48:10<2:38:23, 16.85s/batch, batch_loss=30.6, batch

Validation:  24%|▏| 179/743 [48:26<2:38:23, 16.85s/batch, batch_loss=7.25e+3, ba

Validation:  24%|▏| 180/743 [48:26<2:34:28, 16.46s/batch, batch_loss=7.25e+3, ba

Validation:  24%|▏| 180/743 [48:41<2:34:28, 16.46s/batch, batch_loss=23.8, batch

Validation:  24%|▏| 181/743 [48:41<2:31:42, 16.20s/batch, batch_loss=23.8, batch

Validation:  24%|▏| 181/743 [49:01<2:31:42, 16.20s/batch, batch_loss=35.9, batch

Validation:  24%|▏| 182/743 [49:01<2:41:05, 17.23s/batch, batch_loss=35.9, batch

Validation:  24%|▏| 182/743 [49:17<2:41:05, 17.23s/batch, batch_loss=25.6, batch

Validation:  25%|▏| 183/743 [49:17<2:37:22, 16.86s/batch, batch_loss=25.6, batch

Validation:  25%|▏| 183/743 [49:33<2:37:22, 16.86s/batch, batch_loss=16.2, batch

Validation:  25%|▏| 184/743 [49:33<2:34:31, 16.59s/batch, batch_loss=16.2, batch

Validation:  25%|▏| 184/743 [49:49<2:34:31, 16.59s/batch, batch_loss=22.7, batch

Validation:  25%|▏| 185/743 [49:49<2:32:51, 16.44s/batch, batch_loss=22.7, batch

Validation:  25%|▏| 185/743 [50:05<2:32:51, 16.44s/batch, batch_loss=36.1, batch

Validation:  25%|▎| 186/743 [50:05<2:31:39, 16.34s/batch, batch_loss=36.1, batch

Validation:  25%|▎| 186/743 [50:21<2:31:39, 16.34s/batch, batch_loss=33.3, batch

Validation:  25%|▎| 187/743 [50:21<2:29:27, 16.13s/batch, batch_loss=33.3, batch

Validation:  25%|▎| 187/743 [50:36<2:29:27, 16.13s/batch, batch_loss=19.7, batch

Validation:  25%|▎| 188/743 [50:36<2:25:54, 15.77s/batch, batch_loss=19.7, batch

Validation:  25%|▎| 188/743 [50:52<2:25:54, 15.77s/batch, batch_loss=21.4, batch

Validation:  25%|▎| 189/743 [50:52<2:27:14, 15.95s/batch, batch_loss=21.4, batch

Validation:  25%|▎| 189/743 [51:08<2:27:14, 15.95s/batch, batch_loss=995, batch_

Validation:  26%|▎| 190/743 [51:08<2:27:30, 16.01s/batch, batch_loss=995, batch_

Validation:  26%|▎| 190/743 [51:23<2:27:30, 16.01s/batch, batch_loss=28.5, batch

Validation:  26%|▎| 191/743 [51:23<2:23:32, 15.60s/batch, batch_loss=28.5, batch

Validation:  26%|▎| 191/743 [51:39<2:23:32, 15.60s/batch, batch_loss=23.4, batch

Validation:  26%|▎| 192/743 [51:39<2:24:37, 15.75s/batch, batch_loss=23.4, batch

Validation:  26%|▎| 192/743 [51:55<2:24:37, 15.75s/batch, batch_loss=25.1, batch

Validation:  26%|▎| 193/743 [51:55<2:26:51, 16.02s/batch, batch_loss=25.1, batch

Validation:  26%|▎| 193/743 [52:11<2:26:51, 16.02s/batch, batch_loss=23.7, batch

Validation:  26%|▎| 194/743 [52:11<2:26:25, 16.00s/batch, batch_loss=23.7, batch

Validation:  26%|▎| 194/743 [52:28<2:26:25, 16.00s/batch, batch_loss=24.3, batch

Validation:  26%|▎| 195/743 [52:28<2:27:12, 16.12s/batch, batch_loss=24.3, batch

Validation:  26%|▎| 195/743 [52:43<2:27:12, 16.12s/batch, batch_loss=26, batch_i

Validation:  26%|▎| 196/743 [52:43<2:25:39, 15.98s/batch, batch_loss=26, batch_i

Validation:  26%|▎| 196/743 [52:59<2:25:39, 15.98s/batch, batch_loss=12.5, batch

Validation:  27%|▎| 197/743 [52:59<2:23:09, 15.73s/batch, batch_loss=12.5, batch

Validation:  27%|▎| 197/743 [53:13<2:23:09, 15.73s/batch, batch_loss=22.5, batch

Validation:  27%|▎| 198/743 [53:13<2:20:34, 15.48s/batch, batch_loss=22.5, batch

Validation:  27%|▎| 198/743 [53:28<2:20:34, 15.48s/batch, batch_loss=23.5, batch

Validation:  27%|▎| 199/743 [53:28<2:18:58, 15.33s/batch, batch_loss=23.5, batch

Validation:  27%|▎| 199/743 [53:43<2:18:58, 15.33s/batch, batch_loss=304, batch_

Validation:  27%|▎| 200/743 [53:43<2:15:44, 15.00s/batch, batch_loss=304, batch_

Validation:  27%|▎| 200/743 [53:58<2:15:44, 15.00s/batch, batch_loss=65.2, batch

Validation:  27%|▎| 201/743 [53:58<2:17:02, 15.17s/batch, batch_loss=65.2, batch

Validation:  27%|▎| 201/743 [54:14<2:17:02, 15.17s/batch, batch_loss=26.3, batch

Validation:  27%|▎| 202/743 [54:14<2:19:37, 15.48s/batch, batch_loss=26.3, batch

Validation:  27%|▎| 202/743 [54:31<2:19:37, 15.48s/batch, batch_loss=34.3, batch

Validation:  27%|▎| 203/743 [54:31<2:21:29, 15.72s/batch, batch_loss=34.3, batch

Validation:  27%|▎| 203/743 [54:45<2:21:29, 15.72s/batch, batch_loss=23.4, batch

Validation:  27%|▎| 204/743 [54:45<2:18:29, 15.42s/batch, batch_loss=23.4, batch

Validation:  27%|▎| 204/743 [55:00<2:18:29, 15.42s/batch, batch_loss=22.4, batch

Validation:  28%|▎| 205/743 [55:00<2:15:40, 15.13s/batch, batch_loss=22.4, batch

Validation:  28%|▎| 205/743 [55:17<2:15:40, 15.13s/batch, batch_loss=15.1, batch

Validation:  28%|▎| 206/743 [55:17<2:20:58, 15.75s/batch, batch_loss=15.1, batch

Validation:  28%|▎| 206/743 [55:32<2:20:58, 15.75s/batch, batch_loss=25.1, batch

Validation:  28%|▎| 207/743 [55:32<2:19:02, 15.56s/batch, batch_loss=25.1, batch

Validation:  28%|▎| 207/743 [55:48<2:19:02, 15.56s/batch, batch_loss=27.1, batch

Validation:  28%|▎| 208/743 [55:48<2:19:00, 15.59s/batch, batch_loss=27.1, batch

Validation:  28%|▎| 208/743 [56:04<2:19:00, 15.59s/batch, batch_loss=12.8, batch

Validation:  28%|▎| 209/743 [56:04<2:19:24, 15.66s/batch, batch_loss=12.8, batch

Validation:  28%|▎| 209/743 [56:20<2:19:24, 15.66s/batch, batch_loss=14.9, batch

Validation:  28%|▎| 210/743 [56:20<2:19:33, 15.71s/batch, batch_loss=14.9, batch

Validation:  28%|▎| 210/743 [56:35<2:19:33, 15.71s/batch, batch_loss=21.6, batch

Validation:  28%|▎| 211/743 [56:35<2:19:48, 15.77s/batch, batch_loss=21.6, batch

Validation:  28%|▎| 211/743 [56:52<2:19:48, 15.77s/batch, batch_loss=20.2, batch

Validation:  29%|▎| 212/743 [56:52<2:20:28, 15.87s/batch, batch_loss=20.2, batch

Validation:  29%|▎| 212/743 [57:11<2:20:28, 15.87s/batch, batch_loss=549, batch_

Validation:  29%|▎| 213/743 [57:11<2:30:43, 17.06s/batch, batch_loss=549, batch_

Validation:  29%|▎| 213/743 [57:27<2:30:43, 17.06s/batch, batch_loss=17.9, batch

Validation:  29%|▎| 214/743 [57:27<2:25:29, 16.50s/batch, batch_loss=17.9, batch

Validation:  29%|▎| 214/743 [57:43<2:25:29, 16.50s/batch, batch_loss=32.2, batch

Validation:  29%|▎| 215/743 [57:43<2:24:53, 16.46s/batch, batch_loss=32.2, batch

Validation:  29%|▎| 215/743 [57:58<2:24:53, 16.46s/batch, batch_loss=2.58e+3, ba

Validation:  29%|▎| 216/743 [57:58<2:21:41, 16.13s/batch, batch_loss=2.58e+3, ba

Validation:  29%|▎| 216/743 [58:13<2:21:41, 16.13s/batch, batch_loss=23.7, batch

Validation:  29%|▎| 217/743 [58:13<2:18:40, 15.82s/batch, batch_loss=23.7, batch

Validation:  29%|▎| 217/743 [58:28<2:18:40, 15.82s/batch, batch_loss=19.9, batch

Validation:  29%|▎| 218/743 [58:28<2:15:37, 15.50s/batch, batch_loss=19.9, batch

Validation:  29%|▎| 218/743 [58:43<2:15:37, 15.50s/batch, batch_loss=34.7, batch

Validation:  29%|▎| 219/743 [58:43<2:13:10, 15.25s/batch, batch_loss=34.7, batch

Validation:  29%|▎| 219/743 [58:57<2:13:10, 15.25s/batch, batch_loss=42.5, batch

Validation:  30%|▎| 220/743 [58:57<2:11:12, 15.05s/batch, batch_loss=42.5, batch

Validation:  30%|▎| 220/743 [59:14<2:11:12, 15.05s/batch, batch_loss=28.4, batch

Validation:  30%|▎| 221/743 [59:14<2:15:30, 15.58s/batch, batch_loss=28.4, batch

Validation:  30%|▎| 221/743 [59:28<2:15:30, 15.58s/batch, batch_loss=25, batch_i

Validation:  30%|▎| 222/743 [59:28<2:10:41, 15.05s/batch, batch_loss=25, batch_i

Validation:  30%|▎| 222/743 [59:42<2:10:41, 15.05s/batch, batch_loss=18.3, batch

Validation:  30%|▎| 223/743 [59:42<2:08:22, 14.81s/batch, batch_loss=18.3, batch

Validation:  30%|▎| 223/743 [59:56<2:08:22, 14.81s/batch, batch_loss=14.5, batch

Validation:  30%|▎| 224/743 [59:56<2:06:24, 14.61s/batch, batch_loss=14.5, batch

Validation:  30%|▎| 224/743 [1:00:11<2:06:24, 14.61s/batch, batch_loss=4.92e+3, 

Validation:  30%|▎| 225/743 [1:00:11<2:05:25, 14.53s/batch, batch_loss=4.92e+3, 

Validation:  30%|▎| 225/743 [1:00:25<2:05:25, 14.53s/batch, batch_loss=28.3, bat

Validation:  30%|▎| 226/743 [1:00:25<2:03:46, 14.36s/batch, batch_loss=28.3, bat

Validation:  30%|▎| 226/743 [1:00:39<2:03:46, 14.36s/batch, batch_loss=23.6, bat

Validation:  31%|▎| 227/743 [1:00:39<2:02:52, 14.29s/batch, batch_loss=23.6, bat

Validation:  31%|▎| 227/743 [1:00:53<2:02:52, 14.29s/batch, batch_loss=28.9, bat

Validation:  31%|▎| 228/743 [1:00:53<2:02:04, 14.22s/batch, batch_loss=28.9, bat

Validation:  31%|▎| 228/743 [1:01:07<2:02:04, 14.22s/batch, batch_loss=23.7, bat

Validation:  31%|▎| 229/743 [1:01:07<2:02:12, 14.27s/batch, batch_loss=23.7, bat

Validation:  31%|▎| 229/743 [1:01:20<2:02:12, 14.27s/batch, batch_loss=27.6, bat

Validation:  31%|▎| 230/743 [1:01:20<1:59:16, 13.95s/batch, batch_loss=27.6, bat

Validation:  31%|▎| 230/743 [1:01:34<1:59:16, 13.95s/batch, batch_loss=3.22e+4, 

Validation:  31%|▎| 231/743 [1:01:34<1:58:43, 13.91s/batch, batch_loss=3.22e+4, 

Validation:  31%|▎| 231/743 [1:01:48<1:58:43, 13.91s/batch, batch_loss=25.5, bat

Validation:  31%|▎| 232/743 [1:01:48<1:59:02, 13.98s/batch, batch_loss=25.5, bat

Validation:  31%|▎| 232/743 [1:02:03<1:59:02, 13.98s/batch, batch_loss=16.3, bat

Validation:  31%|▎| 233/743 [1:02:03<2:00:49, 14.22s/batch, batch_loss=16.3, bat

Validation:  31%|▎| 233/743 [1:02:18<2:00:49, 14.22s/batch, batch_loss=18.4, bat

Validation:  31%|▎| 234/743 [1:02:18<2:01:15, 14.29s/batch, batch_loss=18.4, bat

Validation:  31%|▎| 234/743 [1:02:32<2:01:15, 14.29s/batch, batch_loss=21.9, bat

Validation:  32%|▎| 235/743 [1:02:32<2:02:03, 14.42s/batch, batch_loss=21.9, bat

Validation:  32%|▎| 235/743 [1:02:51<2:02:03, 14.42s/batch, batch_loss=4.21, bat

Validation:  32%|▎| 236/743 [1:02:51<2:13:14, 15.77s/batch, batch_loss=4.21, bat

Validation:  32%|▎| 236/743 [1:03:06<2:13:14, 15.77s/batch, batch_loss=26.9, bat

Validation:  32%|▎| 237/743 [1:03:06<2:09:00, 15.30s/batch, batch_loss=26.9, bat

Validation:  32%|▎| 237/743 [1:03:20<2:09:00, 15.30s/batch, batch_loss=22.8, bat

Validation:  32%|▎| 238/743 [1:03:20<2:05:44, 14.94s/batch, batch_loss=22.8, bat

Validation:  32%|▎| 238/743 [1:03:34<2:05:44, 14.94s/batch, batch_loss=4.5e+3, b

Validation:  32%|▎| 239/743 [1:03:34<2:04:01, 14.76s/batch, batch_loss=4.5e+3, b

Validation:  32%|▎| 239/743 [1:03:47<2:04:01, 14.76s/batch, batch_loss=31.9, bat

Validation:  32%|▎| 240/743 [1:03:47<2:00:30, 14.38s/batch, batch_loss=31.9, bat

Validation:  32%|▎| 240/743 [1:04:00<2:00:30, 14.38s/batch, batch_loss=23.6, bat

Validation:  32%|▎| 241/743 [1:04:00<1:55:45, 13.84s/batch, batch_loss=23.6, bat

Validation:  32%|▎| 241/743 [1:04:12<1:55:45, 13.84s/batch, batch_loss=243, batc

Validation:  33%|▎| 242/743 [1:04:12<1:51:47, 13.39s/batch, batch_loss=243, batc

Validation:  33%|▎| 242/743 [1:04:25<1:51:47, 13.39s/batch, batch_loss=17.2, bat

Validation:  33%|▎| 243/743 [1:04:25<1:49:27, 13.14s/batch, batch_loss=17.2, bat

Validation:  33%|▎| 243/743 [1:04:38<1:49:27, 13.14s/batch, batch_loss=24.2, bat

Validation:  33%|▎| 244/743 [1:04:38<1:49:00, 13.11s/batch, batch_loss=24.2, bat

Validation:  33%|▎| 244/743 [1:04:52<1:49:00, 13.11s/batch, batch_loss=25.7, bat

Validation:  33%|▎| 245/743 [1:04:52<1:51:36, 13.45s/batch, batch_loss=25.7, bat

Validation:  33%|▎| 245/743 [1:05:17<1:51:36, 13.45s/batch, batch_loss=8.39, bat

Validation:  33%|▎| 246/743 [1:05:17<2:20:49, 17.00s/batch, batch_loss=8.39, bat

Validation:  33%|▎| 246/743 [1:05:35<2:20:49, 17.00s/batch, batch_loss=25, batch

Validation:  33%|▎| 247/743 [1:05:35<2:21:08, 17.07s/batch, batch_loss=25, batch

Validation:  33%|▎| 247/743 [1:05:49<2:21:08, 17.07s/batch, batch_loss=59, batch

Validation:  33%|▎| 248/743 [1:05:49<2:13:41, 16.20s/batch, batch_loss=59, batch

Validation:  33%|▎| 248/743 [1:06:03<2:13:41, 16.20s/batch, batch_loss=17.4, bat

Validation:  34%|▎| 249/743 [1:06:03<2:08:41, 15.63s/batch, batch_loss=17.4, bat

Validation:  34%|▎| 249/743 [1:06:17<2:08:41, 15.63s/batch, batch_loss=31.2, bat

Validation:  34%|▎| 250/743 [1:06:17<2:04:36, 15.17s/batch, batch_loss=31.2, bat

Validation:  34%|▎| 250/743 [1:06:31<2:04:36, 15.17s/batch, batch_loss=25.9, bat

Validation:  34%|▎| 251/743 [1:06:31<2:01:02, 14.76s/batch, batch_loss=25.9, bat

Validation:  34%|▎| 251/743 [1:06:46<2:01:02, 14.76s/batch, batch_loss=28.6, bat

Validation:  34%|▎| 252/743 [1:06:46<2:00:19, 14.70s/batch, batch_loss=28.6, bat

Validation:  34%|▎| 252/743 [1:06:59<2:00:19, 14.70s/batch, batch_loss=53.3, bat

Validation:  34%|▎| 253/743 [1:06:59<1:57:20, 14.37s/batch, batch_loss=53.3, bat

Validation:  34%|▎| 253/743 [1:07:16<1:57:20, 14.37s/batch, batch_loss=1.14e+4, 

Validation:  34%|▎| 254/743 [1:07:16<2:02:28, 15.03s/batch, batch_loss=1.14e+4, 

Validation:  34%|▎| 254/743 [1:07:29<2:02:28, 15.03s/batch, batch_loss=2.44e+3, 

Validation:  34%|▎| 255/743 [1:07:29<1:58:44, 14.60s/batch, batch_loss=2.44e+3, 

Validation:  34%|▎| 255/743 [1:07:44<1:58:44, 14.60s/batch, batch_loss=23.4, bat

Validation:  34%|▎| 256/743 [1:07:44<1:57:31, 14.48s/batch, batch_loss=23.4, bat

Validation:  34%|▎| 256/743 [1:07:58<1:57:31, 14.48s/batch, batch_loss=32.1, bat

Validation:  35%|▎| 257/743 [1:07:58<1:57:18, 14.48s/batch, batch_loss=32.1, bat

Validation:  35%|▎| 257/743 [1:08:12<1:57:18, 14.48s/batch, batch_loss=19.3, bat

Validation:  35%|▎| 258/743 [1:08:12<1:55:22, 14.27s/batch, batch_loss=19.3, bat

Validation:  35%|▎| 258/743 [1:08:26<1:55:22, 14.27s/batch, batch_loss=5.96, bat

Validation:  35%|▎| 259/743 [1:08:26<1:55:46, 14.35s/batch, batch_loss=5.96, bat

Validation:  35%|▎| 259/743 [1:08:41<1:55:46, 14.35s/batch, batch_loss=4.32, bat

Validation:  35%|▎| 260/743 [1:08:41<1:54:56, 14.28s/batch, batch_loss=4.32, bat

Validation:  35%|▎| 260/743 [1:08:54<1:54:56, 14.28s/batch, batch_loss=9.19, bat

Validation:  35%|▎| 261/743 [1:08:54<1:53:25, 14.12s/batch, batch_loss=9.19, bat

Validation:  35%|▎| 261/743 [1:09:11<1:53:25, 14.12s/batch, batch_loss=41.4, bat

Validation:  35%|▎| 262/743 [1:09:11<1:59:18, 14.88s/batch, batch_loss=41.4, bat

Validation:  35%|▎| 262/743 [1:09:25<1:59:18, 14.88s/batch, batch_loss=2.69e+3, 

Validation:  35%|▎| 263/743 [1:09:25<1:57:59, 14.75s/batch, batch_loss=2.69e+3, 

Validation:  35%|▎| 263/743 [1:09:40<1:57:59, 14.75s/batch, batch_loss=13.9, bat

Validation:  36%|▎| 264/743 [1:09:40<1:58:20, 14.82s/batch, batch_loss=13.9, bat

Validation:  36%|▎| 264/743 [1:09:55<1:58:20, 14.82s/batch, batch_loss=26.2, bat

Validation:  36%|▎| 265/743 [1:09:55<1:56:48, 14.66s/batch, batch_loss=26.2, bat

Validation:  36%|▎| 265/743 [1:10:08<1:56:48, 14.66s/batch, batch_loss=36.8, bat

Validation:  36%|▎| 266/743 [1:10:08<1:53:56, 14.33s/batch, batch_loss=36.8, bat

Validation:  36%|▎| 266/743 [1:10:22<1:53:56, 14.33s/batch, batch_loss=51.6, bat

Validation:  36%|▎| 267/743 [1:10:22<1:52:15, 14.15s/batch, batch_loss=51.6, bat

Validation:  36%|▎| 267/743 [1:10:36<1:52:15, 14.15s/batch, batch_loss=2.99e+3, 

Validation:  36%|▎| 268/743 [1:10:36<1:51:51, 14.13s/batch, batch_loss=2.99e+3, 

Validation:  36%|▎| 268/743 [1:10:50<1:51:51, 14.13s/batch, batch_loss=44.6, bat

Validation:  36%|▎| 269/743 [1:10:50<1:51:16, 14.09s/batch, batch_loss=44.6, bat

Validation:  36%|▎| 269/743 [1:11:04<1:51:16, 14.09s/batch, batch_loss=30.9, bat

Validation:  36%|▎| 270/743 [1:11:04<1:51:20, 14.12s/batch, batch_loss=30.9, bat

Validation:  36%|▎| 270/743 [1:11:21<1:51:20, 14.12s/batch, batch_loss=58.7, bat

Validation:  36%|▎| 271/743 [1:11:21<1:58:21, 15.05s/batch, batch_loss=58.7, bat

Validation:  36%|▎| 271/743 [1:11:36<1:58:21, 15.05s/batch, batch_loss=1.06e+3, 

Validation:  37%|▎| 272/743 [1:11:36<1:56:21, 14.82s/batch, batch_loss=1.06e+3, 

Validation:  37%|▎| 272/743 [1:11:50<1:56:21, 14.82s/batch, batch_loss=27.8, bat

Validation:  37%|▎| 273/743 [1:11:50<1:53:46, 14.52s/batch, batch_loss=27.8, bat

Validation:  37%|▎| 273/743 [1:12:03<1:53:46, 14.52s/batch, batch_loss=31, batch

Validation:  37%|▎| 274/743 [1:12:03<1:52:09, 14.35s/batch, batch_loss=31, batch

Validation:  37%|▎| 274/743 [1:12:18<1:52:09, 14.35s/batch, batch_loss=21.3, bat

Validation:  37%|▎| 275/743 [1:12:18<1:51:51, 14.34s/batch, batch_loss=21.3, bat

Validation:  37%|▎| 275/743 [1:12:33<1:51:51, 14.34s/batch, batch_loss=27.5, bat

Validation:  37%|▎| 276/743 [1:12:33<1:52:35, 14.46s/batch, batch_loss=27.5, bat

Validation:  37%|▎| 276/743 [1:12:47<1:52:35, 14.46s/batch, batch_loss=30.4, bat

Validation:  37%|▎| 277/743 [1:12:47<1:51:51, 14.40s/batch, batch_loss=30.4, bat

Validation:  37%|▎| 277/743 [1:13:02<1:51:51, 14.40s/batch, batch_loss=32, batch

Validation:  37%|▎| 278/743 [1:13:02<1:53:37, 14.66s/batch, batch_loss=32, batch

Validation:  37%|▎| 278/743 [1:13:16<1:53:37, 14.66s/batch, batch_loss=9.49, bat

Validation:  38%|▍| 279/743 [1:13:16<1:52:26, 14.54s/batch, batch_loss=9.49, bat

Validation:  38%|▍| 279/743 [1:13:30<1:52:26, 14.54s/batch, batch_loss=16.5, bat

Validation:  38%|▍| 280/743 [1:13:30<1:50:28, 14.32s/batch, batch_loss=16.5, bat

Validation:  38%|▍| 280/743 [1:13:45<1:50:28, 14.32s/batch, batch_loss=26.6, bat

Validation:  38%|▍| 281/743 [1:13:45<1:50:29, 14.35s/batch, batch_loss=26.6, bat

Validation:  38%|▍| 281/743 [1:13:58<1:50:29, 14.35s/batch, batch_loss=49.1, bat

Validation:  38%|▍| 282/743 [1:13:58<1:47:55, 14.05s/batch, batch_loss=49.1, bat

Validation:  38%|▍| 282/743 [1:14:13<1:47:55, 14.05s/batch, batch_loss=26, batch

Validation:  38%|▍| 283/743 [1:14:13<1:49:46, 14.32s/batch, batch_loss=26, batch

Validation:  38%|▍| 283/743 [1:14:27<1:49:46, 14.32s/batch, batch_loss=25.3, bat

Validation:  38%|▍| 284/743 [1:14:27<1:49:40, 14.34s/batch, batch_loss=25.3, bat

Validation:  38%|▍| 284/743 [1:14:42<1:49:40, 14.34s/batch, batch_loss=18.9, bat

Validation:  38%|▍| 285/743 [1:14:42<1:50:20, 14.46s/batch, batch_loss=18.9, bat

Validation:  38%|▍| 285/743 [1:14:57<1:50:20, 14.46s/batch, batch_loss=26.7, bat

Validation:  38%|▍| 286/743 [1:14:57<1:50:39, 14.53s/batch, batch_loss=26.7, bat

Validation:  38%|▍| 286/743 [1:15:13<1:50:39, 14.53s/batch, batch_loss=1.19e+4, 

Validation:  39%|▍| 287/743 [1:15:13<1:53:22, 14.92s/batch, batch_loss=1.19e+4, 

Validation:  39%|▍| 287/743 [1:15:28<1:53:22, 14.92s/batch, batch_loss=35.9, bat

Validation:  39%|▍| 288/743 [1:15:28<1:54:44, 15.13s/batch, batch_loss=35.9, bat

Validation:  39%|▍| 288/743 [1:15:42<1:54:44, 15.13s/batch, batch_loss=34.3, bat

Validation:  39%|▍| 289/743 [1:15:42<1:52:18, 14.84s/batch, batch_loss=34.3, bat

Validation:  39%|▍| 289/743 [1:15:56<1:52:18, 14.84s/batch, batch_loss=493, batc

Validation:  39%|▍| 290/743 [1:15:56<1:50:02, 14.57s/batch, batch_loss=493, batc

Validation:  39%|▍| 290/743 [1:16:10<1:50:02, 14.57s/batch, batch_loss=1.52e+3, 

Validation:  39%|▍| 291/743 [1:16:10<1:47:37, 14.29s/batch, batch_loss=1.52e+3, 

Validation:  39%|▍| 291/743 [1:16:24<1:47:37, 14.29s/batch, batch_loss=1.21e+3, 

Validation:  39%|▍| 292/743 [1:16:24<1:47:12, 14.26s/batch, batch_loss=1.21e+3, 

Validation:  39%|▍| 292/743 [1:16:38<1:47:12, 14.26s/batch, batch_loss=39, batch

Validation:  39%|▍| 293/743 [1:16:38<1:45:41, 14.09s/batch, batch_loss=39, batch

Validation:  39%|▍| 293/743 [1:16:52<1:45:41, 14.09s/batch, batch_loss=1.1e+3, b

Validation:  40%|▍| 294/743 [1:16:52<1:46:44, 14.26s/batch, batch_loss=1.1e+3, b

Validation:  40%|▍| 294/743 [1:17:06<1:46:44, 14.26s/batch, batch_loss=30.8, bat

Validation:  40%|▍| 295/743 [1:17:06<1:45:50, 14.17s/batch, batch_loss=30.8, bat

Validation:  40%|▍| 295/743 [1:17:20<1:45:50, 14.17s/batch, batch_loss=20.1, bat

Validation:  40%|▍| 296/743 [1:17:20<1:43:34, 13.90s/batch, batch_loss=20.1, bat

Validation:  40%|▍| 296/743 [1:17:34<1:43:34, 13.90s/batch, batch_loss=17.3, bat

Validation:  40%|▍| 297/743 [1:17:34<1:43:18, 13.90s/batch, batch_loss=17.3, bat

Validation:  40%|▍| 297/743 [1:17:47<1:43:18, 13.90s/batch, batch_loss=33.5, bat

Validation:  40%|▍| 298/743 [1:17:47<1:42:59, 13.89s/batch, batch_loss=33.5, bat

Validation:  40%|▍| 298/743 [1:18:01<1:42:59, 13.89s/batch, batch_loss=47, batch

Validation:  40%|▍| 299/743 [1:18:01<1:42:08, 13.80s/batch, batch_loss=47, batch

Validation:  40%|▍| 299/743 [1:18:15<1:42:08, 13.80s/batch, batch_loss=39.5, bat

Validation:  40%|▍| 300/743 [1:18:15<1:41:52, 13.80s/batch, batch_loss=39.5, bat

Validation:  40%|▍| 300/743 [1:18:28<1:41:52, 13.80s/batch, batch_loss=855, batc

Validation:  41%|▍| 301/743 [1:18:28<1:41:06, 13.73s/batch, batch_loss=855, batc

Validation:  41%|▍| 301/743 [1:18:42<1:41:06, 13.73s/batch, batch_loss=16.8, bat

Validation:  41%|▍| 302/743 [1:18:42<1:40:50, 13.72s/batch, batch_loss=16.8, bat

Validation:  41%|▍| 302/743 [1:18:56<1:40:50, 13.72s/batch, batch_loss=26.1, bat

Validation:  41%|▍| 303/743 [1:18:56<1:41:16, 13.81s/batch, batch_loss=26.1, bat

Validation:  41%|▍| 303/743 [1:19:13<1:41:16, 13.81s/batch, batch_loss=25.1, bat

Validation:  41%|▍| 304/743 [1:19:13<1:47:33, 14.70s/batch, batch_loss=25.1, bat

Validation:  41%|▍| 304/743 [1:19:26<1:47:33, 14.70s/batch, batch_loss=18.9, bat

Validation:  41%|▍| 305/743 [1:19:26<1:44:38, 14.33s/batch, batch_loss=18.9, bat

Validation:  41%|▍| 305/743 [1:19:40<1:44:38, 14.33s/batch, batch_loss=30.6, bat

Validation:  41%|▍| 306/743 [1:19:40<1:43:11, 14.17s/batch, batch_loss=30.6, bat

Validation:  41%|▍| 306/743 [1:19:55<1:43:11, 14.17s/batch, batch_loss=39.9, bat

Validation:  41%|▍| 307/743 [1:19:55<1:43:33, 14.25s/batch, batch_loss=39.9, bat

Validation:  41%|▍| 307/743 [1:20:09<1:43:33, 14.25s/batch, batch_loss=921, batc

Validation:  41%|▍| 308/743 [1:20:09<1:43:08, 14.23s/batch, batch_loss=921, batc

Validation:  41%|▍| 308/743 [1:20:23<1:43:08, 14.23s/batch, batch_loss=50.8, bat

Validation:  42%|▍| 309/743 [1:20:23<1:42:03, 14.11s/batch, batch_loss=50.8, bat

Validation:  42%|▍| 309/743 [1:20:37<1:42:03, 14.11s/batch, batch_loss=21.8, bat

Validation:  42%|▍| 310/743 [1:20:37<1:43:02, 14.28s/batch, batch_loss=21.8, bat

Validation:  42%|▍| 310/743 [1:20:54<1:43:02, 14.28s/batch, batch_loss=25.3, bat

Validation:  42%|▍| 311/743 [1:20:54<1:48:05, 15.01s/batch, batch_loss=25.3, bat

Validation:  42%|▍| 311/743 [1:21:08<1:48:05, 15.01s/batch, batch_loss=25.7, bat

Validation:  42%|▍| 312/743 [1:21:08<1:45:49, 14.73s/batch, batch_loss=25.7, bat

Validation:  42%|▍| 312/743 [1:21:23<1:45:49, 14.73s/batch, batch_loss=12, batch

Validation:  42%|▍| 313/743 [1:21:23<1:45:43, 14.75s/batch, batch_loss=12, batch

Validation:  42%|▍| 313/743 [1:21:37<1:45:43, 14.75s/batch, batch_loss=11.8, bat

Validation:  42%|▍| 314/743 [1:21:37<1:45:10, 14.71s/batch, batch_loss=11.8, bat

Validation:  42%|▍| 314/743 [1:21:52<1:45:10, 14.71s/batch, batch_loss=29.4, bat

Validation:  42%|▍| 315/743 [1:21:52<1:44:32, 14.66s/batch, batch_loss=29.4, bat

Validation:  42%|▍| 315/743 [1:22:06<1:44:32, 14.66s/batch, batch_loss=24, batch

Validation:  43%|▍| 316/743 [1:22:06<1:42:25, 14.39s/batch, batch_loss=24, batch

Validation:  43%|▍| 316/743 [1:22:19<1:42:25, 14.39s/batch, batch_loss=27.6, bat

Validation:  43%|▍| 317/743 [1:22:19<1:40:44, 14.19s/batch, batch_loss=27.6, bat

Validation:  43%|▍| 317/743 [1:22:33<1:40:44, 14.19s/batch, batch_loss=32.2, bat

Validation:  43%|▍| 318/743 [1:22:33<1:39:35, 14.06s/batch, batch_loss=32.2, bat

Validation:  43%|▍| 318/743 [1:22:46<1:39:35, 14.06s/batch, batch_loss=30.8, bat

Validation:  43%|▍| 319/743 [1:22:46<1:37:08, 13.75s/batch, batch_loss=30.8, bat

Validation:  43%|▍| 319/743 [1:23:01<1:37:08, 13.75s/batch, batch_loss=33.5, bat

Validation:  43%|▍| 320/743 [1:23:01<1:39:22, 14.09s/batch, batch_loss=33.5, bat

Validation:  43%|▍| 320/743 [1:23:15<1:39:22, 14.09s/batch, batch_loss=24.5, bat

Validation:  43%|▍| 321/743 [1:23:15<1:38:40, 14.03s/batch, batch_loss=24.5, bat

Validation:  43%|▍| 321/743 [1:23:29<1:38:40, 14.03s/batch, batch_loss=30.8, bat

Validation:  43%|▍| 322/743 [1:23:29<1:38:53, 14.09s/batch, batch_loss=30.8, bat

Validation:  43%|▍| 322/743 [1:23:44<1:38:53, 14.09s/batch, batch_loss=27.8, bat

Validation:  43%|▍| 323/743 [1:23:44<1:39:48, 14.26s/batch, batch_loss=27.8, bat

Validation:  43%|▍| 323/743 [1:23:59<1:39:48, 14.26s/batch, batch_loss=304, batc

Validation:  44%|▍| 324/743 [1:23:59<1:41:02, 14.47s/batch, batch_loss=304, batc

Validation:  44%|▍| 324/743 [1:24:13<1:41:02, 14.47s/batch, batch_loss=34.5, bat

Validation:  44%|▍| 325/743 [1:24:13<1:39:48, 14.33s/batch, batch_loss=34.5, bat

Validation:  44%|▍| 325/743 [1:24:28<1:39:48, 14.33s/batch, batch_loss=33.9, bat

Validation:  44%|▍| 326/743 [1:24:28<1:40:21, 14.44s/batch, batch_loss=33.9, bat

Validation:  44%|▍| 326/743 [1:24:42<1:40:21, 14.44s/batch, batch_loss=24.3, bat

Validation:  44%|▍| 327/743 [1:24:42<1:39:04, 14.29s/batch, batch_loss=24.3, bat

Validation:  44%|▍| 327/743 [1:24:56<1:39:04, 14.29s/batch, batch_loss=26.1, bat

Validation:  44%|▍| 328/743 [1:24:56<1:39:11, 14.34s/batch, batch_loss=26.1, bat

Validation:  44%|▍| 328/743 [1:25:11<1:39:11, 14.34s/batch, batch_loss=17.2, bat

Validation:  44%|▍| 329/743 [1:25:11<1:39:54, 14.48s/batch, batch_loss=17.2, bat

Validation:  44%|▍| 329/743 [1:25:27<1:39:54, 14.48s/batch, batch_loss=26.8, bat

Validation:  44%|▍| 330/743 [1:25:27<1:44:07, 15.13s/batch, batch_loss=26.8, bat

Validation:  44%|▍| 330/743 [1:25:42<1:44:07, 15.13s/batch, batch_loss=39.2, bat

Validation:  45%|▍| 331/743 [1:25:42<1:41:51, 14.83s/batch, batch_loss=39.2, bat

Validation:  45%|▍| 331/743 [1:25:57<1:41:51, 14.83s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:25:57<1:41:51, 14.87s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:26:12<1:41:51, 14.87s/batch, batch_loss=34, batch

Validation:  45%|▍| 333/743 [1:26:12<1:42:00, 14.93s/batch, batch_loss=34, batch

Validation:  45%|▍| 333/743 [1:26:26<1:42:00, 14.93s/batch, batch_loss=34.1, bat

Validation:  45%|▍| 334/743 [1:26:26<1:40:12, 14.70s/batch, batch_loss=34.1, bat

Validation:  45%|▍| 334/743 [1:26:40<1:40:12, 14.70s/batch, batch_loss=36.5, bat

Validation:  45%|▍| 335/743 [1:26:40<1:39:00, 14.56s/batch, batch_loss=36.5, bat

Validation:  45%|▍| 335/743 [1:26:55<1:39:00, 14.56s/batch, batch_loss=20.5, bat

Validation:  45%|▍| 336/743 [1:26:55<1:39:10, 14.62s/batch, batch_loss=20.5, bat

Validation:  45%|▍| 336/743 [1:27:09<1:39:10, 14.62s/batch, batch_loss=22.7, bat

Validation:  45%|▍| 337/743 [1:27:09<1:37:51, 14.46s/batch, batch_loss=22.7, bat

Validation:  45%|▍| 337/743 [1:27:24<1:37:51, 14.46s/batch, batch_loss=58.1, bat

Validation:  45%|▍| 338/743 [1:27:24<1:38:24, 14.58s/batch, batch_loss=58.1, bat

Validation:  45%|▍| 338/743 [1:27:38<1:38:24, 14.58s/batch, batch_loss=52.6, bat

Validation:  46%|▍| 339/743 [1:27:38<1:37:58, 14.55s/batch, batch_loss=52.6, bat

Validation:  46%|▍| 339/743 [1:27:53<1:37:58, 14.55s/batch, batch_loss=51.9, bat

Validation:  46%|▍| 340/743 [1:27:53<1:39:13, 14.77s/batch, batch_loss=51.9, bat

Validation:  46%|▍| 340/743 [1:28:09<1:39:13, 14.77s/batch, batch_loss=30.8, bat

Validation:  46%|▍| 341/743 [1:28:09<1:39:44, 14.89s/batch, batch_loss=30.8, bat

Validation:  46%|▍| 341/743 [1:28:22<1:39:44, 14.89s/batch, batch_loss=35.7, bat

Validation:  46%|▍| 342/743 [1:28:22<1:37:01, 14.52s/batch, batch_loss=35.7, bat

Validation:  46%|▍| 342/743 [1:28:37<1:37:01, 14.52s/batch, batch_loss=53.8, bat

Validation:  46%|▍| 343/743 [1:28:37<1:36:20, 14.45s/batch, batch_loss=53.8, bat

Validation:  46%|▍| 343/743 [1:28:51<1:36:20, 14.45s/batch, batch_loss=35.9, bat

Validation:  46%|▍| 344/743 [1:28:51<1:35:49, 14.41s/batch, batch_loss=35.9, bat

Validation:  46%|▍| 344/743 [1:29:05<1:35:49, 14.41s/batch, batch_loss=25.4, bat

Validation:  46%|▍| 345/743 [1:29:05<1:34:37, 14.27s/batch, batch_loss=25.4, bat

Validation:  46%|▍| 345/743 [1:29:19<1:34:37, 14.27s/batch, batch_loss=33, batch

Validation:  47%|▍| 346/743 [1:29:19<1:33:57, 14.20s/batch, batch_loss=33, batch

Validation:  47%|▍| 346/743 [1:29:33<1:33:57, 14.20s/batch, batch_loss=36.4, bat

Validation:  47%|▍| 347/743 [1:29:33<1:33:57, 14.24s/batch, batch_loss=36.4, bat

Validation:  47%|▍| 347/743 [1:29:47<1:33:57, 14.24s/batch, batch_loss=46.6, bat

Validation:  47%|▍| 348/743 [1:29:47<1:32:33, 14.06s/batch, batch_loss=46.6, bat

Validation:  47%|▍| 348/743 [1:30:01<1:32:33, 14.06s/batch, batch_loss=37.6, bat

Validation:  47%|▍| 349/743 [1:30:01<1:32:23, 14.07s/batch, batch_loss=37.6, bat

Validation:  47%|▍| 349/743 [1:30:15<1:32:23, 14.07s/batch, batch_loss=37.5, bat

Validation:  47%|▍| 350/743 [1:30:15<1:32:41, 14.15s/batch, batch_loss=37.5, bat

Validation:  47%|▍| 350/743 [1:30:30<1:32:41, 14.15s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:30:30<1:33:02, 14.24s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:30:44<1:33:02, 14.24s/batch, batch_loss=41.8, bat

Validation:  47%|▍| 352/743 [1:30:44<1:32:35, 14.21s/batch, batch_loss=41.8, bat

Validation:  47%|▍| 352/743 [1:30:58<1:32:35, 14.21s/batch, batch_loss=19.6, bat

Validation:  48%|▍| 353/743 [1:30:58<1:32:34, 14.24s/batch, batch_loss=19.6, bat

Validation:  48%|▍| 353/743 [1:31:12<1:32:34, 14.24s/batch, batch_loss=36, batch

Validation:  48%|▍| 354/743 [1:31:12<1:31:58, 14.19s/batch, batch_loss=36, batch

Validation:  48%|▍| 354/743 [1:31:26<1:31:58, 14.19s/batch, batch_loss=38, batch

Validation:  48%|▍| 355/743 [1:31:26<1:31:38, 14.17s/batch, batch_loss=38, batch

Validation:  48%|▍| 355/743 [1:31:41<1:31:38, 14.17s/batch, batch_loss=75.3, bat

Validation:  48%|▍| 356/743 [1:31:41<1:32:54, 14.41s/batch, batch_loss=75.3, bat

Validation:  48%|▍| 356/743 [1:31:55<1:32:54, 14.41s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:31:55<1:31:48, 14.27s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:32:09<1:31:48, 14.27s/batch, batch_loss=25.7, bat

Validation:  48%|▍| 358/743 [1:32:09<1:31:00, 14.18s/batch, batch_loss=25.7, bat

Validation:  48%|▍| 358/743 [1:32:24<1:31:00, 14.18s/batch, batch_loss=17.7, bat

Validation:  48%|▍| 359/743 [1:32:24<1:32:05, 14.39s/batch, batch_loss=17.7, bat

Validation:  48%|▍| 359/743 [1:32:39<1:32:05, 14.39s/batch, batch_loss=33.4, bat

Validation:  48%|▍| 360/743 [1:32:39<1:32:53, 14.55s/batch, batch_loss=33.4, bat

Validation:  48%|▍| 360/743 [1:32:53<1:32:53, 14.55s/batch, batch_loss=24.8, bat

Validation:  49%|▍| 361/743 [1:32:53<1:31:11, 14.32s/batch, batch_loss=24.8, bat

Validation:  49%|▍| 361/743 [1:33:07<1:31:11, 14.32s/batch, batch_loss=18.4, bat

Validation:  49%|▍| 362/743 [1:33:07<1:30:47, 14.30s/batch, batch_loss=18.4, bat

Validation:  49%|▍| 362/743 [1:33:24<1:30:47, 14.30s/batch, batch_loss=57.4, bat

Validation:  49%|▍| 363/743 [1:33:24<1:35:00, 15.00s/batch, batch_loss=57.4, bat

Validation:  49%|▍| 363/743 [1:33:38<1:35:00, 15.00s/batch, batch_loss=29.1, bat

Validation:  49%|▍| 364/743 [1:33:38<1:33:25, 14.79s/batch, batch_loss=29.1, bat

Validation:  49%|▍| 364/743 [1:33:53<1:33:25, 14.79s/batch, batch_loss=29.9, bat

Validation:  49%|▍| 365/743 [1:33:53<1:32:35, 14.70s/batch, batch_loss=29.9, bat

Validation:  49%|▍| 365/743 [1:34:06<1:32:35, 14.70s/batch, batch_loss=19.7, bat

Validation:  49%|▍| 366/743 [1:34:06<1:29:14, 14.20s/batch, batch_loss=19.7, bat

Validation:  49%|▍| 366/743 [1:34:18<1:29:14, 14.20s/batch, batch_loss=24.5, bat

Validation:  49%|▍| 367/743 [1:34:18<1:25:23, 13.63s/batch, batch_loss=24.5, bat

Validation:  49%|▍| 367/743 [1:34:33<1:25:23, 13.63s/batch, batch_loss=4.87e+3, 

Validation:  50%|▍| 368/743 [1:34:33<1:27:06, 13.94s/batch, batch_loss=4.87e+3, 

Validation:  50%|▍| 368/743 [1:34:47<1:27:06, 13.94s/batch, batch_loss=30.6, bat

Validation:  50%|▍| 369/743 [1:34:47<1:28:22, 14.18s/batch, batch_loss=30.6, bat

Validation:  50%|▍| 369/743 [1:35:02<1:28:22, 14.18s/batch, batch_loss=33, batch

Validation:  50%|▍| 370/743 [1:35:02<1:29:02, 14.32s/batch, batch_loss=33, batch

Validation:  50%|▍| 370/743 [1:35:20<1:29:02, 14.32s/batch, batch_loss=41.4, bat

Validation:  50%|▍| 371/743 [1:35:20<1:34:56, 15.31s/batch, batch_loss=41.4, bat

Validation:  50%|▍| 371/743 [1:35:34<1:34:56, 15.31s/batch, batch_loss=27.6, bat

Validation:  50%|▌| 372/743 [1:35:34<1:32:53, 15.02s/batch, batch_loss=27.6, bat

Validation:  50%|▌| 372/743 [1:35:48<1:32:53, 15.02s/batch, batch_loss=52.7, bat

Validation:  50%|▌| 373/743 [1:35:48<1:31:01, 14.76s/batch, batch_loss=52.7, bat

Validation:  50%|▌| 373/743 [1:36:03<1:31:01, 14.76s/batch, batch_loss=20.4, bat

Validation:  50%|▌| 374/743 [1:36:03<1:30:20, 14.69s/batch, batch_loss=20.4, bat

Validation:  50%|▌| 374/743 [1:36:17<1:30:20, 14.69s/batch, batch_loss=10.3, bat

Validation:  50%|▌| 375/743 [1:36:17<1:29:32, 14.60s/batch, batch_loss=10.3, bat

Validation:  50%|▌| 375/743 [1:36:32<1:29:32, 14.60s/batch, batch_loss=27.6, bat

Validation:  51%|▌| 376/743 [1:36:32<1:29:20, 14.61s/batch, batch_loss=27.6, bat

Validation:  51%|▌| 376/743 [1:36:48<1:29:20, 14.61s/batch, batch_loss=19.1, bat

Validation:  51%|▌| 377/743 [1:36:48<1:32:19, 15.13s/batch, batch_loss=19.1, bat

Validation:  51%|▌| 377/743 [1:37:02<1:32:19, 15.13s/batch, batch_loss=33.8, bat

Validation:  51%|▌| 378/743 [1:37:02<1:29:38, 14.73s/batch, batch_loss=33.8, bat

Validation:  51%|▌| 378/743 [1:37:16<1:29:38, 14.73s/batch, batch_loss=10, batch

Validation:  51%|▌| 379/743 [1:37:16<1:28:46, 14.63s/batch, batch_loss=10, batch

Validation:  51%|▌| 379/743 [1:37:30<1:28:46, 14.63s/batch, batch_loss=14.4, bat

Validation:  51%|▌| 380/743 [1:37:30<1:27:26, 14.45s/batch, batch_loss=14.4, bat

Validation:  51%|▌| 380/743 [1:37:45<1:27:26, 14.45s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:37:45<1:27:26, 14.49s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:37:59<1:27:26, 14.49s/batch, batch_loss=913, batc

Validation:  51%|▌| 382/743 [1:37:59<1:26:32, 14.38s/batch, batch_loss=913, batc

Validation:  51%|▌| 382/743 [1:38:13<1:26:32, 14.38s/batch, batch_loss=217, batc

Validation:  52%|▌| 383/743 [1:38:13<1:26:20, 14.39s/batch, batch_loss=217, batc

Validation:  52%|▌| 383/743 [1:38:28<1:26:20, 14.39s/batch, batch_loss=291, batc

Validation:  52%|▌| 384/743 [1:38:28<1:25:50, 14.35s/batch, batch_loss=291, batc

Validation:  52%|▌| 384/743 [1:38:42<1:25:50, 14.35s/batch, batch_loss=28.2, bat

Validation:  52%|▌| 385/743 [1:38:42<1:25:34, 14.34s/batch, batch_loss=28.2, bat

Validation:  52%|▌| 385/743 [1:38:59<1:25:34, 14.34s/batch, batch_loss=13.4, bat

Validation:  52%|▌| 386/743 [1:38:59<1:30:25, 15.20s/batch, batch_loss=13.4, bat

Validation:  52%|▌| 386/743 [1:39:13<1:30:25, 15.20s/batch, batch_loss=11.9, bat

Validation:  52%|▌| 387/743 [1:39:13<1:28:23, 14.90s/batch, batch_loss=11.9, bat

Validation:  52%|▌| 387/743 [1:39:27<1:28:23, 14.90s/batch, batch_loss=33.9, bat

Validation:  52%|▌| 388/743 [1:39:27<1:26:52, 14.68s/batch, batch_loss=33.9, bat

Validation:  52%|▌| 388/743 [1:39:43<1:26:52, 14.68s/batch, batch_loss=29, batch

Validation:  52%|▌| 389/743 [1:39:43<1:27:23, 14.81s/batch, batch_loss=29, batch

Validation:  52%|▌| 389/743 [1:39:57<1:27:23, 14.81s/batch, batch_loss=29.4, bat

Validation:  52%|▌| 390/743 [1:39:57<1:26:09, 14.64s/batch, batch_loss=29.4, bat

Validation:  52%|▌| 390/743 [1:40:11<1:26:09, 14.64s/batch, batch_loss=16.7, bat

Validation:  53%|▌| 391/743 [1:40:11<1:24:58, 14.48s/batch, batch_loss=16.7, bat

Validation:  53%|▌| 391/743 [1:40:25<1:24:58, 14.48s/batch, batch_loss=26.4, bat

Validation:  53%|▌| 392/743 [1:40:25<1:24:37, 14.47s/batch, batch_loss=26.4, bat

Validation:  53%|▌| 392/743 [1:40:40<1:24:37, 14.47s/batch, batch_loss=27, batch

Validation:  53%|▌| 393/743 [1:40:40<1:25:07, 14.59s/batch, batch_loss=27, batch

Validation:  53%|▌| 393/743 [1:40:58<1:25:07, 14.59s/batch, batch_loss=31.7, bat

Validation:  53%|▌| 394/743 [1:40:58<1:29:55, 15.46s/batch, batch_loss=31.7, bat

Validation:  53%|▌| 394/743 [1:41:12<1:29:55, 15.46s/batch, batch_loss=21.2, bat

Validation:  53%|▌| 395/743 [1:41:12<1:26:46, 14.96s/batch, batch_loss=21.2, bat

Validation:  53%|▌| 395/743 [1:41:26<1:26:46, 14.96s/batch, batch_loss=27.4, bat

Validation:  53%|▌| 396/743 [1:41:26<1:26:19, 14.93s/batch, batch_loss=27.4, bat

Validation:  53%|▌| 396/743 [1:41:41<1:26:19, 14.93s/batch, batch_loss=23.1, bat

Validation:  53%|▌| 397/743 [1:41:41<1:26:26, 14.99s/batch, batch_loss=23.1, bat

Validation:  53%|▌| 397/743 [1:41:56<1:26:26, 14.99s/batch, batch_loss=48.2, bat

Validation:  54%|▌| 398/743 [1:41:56<1:25:09, 14.81s/batch, batch_loss=48.2, bat

Validation:  54%|▌| 398/743 [1:42:10<1:25:09, 14.81s/batch, batch_loss=22.3, bat

Validation:  54%|▌| 399/743 [1:42:10<1:24:06, 14.67s/batch, batch_loss=22.3, bat

Validation:  54%|▌| 399/743 [1:42:25<1:24:06, 14.67s/batch, batch_loss=31.3, bat

Validation:  54%|▌| 400/743 [1:42:25<1:23:39, 14.63s/batch, batch_loss=31.3, bat

Validation:  54%|▌| 400/743 [1:42:41<1:23:39, 14.63s/batch, batch_loss=31.8, bat

Validation:  54%|▌| 401/743 [1:42:41<1:25:26, 14.99s/batch, batch_loss=31.8, bat

Validation:  54%|▌| 401/743 [1:42:58<1:25:26, 14.99s/batch, batch_loss=11.1, bat

Validation:  54%|▌| 402/743 [1:42:58<1:29:07, 15.68s/batch, batch_loss=11.1, bat

Validation:  54%|▌| 402/743 [1:43:12<1:29:07, 15.68s/batch, batch_loss=29.4, bat

Validation:  54%|▌| 403/743 [1:43:12<1:26:33, 15.27s/batch, batch_loss=29.4, bat

Validation:  54%|▌| 403/743 [1:43:28<1:26:33, 15.27s/batch, batch_loss=24.9, bat

Validation:  54%|▌| 404/743 [1:43:28<1:26:22, 15.29s/batch, batch_loss=24.9, bat

Validation:  54%|▌| 404/743 [1:43:42<1:26:22, 15.29s/batch, batch_loss=15.9, bat

Validation:  55%|▌| 405/743 [1:43:42<1:24:26, 14.99s/batch, batch_loss=15.9, bat

Validation:  55%|▌| 405/743 [1:43:57<1:24:26, 14.99s/batch, batch_loss=23.4, bat

Validation:  55%|▌| 406/743 [1:43:57<1:24:08, 14.98s/batch, batch_loss=23.4, bat

Validation:  55%|▌| 406/743 [1:44:12<1:24:08, 14.98s/batch, batch_loss=24.1, bat

Validation:  55%|▌| 407/743 [1:44:12<1:23:49, 14.97s/batch, batch_loss=24.1, bat

Validation:  55%|▌| 407/743 [1:44:26<1:23:49, 14.97s/batch, batch_loss=29.3, bat

Validation:  55%|▌| 408/743 [1:44:26<1:22:09, 14.71s/batch, batch_loss=29.3, bat

Validation:  55%|▌| 408/743 [1:44:39<1:22:09, 14.71s/batch, batch_loss=14.7, bat

Validation:  55%|▌| 409/743 [1:44:39<1:19:34, 14.29s/batch, batch_loss=14.7, bat

Validation:  55%|▌| 409/743 [1:44:54<1:19:34, 14.29s/batch, batch_loss=21, batch

Validation:  55%|▌| 410/743 [1:44:54<1:19:27, 14.32s/batch, batch_loss=21, batch

Validation:  55%|▌| 410/743 [1:45:08<1:19:27, 14.32s/batch, batch_loss=25.6, bat

Validation:  55%|▌| 411/743 [1:45:08<1:19:00, 14.28s/batch, batch_loss=25.6, bat

Validation:  55%|▌| 411/743 [1:45:23<1:19:00, 14.28s/batch, batch_loss=23.9, bat

Validation:  55%|▌| 412/743 [1:45:23<1:19:44, 14.46s/batch, batch_loss=23.9, bat

Validation:  55%|▌| 412/743 [1:45:37<1:19:44, 14.46s/batch, batch_loss=1.95e+3, 

Validation:  56%|▌| 413/743 [1:45:37<1:19:22, 14.43s/batch, batch_loss=1.95e+3, 

Validation:  56%|▌| 413/743 [1:45:51<1:19:22, 14.43s/batch, batch_loss=27.7, bat

Validation:  56%|▌| 414/743 [1:45:51<1:18:51, 14.38s/batch, batch_loss=27.7, bat

Validation:  56%|▌| 414/743 [1:46:05<1:18:51, 14.38s/batch, batch_loss=33.6, bat

Validation:  56%|▌| 415/743 [1:46:05<1:17:47, 14.23s/batch, batch_loss=33.6, bat

Validation:  56%|▌| 415/743 [1:46:18<1:17:47, 14.23s/batch, batch_loss=6.47e+3, 

Validation:  56%|▌| 416/743 [1:46:18<1:15:38, 13.88s/batch, batch_loss=6.47e+3, 

Validation:  56%|▌| 416/743 [1:46:32<1:15:38, 13.88s/batch, batch_loss=27.8, bat

Validation:  56%|▌| 417/743 [1:46:32<1:15:54, 13.97s/batch, batch_loss=27.8, bat

Validation:  56%|▌| 417/743 [1:46:47<1:15:54, 13.97s/batch, batch_loss=21.6, bat

Validation:  56%|▌| 418/743 [1:46:47<1:16:20, 14.09s/batch, batch_loss=21.6, bat

Validation:  56%|▌| 418/743 [1:47:04<1:16:20, 14.09s/batch, batch_loss=22.1, bat

Validation:  56%|▌| 419/743 [1:47:04<1:20:31, 14.91s/batch, batch_loss=22.1, bat

Validation:  56%|▌| 419/743 [1:47:18<1:20:31, 14.91s/batch, batch_loss=26.1, bat

Validation:  57%|▌| 420/743 [1:47:18<1:19:35, 14.78s/batch, batch_loss=26.1, bat

Validation:  57%|▌| 420/743 [1:47:32<1:19:35, 14.78s/batch, batch_loss=37.2, bat

Validation:  57%|▌| 421/743 [1:47:32<1:18:36, 14.65s/batch, batch_loss=37.2, bat

Validation:  57%|▌| 421/743 [1:47:48<1:18:36, 14.65s/batch, batch_loss=17.3, bat

Validation:  57%|▌| 422/743 [1:47:48<1:19:46, 14.91s/batch, batch_loss=17.3, bat

Validation:  57%|▌| 422/743 [1:48:03<1:19:46, 14.91s/batch, batch_loss=25.8, bat

Validation:  57%|▌| 423/743 [1:48:03<1:19:26, 14.89s/batch, batch_loss=25.8, bat

Validation:  57%|▌| 423/743 [1:48:18<1:19:26, 14.89s/batch, batch_loss=335, batc

Validation:  57%|▌| 424/743 [1:48:18<1:19:16, 14.91s/batch, batch_loss=335, batc

Validation:  57%|▌| 424/743 [1:48:32<1:19:16, 14.91s/batch, batch_loss=22.9, bat

Validation:  57%|▌| 425/743 [1:48:32<1:18:33, 14.82s/batch, batch_loss=22.9, bat

Validation:  57%|▌| 425/743 [1:48:49<1:18:33, 14.82s/batch, batch_loss=28.3, bat

Validation:  57%|▌| 426/743 [1:48:49<1:21:41, 15.46s/batch, batch_loss=28.3, bat

Validation:  57%|▌| 426/743 [1:49:03<1:21:41, 15.46s/batch, batch_loss=26.1, bat

Validation:  57%|▌| 427/743 [1:49:03<1:19:05, 15.02s/batch, batch_loss=26.1, bat

Validation:  57%|▌| 427/743 [1:49:18<1:19:05, 15.02s/batch, batch_loss=5.29e+3, 

Validation:  58%|▌| 428/743 [1:49:18<1:18:21, 14.93s/batch, batch_loss=5.29e+3, 

Validation:  58%|▌| 428/743 [1:49:32<1:18:21, 14.93s/batch, batch_loss=27.9, bat

Validation:  58%|▌| 429/743 [1:49:32<1:16:26, 14.61s/batch, batch_loss=27.9, bat

Validation:  58%|▌| 429/743 [1:49:46<1:16:26, 14.61s/batch, batch_loss=5.36e+3, 

Validation:  58%|▌| 430/743 [1:49:46<1:15:29, 14.47s/batch, batch_loss=5.36e+3, 

Validation:  58%|▌| 430/743 [1:50:01<1:15:29, 14.47s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [1:50:01<1:15:26, 14.51s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [1:50:15<1:15:26, 14.51s/batch, batch_loss=973, batc

Validation:  58%|▌| 432/743 [1:50:15<1:14:48, 14.43s/batch, batch_loss=973, batc

Validation:  58%|▌| 432/743 [1:50:30<1:14:48, 14.43s/batch, batch_loss=24.9, bat

Validation:  58%|▌| 433/743 [1:50:30<1:15:23, 14.59s/batch, batch_loss=24.9, bat

Validation:  58%|▌| 433/743 [1:50:44<1:15:23, 14.59s/batch, batch_loss=19.9, bat

Validation:  58%|▌| 434/743 [1:50:44<1:14:57, 14.55s/batch, batch_loss=19.9, bat

Validation:  58%|▌| 434/743 [1:50:58<1:14:57, 14.55s/batch, batch_loss=25.7, bat

Validation:  59%|▌| 435/743 [1:50:58<1:14:03, 14.43s/batch, batch_loss=25.7, bat

Validation:  59%|▌| 435/743 [1:51:12<1:14:03, 14.43s/batch, batch_loss=25, batch

Validation:  59%|▌| 436/743 [1:51:12<1:12:14, 14.12s/batch, batch_loss=25, batch

Validation:  59%|▌| 436/743 [1:51:26<1:12:14, 14.12s/batch, batch_loss=29.7, bat

Validation:  59%|▌| 437/743 [1:51:26<1:12:40, 14.25s/batch, batch_loss=29.7, bat

Validation:  59%|▌| 437/743 [1:51:40<1:12:40, 14.25s/batch, batch_loss=1.02e+3, 

Validation:  59%|▌| 438/743 [1:51:40<1:11:20, 14.03s/batch, batch_loss=1.02e+3, 

Validation:  59%|▌| 438/743 [1:51:57<1:11:20, 14.03s/batch, batch_loss=933, batc

Validation:  59%|▌| 439/743 [1:51:57<1:15:42, 14.94s/batch, batch_loss=933, batc

Validation:  59%|▌| 439/743 [1:52:12<1:15:42, 14.94s/batch, batch_loss=27.1, bat

Validation:  59%|▌| 440/743 [1:52:12<1:14:54, 14.83s/batch, batch_loss=27.1, bat

Validation:  59%|▌| 440/743 [1:52:25<1:14:54, 14.83s/batch, batch_loss=29.3, bat

Validation:  59%|▌| 441/743 [1:52:25<1:13:03, 14.51s/batch, batch_loss=29.3, bat

Validation:  59%|▌| 441/743 [1:52:39<1:13:03, 14.51s/batch, batch_loss=22.5, bat

Validation:  59%|▌| 442/743 [1:52:39<1:11:58, 14.35s/batch, batch_loss=22.5, bat

Validation:  59%|▌| 442/743 [1:52:54<1:11:58, 14.35s/batch, batch_loss=20.9, bat

Validation:  60%|▌| 443/743 [1:52:54<1:11:54, 14.38s/batch, batch_loss=20.9, bat

Validation:  60%|▌| 443/743 [1:53:08<1:11:54, 14.38s/batch, batch_loss=22.8, bat

Validation:  60%|▌| 444/743 [1:53:08<1:11:17, 14.31s/batch, batch_loss=22.8, bat

Validation:  60%|▌| 444/743 [1:53:22<1:11:17, 14.31s/batch, batch_loss=21.6, bat

Validation:  60%|▌| 445/743 [1:53:22<1:10:21, 14.17s/batch, batch_loss=21.6, bat

Validation:  60%|▌| 445/743 [1:53:36<1:10:21, 14.17s/batch, batch_loss=26.1, bat

Validation:  60%|▌| 446/743 [1:53:36<1:10:14, 14.19s/batch, batch_loss=26.1, bat

Validation:  60%|▌| 446/743 [1:53:50<1:10:14, 14.19s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [1:53:50<1:09:58, 14.18s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [1:54:04<1:09:58, 14.18s/batch, batch_loss=11.2, bat

Validation:  60%|▌| 448/743 [1:54:04<1:09:12, 14.08s/batch, batch_loss=11.2, bat

Validation:  60%|▌| 448/743 [1:54:18<1:09:12, 14.08s/batch, batch_loss=12.5, bat

Validation:  60%|▌| 449/743 [1:54:18<1:09:03, 14.09s/batch, batch_loss=12.5, bat

Validation:  60%|▌| 449/743 [1:54:32<1:09:03, 14.09s/batch, batch_loss=22.6, bat

Validation:  61%|▌| 450/743 [1:54:32<1:08:46, 14.08s/batch, batch_loss=22.6, bat

Validation:  61%|▌| 450/743 [1:54:46<1:08:46, 14.08s/batch, batch_loss=15.9, bat

Validation:  61%|▌| 451/743 [1:54:46<1:08:53, 14.16s/batch, batch_loss=15.9, bat

Validation:  61%|▌| 451/743 [1:55:00<1:08:53, 14.16s/batch, batch_loss=31.8, bat

Validation:  61%|▌| 452/743 [1:55:00<1:07:42, 13.96s/batch, batch_loss=31.8, bat

Validation:  61%|▌| 452/743 [1:55:17<1:07:42, 13.96s/batch, batch_loss=22.2, bat

Validation:  61%|▌| 453/743 [1:55:17<1:11:21, 14.76s/batch, batch_loss=22.2, bat

Validation:  61%|▌| 453/743 [1:55:31<1:11:21, 14.76s/batch, batch_loss=14.9, bat

Validation:  61%|▌| 454/743 [1:55:31<1:10:09, 14.57s/batch, batch_loss=14.9, bat

Validation:  61%|▌| 454/743 [1:55:45<1:10:09, 14.57s/batch, batch_loss=17.3, bat

Validation:  61%|▌| 455/743 [1:55:45<1:09:57, 14.57s/batch, batch_loss=17.3, bat

Validation:  61%|▌| 455/743 [1:55:59<1:09:57, 14.57s/batch, batch_loss=18.8, bat

Validation:  61%|▌| 456/743 [1:55:59<1:08:58, 14.42s/batch, batch_loss=18.8, bat

Validation:  61%|▌| 456/743 [1:56:14<1:08:58, 14.42s/batch, batch_loss=19, batch

Validation:  62%|▌| 457/743 [1:56:14<1:08:35, 14.39s/batch, batch_loss=19, batch

Validation:  62%|▌| 457/743 [1:56:29<1:08:35, 14.39s/batch, batch_loss=26.3, bat

Validation:  62%|▌| 458/743 [1:56:29<1:09:14, 14.58s/batch, batch_loss=26.3, bat

Validation:  62%|▌| 458/743 [1:56:43<1:09:14, 14.58s/batch, batch_loss=28.1, bat

Validation:  62%|▌| 459/743 [1:56:43<1:08:33, 14.48s/batch, batch_loss=28.1, bat

Validation:  62%|▌| 459/743 [1:56:58<1:08:33, 14.48s/batch, batch_loss=35.7, bat

Validation:  62%|▌| 460/743 [1:56:58<1:09:16, 14.69s/batch, batch_loss=35.7, bat

Validation:  62%|▌| 460/743 [1:57:16<1:09:16, 14.69s/batch, batch_loss=27.7, bat

Validation:  62%|▌| 461/743 [1:57:16<1:13:08, 15.56s/batch, batch_loss=27.7, bat

Validation:  62%|▌| 461/743 [1:57:29<1:13:08, 15.56s/batch, batch_loss=24.5, bat

Validation:  62%|▌| 462/743 [1:57:29<1:09:52, 14.92s/batch, batch_loss=24.5, bat

Validation:  62%|▌| 462/743 [1:57:43<1:09:52, 14.92s/batch, batch_loss=20.2, bat

Validation:  62%|▌| 463/743 [1:57:43<1:08:37, 14.70s/batch, batch_loss=20.2, bat

Validation:  62%|▌| 463/743 [1:57:58<1:08:37, 14.70s/batch, batch_loss=1.36e+4, 

Validation:  62%|▌| 464/743 [1:57:58<1:08:49, 14.80s/batch, batch_loss=1.36e+4, 

Validation:  62%|▌| 464/743 [1:58:12<1:08:49, 14.80s/batch, batch_loss=30.7, bat

Validation:  63%|▋| 465/743 [1:58:12<1:07:22, 14.54s/batch, batch_loss=30.7, bat

Validation:  63%|▋| 465/743 [1:58:26<1:07:22, 14.54s/batch, batch_loss=25.4, bat

Validation:  63%|▋| 466/743 [1:58:26<1:06:19, 14.37s/batch, batch_loss=25.4, bat

Validation:  63%|▋| 466/743 [1:58:41<1:06:19, 14.37s/batch, batch_loss=32.7, bat

Validation:  63%|▋| 467/743 [1:58:41<1:06:12, 14.39s/batch, batch_loss=32.7, bat

Validation:  63%|▋| 467/743 [1:58:55<1:06:12, 14.39s/batch, batch_loss=26.6, bat

Validation:  63%|▋| 468/743 [1:58:55<1:06:27, 14.50s/batch, batch_loss=26.6, bat

Validation:  63%|▋| 468/743 [1:59:13<1:06:27, 14.50s/batch, batch_loss=21.7, bat

Validation:  63%|▋| 469/743 [1:59:13<1:09:43, 15.27s/batch, batch_loss=21.7, bat

Validation:  63%|▋| 469/743 [1:59:27<1:09:43, 15.27s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [1:59:27<1:08:24, 15.03s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [1:59:41<1:08:24, 15.03s/batch, batch_loss=14.6, bat

Validation:  63%|▋| 471/743 [1:59:41<1:07:16, 14.84s/batch, batch_loss=14.6, bat

Validation:  63%|▋| 471/743 [1:59:55<1:07:16, 14.84s/batch, batch_loss=23.8, bat

Validation:  64%|▋| 472/743 [1:59:55<1:06:01, 14.62s/batch, batch_loss=23.8, bat

Validation:  64%|▋| 472/743 [2:00:10<1:06:01, 14.62s/batch, batch_loss=599, batc

Validation:  64%|▋| 473/743 [2:00:10<1:06:02, 14.67s/batch, batch_loss=599, batc

Validation:  64%|▋| 473/743 [2:00:25<1:06:02, 14.67s/batch, batch_loss=24.6, bat

Validation:  64%|▋| 474/743 [2:00:25<1:05:33, 14.62s/batch, batch_loss=24.6, bat

Validation:  64%|▋| 474/743 [2:00:40<1:05:33, 14.62s/batch, batch_loss=29, batch

Validation:  64%|▋| 475/743 [2:00:40<1:05:55, 14.76s/batch, batch_loss=29, batch

Validation:  64%|▋| 475/743 [2:00:54<1:05:55, 14.76s/batch, batch_loss=12.3, bat

Validation:  64%|▋| 476/743 [2:00:54<1:05:05, 14.63s/batch, batch_loss=12.3, bat

Validation:  64%|▋| 476/743 [2:01:08<1:05:05, 14.63s/batch, batch_loss=16.8, bat

Validation:  64%|▋| 477/743 [2:01:08<1:04:22, 14.52s/batch, batch_loss=16.8, bat

Validation:  64%|▋| 477/743 [2:01:22<1:04:22, 14.52s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [2:01:22<1:03:25, 14.36s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [2:01:37<1:03:25, 14.36s/batch, batch_loss=2.09e+4, 

Validation:  64%|▋| 479/743 [2:01:37<1:03:17, 14.38s/batch, batch_loss=2.09e+4, 

Validation:  64%|▋| 479/743 [2:01:51<1:03:17, 14.38s/batch, batch_loss=19.5, bat

Validation:  65%|▋| 480/743 [2:01:51<1:02:26, 14.25s/batch, batch_loss=19.5, bat

Validation:  65%|▋| 480/743 [2:02:05<1:02:26, 14.25s/batch, batch_loss=21.9, bat

Validation:  65%|▋| 481/743 [2:02:05<1:01:35, 14.10s/batch, batch_loss=21.9, bat

Validation:  65%|▋| 481/743 [2:02:18<1:01:35, 14.10s/batch, batch_loss=6.97e+3, 

Validation:  65%|▋| 482/743 [2:02:18<1:00:16, 13.85s/batch, batch_loss=6.97e+3, 

Validation:  65%|▋| 482/743 [2:02:32<1:00:16, 13.85s/batch, batch_loss=27.7, bat

Validation:  65%|▋| 483/743 [2:02:32<59:55, 13.83s/batch, batch_loss=27.7, batch

Validation:  65%|▋| 483/743 [2:02:48<59:55, 13.83s/batch, batch_loss=2.31e+4, ba

Validation:  65%|▋| 484/743 [2:02:48<1:02:53, 14.57s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [2:03:03<1:02:53, 14.57s/batch, batch_loss=3.12e+4, 

Validation:  65%|▋| 485/743 [2:03:03<1:02:56, 14.64s/batch, batch_loss=3.12e+4, 

Validation:  65%|▋| 485/743 [2:03:16<1:02:56, 14.64s/batch, batch_loss=22.2, bat

Validation:  65%|▋| 486/743 [2:03:16<1:01:31, 14.36s/batch, batch_loss=22.2, bat

Validation:  65%|▋| 486/743 [2:03:31<1:01:31, 14.36s/batch, batch_loss=46.4, bat

Validation:  66%|▋| 487/743 [2:03:31<1:01:07, 14.33s/batch, batch_loss=46.4, bat

Validation:  66%|▋| 487/743 [2:03:45<1:01:07, 14.33s/batch, batch_loss=39.4, bat

Validation:  66%|▋| 488/743 [2:03:45<1:00:44, 14.29s/batch, batch_loss=39.4, bat

Validation:  66%|▋| 488/743 [2:03:58<1:00:44, 14.29s/batch, batch_loss=16.2, bat

Validation:  66%|▋| 489/743 [2:03:58<58:23, 13.79s/batch, batch_loss=16.2, batch

Validation:  66%|▋| 489/743 [2:04:11<58:23, 13.79s/batch, batch_loss=36.5, batch

Validation:  66%|▋| 490/743 [2:04:11<57:25, 13.62s/batch, batch_loss=36.5, batch

Validation:  66%|▋| 490/743 [2:04:24<57:25, 13.62s/batch, batch_loss=34.1, batch

Validation:  66%|▋| 491/743 [2:04:24<56:42, 13.50s/batch, batch_loss=34.1, batch

Validation:  66%|▋| 491/743 [2:04:39<56:42, 13.50s/batch, batch_loss=1.04e+3, ba

Validation:  66%|▋| 492/743 [2:04:39<58:52, 14.07s/batch, batch_loss=1.04e+3, ba

Validation:  66%|▋| 492/743 [2:04:54<58:52, 14.07s/batch, batch_loss=1.44e+4, ba

Validation:  66%|▋| 493/743 [2:04:54<59:38, 14.32s/batch, batch_loss=1.44e+4, ba

Validation:  66%|▋| 493/743 [2:05:10<59:38, 14.32s/batch, batch_loss=16.2, batch

Validation:  66%|▋| 494/743 [2:05:10<1:00:52, 14.67s/batch, batch_loss=16.2, bat

Validation:  66%|▋| 494/743 [2:05:24<1:00:52, 14.67s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:05:24<59:42, 14.45s/batch, batch_loss=1.18e+4, ba

Validation:  67%|▋| 495/743 [2:05:37<59:42, 14.45s/batch, batch_loss=26.8, batch

Validation:  67%|▋| 496/743 [2:05:37<58:09, 14.13s/batch, batch_loss=26.8, batch

Validation:  67%|▋| 496/743 [2:05:52<58:09, 14.13s/batch, batch_loss=18.8, batch

Validation:  67%|▋| 497/743 [2:05:52<58:21, 14.23s/batch, batch_loss=18.8, batch

Validation:  67%|▋| 497/743 [2:06:06<58:21, 14.23s/batch, batch_loss=22.7, batch

Validation:  67%|▋| 498/743 [2:06:06<58:54, 14.43s/batch, batch_loss=22.7, batch

Validation:  67%|▋| 498/743 [2:06:21<58:54, 14.43s/batch, batch_loss=17.5, batch

Validation:  67%|▋| 499/743 [2:06:21<58:28, 14.38s/batch, batch_loss=17.5, batch

Validation:  67%|▋| 499/743 [2:06:35<58:28, 14.38s/batch, batch_loss=2.51e+4, ba

Validation:  67%|▋| 500/743 [2:06:35<58:16, 14.39s/batch, batch_loss=2.51e+4, ba

Validation:  67%|▋| 500/743 [2:06:50<58:16, 14.39s/batch, batch_loss=28, batch_i

Validation:  67%|▋| 501/743 [2:06:50<58:13, 14.44s/batch, batch_loss=28, batch_i

Validation:  67%|▋| 501/743 [2:07:06<58:13, 14.44s/batch, batch_loss=3.17e+3, ba

Validation:  68%|▋| 502/743 [2:07:06<1:00:51, 15.15s/batch, batch_loss=3.17e+3, 

Validation:  68%|▋| 502/743 [2:07:21<1:00:51, 15.15s/batch, batch_loss=21.3, bat

Validation:  68%|▋| 503/743 [2:07:21<1:00:16, 15.07s/batch, batch_loss=21.3, bat

Validation:  68%|▋| 503/743 [2:07:36<1:00:16, 15.07s/batch, batch_loss=24.1, bat

Validation:  68%|▋| 504/743 [2:07:36<59:20, 14.90s/batch, batch_loss=24.1, batch

Validation:  68%|▋| 504/743 [2:07:50<59:20, 14.90s/batch, batch_loss=25.1, batch

Validation:  68%|▋| 505/743 [2:07:50<58:21, 14.71s/batch, batch_loss=25.1, batch

Validation:  68%|▋| 505/743 [2:08:04<58:21, 14.71s/batch, batch_loss=2.85e+3, ba

Validation:  68%|▋| 506/743 [2:08:04<57:19, 14.51s/batch, batch_loss=2.85e+3, ba

Validation:  68%|▋| 506/743 [2:08:20<57:19, 14.51s/batch, batch_loss=2e+3, batch

Validation:  68%|▋| 507/743 [2:08:20<58:08, 14.78s/batch, batch_loss=2e+3, batch

Validation:  68%|▋| 507/743 [2:08:35<58:08, 14.78s/batch, batch_loss=8.43e+3, ba

Validation:  68%|▋| 508/743 [2:08:35<58:24, 14.91s/batch, batch_loss=8.43e+3, ba

Validation:  68%|▋| 508/743 [2:08:49<58:24, 14.91s/batch, batch_loss=8.48e+3, ba

Validation:  69%|▋| 509/743 [2:08:49<57:28, 14.74s/batch, batch_loss=8.48e+3, ba

Validation:  69%|▋| 509/743 [2:09:03<57:28, 14.74s/batch, batch_loss=23.4, batch

Validation:  69%|▋| 510/743 [2:09:03<56:03, 14.43s/batch, batch_loss=23.4, batch

Validation:  69%|▋| 510/743 [2:09:20<56:03, 14.43s/batch, batch_loss=30.9, batch

Validation:  69%|▋| 511/743 [2:09:20<58:29, 15.13s/batch, batch_loss=30.9, batch

Validation:  69%|▋| 511/743 [2:09:34<58:29, 15.13s/batch, batch_loss=21.4, batch

Validation:  69%|▋| 512/743 [2:09:34<57:12, 14.86s/batch, batch_loss=21.4, batch

Validation:  69%|▋| 512/743 [2:09:48<57:12, 14.86s/batch, batch_loss=29, batch_i

Validation:  69%|▋| 513/743 [2:09:48<56:08, 14.65s/batch, batch_loss=29, batch_i

Validation:  69%|▋| 513/743 [2:10:02<56:08, 14.65s/batch, batch_loss=18.3, batch

Validation:  69%|▋| 514/743 [2:10:02<55:19, 14.49s/batch, batch_loss=18.3, batch

Validation:  69%|▋| 514/743 [2:10:17<55:19, 14.49s/batch, batch_loss=19.1, batch

Validation:  69%|▋| 515/743 [2:10:17<55:18, 14.56s/batch, batch_loss=19.1, batch

Validation:  69%|▋| 515/743 [2:10:31<55:18, 14.56s/batch, batch_loss=16.6, batch

Validation:  69%|▋| 516/743 [2:10:31<55:05, 14.56s/batch, batch_loss=16.6, batch

Validation:  69%|▋| 516/743 [2:10:45<55:05, 14.56s/batch, batch_loss=6.15e+4, ba

Validation:  70%|▋| 517/743 [2:10:45<54:01, 14.34s/batch, batch_loss=6.15e+4, ba

Validation:  70%|▋| 517/743 [2:11:00<54:01, 14.34s/batch, batch_loss=518, batch_

Validation:  70%|▋| 518/743 [2:11:00<54:18, 14.48s/batch, batch_loss=518, batch_

Validation:  70%|▋| 518/743 [2:11:16<54:18, 14.48s/batch, batch_loss=18, batch_i

Validation:  70%|▋| 519/743 [2:11:16<55:46, 14.94s/batch, batch_loss=18, batch_i

Validation:  70%|▋| 519/743 [2:11:30<55:46, 14.94s/batch, batch_loss=28.5, batch

Validation:  70%|▋| 520/743 [2:11:30<54:35, 14.69s/batch, batch_loss=28.5, batch

Validation:  70%|▋| 520/743 [2:11:44<54:35, 14.69s/batch, batch_loss=22.6, batch

Validation:  70%|▋| 521/743 [2:11:44<53:24, 14.43s/batch, batch_loss=22.6, batch

Validation:  70%|▋| 521/743 [2:11:58<53:24, 14.43s/batch, batch_loss=22.1, batch

Validation:  70%|▋| 522/743 [2:11:58<52:31, 14.26s/batch, batch_loss=22.1, batch

Validation:  70%|▋| 522/743 [2:12:12<52:31, 14.26s/batch, batch_loss=444, batch_

Validation:  70%|▋| 523/743 [2:12:12<52:17, 14.26s/batch, batch_loss=444, batch_

Validation:  70%|▋| 523/743 [2:12:27<52:17, 14.26s/batch, batch_loss=31.6, batch

Validation:  71%|▋| 524/743 [2:12:27<52:34, 14.40s/batch, batch_loss=31.6, batch

Validation:  71%|▋| 524/743 [2:12:42<52:34, 14.40s/batch, batch_loss=25.8, batch

Validation:  71%|▋| 525/743 [2:12:42<52:59, 14.58s/batch, batch_loss=25.8, batch

Validation:  71%|▋| 525/743 [2:12:57<52:59, 14.58s/batch, batch_loss=15.1, batch

Validation:  71%|▋| 526/743 [2:12:57<53:00, 14.66s/batch, batch_loss=15.1, batch

Validation:  71%|▋| 526/743 [2:13:12<53:00, 14.66s/batch, batch_loss=3.76e+3, ba

Validation:  71%|▋| 527/743 [2:13:12<53:02, 14.73s/batch, batch_loss=3.76e+3, ba

Validation:  71%|▋| 527/743 [2:13:26<53:02, 14.73s/batch, batch_loss=521, batch_

Validation:  71%|▋| 528/743 [2:13:26<52:17, 14.60s/batch, batch_loss=521, batch_

Validation:  71%|▋| 528/743 [2:13:41<52:17, 14.60s/batch, batch_loss=6.53e+3, ba

Validation:  71%|▋| 529/743 [2:13:41<52:17, 14.66s/batch, batch_loss=6.53e+3, ba

Validation:  71%|▋| 529/743 [2:13:55<52:17, 14.66s/batch, batch_loss=211, batch_

Validation:  71%|▋| 530/743 [2:13:55<51:51, 14.61s/batch, batch_loss=211, batch_

Validation:  71%|▋| 530/743 [2:14:10<51:51, 14.61s/batch, batch_loss=59.5, batch

Validation:  71%|▋| 531/743 [2:14:10<51:30, 14.58s/batch, batch_loss=59.5, batch

Validation:  71%|▋| 531/743 [2:14:23<51:30, 14.58s/batch, batch_loss=258, batch_

Validation:  72%|▋| 532/743 [2:14:23<50:13, 14.28s/batch, batch_loss=258, batch_

Validation:  72%|▋| 532/743 [2:14:37<50:13, 14.28s/batch, batch_loss=14.7, batch

Validation:  72%|▋| 533/743 [2:14:37<49:43, 14.21s/batch, batch_loss=14.7, batch

Validation:  72%|▋| 533/743 [2:14:51<49:43, 14.21s/batch, batch_loss=21.5, batch

Validation:  72%|▋| 534/743 [2:14:51<49:08, 14.11s/batch, batch_loss=21.5, batch

Validation:  72%|▋| 534/743 [2:15:05<49:08, 14.11s/batch, batch_loss=17.1, batch

Validation:  72%|▋| 535/743 [2:15:05<48:54, 14.11s/batch, batch_loss=17.1, batch

Validation:  72%|▋| 535/743 [2:15:22<48:54, 14.11s/batch, batch_loss=25.2, batch

Validation:  72%|▋| 536/743 [2:15:22<51:09, 14.83s/batch, batch_loss=25.2, batch

Validation:  72%|▋| 536/743 [2:15:36<51:09, 14.83s/batch, batch_loss=21.9, batch

Validation:  72%|▋| 537/743 [2:15:36<50:37, 14.74s/batch, batch_loss=21.9, batch

Validation:  72%|▋| 537/743 [2:15:51<50:37, 14.74s/batch, batch_loss=26, batch_i

Validation:  72%|▋| 538/743 [2:15:51<49:57, 14.62s/batch, batch_loss=26, batch_i

Validation:  72%|▋| 538/743 [2:16:05<49:57, 14.62s/batch, batch_loss=262, batch_

Validation:  73%|▋| 539/743 [2:16:05<49:47, 14.65s/batch, batch_loss=262, batch_

Validation:  73%|▋| 539/743 [2:16:19<49:47, 14.65s/batch, batch_loss=21, batch_i

Validation:  73%|▋| 540/743 [2:16:19<48:57, 14.47s/batch, batch_loss=21, batch_i

Validation:  73%|▋| 540/743 [2:16:34<48:57, 14.47s/batch, batch_loss=42.5, batch

Validation:  73%|▋| 541/743 [2:16:34<49:04, 14.58s/batch, batch_loss=42.5, batch

Validation:  73%|▋| 541/743 [2:16:48<49:04, 14.58s/batch, batch_loss=1.99e+3, ba

Validation:  73%|▋| 542/743 [2:16:48<47:31, 14.18s/batch, batch_loss=1.99e+3, ba

Validation:  73%|▋| 542/743 [2:17:02<47:31, 14.18s/batch, batch_loss=22.3, batch

Validation:  73%|▋| 543/743 [2:17:02<47:48, 14.34s/batch, batch_loss=22.3, batch

Validation:  73%|▋| 543/743 [2:17:17<47:48, 14.34s/batch, batch_loss=1.1e+4, bat

Validation:  73%|▋| 544/743 [2:17:17<48:09, 14.52s/batch, batch_loss=1.1e+4, bat

Validation:  73%|▋| 544/743 [2:17:31<48:09, 14.52s/batch, batch_loss=2.76e+3, ba

Validation:  73%|▋| 545/743 [2:17:31<47:36, 14.43s/batch, batch_loss=2.76e+3, ba

Validation:  73%|▋| 545/743 [2:17:46<47:36, 14.43s/batch, batch_loss=16.2, batch

Validation:  73%|▋| 546/743 [2:17:46<47:18, 14.41s/batch, batch_loss=16.2, batch

Validation:  73%|▋| 546/743 [2:18:00<47:18, 14.41s/batch, batch_loss=268, batch_

Validation:  74%|▋| 547/743 [2:18:00<46:49, 14.33s/batch, batch_loss=268, batch_

Validation:  74%|▋| 547/743 [2:18:14<46:49, 14.33s/batch, batch_loss=36.2, batch

Validation:  74%|▋| 548/743 [2:18:14<46:46, 14.39s/batch, batch_loss=36.2, batch

Validation:  74%|▋| 548/743 [2:18:29<46:46, 14.39s/batch, batch_loss=4.14e+3, ba

Validation:  74%|▋| 549/743 [2:18:29<46:27, 14.37s/batch, batch_loss=4.14e+3, ba

Validation:  74%|▋| 549/743 [2:18:43<46:27, 14.37s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:18:43<46:30, 14.46s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:18:58<46:30, 14.46s/batch, batch_loss=19.5, batch

Validation:  74%|▋| 551/743 [2:18:58<46:51, 14.64s/batch, batch_loss=19.5, batch

Validation:  74%|▋| 551/743 [2:19:15<46:51, 14.64s/batch, batch_loss=6.74e+3, ba

Validation:  74%|▋| 552/743 [2:19:15<48:31, 15.24s/batch, batch_loss=6.74e+3, ba

Validation:  74%|▋| 552/743 [2:19:30<48:31, 15.24s/batch, batch_loss=39.1, batch

Validation:  74%|▋| 553/743 [2:19:30<47:37, 15.04s/batch, batch_loss=39.1, batch

Validation:  74%|▋| 553/743 [2:19:44<47:37, 15.04s/batch, batch_loss=34.1, batch

Validation:  75%|▋| 554/743 [2:19:44<46:39, 14.81s/batch, batch_loss=34.1, batch

Validation:  75%|▋| 554/743 [2:19:58<46:39, 14.81s/batch, batch_loss=2.48e+3, ba

Validation:  75%|▋| 555/743 [2:19:58<46:03, 14.70s/batch, batch_loss=2.48e+3, ba

Validation:  75%|▋| 555/743 [2:20:13<46:03, 14.70s/batch, batch_loss=33.4, batch

Validation:  75%|▋| 556/743 [2:20:13<45:35, 14.63s/batch, batch_loss=33.4, batch

Validation:  75%|▋| 556/743 [2:20:27<45:35, 14.63s/batch, batch_loss=15, batch_i

Validation:  75%|▋| 557/743 [2:20:27<44:56, 14.50s/batch, batch_loss=15, batch_i

Validation:  75%|▋| 557/743 [2:20:41<44:56, 14.50s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:20:41<44:31, 14.44s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:20:59<44:31, 14.44s/batch, batch_loss=3.64e+3, ba

Validation:  75%|▊| 559/743 [2:20:59<47:05, 15.35s/batch, batch_loss=3.64e+3, ba

Validation:  75%|▊| 559/743 [2:21:13<47:05, 15.35s/batch, batch_loss=2.95e+3, ba

Validation:  75%|▊| 560/743 [2:21:13<45:37, 14.96s/batch, batch_loss=2.95e+3, ba

Validation:  75%|▊| 560/743 [2:21:28<45:37, 14.96s/batch, batch_loss=20.7, batch

Validation:  76%|▊| 561/743 [2:21:28<45:14, 14.91s/batch, batch_loss=20.7, batch

Validation:  76%|▊| 561/743 [2:21:42<45:14, 14.91s/batch, batch_loss=32.1, batch

Validation:  76%|▊| 562/743 [2:21:42<44:11, 14.65s/batch, batch_loss=32.1, batch

Validation:  76%|▊| 562/743 [2:21:56<44:11, 14.65s/batch, batch_loss=28.5, batch

Validation:  76%|▊| 563/743 [2:21:56<43:46, 14.59s/batch, batch_loss=28.5, batch

Validation:  76%|▊| 563/743 [2:22:11<43:46, 14.59s/batch, batch_loss=1.12e+3, ba

Validation:  76%|▊| 564/743 [2:22:11<43:44, 14.66s/batch, batch_loss=1.12e+3, ba

Validation:  76%|▊| 564/743 [2:22:26<43:44, 14.66s/batch, batch_loss=3.71e+3, ba

Validation:  76%|▊| 565/743 [2:22:26<43:29, 14.66s/batch, batch_loss=3.71e+3, ba

Validation:  76%|▊| 565/743 [2:22:40<43:29, 14.66s/batch, batch_loss=17.2, batch

Validation:  76%|▊| 566/743 [2:22:40<43:03, 14.59s/batch, batch_loss=17.2, batch

Validation:  76%|▊| 566/743 [2:22:55<43:03, 14.59s/batch, batch_loss=21.1, batch

Validation:  76%|▊| 567/743 [2:22:55<42:53, 14.62s/batch, batch_loss=21.1, batch

Validation:  76%|▊| 567/743 [2:23:09<42:53, 14.62s/batch, batch_loss=16.6, batch

Validation:  76%|▊| 568/743 [2:23:09<42:31, 14.58s/batch, batch_loss=16.6, batch

Validation:  76%|▊| 568/743 [2:23:24<42:31, 14.58s/batch, batch_loss=24.3, batch

Validation:  77%|▊| 569/743 [2:23:24<42:16, 14.57s/batch, batch_loss=24.3, batch

Validation:  77%|▊| 569/743 [2:23:38<42:16, 14.57s/batch, batch_loss=24.3, batch

Validation:  77%|▊| 570/743 [2:23:38<41:58, 14.56s/batch, batch_loss=24.3, batch

Validation:  77%|▊| 570/743 [2:23:53<41:58, 14.56s/batch, batch_loss=19.6, batch

Validation:  77%|▊| 571/743 [2:23:53<41:24, 14.45s/batch, batch_loss=19.6, batch

Validation:  77%|▊| 571/743 [2:24:06<41:24, 14.45s/batch, batch_loss=30.5, batch

Validation:  77%|▊| 572/743 [2:24:06<40:45, 14.30s/batch, batch_loss=30.5, batch

Validation:  77%|▊| 572/743 [2:24:21<40:45, 14.30s/batch, batch_loss=24.5, batch

Validation:  77%|▊| 573/743 [2:24:21<40:37, 14.34s/batch, batch_loss=24.5, batch

Validation:  77%|▊| 573/743 [2:24:35<40:37, 14.34s/batch, batch_loss=29.9, batch

Validation:  77%|▊| 574/743 [2:24:35<40:17, 14.30s/batch, batch_loss=29.9, batch

Validation:  77%|▊| 574/743 [2:24:50<40:17, 14.30s/batch, batch_loss=24.5, batch

Validation:  77%|▊| 575/743 [2:24:50<40:09, 14.34s/batch, batch_loss=24.5, batch

Validation:  77%|▊| 575/743 [2:25:04<40:09, 14.34s/batch, batch_loss=23.2, batch

Validation:  78%|▊| 576/743 [2:25:04<40:20, 14.49s/batch, batch_loss=23.2, batch

Validation:  78%|▊| 576/743 [2:25:19<40:20, 14.49s/batch, batch_loss=30.7, batch

Validation:  78%|▊| 577/743 [2:25:19<40:03, 14.48s/batch, batch_loss=30.7, batch

Validation:  78%|▊| 577/743 [2:25:33<40:03, 14.48s/batch, batch_loss=27.2, batch

Validation:  78%|▊| 578/743 [2:25:33<39:39, 14.42s/batch, batch_loss=27.2, batch

Validation:  78%|▊| 578/743 [2:25:48<39:39, 14.42s/batch, batch_loss=321, batch_

Validation:  78%|▊| 579/743 [2:25:48<39:30, 14.45s/batch, batch_loss=321, batch_

Validation:  78%|▊| 579/743 [2:26:02<39:30, 14.45s/batch, batch_loss=10.5, batch

Validation:  78%|▊| 580/743 [2:26:02<38:56, 14.33s/batch, batch_loss=10.5, batch

Validation:  78%|▊| 580/743 [2:26:16<38:56, 14.33s/batch, batch_loss=15.7, batch

Validation:  78%|▊| 581/743 [2:26:16<38:20, 14.20s/batch, batch_loss=15.7, batch

Validation:  78%|▊| 581/743 [2:26:30<38:20, 14.20s/batch, batch_loss=25.1, batch

Validation:  78%|▊| 582/743 [2:26:30<38:33, 14.37s/batch, batch_loss=25.1, batch

Validation:  78%|▊| 582/743 [2:26:45<38:33, 14.37s/batch, batch_loss=2.39e+3, ba

Validation:  78%|▊| 583/743 [2:26:45<38:38, 14.49s/batch, batch_loss=2.39e+3, ba

Validation:  78%|▊| 583/743 [2:27:00<38:38, 14.49s/batch, batch_loss=7.16, batch

Validation:  79%|▊| 584/743 [2:27:00<38:41, 14.60s/batch, batch_loss=7.16, batch

Validation:  79%|▊| 584/743 [2:27:14<38:41, 14.60s/batch, batch_loss=25.4, batch

Validation:  79%|▊| 585/743 [2:27:14<38:14, 14.52s/batch, batch_loss=25.4, batch

Validation:  79%|▊| 585/743 [2:27:29<38:14, 14.52s/batch, batch_loss=558, batch_

Validation:  79%|▊| 586/743 [2:27:29<38:11, 14.60s/batch, batch_loss=558, batch_

Validation:  79%|▊| 586/743 [2:27:44<38:11, 14.60s/batch, batch_loss=12.9, batch

Validation:  79%|▊| 587/743 [2:27:44<37:58, 14.61s/batch, batch_loss=12.9, batch

Validation:  79%|▊| 587/743 [2:27:58<37:58, 14.61s/batch, batch_loss=402, batch_

Validation:  79%|▊| 588/743 [2:27:58<37:25, 14.48s/batch, batch_loss=402, batch_

Validation:  79%|▊| 588/743 [2:28:12<37:25, 14.48s/batch, batch_loss=2.5e+4, bat

Validation:  79%|▊| 589/743 [2:28:12<36:56, 14.39s/batch, batch_loss=2.5e+4, bat

Validation:  79%|▊| 589/743 [2:28:26<36:56, 14.39s/batch, batch_loss=29.3, batch

Validation:  79%|▊| 590/743 [2:28:26<36:37, 14.37s/batch, batch_loss=29.3, batch

Validation:  79%|▊| 590/743 [2:28:41<36:37, 14.37s/batch, batch_loss=26.8, batch

Validation:  80%|▊| 591/743 [2:28:41<36:16, 14.32s/batch, batch_loss=26.8, batch

Validation:  80%|▊| 591/743 [2:28:55<36:16, 14.32s/batch, batch_loss=22.6, batch

Validation:  80%|▊| 592/743 [2:28:55<35:58, 14.30s/batch, batch_loss=22.6, batch

Validation:  80%|▊| 592/743 [2:29:10<35:58, 14.30s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:29:10<36:22, 14.55s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:29:24<36:22, 14.55s/batch, batch_loss=7.24, batch

Validation:  80%|▊| 594/743 [2:29:24<35:46, 14.41s/batch, batch_loss=7.24, batch

Validation:  80%|▊| 594/743 [2:29:39<35:46, 14.41s/batch, batch_loss=6.26, batch

Validation:  80%|▊| 595/743 [2:29:39<36:10, 14.67s/batch, batch_loss=6.26, batch

Validation:  80%|▊| 595/743 [2:29:54<36:10, 14.67s/batch, batch_loss=13.8, batch

Validation:  80%|▊| 596/743 [2:29:54<35:45, 14.60s/batch, batch_loss=13.8, batch

Validation:  80%|▊| 596/743 [2:30:09<35:45, 14.60s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:30:09<36:12, 14.88s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:30:23<36:12, 14.88s/batch, batch_loss=21.3, batch

Validation:  80%|▊| 598/743 [2:30:23<35:06, 14.53s/batch, batch_loss=21.3, batch

Validation:  80%|▊| 598/743 [2:30:38<35:06, 14.53s/batch, batch_loss=20.2, batch

Validation:  81%|▊| 599/743 [2:30:38<35:05, 14.62s/batch, batch_loss=20.2, batch

Validation:  81%|▊| 599/743 [2:30:54<35:05, 14.62s/batch, batch_loss=44.3, batch

Validation:  81%|▊| 600/743 [2:30:54<35:40, 14.97s/batch, batch_loss=44.3, batch

Validation:  81%|▊| 600/743 [2:31:09<35:40, 14.97s/batch, batch_loss=21.4, batch

Validation:  81%|▊| 601/743 [2:31:09<35:21, 14.94s/batch, batch_loss=21.4, batch

Validation:  81%|▊| 601/743 [2:31:23<35:21, 14.94s/batch, batch_loss=29.9, batch

Validation:  81%|▊| 602/743 [2:31:23<35:04, 14.93s/batch, batch_loss=29.9, batch

Validation:  81%|▊| 602/743 [2:31:38<35:04, 14.93s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:31:38<34:39, 14.85s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:31:53<34:39, 14.85s/batch, batch_loss=28.2, batch

Validation:  81%|▊| 604/743 [2:31:53<34:08, 14.74s/batch, batch_loss=28.2, batch

Validation:  81%|▊| 604/743 [2:32:07<34:08, 14.74s/batch, batch_loss=32.3, batch

Validation:  81%|▊| 605/743 [2:32:07<33:32, 14.58s/batch, batch_loss=32.3, batch

Validation:  81%|▊| 605/743 [2:32:22<33:32, 14.58s/batch, batch_loss=257, batch_

Validation:  82%|▊| 606/743 [2:32:22<33:31, 14.68s/batch, batch_loss=257, batch_

Validation:  82%|▊| 606/743 [2:32:37<33:31, 14.68s/batch, batch_loss=36.7, batch

Validation:  82%|▊| 607/743 [2:32:37<33:30, 14.78s/batch, batch_loss=36.7, batch

Validation:  82%|▊| 607/743 [2:32:52<33:30, 14.78s/batch, batch_loss=31.9, batch

Validation:  82%|▊| 608/743 [2:32:52<33:42, 14.98s/batch, batch_loss=31.9, batch

Validation:  82%|▊| 608/743 [2:33:08<33:42, 14.98s/batch, batch_loss=35.6, batch

Validation:  82%|▊| 609/743 [2:33:08<33:49, 15.14s/batch, batch_loss=35.6, batch

Validation:  82%|▊| 609/743 [2:33:26<33:49, 15.14s/batch, batch_loss=22.1, batch

Validation:  82%|▊| 610/743 [2:33:26<35:43, 16.12s/batch, batch_loss=22.1, batch

Validation:  82%|▊| 610/743 [2:33:42<35:43, 16.12s/batch, batch_loss=29.2, batch

Validation:  82%|▊| 611/743 [2:33:42<35:19, 16.05s/batch, batch_loss=29.2, batch

Validation:  82%|▊| 611/743 [2:33:57<35:19, 16.05s/batch, batch_loss=16.6, batch

Validation:  82%|▊| 612/743 [2:33:57<34:16, 15.70s/batch, batch_loss=16.6, batch

Validation:  82%|▊| 612/743 [2:34:11<34:16, 15.70s/batch, batch_loss=22.4, batch

Validation:  83%|▊| 613/743 [2:34:11<32:53, 15.18s/batch, batch_loss=22.4, batch

Validation:  83%|▊| 613/743 [2:34:25<32:53, 15.18s/batch, batch_loss=5.64e+3, ba

Validation:  83%|▊| 614/743 [2:34:25<31:45, 14.77s/batch, batch_loss=5.64e+3, ba

Validation:  83%|▊| 614/743 [2:34:40<31:45, 14.77s/batch, batch_loss=15.4, batch

Validation:  83%|▊| 615/743 [2:34:40<32:08, 15.06s/batch, batch_loss=15.4, batch

Validation:  83%|▊| 615/743 [2:34:58<32:08, 15.06s/batch, batch_loss=21.9, batch

Validation:  83%|▊| 616/743 [2:34:58<33:28, 15.81s/batch, batch_loss=21.9, batch

Validation:  83%|▊| 616/743 [2:35:13<33:28, 15.81s/batch, batch_loss=11.9, batch

Validation:  83%|▊| 617/743 [2:35:13<32:43, 15.58s/batch, batch_loss=11.9, batch

Validation:  83%|▊| 617/743 [2:35:28<32:43, 15.58s/batch, batch_loss=12.3, batch

Validation:  83%|▊| 618/743 [2:35:28<32:09, 15.44s/batch, batch_loss=12.3, batch

Validation:  83%|▊| 618/743 [2:35:44<32:09, 15.44s/batch, batch_loss=344, batch_

Validation:  83%|▊| 619/743 [2:35:44<32:11, 15.58s/batch, batch_loss=344, batch_

Validation:  83%|▊| 619/743 [2:35:59<32:11, 15.58s/batch, batch_loss=16.5, batch

Validation:  83%|▊| 620/743 [2:35:59<31:37, 15.43s/batch, batch_loss=16.5, batch

Validation:  83%|▊| 620/743 [2:36:14<31:37, 15.43s/batch, batch_loss=15, batch_i

Validation:  84%|▊| 621/743 [2:36:14<30:58, 15.24s/batch, batch_loss=15, batch_i

Validation:  84%|▊| 621/743 [2:36:29<30:58, 15.24s/batch, batch_loss=22.9, batch

Validation:  84%|▊| 622/743 [2:36:29<30:30, 15.13s/batch, batch_loss=22.9, batch

Validation:  84%|▊| 622/743 [2:36:46<30:30, 15.13s/batch, batch_loss=202, batch_

Validation:  84%|▊| 623/743 [2:36:46<31:45, 15.88s/batch, batch_loss=202, batch_

Validation:  84%|▊| 623/743 [2:37:01<31:45, 15.88s/batch, batch_loss=21.4, batch

Validation:  84%|▊| 624/743 [2:37:01<30:53, 15.58s/batch, batch_loss=21.4, batch

Validation:  84%|▊| 624/743 [2:37:16<30:53, 15.58s/batch, batch_loss=2.42e+3, ba

Validation:  84%|▊| 625/743 [2:37:16<30:00, 15.26s/batch, batch_loss=2.42e+3, ba

Validation:  84%|▊| 625/743 [2:37:31<30:00, 15.26s/batch, batch_loss=33.7, batch

Validation:  84%|▊| 626/743 [2:37:31<29:29, 15.12s/batch, batch_loss=33.7, batch

Validation:  84%|▊| 626/743 [2:37:46<29:29, 15.12s/batch, batch_loss=26.5, batch

Validation:  84%|▊| 627/743 [2:37:46<29:09, 15.08s/batch, batch_loss=26.5, batch

Validation:  84%|▊| 627/743 [2:38:00<29:09, 15.08s/batch, batch_loss=27.1, batch

Validation:  85%|▊| 628/743 [2:38:00<28:40, 14.96s/batch, batch_loss=27.1, batch

Validation:  85%|▊| 628/743 [2:38:15<28:40, 14.96s/batch, batch_loss=19, batch_i

Validation:  85%|▊| 629/743 [2:38:15<28:31, 15.01s/batch, batch_loss=19, batch_i

Validation:  85%|▊| 629/743 [2:38:31<28:31, 15.01s/batch, batch_loss=36.1, batch

Validation:  85%|▊| 630/743 [2:38:31<28:24, 15.08s/batch, batch_loss=36.1, batch

Validation:  85%|▊| 630/743 [2:38:46<28:24, 15.08s/batch, batch_loss=248, batch_

Validation:  85%|▊| 631/743 [2:38:46<28:06, 15.06s/batch, batch_loss=248, batch_

Validation:  85%|▊| 631/743 [2:39:01<28:06, 15.06s/batch, batch_loss=30.4, batch

Validation:  85%|▊| 632/743 [2:39:01<27:52, 15.07s/batch, batch_loss=30.4, batch

Validation:  85%|▊| 632/743 [2:39:18<27:52, 15.07s/batch, batch_loss=26, batch_i

Validation:  85%|▊| 633/743 [2:39:18<28:52, 15.75s/batch, batch_loss=26, batch_i

Validation:  85%|▊| 633/743 [2:39:33<28:52, 15.75s/batch, batch_loss=14.5, batch

Validation:  85%|▊| 634/743 [2:39:33<28:07, 15.48s/batch, batch_loss=14.5, batch

Validation:  85%|▊| 634/743 [2:39:48<28:07, 15.48s/batch, batch_loss=12.4, batch

Validation:  85%|▊| 635/743 [2:39:48<27:29, 15.27s/batch, batch_loss=12.4, batch

Validation:  85%|▊| 635/743 [2:40:03<27:29, 15.27s/batch, batch_loss=816, batch_

Validation:  86%|▊| 636/743 [2:40:03<27:00, 15.15s/batch, batch_loss=816, batch_

Validation:  86%|▊| 636/743 [2:40:16<27:00, 15.15s/batch, batch_loss=738, batch_

Validation:  86%|▊| 637/743 [2:40:16<26:02, 14.74s/batch, batch_loss=738, batch_

Validation:  86%|▊| 637/743 [2:40:31<26:02, 14.74s/batch, batch_loss=23.3, batch

Validation:  86%|▊| 638/743 [2:40:31<25:52, 14.79s/batch, batch_loss=23.3, batch

Validation:  86%|▊| 638/743 [2:40:46<25:52, 14.79s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:40:46<25:39, 14.80s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:41:01<25:39, 14.80s/batch, batch_loss=34.7, batch

Validation:  86%|▊| 640/743 [2:41:01<25:29, 14.85s/batch, batch_loss=34.7, batch

Validation:  86%|▊| 640/743 [2:41:17<25:29, 14.85s/batch, batch_loss=50.7, batch

Validation:  86%|▊| 641/743 [2:41:17<25:38, 15.08s/batch, batch_loss=50.7, batch

Validation:  86%|▊| 641/743 [2:41:34<25:38, 15.08s/batch, batch_loss=52, batch_i

Validation:  86%|▊| 642/743 [2:41:34<26:27, 15.72s/batch, batch_loss=52, batch_i

Validation:  86%|▊| 642/743 [2:41:49<26:27, 15.72s/batch, batch_loss=1.08e+3, ba

Validation:  87%|▊| 643/743 [2:41:49<25:55, 15.55s/batch, batch_loss=1.08e+3, ba

Validation:  87%|▊| 643/743 [2:42:04<25:55, 15.55s/batch, batch_loss=27.3, batch

Validation:  87%|▊| 644/743 [2:42:04<25:17, 15.33s/batch, batch_loss=27.3, batch

Validation:  87%|▊| 644/743 [2:42:19<25:17, 15.33s/batch, batch_loss=22.3, batch

Validation:  87%|▊| 645/743 [2:42:19<24:50, 15.21s/batch, batch_loss=22.3, batch

Validation:  87%|▊| 645/743 [2:42:33<24:50, 15.21s/batch, batch_loss=6.26e+3, ba

Validation:  87%|▊| 646/743 [2:42:33<24:14, 15.00s/batch, batch_loss=6.26e+3, ba

Validation:  87%|▊| 646/743 [2:42:49<24:14, 15.00s/batch, batch_loss=30.1, batch

Validation:  87%|▊| 647/743 [2:42:49<24:16, 15.17s/batch, batch_loss=30.1, batch

Validation:  87%|▊| 647/743 [2:43:04<24:16, 15.17s/batch, batch_loss=11.5, batch

Validation:  87%|▊| 648/743 [2:43:04<23:52, 15.08s/batch, batch_loss=11.5, batch

Validation:  87%|▊| 648/743 [2:43:21<23:52, 15.08s/batch, batch_loss=17.6, batch

Validation:  87%|▊| 649/743 [2:43:21<24:31, 15.66s/batch, batch_loss=17.6, batch

Validation:  87%|▊| 649/743 [2:43:35<24:31, 15.66s/batch, batch_loss=24.1, batch

Validation:  87%|▊| 650/743 [2:43:35<23:38, 15.25s/batch, batch_loss=24.1, batch

Validation:  87%|▊| 650/743 [2:43:50<23:38, 15.25s/batch, batch_loss=36.3, batch

Validation:  88%|▉| 651/743 [2:43:50<23:15, 15.17s/batch, batch_loss=36.3, batch

Validation:  88%|▉| 651/743 [2:44:05<23:15, 15.17s/batch, batch_loss=45.2, batch

Validation:  88%|▉| 652/743 [2:44:05<22:42, 14.97s/batch, batch_loss=45.2, batch

Validation:  88%|▉| 652/743 [2:44:18<22:42, 14.97s/batch, batch_loss=20.8, batch

Validation:  88%|▉| 653/743 [2:44:18<21:57, 14.64s/batch, batch_loss=20.8, batch

Validation:  88%|▉| 653/743 [2:44:33<21:57, 14.64s/batch, batch_loss=24.7, batch

Validation:  88%|▉| 654/743 [2:44:33<21:43, 14.64s/batch, batch_loss=24.7, batch

Validation:  88%|▉| 654/743 [2:44:48<21:43, 14.64s/batch, batch_loss=32.8, batch

Validation:  88%|▉| 655/743 [2:44:48<21:42, 14.80s/batch, batch_loss=32.8, batch

Validation:  88%|▉| 655/743 [2:45:03<21:42, 14.80s/batch, batch_loss=26.5, batch

Validation:  88%|▉| 656/743 [2:45:03<21:21, 14.73s/batch, batch_loss=26.5, batch

Validation:  88%|▉| 656/743 [2:45:18<21:21, 14.73s/batch, batch_loss=21.8, batch

Validation:  88%|▉| 657/743 [2:45:18<21:19, 14.88s/batch, batch_loss=21.8, batch

Validation:  88%|▉| 657/743 [2:45:33<21:19, 14.88s/batch, batch_loss=26, batch_i

Validation:  89%|▉| 658/743 [2:45:33<21:16, 15.02s/batch, batch_loss=26, batch_i

Validation:  89%|▉| 658/743 [2:45:49<21:16, 15.02s/batch, batch_loss=25.2, batch

Validation:  89%|▉| 659/743 [2:45:49<21:06, 15.07s/batch, batch_loss=25.2, batch

Validation:  89%|▉| 659/743 [2:46:04<21:06, 15.07s/batch, batch_loss=27.8, batch

Validation:  89%|▉| 660/743 [2:46:04<20:53, 15.10s/batch, batch_loss=27.8, batch

Validation:  89%|▉| 660/743 [2:46:19<20:53, 15.10s/batch, batch_loss=28.4, batch

Validation:  89%|▉| 661/743 [2:46:19<20:48, 15.23s/batch, batch_loss=28.4, batch

Validation:  89%|▉| 661/743 [2:46:34<20:48, 15.23s/batch, batch_loss=17, batch_i

Validation:  89%|▉| 662/743 [2:46:34<20:32, 15.22s/batch, batch_loss=17, batch_i

Validation:  89%|▉| 662/743 [2:46:50<20:32, 15.22s/batch, batch_loss=3.57e+3, ba

Validation:  89%|▉| 663/743 [2:46:50<20:17, 15.22s/batch, batch_loss=3.57e+3, ba

Validation:  89%|▉| 663/743 [2:47:05<20:17, 15.22s/batch, batch_loss=34, batch_i

Validation:  89%|▉| 664/743 [2:47:05<19:54, 15.12s/batch, batch_loss=34, batch_i

Validation:  89%|▉| 664/743 [2:47:19<19:54, 15.12s/batch, batch_loss=21.4, batch

Validation:  90%|▉| 665/743 [2:47:19<19:26, 14.95s/batch, batch_loss=21.4, batch

Validation:  90%|▉| 665/743 [2:47:37<19:26, 14.95s/batch, batch_loss=20.7, batch

Validation:  90%|▉| 666/743 [2:47:37<20:22, 15.88s/batch, batch_loss=20.7, batch

Validation:  90%|▉| 666/743 [2:47:52<20:22, 15.88s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:47:52<19:44, 15.59s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:48:08<19:44, 15.59s/batch, batch_loss=27.7, batch

Validation:  90%|▉| 668/743 [2:48:08<19:29, 15.60s/batch, batch_loss=27.7, batch

Validation:  90%|▉| 668/743 [2:48:23<19:29, 15.60s/batch, batch_loss=39.9, batch

Validation:  90%|▉| 669/743 [2:48:23<19:12, 15.58s/batch, batch_loss=39.9, batch

Validation:  90%|▉| 669/743 [2:48:38<19:12, 15.58s/batch, batch_loss=39.5, batch

Validation:  90%|▉| 670/743 [2:48:38<18:30, 15.22s/batch, batch_loss=39.5, batch

Validation:  90%|▉| 670/743 [2:48:55<18:30, 15.22s/batch, batch_loss=3.13e+3, ba

Validation:  90%|▉| 671/743 [2:48:55<19:01, 15.86s/batch, batch_loss=3.13e+3, ba

Validation:  90%|▉| 671/743 [2:49:10<19:01, 15.86s/batch, batch_loss=30.2, batch

Validation:  90%|▉| 672/743 [2:49:10<18:29, 15.63s/batch, batch_loss=30.2, batch

Validation:  90%|▉| 672/743 [2:49:25<18:29, 15.63s/batch, batch_loss=38.5, batch

Validation:  91%|▉| 673/743 [2:49:25<17:56, 15.38s/batch, batch_loss=38.5, batch

Validation:  91%|▉| 673/743 [2:49:41<17:56, 15.38s/batch, batch_loss=20.4, batch

Validation:  91%|▉| 674/743 [2:49:41<17:48, 15.49s/batch, batch_loss=20.4, batch

Validation:  91%|▉| 674/743 [2:49:57<17:48, 15.49s/batch, batch_loss=42.3, batch

Validation:  91%|▉| 675/743 [2:49:57<17:53, 15.78s/batch, batch_loss=42.3, batch

Validation:  91%|▉| 675/743 [2:50:12<17:53, 15.78s/batch, batch_loss=36.9, batch

Validation:  91%|▉| 676/743 [2:50:12<17:28, 15.65s/batch, batch_loss=36.9, batch

Validation:  91%|▉| 676/743 [2:50:28<17:28, 15.65s/batch, batch_loss=56.3, batch

Validation:  91%|▉| 677/743 [2:50:28<17:21, 15.78s/batch, batch_loss=56.3, batch

Validation:  91%|▉| 677/743 [2:50:44<17:21, 15.78s/batch, batch_loss=27.3, batch

Validation:  91%|▉| 678/743 [2:50:44<16:51, 15.56s/batch, batch_loss=27.3, batch

Validation:  91%|▉| 678/743 [2:50:58<16:51, 15.56s/batch, batch_loss=23.5, batch

Validation:  91%|▉| 679/743 [2:50:58<16:17, 15.27s/batch, batch_loss=23.5, batch

Validation:  91%|▉| 679/743 [2:51:13<16:17, 15.27s/batch, batch_loss=23.4, batch

Validation:  92%|▉| 680/743 [2:51:13<15:52, 15.12s/batch, batch_loss=23.4, batch

Validation:  92%|▉| 680/743 [2:51:27<15:52, 15.12s/batch, batch_loss=23.4, batch

Validation:  92%|▉| 681/743 [2:51:27<15:20, 14.84s/batch, batch_loss=23.4, batch

Validation:  92%|▉| 681/743 [2:51:42<15:20, 14.84s/batch, batch_loss=32.4, batch

Validation:  92%|▉| 682/743 [2:51:42<15:07, 14.88s/batch, batch_loss=32.4, batch

Validation:  92%|▉| 682/743 [2:51:57<15:07, 14.88s/batch, batch_loss=42, batch_i

Validation:  92%|▉| 683/743 [2:51:57<14:45, 14.76s/batch, batch_loss=42, batch_i

Validation:  92%|▉| 683/743 [2:52:12<14:45, 14.76s/batch, batch_loss=21.2, batch

Validation:  92%|▉| 684/743 [2:52:12<14:39, 14.90s/batch, batch_loss=21.2, batch

Validation:  92%|▉| 684/743 [2:52:26<14:39, 14.90s/batch, batch_loss=17.6, batch

Validation:  92%|▉| 685/743 [2:52:26<14:20, 14.83s/batch, batch_loss=17.6, batch

Validation:  92%|▉| 685/743 [2:52:41<14:20, 14.83s/batch, batch_loss=1.67e+3, ba

Validation:  92%|▉| 686/743 [2:52:41<13:57, 14.69s/batch, batch_loss=1.67e+3, ba

Validation:  92%|▉| 686/743 [2:52:56<13:57, 14.69s/batch, batch_loss=30.8, batch

Validation:  92%|▉| 687/743 [2:52:56<13:49, 14.81s/batch, batch_loss=30.8, batch

Validation:  92%|▉| 687/743 [2:53:10<13:49, 14.81s/batch, batch_loss=22.3, batch

Validation:  93%|▉| 688/743 [2:53:10<13:30, 14.74s/batch, batch_loss=22.3, batch

Validation:  93%|▉| 688/743 [2:53:25<13:30, 14.74s/batch, batch_loss=33.7, batch

Validation:  93%|▉| 689/743 [2:53:25<13:10, 14.63s/batch, batch_loss=33.7, batch

Validation:  93%|▉| 689/743 [2:53:41<13:10, 14.63s/batch, batch_loss=28.5, batch

Validation:  93%|▉| 690/743 [2:53:41<13:16, 15.04s/batch, batch_loss=28.5, batch

Validation:  93%|▉| 690/743 [2:53:56<13:16, 15.04s/batch, batch_loss=13.8, batch

Validation:  93%|▉| 691/743 [2:53:56<13:07, 15.15s/batch, batch_loss=13.8, batch

Validation:  93%|▉| 691/743 [2:54:11<13:07, 15.15s/batch, batch_loss=31.9, batch

Validation:  93%|▉| 692/743 [2:54:11<12:47, 15.06s/batch, batch_loss=31.9, batch

Validation:  93%|▉| 692/743 [2:54:26<12:47, 15.06s/batch, batch_loss=35.9, batch

Validation:  93%|▉| 693/743 [2:54:26<12:30, 15.01s/batch, batch_loss=35.9, batch

Validation:  93%|▉| 693/743 [2:54:40<12:30, 15.01s/batch, batch_loss=47.8, batch

Validation:  93%|▉| 694/743 [2:54:40<11:59, 14.68s/batch, batch_loss=47.8, batch

Validation:  93%|▉| 694/743 [2:54:54<11:59, 14.68s/batch, batch_loss=3.13e+3, ba

Validation:  94%|▉| 695/743 [2:54:54<11:40, 14.59s/batch, batch_loss=3.13e+3, ba

Validation:  94%|▉| 695/743 [2:55:09<11:40, 14.59s/batch, batch_loss=14.3, batch

Validation:  94%|▉| 696/743 [2:55:09<11:24, 14.56s/batch, batch_loss=14.3, batch

Validation:  94%|▉| 696/743 [2:55:24<11:24, 14.56s/batch, batch_loss=34.3, batch

Validation:  94%|▉| 697/743 [2:55:24<11:19, 14.77s/batch, batch_loss=34.3, batch

Validation:  94%|▉| 697/743 [2:55:41<11:19, 14.77s/batch, batch_loss=754, batch_

Validation:  94%|▉| 698/743 [2:55:41<11:33, 15.40s/batch, batch_loss=754, batch_

Validation:  94%|▉| 698/743 [2:55:55<11:33, 15.40s/batch, batch_loss=12.1, batch

Validation:  94%|▉| 699/743 [2:55:55<10:59, 14.99s/batch, batch_loss=12.1, batch

Validation:  94%|▉| 699/743 [2:56:09<10:59, 14.99s/batch, batch_loss=937, batch_

Validation:  94%|▉| 700/743 [2:56:09<10:38, 14.85s/batch, batch_loss=937, batch_

Validation:  94%|▉| 700/743 [2:56:24<10:38, 14.85s/batch, batch_loss=9.5, batch_

Validation:  94%|▉| 701/743 [2:56:24<10:20, 14.78s/batch, batch_loss=9.5, batch_

Validation:  94%|▉| 701/743 [2:56:39<10:20, 14.78s/batch, batch_loss=17.9, batch

Validation:  94%|▉| 702/743 [2:56:39<10:02, 14.70s/batch, batch_loss=17.9, batch

Validation:  94%|▉| 702/743 [2:56:54<10:02, 14.70s/batch, batch_loss=188, batch_

Validation:  95%|▉| 703/743 [2:56:54<09:55, 14.88s/batch, batch_loss=188, batch_

Validation:  95%|▉| 703/743 [2:57:08<09:55, 14.88s/batch, batch_loss=477, batch_

Validation:  95%|▉| 704/743 [2:57:08<09:25, 14.51s/batch, batch_loss=477, batch_

Validation:  95%|▉| 704/743 [2:57:22<09:25, 14.51s/batch, batch_loss=18, batch_i

Validation:  95%|▉| 705/743 [2:57:22<09:10, 14.48s/batch, batch_loss=18, batch_i

Validation:  95%|▉| 705/743 [2:57:38<09:10, 14.48s/batch, batch_loss=32, batch_i

Validation:  95%|▉| 706/743 [2:57:38<09:16, 15.03s/batch, batch_loss=32, batch_i

Validation:  95%|▉| 706/743 [2:57:52<09:16, 15.03s/batch, batch_loss=411, batch_

Validation:  95%|▉| 707/743 [2:57:52<08:48, 14.69s/batch, batch_loss=411, batch_

Validation:  95%|▉| 707/743 [2:58:07<08:48, 14.69s/batch, batch_loss=20.7, batch

Validation:  95%|▉| 708/743 [2:58:07<08:30, 14.59s/batch, batch_loss=20.7, batch

Validation:  95%|▉| 708/743 [2:58:21<08:30, 14.59s/batch, batch_loss=40.8, batch

Validation:  95%|▉| 709/743 [2:58:21<08:19, 14.70s/batch, batch_loss=40.8, batch

Validation:  95%|▉| 709/743 [2:58:35<08:19, 14.70s/batch, batch_loss=27.2, batch

Validation:  96%|▉| 710/743 [2:58:35<07:57, 14.47s/batch, batch_loss=27.2, batch

Validation:  96%|▉| 710/743 [2:58:50<07:57, 14.47s/batch, batch_loss=20.7, batch

Validation:  96%|▉| 711/743 [2:58:50<07:43, 14.48s/batch, batch_loss=20.7, batch

Validation:  96%|▉| 711/743 [2:59:05<07:43, 14.48s/batch, batch_loss=36.6, batch

Validation:  96%|▉| 712/743 [2:59:05<07:31, 14.56s/batch, batch_loss=36.6, batch

Validation:  96%|▉| 712/743 [2:59:18<07:31, 14.56s/batch, batch_loss=32.1, batch

Validation:  96%|▉| 713/743 [2:59:18<07:09, 14.32s/batch, batch_loss=32.1, batch

Validation:  96%|▉| 713/743 [2:59:36<07:09, 14.32s/batch, batch_loss=15.4, batch

Validation:  96%|▉| 714/743 [2:59:36<07:19, 15.15s/batch, batch_loss=15.4, batch

Validation:  96%|▉| 714/743 [2:59:50<07:19, 15.15s/batch, batch_loss=18.6, batch

Validation:  96%|▉| 715/743 [2:59:50<06:55, 14.85s/batch, batch_loss=18.6, batch

Validation:  96%|▉| 715/743 [3:00:04<06:55, 14.85s/batch, batch_loss=43.9, batch

Validation:  96%|▉| 716/743 [3:00:04<06:36, 14.69s/batch, batch_loss=43.9, batch

Validation:  96%|▉| 716/743 [3:00:19<06:36, 14.69s/batch, batch_loss=392, batch_

Validation:  97%|▉| 717/743 [3:00:19<06:23, 14.75s/batch, batch_loss=392, batch_

Validation:  97%|▉| 717/743 [3:00:34<06:23, 14.75s/batch, batch_loss=21.7, batch

Validation:  97%|▉| 718/743 [3:00:34<06:08, 14.73s/batch, batch_loss=21.7, batch

Validation:  97%|▉| 718/743 [3:00:48<06:08, 14.73s/batch, batch_loss=18.3, batch

Validation:  97%|▉| 719/743 [3:00:48<05:50, 14.59s/batch, batch_loss=18.3, batch

Validation:  97%|▉| 719/743 [3:01:02<05:50, 14.59s/batch, batch_loss=17.5, batch

Validation:  97%|▉| 720/743 [3:01:02<05:30, 14.37s/batch, batch_loss=17.5, batch

Validation:  97%|▉| 720/743 [3:01:16<05:30, 14.37s/batch, batch_loss=28.4, batch

Validation:  97%|▉| 721/743 [3:01:16<05:13, 14.26s/batch, batch_loss=28.4, batch

Validation:  97%|▉| 721/743 [3:01:30<05:13, 14.26s/batch, batch_loss=33, batch_i

Validation:  97%|▉| 722/743 [3:01:30<05:02, 14.41s/batch, batch_loss=33, batch_i

Validation:  97%|▉| 722/743 [3:01:45<05:02, 14.41s/batch, batch_loss=5.28e+3, ba

Validation:  97%|▉| 723/743 [3:01:45<04:51, 14.58s/batch, batch_loss=5.28e+3, ba

Validation:  97%|▉| 723/743 [3:02:00<04:51, 14.58s/batch, batch_loss=36.2, batch

Validation:  97%|▉| 724/743 [3:02:00<04:35, 14.50s/batch, batch_loss=36.2, batch

Validation:  97%|▉| 724/743 [3:02:14<04:35, 14.50s/batch, batch_loss=22.8, batch

Validation:  98%|▉| 725/743 [3:02:14<04:19, 14.44s/batch, batch_loss=22.8, batch

Validation:  98%|▉| 725/743 [3:02:29<04:19, 14.44s/batch, batch_loss=27.6, batch

Validation:  98%|▉| 726/743 [3:02:29<04:05, 14.46s/batch, batch_loss=27.6, batch

Validation:  98%|▉| 726/743 [3:02:44<04:05, 14.46s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [3:02:44<03:54, 14.67s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [3:02:59<03:54, 14.67s/batch, batch_loss=42.4, batch

Validation:  98%|▉| 728/743 [3:02:59<03:43, 14.87s/batch, batch_loss=42.4, batch

Validation:  98%|▉| 728/743 [3:03:14<03:43, 14.87s/batch, batch_loss=32.5, batch

Validation:  98%|▉| 729/743 [3:03:14<03:28, 14.91s/batch, batch_loss=32.5, batch

Validation:  98%|▉| 729/743 [3:03:28<03:28, 14.91s/batch, batch_loss=32.2, batch

Validation:  98%|▉| 730/743 [3:03:28<03:11, 14.73s/batch, batch_loss=32.2, batch

Validation:  98%|▉| 730/743 [3:03:43<03:11, 14.73s/batch, batch_loss=22.6, batch

Validation:  98%|▉| 731/743 [3:03:43<02:56, 14.71s/batch, batch_loss=22.6, batch

Validation:  98%|▉| 731/743 [3:03:56<02:56, 14.71s/batch, batch_loss=11.6, batch

Validation:  99%|▉| 732/743 [3:03:56<02:36, 14.24s/batch, batch_loss=11.6, batch

Validation:  99%|▉| 732/743 [3:04:09<02:36, 14.24s/batch, batch_loss=26.4, batch

Validation:  99%|▉| 733/743 [3:04:09<02:17, 13.72s/batch, batch_loss=26.4, batch

Validation:  99%|▉| 733/743 [3:04:21<02:17, 13.72s/batch, batch_loss=6.08, batch

Validation:  99%|▉| 734/743 [3:04:21<02:00, 13.37s/batch, batch_loss=6.08, batch

Validation:  99%|▉| 734/743 [3:04:35<02:00, 13.37s/batch, batch_loss=9.94, batch

Validation:  99%|▉| 735/743 [3:04:35<01:48, 13.59s/batch, batch_loss=9.94, batch

Validation:  99%|▉| 735/743 [3:04:49<01:48, 13.59s/batch, batch_loss=1.24, batch

Validation:  99%|▉| 736/743 [3:04:49<01:35, 13.67s/batch, batch_loss=1.24, batch

Validation:  99%|▉| 736/743 [3:05:02<01:35, 13.67s/batch, batch_loss=0.159, batc

Validation:  99%|▉| 737/743 [3:05:02<01:20, 13.41s/batch, batch_loss=0.159, batc

Validation:  99%|▉| 737/743 [3:05:15<01:20, 13.41s/batch, batch_loss=0.159, batc

Validation:  99%|▉| 738/743 [3:05:15<01:06, 13.40s/batch, batch_loss=0.159, batc

Validation:  99%|▉| 738/743 [3:05:29<01:06, 13.40s/batch, batch_loss=0.159, batc

Validation:  99%|▉| 739/743 [3:05:29<00:54, 13.63s/batch, batch_loss=0.159, batc

Validation:  99%|▉| 739/743 [3:05:42<00:54, 13.63s/batch, batch_loss=0.159, batc

Validation: 100%|▉| 740/743 [3:05:42<00:39, 13.28s/batch, batch_loss=0.159, batc

Validation: 100%|▉| 740/743 [3:05:54<00:39, 13.28s/batch, batch_loss=0.159, batc

Validation: 100%|▉| 741/743 [3:05:54<00:26, 13.02s/batch, batch_loss=0.159, batc

Validation: 100%|▉| 741/743 [3:06:07<00:26, 13.02s/batch, batch_loss=0.159, batc

Validation: 100%|▉| 742/743 [3:06:07<00:12, 12.79s/batch, batch_loss=0.159, batc

Validation: 100%|▉| 742/743 [3:06:18<00:12, 12.79s/batch, batch_loss=0.155, batc

Validation: 100%|█| 743/743 [3:06:18<00:00, 12.51s/batch, batch_loss=0.155, batc

Validation: 100%|█| 743/743 [3:06:18<00:00, 15.05s/batch, batch_loss=0.155, batc




Val Loss: 1305.9390


Epoch 6/10:   0%|                                    | 0/991 [00:00<?, ?batch/s]

Epoch 6/10:   0%| | 0/991 [00:14<?, ?batch/s, batch_loss=31.5, batch_index=1, ba

Epoch 6/10:   0%| | 1/991 [00:14<3:58:31, 14.46s/batch, batch_loss=31.5, batch_i

Epoch 6/10:   0%| | 1/991 [00:29<3:58:31, 14.46s/batch, batch_loss=36.8, batch_i

Epoch 6/10:   0%| | 2/991 [00:29<4:02:22, 14.70s/batch, batch_loss=36.8, batch_i

Epoch 6/10:   0%| | 2/991 [00:44<4:02:22, 14.70s/batch, batch_loss=15.2, batch_i

Epoch 6/10:   0%| | 3/991 [00:44<4:02:57, 14.75s/batch, batch_loss=15.2, batch_i

Epoch 6/10:   0%| | 3/991 [01:01<4:02:57, 14.75s/batch, batch_loss=15.3, batch_i

Epoch 6/10:   0%| | 4/991 [01:01<4:16:31, 15.59s/batch, batch_loss=15.3, batch_i

Epoch 6/10:   0%| | 4/991 [01:15<4:16:31, 15.59s/batch, batch_loss=36.4, batch_i

Epoch 6/10:   1%| | 5/991 [01:15<4:11:19, 15.29s/batch, batch_loss=36.4, batch_i

Epoch 6/10:   1%| | 5/991 [01:29<4:11:19, 15.29s/batch, batch_loss=27.5, batch_i

Epoch 6/10:   1%| | 6/991 [01:29<4:02:57, 14.80s/batch, batch_loss=27.5, batch_i

Epoch 6/10:   1%| | 6/991 [01:44<4:02:57, 14.80s/batch, batch_loss=30.8, batch_i

Epoch 6/10:   1%| | 7/991 [01:44<4:04:22, 14.90s/batch, batch_loss=30.8, batch_i

Epoch 6/10:   1%| | 7/991 [01:58<4:04:22, 14.90s/batch, batch_loss=604, batch_in

Epoch 6/10:   1%| | 8/991 [01:58<3:59:36, 14.62s/batch, batch_loss=604, batch_in

Epoch 6/10:   1%| | 8/991 [02:14<3:59:36, 14.62s/batch, batch_loss=17.9, batch_i

Epoch 6/10:   1%| | 9/991 [02:14<4:04:14, 14.92s/batch, batch_loss=17.9, batch_i

Epoch 6/10:   1%| | 9/991 [02:29<4:04:14, 14.92s/batch, batch_loss=26.8, batch_i

Epoch 6/10:   1%| | 10/991 [02:29<4:05:22, 15.01s/batch, batch_loss=26.8, batch_

Epoch 6/10:   1%| | 10/991 [02:48<4:05:22, 15.01s/batch, batch_loss=16.2, batch_

Epoch 6/10:   1%| | 11/991 [02:48<4:23:40, 16.14s/batch, batch_loss=16.2, batch_

Epoch 6/10:   1%| | 11/991 [03:03<4:23:40, 16.14s/batch, batch_loss=1.99e+3, bat

Epoch 6/10:   1%| | 12/991 [03:03<4:16:50, 15.74s/batch, batch_loss=1.99e+3, bat

Epoch 6/10:   1%| | 12/991 [03:17<4:16:50, 15.74s/batch, batch_loss=21.6, batch_

Epoch 6/10:   1%| | 13/991 [03:17<4:11:57, 15.46s/batch, batch_loss=21.6, batch_

Epoch 6/10:   1%| | 13/991 [03:33<4:11:57, 15.46s/batch, batch_loss=13.6, batch_

Epoch 6/10:   1%| | 14/991 [03:33<4:12:41, 15.52s/batch, batch_loss=13.6, batch_

Epoch 6/10:   1%| | 14/991 [03:49<4:12:41, 15.52s/batch, batch_loss=17.6, batch_

Epoch 6/10:   2%| | 15/991 [03:49<4:14:13, 15.63s/batch, batch_loss=17.6, batch_

Epoch 6/10:   2%| | 15/991 [04:03<4:14:13, 15.63s/batch, batch_loss=16.4, batch_

Epoch 6/10:   2%| | 16/991 [04:03<4:06:17, 15.16s/batch, batch_loss=16.4, batch_

Epoch 6/10:   2%| | 16/991 [04:18<4:06:17, 15.16s/batch, batch_loss=17.1, batch_

Epoch 6/10:   2%| | 17/991 [04:18<4:04:37, 15.07s/batch, batch_loss=17.1, batch_

Epoch 6/10:   2%| | 17/991 [04:33<4:04:37, 15.07s/batch, batch_loss=11.6, batch_

Epoch 6/10:   2%| | 18/991 [04:33<4:04:55, 15.10s/batch, batch_loss=11.6, batch_

Epoch 6/10:   2%| | 18/991 [04:50<4:04:55, 15.10s/batch, batch_loss=8.99e+3, bat

Epoch 6/10:   2%| | 19/991 [04:50<4:13:24, 15.64s/batch, batch_loss=8.99e+3, bat

Epoch 6/10:   2%| | 19/991 [05:05<4:13:24, 15.64s/batch, batch_loss=11.5, batch_

Epoch 6/10:   2%| | 20/991 [05:05<4:12:11, 15.58s/batch, batch_loss=11.5, batch_

Epoch 6/10:   2%| | 20/991 [05:20<4:12:11, 15.58s/batch, batch_loss=20.7, batch_

Epoch 6/10:   2%| | 21/991 [05:20<4:06:15, 15.23s/batch, batch_loss=20.7, batch_

Epoch 6/10:   2%| | 21/991 [05:35<4:06:15, 15.23s/batch, batch_loss=1.08e+3, bat

Epoch 6/10:   2%| | 22/991 [05:35<4:04:42, 15.15s/batch, batch_loss=1.08e+3, bat

Epoch 6/10:   2%| | 22/991 [05:51<4:04:42, 15.15s/batch, batch_loss=8.73, batch_

Epoch 6/10:   2%| | 23/991 [05:51<4:07:54, 15.37s/batch, batch_loss=8.73, batch_

Epoch 6/10:   2%| | 23/991 [06:05<4:07:54, 15.37s/batch, batch_loss=13.4, batch_

Epoch 6/10:   2%| | 24/991 [06:05<4:03:38, 15.12s/batch, batch_loss=13.4, batch_

Epoch 6/10:   2%| | 24/991 [06:20<4:03:38, 15.12s/batch, batch_loss=13.1, batch_

Epoch 6/10:   3%| | 25/991 [06:20<4:00:45, 14.95s/batch, batch_loss=13.1, batch_

Epoch 6/10:   3%| | 25/991 [06:34<4:00:45, 14.95s/batch, batch_loss=22, batch_in

Epoch 6/10:   3%| | 26/991 [06:34<3:57:14, 14.75s/batch, batch_loss=22, batch_in

Epoch 6/10:   3%| | 26/991 [06:48<3:57:14, 14.75s/batch, batch_loss=16.7, batch_

Epoch 6/10:   3%| | 27/991 [06:48<3:54:46, 14.61s/batch, batch_loss=16.7, batch_

Epoch 6/10:   3%| | 27/991 [07:04<3:54:46, 14.61s/batch, batch_loss=1.12e+3, bat

Epoch 6/10:   3%| | 28/991 [07:04<3:59:58, 14.95s/batch, batch_loss=1.12e+3, bat

Epoch 6/10:   3%| | 28/991 [07:19<3:59:58, 14.95s/batch, batch_loss=11.6, batch_

Epoch 6/10:   3%| | 29/991 [07:19<3:59:31, 14.94s/batch, batch_loss=11.6, batch_

Epoch 6/10:   3%| | 29/991 [07:34<3:59:31, 14.94s/batch, batch_loss=11.1, batch_

Epoch 6/10:   3%| | 30/991 [07:34<3:59:24, 14.95s/batch, batch_loss=11.1, batch_

Epoch 6/10:   3%| | 30/991 [07:49<3:59:24, 14.95s/batch, batch_loss=8.45, batch_

Epoch 6/10:   3%| | 31/991 [07:49<3:57:38, 14.85s/batch, batch_loss=8.45, batch_

Epoch 6/10:   3%| | 31/991 [08:03<3:57:38, 14.85s/batch, batch_loss=1.3e+4, batc

Epoch 6/10:   3%| | 32/991 [08:03<3:55:19, 14.72s/batch, batch_loss=1.3e+4, batc

Epoch 6/10:   3%| | 32/991 [08:18<3:55:19, 14.72s/batch, batch_loss=13.6, batch_

Epoch 6/10:   3%| | 33/991 [08:18<3:58:17, 14.92s/batch, batch_loss=13.6, batch_

Epoch 6/10:   3%| | 33/991 [08:33<3:58:17, 14.92s/batch, batch_loss=9.05, batch_

Epoch 6/10:   3%| | 34/991 [08:33<3:58:56, 14.98s/batch, batch_loss=9.05, batch_

Epoch 6/10:   3%| | 34/991 [08:48<3:58:56, 14.98s/batch, batch_loss=13.7, batch_

Epoch 6/10:   4%| | 35/991 [08:48<3:56:16, 14.83s/batch, batch_loss=13.7, batch_

Epoch 6/10:   4%| | 35/991 [09:05<3:56:16, 14.83s/batch, batch_loss=10.4, batch_

Epoch 6/10:   4%| | 36/991 [09:05<4:07:00, 15.52s/batch, batch_loss=10.4, batch_

Epoch 6/10:   4%| | 36/991 [09:20<4:07:00, 15.52s/batch, batch_loss=11.9, batch_

Epoch 6/10:   4%| | 37/991 [09:20<4:03:12, 15.30s/batch, batch_loss=11.9, batch_

Epoch 6/10:   4%| | 37/991 [09:35<4:03:12, 15.30s/batch, batch_loss=7.61, batch_

Epoch 6/10:   4%| | 38/991 [09:35<4:03:07, 15.31s/batch, batch_loss=7.61, batch_

Epoch 6/10:   4%| | 38/991 [09:50<4:03:07, 15.31s/batch, batch_loss=1.73e+3, bat

Epoch 6/10:   4%| | 39/991 [09:50<4:00:17, 15.14s/batch, batch_loss=1.73e+3, bat

Epoch 6/10:   4%| | 39/991 [10:05<4:00:17, 15.14s/batch, batch_loss=13.1, batch_

Epoch 6/10:   4%| | 40/991 [10:05<3:59:48, 15.13s/batch, batch_loss=13.1, batch_

Epoch 6/10:   4%| | 40/991 [10:20<3:59:48, 15.13s/batch, batch_loss=6.22e+3, bat

Epoch 6/10:   4%| | 41/991 [10:20<3:58:53, 15.09s/batch, batch_loss=6.22e+3, bat

Epoch 6/10:   4%| | 41/991 [10:35<3:58:53, 15.09s/batch, batch_loss=15.4, batch_

Epoch 6/10:   4%| | 42/991 [10:35<3:57:03, 14.99s/batch, batch_loss=15.4, batch_

Epoch 6/10:   4%| | 42/991 [10:49<3:57:03, 14.99s/batch, batch_loss=9.63, batch_

Epoch 6/10:   4%| | 43/991 [10:49<3:55:21, 14.90s/batch, batch_loss=9.63, batch_

Epoch 6/10:   4%| | 43/991 [11:05<3:55:21, 14.90s/batch, batch_loss=14.4, batch_

Epoch 6/10:   4%| | 44/991 [11:05<3:59:13, 15.16s/batch, batch_loss=14.4, batch_

Epoch 6/10:   4%| | 44/991 [11:19<3:59:13, 15.16s/batch, batch_loss=15.9, batch_

Epoch 6/10:   5%| | 45/991 [11:19<3:53:33, 14.81s/batch, batch_loss=15.9, batch_

Epoch 6/10:   5%| | 45/991 [11:34<3:53:33, 14.81s/batch, batch_loss=14.4, batch_

Epoch 6/10:   5%| | 46/991 [11:34<3:54:27, 14.89s/batch, batch_loss=14.4, batch_

Epoch 6/10:   5%| | 46/991 [11:49<3:54:27, 14.89s/batch, batch_loss=6.42, batch_

Epoch 6/10:   5%| | 47/991 [11:49<3:55:24, 14.96s/batch, batch_loss=6.42, batch_

Epoch 6/10:   5%| | 47/991 [12:05<3:55:24, 14.96s/batch, batch_loss=12.6, batch_

Epoch 6/10:   5%| | 48/991 [12:05<3:56:49, 15.07s/batch, batch_loss=12.6, batch_

Epoch 6/10:   5%| | 48/991 [12:18<3:56:49, 15.07s/batch, batch_loss=11.8, batch_

Epoch 6/10:   5%| | 49/991 [12:18<3:48:51, 14.58s/batch, batch_loss=11.8, batch_

Epoch 6/10:   5%| | 49/991 [12:32<3:48:51, 14.58s/batch, batch_loss=16.2, batch_

Epoch 6/10:   5%| | 50/991 [12:32<3:46:21, 14.43s/batch, batch_loss=16.2, batch_

Epoch 6/10:   5%| | 50/991 [12:50<3:46:21, 14.43s/batch, batch_loss=9.24, batch_

Epoch 6/10:   5%| | 51/991 [12:50<4:00:27, 15.35s/batch, batch_loss=9.24, batch_

Epoch 6/10:   5%| | 51/991 [13:04<4:00:27, 15.35s/batch, batch_loss=12.8, batch_

Epoch 6/10:   5%| | 52/991 [13:04<3:57:01, 15.15s/batch, batch_loss=12.8, batch_

Epoch 6/10:   5%| | 52/991 [13:20<3:57:01, 15.15s/batch, batch_loss=13.3, batch_

Epoch 6/10:   5%| | 53/991 [13:20<3:57:12, 15.17s/batch, batch_loss=13.3, batch_

Epoch 6/10:   5%| | 53/991 [13:35<3:57:12, 15.17s/batch, batch_loss=9.19, batch_

Epoch 6/10:   5%| | 54/991 [13:35<3:58:09, 15.25s/batch, batch_loss=9.19, batch_

Epoch 6/10:   5%| | 54/991 [13:50<3:58:09, 15.25s/batch, batch_loss=10.6, batch_

Epoch 6/10:   6%| | 55/991 [13:50<3:57:08, 15.20s/batch, batch_loss=10.6, batch_

Epoch 6/10:   6%| | 55/991 [14:06<3:57:08, 15.20s/batch, batch_loss=11, batch_in

Epoch 6/10:   6%| | 56/991 [14:06<3:58:13, 15.29s/batch, batch_loss=11, batch_in

Epoch 6/10:   6%| | 56/991 [14:21<3:58:13, 15.29s/batch, batch_loss=7.71, batch_

Epoch 6/10:   6%| | 57/991 [14:21<3:56:11, 15.17s/batch, batch_loss=7.71, batch_

Epoch 6/10:   6%| | 57/991 [14:36<3:56:11, 15.17s/batch, batch_loss=17.2, batch_

Epoch 6/10:   6%| | 58/991 [14:36<3:59:01, 15.37s/batch, batch_loss=17.2, batch_

Epoch 6/10:   6%| | 58/991 [14:51<3:59:01, 15.37s/batch, batch_loss=9.69, batch_

Epoch 6/10:   6%| | 59/991 [14:51<3:54:08, 15.07s/batch, batch_loss=9.69, batch_

Epoch 6/10:   6%| | 59/991 [15:05<3:54:08, 15.07s/batch, batch_loss=19.7, batch_

Epoch 6/10:   6%| | 60/991 [15:05<3:51:34, 14.92s/batch, batch_loss=19.7, batch_

Epoch 6/10:   6%| | 60/991 [15:23<3:51:34, 14.92s/batch, batch_loss=11, batch_in

Epoch 6/10:   6%| | 61/991 [15:23<4:06:00, 15.87s/batch, batch_loss=11, batch_in

Epoch 6/10:   6%| | 61/991 [15:38<4:06:00, 15.87s/batch, batch_loss=10.9, batch_

Epoch 6/10:   6%| | 62/991 [15:38<4:00:46, 15.55s/batch, batch_loss=10.9, batch_

Epoch 6/10:   6%| | 62/991 [15:54<4:00:46, 15.55s/batch, batch_loss=418, batch_i

Epoch 6/10:   6%| | 63/991 [15:54<3:59:42, 15.50s/batch, batch_loss=418, batch_i

Epoch 6/10:   6%| | 63/991 [16:09<3:59:42, 15.50s/batch, batch_loss=797, batch_i

Epoch 6/10:   6%| | 64/991 [16:09<3:57:29, 15.37s/batch, batch_loss=797, batch_i

Epoch 6/10:   6%| | 64/991 [16:24<3:57:29, 15.37s/batch, batch_loss=2.55e+3, bat

Epoch 6/10:   7%| | 65/991 [16:24<3:54:44, 15.21s/batch, batch_loss=2.55e+3, bat

Epoch 6/10:   7%| | 65/991 [16:38<3:54:44, 15.21s/batch, batch_loss=4.18, batch_

Epoch 6/10:   7%| | 66/991 [16:38<3:51:27, 15.01s/batch, batch_loss=4.18, batch_

Epoch 6/10:   7%| | 66/991 [16:53<3:51:27, 15.01s/batch, batch_loss=10.7, batch_

Epoch 6/10:   7%| | 67/991 [16:53<3:50:46, 14.99s/batch, batch_loss=10.7, batch_

Epoch 6/10:   7%| | 67/991 [17:08<3:50:46, 14.99s/batch, batch_loss=8.17, batch_

Epoch 6/10:   7%| | 68/991 [17:08<3:50:30, 14.98s/batch, batch_loss=8.17, batch_

Epoch 6/10:   7%| | 68/991 [17:23<3:50:30, 14.98s/batch, batch_loss=22, batch_in

Epoch 6/10:   7%| | 69/991 [17:23<3:50:12, 14.98s/batch, batch_loss=22, batch_in

Epoch 6/10:   7%| | 69/991 [17:37<3:50:12, 14.98s/batch, batch_loss=8.16, batch_

Epoch 6/10:   7%| | 70/991 [17:37<3:47:36, 14.83s/batch, batch_loss=8.16, batch_

Epoch 6/10:   7%| | 70/991 [17:53<3:47:36, 14.83s/batch, batch_loss=12.1, batch_

Epoch 6/10:   7%| | 71/991 [17:53<3:48:49, 14.92s/batch, batch_loss=12.1, batch_

Epoch 6/10:   7%| | 71/991 [18:08<3:48:49, 14.92s/batch, batch_loss=13.4, batch_

Epoch 6/10:   7%| | 72/991 [18:08<3:51:29, 15.11s/batch, batch_loss=13.4, batch_

Epoch 6/10:   7%| | 72/991 [18:24<3:51:29, 15.11s/batch, batch_loss=24.1, batch_

Epoch 6/10:   7%| | 73/991 [18:24<3:52:57, 15.23s/batch, batch_loss=24.1, batch_

Epoch 6/10:   7%| | 73/991 [18:39<3:52:57, 15.23s/batch, batch_loss=1.74e+3, bat

Epoch 6/10:   7%| | 74/991 [18:39<3:52:20, 15.20s/batch, batch_loss=1.74e+3, bat

Epoch 6/10:   7%| | 74/991 [18:54<3:52:20, 15.20s/batch, batch_loss=15.7, batch_

Epoch 6/10:   8%| | 75/991 [18:54<3:50:44, 15.11s/batch, batch_loss=15.7, batch_

Epoch 6/10:   8%| | 75/991 [19:09<3:50:44, 15.11s/batch, batch_loss=10.8, batch_

Epoch 6/10:   8%| | 76/991 [19:09<3:50:35, 15.12s/batch, batch_loss=10.8, batch_

Epoch 6/10:   8%| | 76/991 [19:24<3:50:35, 15.12s/batch, batch_loss=10.8, batch_

Epoch 6/10:   8%| | 77/991 [19:24<3:51:01, 15.17s/batch, batch_loss=10.8, batch_

Epoch 6/10:   8%| | 77/991 [19:39<3:51:01, 15.17s/batch, batch_loss=13, batch_in

Epoch 6/10:   8%| | 78/991 [19:39<3:50:50, 15.17s/batch, batch_loss=13, batch_in

Epoch 6/10:   8%| | 78/991 [19:53<3:50:50, 15.17s/batch, batch_loss=8.63, batch_

Epoch 6/10:   8%| | 79/991 [19:53<3:45:20, 14.82s/batch, batch_loss=8.63, batch_

Epoch 6/10:   8%| | 79/991 [20:09<3:45:20, 14.82s/batch, batch_loss=9.28, batch_

Epoch 6/10:   8%| | 80/991 [20:09<3:47:48, 15.00s/batch, batch_loss=9.28, batch_

Epoch 6/10:   8%| | 80/991 [20:24<3:47:48, 15.00s/batch, batch_loss=14, batch_in

Epoch 6/10:   8%| | 81/991 [20:24<3:49:40, 15.14s/batch, batch_loss=14, batch_in

Epoch 6/10:   8%| | 81/991 [20:40<3:49:40, 15.14s/batch, batch_loss=12.4, batch_

Epoch 6/10:   8%| | 82/991 [20:40<3:52:09, 15.32s/batch, batch_loss=12.4, batch_

Epoch 6/10:   8%| | 82/991 [20:55<3:52:09, 15.32s/batch, batch_loss=8.07, batch_

Epoch 6/10:   8%| | 83/991 [20:55<3:51:52, 15.32s/batch, batch_loss=8.07, batch_

Epoch 6/10:   8%| | 83/991 [21:09<3:51:52, 15.32s/batch, batch_loss=9.69, batch_

Epoch 6/10:   8%| | 84/991 [21:09<3:46:29, 14.98s/batch, batch_loss=9.69, batch_

Epoch 6/10:   8%| | 84/991 [21:24<3:46:29, 14.98s/batch, batch_loss=8.97, batch_

Epoch 6/10:   9%| | 85/991 [21:24<3:44:26, 14.86s/batch, batch_loss=8.97, batch_

Epoch 6/10:   9%| | 85/991 [21:39<3:44:26, 14.86s/batch, batch_loss=11.2, batch_

Epoch 6/10:   9%| | 86/991 [21:39<3:45:49, 14.97s/batch, batch_loss=11.2, batch_

Epoch 6/10:   9%| | 86/991 [21:54<3:45:49, 14.97s/batch, batch_loss=9.65, batch_

Epoch 6/10:   9%| | 87/991 [21:54<3:46:07, 15.01s/batch, batch_loss=9.65, batch_

Epoch 6/10:   9%| | 87/991 [22:09<3:46:07, 15.01s/batch, batch_loss=9.19, batch_

Epoch 6/10:   9%| | 88/991 [22:09<3:44:20, 14.91s/batch, batch_loss=9.19, batch_

Epoch 6/10:   9%| | 88/991 [22:24<3:44:20, 14.91s/batch, batch_loss=6.01, batch_

Epoch 6/10:   9%| | 89/991 [22:24<3:45:01, 14.97s/batch, batch_loss=6.01, batch_

Epoch 6/10:   9%| | 89/991 [22:40<3:45:01, 14.97s/batch, batch_loss=247, batch_i

Epoch 6/10:   9%| | 90/991 [22:40<3:48:44, 15.23s/batch, batch_loss=247, batch_i

Epoch 6/10:   9%| | 90/991 [22:56<3:48:44, 15.23s/batch, batch_loss=1.54e+3, bat

Epoch 6/10:   9%| | 91/991 [22:56<3:50:30, 15.37s/batch, batch_loss=1.54e+3, bat

Epoch 6/10:   9%| | 91/991 [23:11<3:50:30, 15.37s/batch, batch_loss=17.3, batch_

Epoch 6/10:   9%| | 92/991 [23:11<3:49:41, 15.33s/batch, batch_loss=17.3, batch_

Epoch 6/10:   9%| | 92/991 [23:27<3:49:41, 15.33s/batch, batch_loss=19.7, batch_

Epoch 6/10:   9%| | 93/991 [23:27<3:52:47, 15.55s/batch, batch_loss=19.7, batch_

Epoch 6/10:   9%| | 93/991 [23:42<3:52:47, 15.55s/batch, batch_loss=18.4, batch_

Epoch 6/10:   9%| | 94/991 [23:42<3:51:34, 15.49s/batch, batch_loss=18.4, batch_

Epoch 6/10:   9%| | 94/991 [23:57<3:51:34, 15.49s/batch, batch_loss=17.6, batch_

Epoch 6/10:  10%| | 95/991 [23:57<3:49:23, 15.36s/batch, batch_loss=17.6, batch_

Epoch 6/10:  10%| | 95/991 [24:12<3:49:23, 15.36s/batch, batch_loss=17.9, batch_

Epoch 6/10:  10%| | 96/991 [24:12<3:47:26, 15.25s/batch, batch_loss=17.9, batch_

Epoch 6/10:  10%| | 96/991 [24:27<3:47:26, 15.25s/batch, batch_loss=16.7, batch_

Epoch 6/10:  10%| | 97/991 [24:27<3:45:16, 15.12s/batch, batch_loss=16.7, batch_

Epoch 6/10:  10%| | 97/991 [24:42<3:45:16, 15.12s/batch, batch_loss=20.3, batch_

Epoch 6/10:  10%| | 98/991 [24:42<3:41:37, 14.89s/batch, batch_loss=20.3, batch_

Epoch 6/10:  10%| | 98/991 [24:56<3:41:37, 14.89s/batch, batch_loss=15.9, batch_

Epoch 6/10:  10%| | 99/991 [24:56<3:39:35, 14.77s/batch, batch_loss=15.9, batch_

Epoch 6/10:  10%| | 99/991 [25:11<3:39:35, 14.77s/batch, batch_loss=18.7, batch_

Epoch 6/10:  10%| | 100/991 [25:11<3:41:47, 14.94s/batch, batch_loss=18.7, batch

Epoch 6/10:  10%| | 100/991 [25:27<3:41:47, 14.94s/batch, batch_loss=14.5, batch

Epoch 6/10:  10%| | 101/991 [25:27<3:45:20, 15.19s/batch, batch_loss=14.5, batch

Epoch 6/10:  10%| | 101/991 [25:42<3:45:20, 15.19s/batch, batch_loss=22.5, batch

Epoch 6/10:  10%| | 102/991 [25:42<3:45:00, 15.19s/batch, batch_loss=22.5, batch

Epoch 6/10:  10%| | 102/991 [25:57<3:45:00, 15.19s/batch, batch_loss=915, batch_

Epoch 6/10:  10%| | 103/991 [25:57<3:43:02, 15.07s/batch, batch_loss=915, batch_

Epoch 6/10:  10%| | 103/991 [26:12<3:43:02, 15.07s/batch, batch_loss=13.6, batch

Epoch 6/10:  10%| | 104/991 [26:12<3:40:58, 14.95s/batch, batch_loss=13.6, batch

Epoch 6/10:  10%| | 104/991 [26:26<3:40:58, 14.95s/batch, batch_loss=10.4, batch

Epoch 6/10:  11%| | 105/991 [26:26<3:38:28, 14.80s/batch, batch_loss=10.4, batch

Epoch 6/10:  11%| | 105/991 [26:41<3:38:28, 14.80s/batch, batch_loss=9.26, batch

Epoch 6/10:  11%| | 106/991 [26:41<3:37:03, 14.72s/batch, batch_loss=9.26, batch

Epoch 6/10:  11%| | 106/991 [26:59<3:37:03, 14.72s/batch, batch_loss=16.4, batch

Epoch 6/10:  11%| | 107/991 [26:59<3:51:46, 15.73s/batch, batch_loss=16.4, batch

Epoch 6/10:  11%| | 107/991 [27:13<3:51:46, 15.73s/batch, batch_loss=25.4, batch

Epoch 6/10:  11%| | 108/991 [27:13<3:44:39, 15.27s/batch, batch_loss=25.4, batch

Epoch 6/10:  11%| | 108/991 [27:28<3:44:39, 15.27s/batch, batch_loss=17.6, batch

Epoch 6/10:  11%| | 109/991 [27:28<3:44:16, 15.26s/batch, batch_loss=17.6, batch

Epoch 6/10:  11%| | 109/991 [27:43<3:44:16, 15.26s/batch, batch_loss=15.5, batch

Epoch 6/10:  11%| | 110/991 [27:43<3:39:28, 14.95s/batch, batch_loss=15.5, batch

Epoch 6/10:  11%| | 110/991 [27:56<3:39:28, 14.95s/batch, batch_loss=16.1, batch

Epoch 6/10:  11%| | 111/991 [27:56<3:34:16, 14.61s/batch, batch_loss=16.1, batch

Epoch 6/10:  11%| | 111/991 [28:10<3:34:16, 14.61s/batch, batch_loss=19.2, batch

Epoch 6/10:  11%| | 112/991 [28:10<3:30:42, 14.38s/batch, batch_loss=19.2, batch

Epoch 6/10:  11%| | 112/991 [28:25<3:30:42, 14.38s/batch, batch_loss=15.1, batch

Epoch 6/10:  11%| | 113/991 [28:25<3:31:37, 14.46s/batch, batch_loss=15.1, batch

Epoch 6/10:  11%| | 113/991 [28:42<3:31:37, 14.46s/batch, batch_loss=15.4, batch

Epoch 6/10:  12%| | 114/991 [28:42<3:44:02, 15.33s/batch, batch_loss=15.4, batch

Epoch 6/10:  12%| | 114/991 [28:57<3:44:02, 15.33s/batch, batch_loss=19.3, batch

Epoch 6/10:  12%| | 115/991 [28:57<3:40:34, 15.11s/batch, batch_loss=19.3, batch

Epoch 6/10:  12%| | 115/991 [29:12<3:40:34, 15.11s/batch, batch_loss=11, batch_i

Epoch 6/10:  12%| | 116/991 [29:12<3:41:37, 15.20s/batch, batch_loss=11, batch_i

Epoch 6/10:  12%| | 116/991 [29:27<3:41:37, 15.20s/batch, batch_loss=16.1, batch

Epoch 6/10:  12%| | 117/991 [29:27<3:41:03, 15.18s/batch, batch_loss=16.1, batch

Epoch 6/10:  12%| | 117/991 [29:42<3:41:03, 15.18s/batch, batch_loss=15.4, batch

Epoch 6/10:  12%| | 118/991 [29:42<3:37:54, 14.98s/batch, batch_loss=15.4, batch

Epoch 6/10:  12%| | 118/991 [29:57<3:37:54, 14.98s/batch, batch_loss=25.3, batch

Epoch 6/10:  12%| | 119/991 [29:57<3:37:37, 14.97s/batch, batch_loss=25.3, batch

Epoch 6/10:  12%| | 119/991 [30:11<3:37:37, 14.97s/batch, batch_loss=16.4, batch

Epoch 6/10:  12%| | 120/991 [30:11<3:34:27, 14.77s/batch, batch_loss=16.4, batch

Epoch 6/10:  12%| | 120/991 [30:29<3:34:27, 14.77s/batch, batch_loss=28.9, batch

Epoch 6/10:  12%| | 121/991 [30:29<3:47:32, 15.69s/batch, batch_loss=28.9, batch

Epoch 6/10:  12%| | 121/991 [30:44<3:47:32, 15.69s/batch, batch_loss=10.2, batch

Epoch 6/10:  12%| | 122/991 [30:44<3:43:27, 15.43s/batch, batch_loss=10.2, batch

Epoch 6/10:  12%| | 122/991 [30:58<3:43:27, 15.43s/batch, batch_loss=17.5, batch

Epoch 6/10:  12%| | 123/991 [30:58<3:40:03, 15.21s/batch, batch_loss=17.5, batch

Epoch 6/10:  12%| | 123/991 [31:13<3:40:03, 15.21s/batch, batch_loss=3.49e+3, ba

Epoch 6/10:  13%|▏| 124/991 [31:13<3:35:24, 14.91s/batch, batch_loss=3.49e+3, ba

Epoch 6/10:  13%|▏| 124/991 [31:28<3:35:24, 14.91s/batch, batch_loss=8.72, batch

Epoch 6/10:  13%|▏| 125/991 [31:28<3:37:49, 15.09s/batch, batch_loss=8.72, batch

Epoch 6/10:  13%|▏| 125/991 [31:43<3:37:49, 15.09s/batch, batch_loss=10.7, batch

Epoch 6/10:  13%|▏| 126/991 [31:43<3:37:21, 15.08s/batch, batch_loss=10.7, batch

Epoch 6/10:  13%|▏| 126/991 [31:58<3:37:21, 15.08s/batch, batch_loss=1.88e+3, ba

Epoch 6/10:  13%|▏| 127/991 [31:58<3:36:56, 15.07s/batch, batch_loss=1.88e+3, ba

Epoch 6/10:  13%|▏| 127/991 [32:13<3:36:56, 15.07s/batch, batch_loss=1.57e+3, ba

Epoch 6/10:  13%|▏| 128/991 [32:13<3:33:22, 14.83s/batch, batch_loss=1.57e+3, ba

Epoch 6/10:  13%|▏| 128/991 [32:27<3:33:22, 14.83s/batch, batch_loss=227, batch_

Epoch 6/10:  13%|▏| 129/991 [32:27<3:31:06, 14.69s/batch, batch_loss=227, batch_

Epoch 6/10:  13%|▏| 129/991 [32:44<3:31:06, 14.69s/batch, batch_loss=995, batch_

Epoch 6/10:  13%|▏| 130/991 [32:44<3:41:06, 15.41s/batch, batch_loss=995, batch_

Epoch 6/10:  13%|▏| 130/991 [32:59<3:41:06, 15.41s/batch, batch_loss=8.34e+3, ba

Epoch 6/10:  13%|▏| 131/991 [32:59<3:37:06, 15.15s/batch, batch_loss=8.34e+3, ba

Epoch 6/10:  13%|▏| 131/991 [33:12<3:37:06, 15.15s/batch, batch_loss=18, batch_i

Epoch 6/10:  13%|▏| 132/991 [33:12<3:31:48, 14.79s/batch, batch_loss=18, batch_i

Epoch 6/10:  13%|▏| 132/991 [33:26<3:31:48, 14.79s/batch, batch_loss=9.81, batch

Epoch 6/10:  13%|▏| 133/991 [33:26<3:27:30, 14.51s/batch, batch_loss=9.81, batch

Epoch 6/10:  13%|▏| 133/991 [33:42<3:27:30, 14.51s/batch, batch_loss=10.9, batch

Epoch 6/10:  14%|▏| 134/991 [33:42<3:31:01, 14.77s/batch, batch_loss=10.9, batch

Epoch 6/10:  14%|▏| 134/991 [33:56<3:31:01, 14.77s/batch, batch_loss=19.7, batch

Epoch 6/10:  14%|▏| 135/991 [33:56<3:30:09, 14.73s/batch, batch_loss=19.7, batch

Epoch 6/10:  14%|▏| 135/991 [34:11<3:30:09, 14.73s/batch, batch_loss=8.14, batch

Epoch 6/10:  14%|▏| 136/991 [34:11<3:29:48, 14.72s/batch, batch_loss=8.14, batch

Epoch 6/10:  14%|▏| 136/991 [34:26<3:29:48, 14.72s/batch, batch_loss=13.1, batch

Epoch 6/10:  14%|▏| 137/991 [34:26<3:29:56, 14.75s/batch, batch_loss=13.1, batch

Epoch 6/10:  14%|▏| 137/991 [34:43<3:29:56, 14.75s/batch, batch_loss=15, batch_i

Epoch 6/10:  14%|▏| 138/991 [34:43<3:40:25, 15.50s/batch, batch_loss=15, batch_i

Epoch 6/10:  14%|▏| 138/991 [34:58<3:40:25, 15.50s/batch, batch_loss=6.89, batch

Epoch 6/10:  14%|▏| 139/991 [34:58<3:38:02, 15.35s/batch, batch_loss=6.89, batch

Epoch 6/10:  14%|▏| 139/991 [35:13<3:38:02, 15.35s/batch, batch_loss=10, batch_i

Epoch 6/10:  14%|▏| 140/991 [35:13<3:35:03, 15.16s/batch, batch_loss=10, batch_i

Epoch 6/10:  14%|▏| 140/991 [35:28<3:35:03, 15.16s/batch, batch_loss=6.57, batch

Epoch 6/10:  14%|▏| 141/991 [35:28<3:34:45, 15.16s/batch, batch_loss=6.57, batch

Epoch 6/10:  14%|▏| 141/991 [35:42<3:34:45, 15.16s/batch, batch_loss=7, batch_in

Epoch 6/10:  14%|▏| 142/991 [35:42<3:31:29, 14.95s/batch, batch_loss=7, batch_in

Epoch 6/10:  14%|▏| 142/991 [35:58<3:31:29, 14.95s/batch, batch_loss=13.3, batch

Epoch 6/10:  14%|▏| 143/991 [35:58<3:33:17, 15.09s/batch, batch_loss=13.3, batch

Epoch 6/10:  14%|▏| 143/991 [36:13<3:33:17, 15.09s/batch, batch_loss=14.3, batch

Epoch 6/10:  15%|▏| 144/991 [36:13<3:31:58, 15.02s/batch, batch_loss=14.3, batch

Epoch 6/10:  15%|▏| 144/991 [36:27<3:31:58, 15.02s/batch, batch_loss=18.1, batch

Epoch 6/10:  15%|▏| 145/991 [36:27<3:29:13, 14.84s/batch, batch_loss=18.1, batch

Epoch 6/10:  15%|▏| 145/991 [36:44<3:29:13, 14.84s/batch, batch_loss=13.4, batch

Epoch 6/10:  15%|▏| 146/991 [36:44<3:38:27, 15.51s/batch, batch_loss=13.4, batch

Epoch 6/10:  15%|▏| 146/991 [36:59<3:38:27, 15.51s/batch, batch_loss=7.99, batch

Epoch 6/10:  15%|▏| 147/991 [36:59<3:35:06, 15.29s/batch, batch_loss=7.99, batch

Epoch 6/10:  15%|▏| 147/991 [37:14<3:35:06, 15.29s/batch, batch_loss=19, batch_i

Epoch 6/10:  15%|▏| 148/991 [37:14<3:34:56, 15.30s/batch, batch_loss=19, batch_i

Epoch 6/10:  15%|▏| 148/991 [37:29<3:34:56, 15.30s/batch, batch_loss=11.2, batch

Epoch 6/10:  15%|▏| 149/991 [37:29<3:30:09, 14.98s/batch, batch_loss=11.2, batch

Epoch 6/10:  15%|▏| 149/991 [37:45<3:30:09, 14.98s/batch, batch_loss=11, batch_i

Epoch 6/10:  15%|▏| 150/991 [37:45<3:38:07, 15.56s/batch, batch_loss=11, batch_i

Epoch 6/10:  15%|▏| 150/991 [38:01<3:38:07, 15.56s/batch, batch_loss=17.8, batch

Epoch 6/10:  15%|▏| 151/991 [38:01<3:37:52, 15.56s/batch, batch_loss=17.8, batch

Epoch 6/10:  15%|▏| 151/991 [38:16<3:37:52, 15.56s/batch, batch_loss=15, batch_i

Epoch 6/10:  15%|▏| 152/991 [38:16<3:35:26, 15.41s/batch, batch_loss=15, batch_i

Epoch 6/10:  15%|▏| 152/991 [38:31<3:35:26, 15.41s/batch, batch_loss=17.7, batch

Epoch 6/10:  15%|▏| 153/991 [38:31<3:33:02, 15.25s/batch, batch_loss=17.7, batch

Epoch 6/10:  15%|▏| 153/991 [38:45<3:33:02, 15.25s/batch, batch_loss=19.4, batch

Epoch 6/10:  16%|▏| 154/991 [38:45<3:29:37, 15.03s/batch, batch_loss=19.4, batch

Epoch 6/10:  16%|▏| 154/991 [39:00<3:29:37, 15.03s/batch, batch_loss=19.8, batch

Epoch 6/10:  16%|▏| 155/991 [39:00<3:28:17, 14.95s/batch, batch_loss=19.8, batch

Epoch 6/10:  16%|▏| 155/991 [39:15<3:28:17, 14.95s/batch, batch_loss=9.24, batch

Epoch 6/10:  16%|▏| 156/991 [39:15<3:28:20, 14.97s/batch, batch_loss=9.24, batch

Epoch 6/10:  16%|▏| 156/991 [39:30<3:28:20, 14.97s/batch, batch_loss=23.9, batch

Epoch 6/10:  16%|▏| 157/991 [39:30<3:28:06, 14.97s/batch, batch_loss=23.9, batch

Epoch 6/10:  16%|▏| 157/991 [39:45<3:28:06, 14.97s/batch, batch_loss=8.85, batch

Epoch 6/10:  16%|▏| 158/991 [39:45<3:26:13, 14.85s/batch, batch_loss=8.85, batch

Epoch 6/10:  16%|▏| 158/991 [39:59<3:26:13, 14.85s/batch, batch_loss=5.96, batch

Epoch 6/10:  16%|▏| 159/991 [39:59<3:24:29, 14.75s/batch, batch_loss=5.96, batch

Epoch 6/10:  16%|▏| 159/991 [40:14<3:24:29, 14.75s/batch, batch_loss=12, batch_i

Epoch 6/10:  16%|▏| 160/991 [40:14<3:25:11, 14.82s/batch, batch_loss=12, batch_i

Epoch 6/10:  16%|▏| 160/991 [40:30<3:25:11, 14.82s/batch, batch_loss=459, batch_

Epoch 6/10:  16%|▏| 161/991 [40:30<3:30:30, 15.22s/batch, batch_loss=459, batch_

Epoch 6/10:  16%|▏| 161/991 [40:46<3:30:30, 15.22s/batch, batch_loss=15.3, batch

Epoch 6/10:  16%|▏| 162/991 [40:46<3:29:51, 15.19s/batch, batch_loss=15.3, batch

Epoch 6/10:  16%|▏| 162/991 [41:00<3:29:51, 15.19s/batch, batch_loss=8.29, batch

Epoch 6/10:  16%|▏| 163/991 [41:00<3:25:40, 14.90s/batch, batch_loss=8.29, batch

Epoch 6/10:  16%|▏| 163/991 [41:14<3:25:40, 14.90s/batch, batch_loss=11.6, batch

Epoch 6/10:  17%|▏| 164/991 [41:14<3:21:47, 14.64s/batch, batch_loss=11.6, batch

Epoch 6/10:  17%|▏| 164/991 [41:28<3:21:47, 14.64s/batch, batch_loss=11.1, batch

Epoch 6/10:  17%|▏| 165/991 [41:28<3:20:38, 14.57s/batch, batch_loss=11.1, batch

Epoch 6/10:  17%|▏| 165/991 [41:43<3:20:38, 14.57s/batch, batch_loss=10.5, batch

Epoch 6/10:  17%|▏| 166/991 [41:43<3:21:11, 14.63s/batch, batch_loss=10.5, batch

Epoch 6/10:  17%|▏| 166/991 [41:58<3:21:11, 14.63s/batch, batch_loss=18.1, batch

Epoch 6/10:  17%|▏| 167/991 [41:58<3:22:38, 14.75s/batch, batch_loss=18.1, batch

Epoch 6/10:  17%|▏| 167/991 [42:14<3:22:38, 14.75s/batch, batch_loss=12, batch_i

Epoch 6/10:  17%|▏| 168/991 [42:14<3:26:53, 15.08s/batch, batch_loss=12, batch_i

Epoch 6/10:  17%|▏| 168/991 [42:31<3:26:53, 15.08s/batch, batch_loss=13.5, batch

Epoch 6/10:  17%|▏| 169/991 [42:31<3:35:18, 15.72s/batch, batch_loss=13.5, batch

Epoch 6/10:  17%|▏| 169/991 [42:47<3:35:18, 15.72s/batch, batch_loss=9.7, batch_

Epoch 6/10:  17%|▏| 170/991 [42:47<3:35:10, 15.73s/batch, batch_loss=9.7, batch_

Epoch 6/10:  17%|▏| 170/991 [43:02<3:35:10, 15.73s/batch, batch_loss=5.66, batch

Epoch 6/10:  17%|▏| 171/991 [43:02<3:31:48, 15.50s/batch, batch_loss=5.66, batch

Epoch 6/10:  17%|▏| 171/991 [43:16<3:31:48, 15.50s/batch, batch_loss=9.43, batch

Epoch 6/10:  17%|▏| 172/991 [43:16<3:27:46, 15.22s/batch, batch_loss=9.43, batch

Epoch 6/10:  17%|▏| 172/991 [43:32<3:27:46, 15.22s/batch, batch_loss=6.98, batch

Epoch 6/10:  17%|▏| 173/991 [43:32<3:28:47, 15.31s/batch, batch_loss=6.98, batch

Epoch 6/10:  17%|▏| 173/991 [43:47<3:28:47, 15.31s/batch, batch_loss=3.02e+4, ba

Epoch 6/10:  18%|▏| 174/991 [43:47<3:26:22, 15.16s/batch, batch_loss=3.02e+4, ba

Epoch 6/10:  18%|▏| 174/991 [44:02<3:26:22, 15.16s/batch, batch_loss=19.9, batch

Epoch 6/10:  18%|▏| 175/991 [44:02<3:28:24, 15.32s/batch, batch_loss=19.9, batch

Epoch 6/10:  18%|▏| 175/991 [44:17<3:28:24, 15.32s/batch, batch_loss=21.4, batch

Epoch 6/10:  18%|▏| 176/991 [44:17<3:26:48, 15.23s/batch, batch_loss=21.4, batch

Epoch 6/10:  18%|▏| 176/991 [44:32<3:26:48, 15.23s/batch, batch_loss=22.5, batch

Epoch 6/10:  18%|▏| 177/991 [44:32<3:24:28, 15.07s/batch, batch_loss=22.5, batch

Epoch 6/10:  18%|▏| 177/991 [44:47<3:24:28, 15.07s/batch, batch_loss=21.8, batch

Epoch 6/10:  18%|▏| 178/991 [44:47<3:22:20, 14.93s/batch, batch_loss=21.8, batch

Epoch 6/10:  18%|▏| 178/991 [45:02<3:22:20, 14.93s/batch, batch_loss=12.8, batch

Epoch 6/10:  18%|▏| 179/991 [45:02<3:21:28, 14.89s/batch, batch_loss=12.8, batch

Epoch 6/10:  18%|▏| 179/991 [45:16<3:21:28, 14.89s/batch, batch_loss=8.25, batch

Epoch 6/10:  18%|▏| 180/991 [45:16<3:19:24, 14.75s/batch, batch_loss=8.25, batch

Epoch 6/10:  18%|▏| 180/991 [45:31<3:19:24, 14.75s/batch, batch_loss=2.51e+4, ba

Epoch 6/10:  18%|▏| 181/991 [45:31<3:18:43, 14.72s/batch, batch_loss=2.51e+4, ba

Epoch 6/10:  18%|▏| 181/991 [45:46<3:18:43, 14.72s/batch, batch_loss=13.7, batch

Epoch 6/10:  18%|▏| 182/991 [45:46<3:19:23, 14.79s/batch, batch_loss=13.7, batch

Epoch 6/10:  18%|▏| 182/991 [46:00<3:19:23, 14.79s/batch, batch_loss=18.9, batch

Epoch 6/10:  18%|▏| 183/991 [46:00<3:17:35, 14.67s/batch, batch_loss=18.9, batch

Epoch 6/10:  18%|▏| 183/991 [46:15<3:17:35, 14.67s/batch, batch_loss=19.1, batch

Epoch 6/10:  19%|▏| 184/991 [46:15<3:17:55, 14.72s/batch, batch_loss=19.1, batch

Epoch 6/10:  19%|▏| 184/991 [46:30<3:17:55, 14.72s/batch, batch_loss=11.6, batch

Epoch 6/10:  19%|▏| 185/991 [46:30<3:18:38, 14.79s/batch, batch_loss=11.6, batch

Epoch 6/10:  19%|▏| 185/991 [46:45<3:18:38, 14.79s/batch, batch_loss=17.9, batch

Epoch 6/10:  19%|▏| 186/991 [46:45<3:18:50, 14.82s/batch, batch_loss=17.9, batch

Epoch 6/10:  19%|▏| 186/991 [46:59<3:18:50, 14.82s/batch, batch_loss=16.3, batch

Epoch 6/10:  19%|▏| 187/991 [46:59<3:18:16, 14.80s/batch, batch_loss=16.3, batch

Epoch 6/10:  19%|▏| 187/991 [47:14<3:18:16, 14.80s/batch, batch_loss=15.9, batch

Epoch 6/10:  19%|▏| 188/991 [47:14<3:18:50, 14.86s/batch, batch_loss=15.9, batch

Epoch 6/10:  19%|▏| 188/991 [47:29<3:18:50, 14.86s/batch, batch_loss=21.2, batch

Epoch 6/10:  19%|▏| 189/991 [47:29<3:17:52, 14.80s/batch, batch_loss=21.2, batch

Epoch 6/10:  19%|▏| 189/991 [47:43<3:17:52, 14.80s/batch, batch_loss=21.4, batch

Epoch 6/10:  19%|▏| 190/991 [47:43<3:15:48, 14.67s/batch, batch_loss=21.4, batch

Epoch 6/10:  19%|▏| 190/991 [47:58<3:15:48, 14.67s/batch, batch_loss=21.9, batch

Epoch 6/10:  19%|▏| 191/991 [47:58<3:15:18, 14.65s/batch, batch_loss=21.9, batch

Epoch 6/10:  19%|▏| 191/991 [48:12<3:15:18, 14.65s/batch, batch_loss=20.5, batch

Epoch 6/10:  19%|▏| 192/991 [48:12<3:13:04, 14.50s/batch, batch_loss=20.5, batch

Epoch 6/10:  19%|▏| 192/991 [48:27<3:13:04, 14.50s/batch, batch_loss=23.9, batch

Epoch 6/10:  19%|▏| 193/991 [48:27<3:14:57, 14.66s/batch, batch_loss=23.9, batch

Epoch 6/10:  19%|▏| 193/991 [48:43<3:14:57, 14.66s/batch, batch_loss=11, batch_i

Epoch 6/10:  20%|▏| 194/991 [48:43<3:17:43, 14.89s/batch, batch_loss=11, batch_i

Epoch 6/10:  20%|▏| 194/991 [49:00<3:17:43, 14.89s/batch, batch_loss=4.59, batch

Epoch 6/10:  20%|▏| 195/991 [49:00<3:28:28, 15.71s/batch, batch_loss=4.59, batch

Epoch 6/10:  20%|▏| 195/991 [49:15<3:28:28, 15.71s/batch, batch_loss=8.68, batch

Epoch 6/10:  20%|▏| 196/991 [49:15<3:23:17, 15.34s/batch, batch_loss=8.68, batch

Epoch 6/10:  20%|▏| 196/991 [49:30<3:23:17, 15.34s/batch, batch_loss=15.7, batch

Epoch 6/10:  20%|▏| 197/991 [49:30<3:21:56, 15.26s/batch, batch_loss=15.7, batch

Epoch 6/10:  20%|▏| 197/991 [49:45<3:21:56, 15.26s/batch, batch_loss=10.6, batch

Epoch 6/10:  20%|▏| 198/991 [49:45<3:22:51, 15.35s/batch, batch_loss=10.6, batch

Epoch 6/10:  20%|▏| 198/991 [50:01<3:22:51, 15.35s/batch, batch_loss=15, batch_i

Epoch 6/10:  20%|▏| 199/991 [50:01<3:23:24, 15.41s/batch, batch_loss=15, batch_i

Epoch 6/10:  20%|▏| 199/991 [50:16<3:23:24, 15.41s/batch, batch_loss=8.19, batch

Epoch 6/10:  20%|▏| 200/991 [50:16<3:22:53, 15.39s/batch, batch_loss=8.19, batch

Epoch 6/10:  20%|▏| 200/991 [50:30<3:22:53, 15.39s/batch, batch_loss=15.9, batch

Epoch 6/10:  20%|▏| 201/991 [50:30<3:17:55, 15.03s/batch, batch_loss=15.9, batch

Epoch 6/10:  20%|▏| 201/991 [50:48<3:17:55, 15.03s/batch, batch_loss=11.8, batch

Epoch 6/10:  20%|▏| 202/991 [50:48<3:26:21, 15.69s/batch, batch_loss=11.8, batch

Epoch 6/10:  20%|▏| 202/991 [51:02<3:26:21, 15.69s/batch, batch_loss=15.1, batch

Epoch 6/10:  20%|▏| 203/991 [51:02<3:20:42, 15.28s/batch, batch_loss=15.1, batch

Epoch 6/10:  20%|▏| 203/991 [51:16<3:20:42, 15.28s/batch, batch_loss=39.4, batch

Epoch 6/10:  21%|▏| 204/991 [51:16<3:16:29, 14.98s/batch, batch_loss=39.4, batch

Epoch 6/10:  21%|▏| 204/991 [51:32<3:16:29, 14.98s/batch, batch_loss=32.8, batch

Epoch 6/10:  21%|▏| 205/991 [51:32<3:17:46, 15.10s/batch, batch_loss=32.8, batch

Epoch 6/10:  21%|▏| 205/991 [51:47<3:17:46, 15.10s/batch, batch_loss=9.29, batch

Epoch 6/10:  21%|▏| 206/991 [51:47<3:17:37, 15.11s/batch, batch_loss=9.29, batch

Epoch 6/10:  21%|▏| 206/991 [52:02<3:17:37, 15.11s/batch, batch_loss=10.4, batch

Epoch 6/10:  21%|▏| 207/991 [52:02<3:16:58, 15.07s/batch, batch_loss=10.4, batch

Epoch 6/10:  21%|▏| 207/991 [52:16<3:16:58, 15.07s/batch, batch_loss=12.4, batch

Epoch 6/10:  21%|▏| 208/991 [52:16<3:15:13, 14.96s/batch, batch_loss=12.4, batch

Epoch 6/10:  21%|▏| 208/991 [52:32<3:15:13, 14.96s/batch, batch_loss=10, batch_i

Epoch 6/10:  21%|▏| 209/991 [52:32<3:17:03, 15.12s/batch, batch_loss=10, batch_i

Epoch 6/10:  21%|▏| 209/991 [52:49<3:17:03, 15.12s/batch, batch_loss=23.8, batch

Epoch 6/10:  21%|▏| 210/991 [52:49<3:26:13, 15.84s/batch, batch_loss=23.8, batch

Epoch 6/10:  21%|▏| 210/991 [53:04<3:26:13, 15.84s/batch, batch_loss=14.1, batch

Epoch 6/10:  21%|▏| 211/991 [53:04<3:22:11, 15.55s/batch, batch_loss=14.1, batch

Epoch 6/10:  21%|▏| 211/991 [53:19<3:22:11, 15.55s/batch, batch_loss=24.7, batch

Epoch 6/10:  21%|▏| 212/991 [53:19<3:17:16, 15.19s/batch, batch_loss=24.7, batch

Epoch 6/10:  21%|▏| 212/991 [53:34<3:17:16, 15.19s/batch, batch_loss=3.45, batch

Epoch 6/10:  21%|▏| 213/991 [53:34<3:18:31, 15.31s/batch, batch_loss=3.45, batch

Epoch 6/10:  21%|▏| 213/991 [53:49<3:18:31, 15.31s/batch, batch_loss=13.5, batch

Epoch 6/10:  22%|▏| 214/991 [53:49<3:16:53, 15.20s/batch, batch_loss=13.5, batch

Epoch 6/10:  22%|▏| 214/991 [54:04<3:16:53, 15.20s/batch, batch_loss=20.2, batch

Epoch 6/10:  22%|▏| 215/991 [54:04<3:16:38, 15.20s/batch, batch_loss=20.2, batch

Epoch 6/10:  22%|▏| 215/991 [54:19<3:16:38, 15.20s/batch, batch_loss=10.1, batch

Epoch 6/10:  22%|▏| 216/991 [54:19<3:14:11, 15.03s/batch, batch_loss=10.1, batch

Epoch 6/10:  22%|▏| 216/991 [54:34<3:14:11, 15.03s/batch, batch_loss=16.7, batch

Epoch 6/10:  22%|▏| 217/991 [54:34<3:14:30, 15.08s/batch, batch_loss=16.7, batch

Epoch 6/10:  22%|▏| 217/991 [54:49<3:14:30, 15.08s/batch, batch_loss=21.4, batch

Epoch 6/10:  22%|▏| 218/991 [54:49<3:13:20, 15.01s/batch, batch_loss=21.4, batch

Epoch 6/10:  22%|▏| 218/991 [55:04<3:13:20, 15.01s/batch, batch_loss=24.1, batch

Epoch 6/10:  22%|▏| 219/991 [55:04<3:12:44, 14.98s/batch, batch_loss=24.1, batch

Epoch 6/10:  22%|▏| 219/991 [55:18<3:12:44, 14.98s/batch, batch_loss=32, batch_i

Epoch 6/10:  22%|▏| 220/991 [55:18<3:09:45, 14.77s/batch, batch_loss=32, batch_i

Epoch 6/10:  22%|▏| 220/991 [55:34<3:09:45, 14.77s/batch, batch_loss=22.2, batch

Epoch 6/10:  22%|▏| 221/991 [55:34<3:11:12, 14.90s/batch, batch_loss=22.2, batch

Epoch 6/10:  22%|▏| 221/991 [55:49<3:11:12, 14.90s/batch, batch_loss=16.6, batch

Epoch 6/10:  22%|▏| 222/991 [55:49<3:13:43, 15.11s/batch, batch_loss=16.6, batch

Epoch 6/10:  22%|▏| 222/991 [56:04<3:13:43, 15.11s/batch, batch_loss=32.9, batch

Epoch 6/10:  23%|▏| 223/991 [56:04<3:12:59, 15.08s/batch, batch_loss=32.9, batch

Epoch 6/10:  23%|▏| 223/991 [56:20<3:12:59, 15.08s/batch, batch_loss=17.6, batch

Epoch 6/10:  23%|▏| 224/991 [56:20<3:15:17, 15.28s/batch, batch_loss=17.6, batch

Epoch 6/10:  23%|▏| 224/991 [56:37<3:15:17, 15.28s/batch, batch_loss=10.7, batch

Epoch 6/10:  23%|▏| 225/991 [56:37<3:23:47, 15.96s/batch, batch_loss=10.7, batch

Epoch 6/10:  23%|▏| 225/991 [56:53<3:23:47, 15.96s/batch, batch_loss=27.6, batch

Epoch 6/10:  23%|▏| 226/991 [56:53<3:22:51, 15.91s/batch, batch_loss=27.6, batch

Epoch 6/10:  23%|▏| 226/991 [57:09<3:22:51, 15.91s/batch, batch_loss=2.41e+3, ba

Epoch 6/10:  23%|▏| 227/991 [57:09<3:20:55, 15.78s/batch, batch_loss=2.41e+3, ba

Epoch 6/10:  23%|▏| 227/991 [57:24<3:20:55, 15.78s/batch, batch_loss=3.6e+3, bat

Epoch 6/10:  23%|▏| 228/991 [57:24<3:18:18, 15.59s/batch, batch_loss=3.6e+3, bat

Epoch 6/10:  23%|▏| 228/991 [57:38<3:18:18, 15.59s/batch, batch_loss=13.1, batch

Epoch 6/10:  23%|▏| 229/991 [57:38<3:10:42, 15.02s/batch, batch_loss=13.1, batch

Epoch 6/10:  23%|▏| 229/991 [57:51<3:10:42, 15.02s/batch, batch_loss=10.7, batch

Epoch 6/10:  23%|▏| 230/991 [57:51<3:03:00, 14.43s/batch, batch_loss=10.7, batch

Epoch 6/10:  23%|▏| 230/991 [58:05<3:03:00, 14.43s/batch, batch_loss=13.2, batch

Epoch 6/10:  23%|▏| 231/991 [58:05<3:01:21, 14.32s/batch, batch_loss=13.2, batch

Epoch 6/10:  23%|▏| 231/991 [58:23<3:01:21, 14.32s/batch, batch_loss=10.9, batch

Epoch 6/10:  23%|▏| 232/991 [58:23<3:16:50, 15.56s/batch, batch_loss=10.9, batch

Epoch 6/10:  23%|▏| 232/991 [58:39<3:16:50, 15.56s/batch, batch_loss=9.05, batch

Epoch 6/10:  24%|▏| 233/991 [58:39<3:18:57, 15.75s/batch, batch_loss=9.05, batch

Epoch 6/10:  24%|▏| 233/991 [58:55<3:18:57, 15.75s/batch, batch_loss=14.2, batch

Epoch 6/10:  24%|▏| 234/991 [58:55<3:17:13, 15.63s/batch, batch_loss=14.2, batch

Epoch 6/10:  24%|▏| 234/991 [59:09<3:17:13, 15.63s/batch, batch_loss=15.9, batch

Epoch 6/10:  24%|▏| 235/991 [59:09<3:12:02, 15.24s/batch, batch_loss=15.9, batch

Epoch 6/10:  24%|▏| 235/991 [59:24<3:12:02, 15.24s/batch, batch_loss=24.8, batch

Epoch 6/10:  24%|▏| 236/991 [59:24<3:09:19, 15.05s/batch, batch_loss=24.8, batch

Epoch 6/10:  24%|▏| 236/991 [59:39<3:09:19, 15.05s/batch, batch_loss=27.4, batch

Epoch 6/10:  24%|▏| 237/991 [59:39<3:09:54, 15.11s/batch, batch_loss=27.4, batch

Epoch 6/10:  24%|▏| 237/991 [59:55<3:09:54, 15.11s/batch, batch_loss=20.4, batch

Epoch 6/10:  24%|▏| 238/991 [59:55<3:14:50, 15.53s/batch, batch_loss=20.4, batch

Epoch 6/10:  24%|▏| 238/991 [1:00:11<3:14:50, 15.53s/batch, batch_loss=6.96, bat

Epoch 6/10:  24%|▏| 239/991 [1:00:11<3:13:23, 15.43s/batch, batch_loss=6.96, bat

Epoch 6/10:  24%|▏| 239/991 [1:00:26<3:13:23, 15.43s/batch, batch_loss=8.14, bat

Epoch 6/10:  24%|▏| 240/991 [1:00:26<3:12:05, 15.35s/batch, batch_loss=8.14, bat

Epoch 6/10:  24%|▏| 240/991 [1:00:42<3:12:05, 15.35s/batch, batch_loss=9.3, batc

Epoch 6/10:  24%|▏| 241/991 [1:00:42<3:15:23, 15.63s/batch, batch_loss=9.3, batc

Epoch 6/10:  24%|▏| 241/991 [1:00:59<3:15:23, 15.63s/batch, batch_loss=21.6, bat

Epoch 6/10:  24%|▏| 242/991 [1:00:59<3:20:49, 16.09s/batch, batch_loss=21.6, bat

Epoch 6/10:  24%|▏| 242/991 [1:01:14<3:20:49, 16.09s/batch, batch_loss=271, batc

Epoch 6/10:  25%|▏| 243/991 [1:01:14<3:17:43, 15.86s/batch, batch_loss=271, batc

Epoch 6/10:  25%|▏| 243/991 [1:01:30<3:17:43, 15.86s/batch, batch_loss=20.4, bat

Epoch 6/10:  25%|▏| 244/991 [1:01:30<3:16:38, 15.79s/batch, batch_loss=20.4, bat

Epoch 6/10:  25%|▏| 244/991 [1:01:46<3:16:38, 15.79s/batch, batch_loss=7.11, bat

Epoch 6/10:  25%|▏| 245/991 [1:01:46<3:15:12, 15.70s/batch, batch_loss=7.11, bat

Epoch 6/10:  25%|▏| 245/991 [1:02:00<3:15:12, 15.70s/batch, batch_loss=5.82, bat

Epoch 6/10:  25%|▏| 246/991 [1:02:00<3:11:49, 15.45s/batch, batch_loss=5.82, bat

Epoch 6/10:  25%|▏| 246/991 [1:02:16<3:11:49, 15.45s/batch, batch_loss=15, batch

Epoch 6/10:  25%|▏| 247/991 [1:02:16<3:11:13, 15.42s/batch, batch_loss=15, batch

Epoch 6/10:  25%|▏| 247/991 [1:02:32<3:11:13, 15.42s/batch, batch_loss=4.74, bat

Epoch 6/10:  25%|▎| 248/991 [1:02:32<3:11:58, 15.50s/batch, batch_loss=4.74, bat

Epoch 6/10:  25%|▎| 248/991 [1:02:49<3:11:58, 15.50s/batch, batch_loss=13.5, bat

Epoch 6/10:  25%|▎| 249/991 [1:02:49<3:18:14, 16.03s/batch, batch_loss=13.5, bat

Epoch 6/10:  25%|▎| 249/991 [1:03:03<3:18:14, 16.03s/batch, batch_loss=10.4, bat

Epoch 6/10:  25%|▎| 250/991 [1:03:03<3:11:22, 15.50s/batch, batch_loss=10.4, bat

Epoch 6/10:  25%|▎| 250/991 [1:03:18<3:11:22, 15.50s/batch, batch_loss=6.92, bat

Epoch 6/10:  25%|▎| 251/991 [1:03:18<3:09:16, 15.35s/batch, batch_loss=6.92, bat

Epoch 6/10:  25%|▎| 251/991 [1:03:33<3:09:16, 15.35s/batch, batch_loss=14.6, bat

Epoch 6/10:  25%|▎| 252/991 [1:03:33<3:07:34, 15.23s/batch, batch_loss=14.6, bat

Epoch 6/10:  25%|▎| 252/991 [1:03:48<3:07:34, 15.23s/batch, batch_loss=8.32, bat

Epoch 6/10:  26%|▎| 253/991 [1:03:48<3:06:56, 15.20s/batch, batch_loss=8.32, bat

Epoch 6/10:  26%|▎| 253/991 [1:04:03<3:06:56, 15.20s/batch, batch_loss=19.7, bat

Epoch 6/10:  26%|▎| 254/991 [1:04:03<3:05:48, 15.13s/batch, batch_loss=19.7, bat

Epoch 6/10:  26%|▎| 254/991 [1:04:18<3:05:48, 15.13s/batch, batch_loss=16, batch

Epoch 6/10:  26%|▎| 255/991 [1:04:18<3:05:44, 15.14s/batch, batch_loss=16, batch

Epoch 6/10:  26%|▎| 255/991 [1:04:32<3:05:44, 15.14s/batch, batch_loss=696, batc

Epoch 6/10:  26%|▎| 256/991 [1:04:32<2:58:43, 14.59s/batch, batch_loss=696, batc

Epoch 6/10:  26%|▎| 256/991 [1:04:48<2:58:43, 14.59s/batch, batch_loss=19.1, bat

Epoch 6/10:  26%|▎| 257/991 [1:04:48<3:06:57, 15.28s/batch, batch_loss=19.1, bat

Epoch 6/10:  26%|▎| 257/991 [1:05:02<3:06:57, 15.28s/batch, batch_loss=206, batc

Epoch 6/10:  26%|▎| 258/991 [1:05:02<3:01:45, 14.88s/batch, batch_loss=206, batc

Epoch 6/10:  26%|▎| 258/991 [1:05:17<3:01:45, 14.88s/batch, batch_loss=15.6, bat

Epoch 6/10:  26%|▎| 259/991 [1:05:17<2:59:43, 14.73s/batch, batch_loss=15.6, bat

Epoch 6/10:  26%|▎| 259/991 [1:05:31<2:59:43, 14.73s/batch, batch_loss=27, batch

Epoch 6/10:  26%|▎| 260/991 [1:05:31<2:56:18, 14.47s/batch, batch_loss=27, batch

Epoch 6/10:  26%|▎| 260/991 [1:05:45<2:56:18, 14.47s/batch, batch_loss=11.9, bat

Epoch 6/10:  26%|▎| 261/991 [1:05:45<2:57:30, 14.59s/batch, batch_loss=11.9, bat

Epoch 6/10:  26%|▎| 261/991 [1:06:00<2:57:30, 14.59s/batch, batch_loss=11.2, bat

Epoch 6/10:  26%|▎| 262/991 [1:06:00<2:58:12, 14.67s/batch, batch_loss=11.2, bat

Epoch 6/10:  26%|▎| 262/991 [1:06:15<2:58:12, 14.67s/batch, batch_loss=15, batch

Epoch 6/10:  27%|▎| 263/991 [1:06:15<2:58:51, 14.74s/batch, batch_loss=15, batch

Epoch 6/10:  27%|▎| 263/991 [1:06:30<2:58:51, 14.74s/batch, batch_loss=14.6, bat

Epoch 6/10:  27%|▎| 264/991 [1:06:30<2:58:46, 14.75s/batch, batch_loss=14.6, bat

Epoch 6/10:  27%|▎| 264/991 [1:06:46<2:58:46, 14.75s/batch, batch_loss=15.7, bat

Epoch 6/10:  27%|▎| 265/991 [1:06:46<3:01:54, 15.03s/batch, batch_loss=15.7, bat

Epoch 6/10:  27%|▎| 265/991 [1:07:01<3:01:54, 15.03s/batch, batch_loss=15.1, bat

Epoch 6/10:  27%|▎| 266/991 [1:07:01<3:01:35, 15.03s/batch, batch_loss=15.1, bat

Epoch 6/10:  27%|▎| 266/991 [1:07:15<3:01:35, 15.03s/batch, batch_loss=14.5, bat

Epoch 6/10:  27%|▎| 267/991 [1:07:15<2:58:23, 14.78s/batch, batch_loss=14.5, bat

Epoch 6/10:  27%|▎| 267/991 [1:07:31<2:58:23, 14.78s/batch, batch_loss=6.73, bat

Epoch 6/10:  27%|▎| 268/991 [1:07:31<3:00:59, 15.02s/batch, batch_loss=6.73, bat

Epoch 6/10:  27%|▎| 268/991 [1:07:46<3:00:59, 15.02s/batch, batch_loss=14.4, bat

Epoch 6/10:  27%|▎| 269/991 [1:07:46<3:02:06, 15.13s/batch, batch_loss=14.4, bat

Epoch 6/10:  27%|▎| 269/991 [1:08:02<3:02:06, 15.13s/batch, batch_loss=1.91, bat

Epoch 6/10:  27%|▎| 270/991 [1:08:02<3:05:30, 15.44s/batch, batch_loss=1.91, bat

Epoch 6/10:  27%|▎| 270/991 [1:08:16<3:05:30, 15.44s/batch, batch_loss=10.8, bat

Epoch 6/10:  27%|▎| 271/991 [1:08:16<3:00:44, 15.06s/batch, batch_loss=10.8, bat

Epoch 6/10:  27%|▎| 271/991 [1:08:31<3:00:44, 15.06s/batch, batch_loss=11.6, bat

Epoch 6/10:  27%|▎| 272/991 [1:08:31<2:57:38, 14.82s/batch, batch_loss=11.6, bat

Epoch 6/10:  27%|▎| 272/991 [1:08:46<2:57:38, 14.82s/batch, batch_loss=21.9, bat

Epoch 6/10:  28%|▎| 273/991 [1:08:46<2:58:15, 14.90s/batch, batch_loss=21.9, bat

Epoch 6/10:  28%|▎| 273/991 [1:09:03<2:58:15, 14.90s/batch, batch_loss=11.1, bat

Epoch 6/10:  28%|▎| 274/991 [1:09:03<3:06:13, 15.58s/batch, batch_loss=11.1, bat

Epoch 6/10:  28%|▎| 274/991 [1:09:18<3:06:13, 15.58s/batch, batch_loss=3.32e+3, 

Epoch 6/10:  28%|▎| 275/991 [1:09:18<3:04:46, 15.48s/batch, batch_loss=3.32e+3, 

Epoch 6/10:  28%|▎| 275/991 [1:09:33<3:04:46, 15.48s/batch, batch_loss=14.5, bat

Epoch 6/10:  28%|▎| 276/991 [1:09:33<3:01:31, 15.23s/batch, batch_loss=14.5, bat

Epoch 6/10:  28%|▎| 276/991 [1:09:47<3:01:31, 15.23s/batch, batch_loss=4.77e+3, 

Epoch 6/10:  28%|▎| 277/991 [1:09:47<2:59:47, 15.11s/batch, batch_loss=4.77e+3, 

Epoch 6/10:  28%|▎| 277/991 [1:10:03<2:59:47, 15.11s/batch, batch_loss=10.5, bat

Epoch 6/10:  28%|▎| 278/991 [1:10:03<2:59:32, 15.11s/batch, batch_loss=10.5, bat

Epoch 6/10:  28%|▎| 278/991 [1:10:17<2:59:32, 15.11s/batch, batch_loss=18, batch

Epoch 6/10:  28%|▎| 279/991 [1:10:17<2:57:19, 14.94s/batch, batch_loss=18, batch

Epoch 6/10:  28%|▎| 279/991 [1:10:33<2:57:19, 14.94s/batch, batch_loss=14.2, bat

Epoch 6/10:  28%|▎| 280/991 [1:10:33<2:58:45, 15.08s/batch, batch_loss=14.2, bat

Epoch 6/10:  28%|▎| 280/991 [1:10:48<2:58:45, 15.08s/batch, batch_loss=10.4, bat

Epoch 6/10:  28%|▎| 281/991 [1:10:48<3:00:17, 15.24s/batch, batch_loss=10.4, bat

Epoch 6/10:  28%|▎| 281/991 [1:11:03<3:00:17, 15.24s/batch, batch_loss=7.09, bat

Epoch 6/10:  28%|▎| 282/991 [1:11:03<2:59:37, 15.20s/batch, batch_loss=7.09, bat

Epoch 6/10:  28%|▎| 282/991 [1:11:18<2:59:37, 15.20s/batch, batch_loss=17, batch

Epoch 6/10:  29%|▎| 283/991 [1:11:18<2:58:57, 15.17s/batch, batch_loss=17, batch

Epoch 6/10:  29%|▎| 283/991 [1:11:34<2:58:57, 15.17s/batch, batch_loss=15.5, bat

Epoch 6/10:  29%|▎| 284/991 [1:11:34<3:00:46, 15.34s/batch, batch_loss=15.5, bat

Epoch 6/10:  29%|▎| 284/991 [1:11:49<3:00:46, 15.34s/batch, batch_loss=13, batch

Epoch 6/10:  29%|▎| 285/991 [1:11:49<2:59:25, 15.25s/batch, batch_loss=13, batch

Epoch 6/10:  29%|▎| 285/991 [1:12:04<2:59:25, 15.25s/batch, batch_loss=7.45, bat

Epoch 6/10:  29%|▎| 286/991 [1:12:04<2:56:30, 15.02s/batch, batch_loss=7.45, bat

Epoch 6/10:  29%|▎| 286/991 [1:12:19<2:56:30, 15.02s/batch, batch_loss=6.87, bat

Epoch 6/10:  29%|▎| 287/991 [1:12:19<2:56:13, 15.02s/batch, batch_loss=6.87, bat

Epoch 6/10:  29%|▎| 287/991 [1:12:33<2:56:13, 15.02s/batch, batch_loss=2.59e+3, 

Epoch 6/10:  29%|▎| 288/991 [1:12:33<2:52:18, 14.71s/batch, batch_loss=2.59e+3, 

Epoch 6/10:  29%|▎| 288/991 [1:12:50<2:52:18, 14.71s/batch, batch_loss=1.26e+3, 

Epoch 6/10:  29%|▎| 289/991 [1:12:50<3:00:43, 15.45s/batch, batch_loss=1.26e+3, 

Epoch 6/10:  29%|▎| 289/991 [1:13:06<3:00:43, 15.45s/batch, batch_loss=10.8, bat

Epoch 6/10:  29%|▎| 290/991 [1:13:06<3:01:22, 15.52s/batch, batch_loss=10.8, bat

Epoch 6/10:  29%|▎| 290/991 [1:13:21<3:01:22, 15.52s/batch, batch_loss=5.66, bat

Epoch 6/10:  29%|▎| 291/991 [1:13:21<2:59:24, 15.38s/batch, batch_loss=5.66, bat

Epoch 6/10:  29%|▎| 291/991 [1:13:36<2:59:24, 15.38s/batch, batch_loss=11.1, bat

Epoch 6/10:  29%|▎| 292/991 [1:13:36<2:57:57, 15.28s/batch, batch_loss=11.1, bat

Epoch 6/10:  29%|▎| 292/991 [1:13:50<2:57:57, 15.28s/batch, batch_loss=13.4, bat

Epoch 6/10:  30%|▎| 293/991 [1:13:50<2:54:13, 14.98s/batch, batch_loss=13.4, bat

Epoch 6/10:  30%|▎| 293/991 [1:14:05<2:54:13, 14.98s/batch, batch_loss=12.6, bat

Epoch 6/10:  30%|▎| 294/991 [1:14:05<2:53:08, 14.90s/batch, batch_loss=12.6, bat

Epoch 6/10:  30%|▎| 294/991 [1:14:19<2:53:08, 14.90s/batch, batch_loss=9.86, bat

Epoch 6/10:  30%|▎| 295/991 [1:14:19<2:52:14, 14.85s/batch, batch_loss=9.86, bat

Epoch 6/10:  30%|▎| 295/991 [1:14:36<2:52:14, 14.85s/batch, batch_loss=16.4, bat

Epoch 6/10:  30%|▎| 296/991 [1:14:36<2:59:17, 15.48s/batch, batch_loss=16.4, bat

Epoch 6/10:  30%|▎| 296/991 [1:14:51<2:59:17, 15.48s/batch, batch_loss=11.5, bat

Epoch 6/10:  30%|▎| 297/991 [1:14:51<2:56:50, 15.29s/batch, batch_loss=11.5, bat

Epoch 6/10:  30%|▎| 297/991 [1:15:06<2:56:50, 15.29s/batch, batch_loss=3.24e+4, 

Epoch 6/10:  30%|▎| 298/991 [1:15:06<2:54:12, 15.08s/batch, batch_loss=3.24e+4, 

Epoch 6/10:  30%|▎| 298/991 [1:15:20<2:54:12, 15.08s/batch, batch_loss=15.6, bat

Epoch 6/10:  30%|▎| 299/991 [1:15:20<2:51:55, 14.91s/batch, batch_loss=15.6, bat

Epoch 6/10:  30%|▎| 299/991 [1:15:35<2:51:55, 14.91s/batch, batch_loss=6.51, bat

Epoch 6/10:  30%|▎| 300/991 [1:15:35<2:49:38, 14.73s/batch, batch_loss=6.51, bat

Epoch 6/10:  30%|▎| 300/991 [1:15:50<2:49:38, 14.73s/batch, batch_loss=7.86, bat

Epoch 6/10:  30%|▎| 301/991 [1:15:50<2:51:46, 14.94s/batch, batch_loss=7.86, bat

Epoch 6/10:  30%|▎| 301/991 [1:16:05<2:51:46, 14.94s/batch, batch_loss=10.8, bat

Epoch 6/10:  30%|▎| 302/991 [1:16:05<2:51:04, 14.90s/batch, batch_loss=10.8, bat

Epoch 6/10:  30%|▎| 302/991 [1:16:20<2:51:04, 14.90s/batch, batch_loss=9.24, bat

Epoch 6/10:  31%|▎| 303/991 [1:16:20<2:52:30, 15.04s/batch, batch_loss=9.24, bat

Epoch 6/10:  31%|▎| 303/991 [1:16:36<2:52:30, 15.04s/batch, batch_loss=3.22, bat

Epoch 6/10:  31%|▎| 304/991 [1:16:36<2:53:53, 15.19s/batch, batch_loss=3.22, bat

Epoch 6/10:  31%|▎| 304/991 [1:16:54<2:53:53, 15.19s/batch, batch_loss=14.9, bat

Epoch 6/10:  31%|▎| 305/991 [1:16:54<3:02:48, 15.99s/batch, batch_loss=14.9, bat

Epoch 6/10:  31%|▎| 305/991 [1:17:08<3:02:48, 15.99s/batch, batch_loss=7.78, bat

Epoch 6/10:  31%|▎| 306/991 [1:17:08<2:58:26, 15.63s/batch, batch_loss=7.78, bat

Epoch 6/10:  31%|▎| 306/991 [1:17:23<2:58:26, 15.63s/batch, batch_loss=6.28e+3, 

Epoch 6/10:  31%|▎| 307/991 [1:17:23<2:55:06, 15.36s/batch, batch_loss=6.28e+3, 

Epoch 6/10:  31%|▎| 307/991 [1:17:38<2:55:06, 15.36s/batch, batch_loss=11.4, bat

Epoch 6/10:  31%|▎| 308/991 [1:17:38<2:54:19, 15.31s/batch, batch_loss=11.4, bat

Epoch 6/10:  31%|▎| 308/991 [1:17:54<2:54:19, 15.31s/batch, batch_loss=20.6, bat

Epoch 6/10:  31%|▎| 309/991 [1:17:54<2:54:45, 15.37s/batch, batch_loss=20.6, bat

Epoch 6/10:  31%|▎| 309/991 [1:18:09<2:54:45, 15.37s/batch, batch_loss=14.2, bat

Epoch 6/10:  31%|▎| 310/991 [1:18:09<2:54:10, 15.35s/batch, batch_loss=14.2, bat

Epoch 6/10:  31%|▎| 310/991 [1:18:24<2:54:10, 15.35s/batch, batch_loss=13.7, bat

Epoch 6/10:  31%|▎| 311/991 [1:18:24<2:52:02, 15.18s/batch, batch_loss=13.7, bat

Epoch 6/10:  31%|▎| 311/991 [1:18:40<2:52:02, 15.18s/batch, batch_loss=11.7, bat

Epoch 6/10:  31%|▎| 312/991 [1:18:40<2:54:00, 15.38s/batch, batch_loss=11.7, bat

Epoch 6/10:  31%|▎| 312/991 [1:18:55<2:54:00, 15.38s/batch, batch_loss=1.06e+4, 

Epoch 6/10:  32%|▎| 313/991 [1:18:55<2:53:27, 15.35s/batch, batch_loss=1.06e+4, 

Epoch 6/10:  32%|▎| 313/991 [1:19:09<2:53:27, 15.35s/batch, batch_loss=8.1, batc

Epoch 6/10:  32%|▎| 314/991 [1:19:09<2:50:03, 15.07s/batch, batch_loss=8.1, batc

Epoch 6/10:  32%|▎| 314/991 [1:19:24<2:50:03, 15.07s/batch, batch_loss=13.4, bat

Epoch 6/10:  32%|▎| 315/991 [1:19:24<2:48:23, 14.95s/batch, batch_loss=13.4, bat

Epoch 6/10:  32%|▎| 315/991 [1:19:40<2:48:23, 14.95s/batch, batch_loss=20.8, bat

Epoch 6/10:  32%|▎| 316/991 [1:19:40<2:51:28, 15.24s/batch, batch_loss=20.8, bat

Epoch 6/10:  32%|▎| 316/991 [1:19:55<2:51:28, 15.24s/batch, batch_loss=22.7, bat

Epoch 6/10:  32%|▎| 317/991 [1:19:55<2:50:53, 15.21s/batch, batch_loss=22.7, bat

Epoch 6/10:  32%|▎| 317/991 [1:20:10<2:50:53, 15.21s/batch, batch_loss=21.1, bat

Epoch 6/10:  32%|▎| 318/991 [1:20:10<2:49:55, 15.15s/batch, batch_loss=21.1, bat

Epoch 6/10:  32%|▎| 318/991 [1:20:25<2:49:55, 15.15s/batch, batch_loss=16.8, bat

Epoch 6/10:  32%|▎| 319/991 [1:20:25<2:48:48, 15.07s/batch, batch_loss=16.8, bat

Epoch 6/10:  32%|▎| 319/991 [1:20:40<2:48:48, 15.07s/batch, batch_loss=15.1, bat

Epoch 6/10:  32%|▎| 320/991 [1:20:40<2:48:37, 15.08s/batch, batch_loss=15.1, bat

Epoch 6/10:  32%|▎| 320/991 [1:20:55<2:48:37, 15.08s/batch, batch_loss=22.2, bat

Epoch 6/10:  32%|▎| 321/991 [1:20:55<2:47:06, 14.97s/batch, batch_loss=22.2, bat

Epoch 6/10:  32%|▎| 321/991 [1:21:10<2:47:06, 14.97s/batch, batch_loss=6.79, bat

Epoch 6/10:  32%|▎| 322/991 [1:21:10<2:48:43, 15.13s/batch, batch_loss=6.79, bat

Epoch 6/10:  32%|▎| 322/991 [1:21:25<2:48:43, 15.13s/batch, batch_loss=9.77, bat

Epoch 6/10:  33%|▎| 323/991 [1:21:25<2:46:22, 14.94s/batch, batch_loss=9.77, bat

Epoch 6/10:  33%|▎| 323/991 [1:21:39<2:46:22, 14.94s/batch, batch_loss=20.2, bat

Epoch 6/10:  33%|▎| 324/991 [1:21:39<2:43:18, 14.69s/batch, batch_loss=20.2, bat

Epoch 6/10:  33%|▎| 324/991 [1:21:54<2:43:18, 14.69s/batch, batch_loss=8.61, bat

Epoch 6/10:  33%|▎| 325/991 [1:21:54<2:44:47, 14.85s/batch, batch_loss=8.61, bat

Epoch 6/10:  33%|▎| 325/991 [1:22:09<2:44:47, 14.85s/batch, batch_loss=23.4, bat

Epoch 6/10:  33%|▎| 326/991 [1:22:09<2:43:48, 14.78s/batch, batch_loss=23.4, bat

Epoch 6/10:  33%|▎| 326/991 [1:22:24<2:43:48, 14.78s/batch, batch_loss=3.04e+3, 

Epoch 6/10:  33%|▎| 327/991 [1:22:24<2:46:41, 15.06s/batch, batch_loss=3.04e+3, 

Epoch 6/10:  33%|▎| 327/991 [1:22:40<2:46:41, 15.06s/batch, batch_loss=8.71, bat

Epoch 6/10:  33%|▎| 328/991 [1:22:40<2:47:14, 15.14s/batch, batch_loss=8.71, bat

Epoch 6/10:  33%|▎| 328/991 [1:22:55<2:47:14, 15.14s/batch, batch_loss=16.6, bat

Epoch 6/10:  33%|▎| 329/991 [1:22:55<2:46:26, 15.08s/batch, batch_loss=16.6, bat

Epoch 6/10:  33%|▎| 329/991 [1:23:10<2:46:26, 15.08s/batch, batch_loss=11.1, bat

Epoch 6/10:  33%|▎| 330/991 [1:23:10<2:46:58, 15.16s/batch, batch_loss=11.1, bat

Epoch 6/10:  33%|▎| 330/991 [1:23:26<2:46:58, 15.16s/batch, batch_loss=12.9, bat

Epoch 6/10:  33%|▎| 331/991 [1:23:26<2:47:53, 15.26s/batch, batch_loss=12.9, bat

Epoch 6/10:  33%|▎| 331/991 [1:23:41<2:47:53, 15.26s/batch, batch_loss=14.3, bat

Epoch 6/10:  34%|▎| 332/991 [1:23:41<2:47:38, 15.26s/batch, batch_loss=14.3, bat

Epoch 6/10:  34%|▎| 332/991 [1:23:56<2:47:38, 15.26s/batch, batch_loss=11.5, bat

Epoch 6/10:  34%|▎| 333/991 [1:23:56<2:45:35, 15.10s/batch, batch_loss=11.5, bat

Epoch 6/10:  34%|▎| 333/991 [1:24:11<2:45:35, 15.10s/batch, batch_loss=13.7, bat

Epoch 6/10:  34%|▎| 334/991 [1:24:11<2:45:37, 15.12s/batch, batch_loss=13.7, bat

Epoch 6/10:  34%|▎| 334/991 [1:24:26<2:45:37, 15.12s/batch, batch_loss=5.02, bat

Epoch 6/10:  34%|▎| 335/991 [1:24:26<2:45:00, 15.09s/batch, batch_loss=5.02, bat

Epoch 6/10:  34%|▎| 335/991 [1:24:41<2:45:00, 15.09s/batch, batch_loss=8.47e+3, 

Epoch 6/10:  34%|▎| 336/991 [1:24:41<2:45:12, 15.13s/batch, batch_loss=8.47e+3, 

Epoch 6/10:  34%|▎| 336/991 [1:24:56<2:45:12, 15.13s/batch, batch_loss=2.35e+3, 

Epoch 6/10:  34%|▎| 337/991 [1:24:56<2:44:48, 15.12s/batch, batch_loss=2.35e+3, 

Epoch 6/10:  34%|▎| 337/991 [1:25:12<2:44:48, 15.12s/batch, batch_loss=9.12, bat

Epoch 6/10:  34%|▎| 338/991 [1:25:12<2:46:01, 15.26s/batch, batch_loss=9.12, bat

Epoch 6/10:  34%|▎| 338/991 [1:25:27<2:46:01, 15.26s/batch, batch_loss=23.5, bat

Epoch 6/10:  34%|▎| 339/991 [1:25:27<2:44:35, 15.15s/batch, batch_loss=23.5, bat

Epoch 6/10:  34%|▎| 339/991 [1:25:42<2:44:35, 15.15s/batch, batch_loss=12.3, bat

Epoch 6/10:  34%|▎| 340/991 [1:25:42<2:45:17, 15.23s/batch, batch_loss=12.3, bat

Epoch 6/10:  34%|▎| 340/991 [1:25:56<2:45:17, 15.23s/batch, batch_loss=11.2, bat

Epoch 6/10:  34%|▎| 341/991 [1:25:56<2:41:32, 14.91s/batch, batch_loss=11.2, bat

Epoch 6/10:  34%|▎| 341/991 [1:26:11<2:41:32, 14.91s/batch, batch_loss=1.15, bat

Epoch 6/10:  35%|▎| 342/991 [1:26:11<2:41:26, 14.92s/batch, batch_loss=1.15, bat

Epoch 6/10:  35%|▎| 342/991 [1:26:26<2:41:26, 14.92s/batch, batch_loss=8.86, bat

Epoch 6/10:  35%|▎| 343/991 [1:26:26<2:40:37, 14.87s/batch, batch_loss=8.86, bat

Epoch 6/10:  35%|▎| 343/991 [1:26:41<2:40:37, 14.87s/batch, batch_loss=17.2, bat

Epoch 6/10:  35%|▎| 344/991 [1:26:41<2:41:17, 14.96s/batch, batch_loss=17.2, bat

Epoch 6/10:  35%|▎| 344/991 [1:26:59<2:41:17, 14.96s/batch, batch_loss=117, batc

Epoch 6/10:  35%|▎| 345/991 [1:26:59<2:51:35, 15.94s/batch, batch_loss=117, batc

Epoch 6/10:  35%|▎| 345/991 [1:27:15<2:51:35, 15.94s/batch, batch_loss=24.3, bat

Epoch 6/10:  35%|▎| 346/991 [1:27:15<2:50:57, 15.90s/batch, batch_loss=24.3, bat

Epoch 6/10:  35%|▎| 346/991 [1:27:30<2:50:57, 15.90s/batch, batch_loss=12.3, bat

Epoch 6/10:  35%|▎| 347/991 [1:27:30<2:47:24, 15.60s/batch, batch_loss=12.3, bat

Epoch 6/10:  35%|▎| 347/991 [1:27:44<2:47:24, 15.60s/batch, batch_loss=17, batch

Epoch 6/10:  35%|▎| 348/991 [1:27:44<2:41:23, 15.06s/batch, batch_loss=17, batch

Epoch 6/10:  35%|▎| 348/991 [1:27:58<2:41:23, 15.06s/batch, batch_loss=9.75, bat

Epoch 6/10:  35%|▎| 349/991 [1:27:58<2:38:53, 14.85s/batch, batch_loss=9.75, bat

Epoch 6/10:  35%|▎| 349/991 [1:28:13<2:38:53, 14.85s/batch, batch_loss=11.5, bat

Epoch 6/10:  35%|▎| 350/991 [1:28:13<2:39:06, 14.89s/batch, batch_loss=11.5, bat

Epoch 6/10:  35%|▎| 350/991 [1:28:32<2:39:06, 14.89s/batch, batch_loss=7.78, bat

Epoch 6/10:  35%|▎| 351/991 [1:28:32<2:50:28, 15.98s/batch, batch_loss=7.78, bat

Epoch 6/10:  35%|▎| 351/991 [1:28:48<2:50:28, 15.98s/batch, batch_loss=14.3, bat

Epoch 6/10:  36%|▎| 352/991 [1:28:48<2:49:55, 15.96s/batch, batch_loss=14.3, bat

Epoch 6/10:  36%|▎| 352/991 [1:29:03<2:49:55, 15.96s/batch, batch_loss=19.6, bat

Epoch 6/10:  36%|▎| 353/991 [1:29:03<2:48:18, 15.83s/batch, batch_loss=19.6, bat

Epoch 6/10:  36%|▎| 353/991 [1:29:18<2:48:18, 15.83s/batch, batch_loss=22.4, bat

Epoch 6/10:  36%|▎| 354/991 [1:29:18<2:45:07, 15.55s/batch, batch_loss=22.4, bat

Epoch 6/10:  36%|▎| 354/991 [1:29:34<2:45:07, 15.55s/batch, batch_loss=9.71, bat

Epoch 6/10:  36%|▎| 355/991 [1:29:34<2:46:45, 15.73s/batch, batch_loss=9.71, bat

Epoch 6/10:  36%|▎| 355/991 [1:29:50<2:46:45, 15.73s/batch, batch_loss=15.9, bat

Epoch 6/10:  36%|▎| 356/991 [1:29:50<2:47:22, 15.81s/batch, batch_loss=15.9, bat

Epoch 6/10:  36%|▎| 356/991 [1:30:06<2:47:22, 15.81s/batch, batch_loss=17.7, bat

Epoch 6/10:  36%|▎| 357/991 [1:30:06<2:46:39, 15.77s/batch, batch_loss=17.7, bat

Epoch 6/10:  36%|▎| 357/991 [1:30:21<2:46:39, 15.77s/batch, batch_loss=14.5, bat

Epoch 6/10:  36%|▎| 358/991 [1:30:21<2:45:18, 15.67s/batch, batch_loss=14.5, bat

Epoch 6/10:  36%|▎| 358/991 [1:30:38<2:45:18, 15.67s/batch, batch_loss=4.89, bat

Epoch 6/10:  36%|▎| 359/991 [1:30:38<2:49:36, 16.10s/batch, batch_loss=4.89, bat

Epoch 6/10:  36%|▎| 359/991 [1:30:54<2:49:36, 16.10s/batch, batch_loss=10.5, bat

Epoch 6/10:  36%|▎| 360/991 [1:30:54<2:46:28, 15.83s/batch, batch_loss=10.5, bat

Epoch 6/10:  36%|▎| 360/991 [1:31:09<2:46:28, 15.83s/batch, batch_loss=24.9, bat

Epoch 6/10:  36%|▎| 361/991 [1:31:09<2:46:07, 15.82s/batch, batch_loss=24.9, bat

Epoch 6/10:  36%|▎| 361/991 [1:31:25<2:46:07, 15.82s/batch, batch_loss=18.7, bat

Epoch 6/10:  37%|▎| 362/991 [1:31:25<2:46:52, 15.92s/batch, batch_loss=18.7, bat

Epoch 6/10:  37%|▎| 362/991 [1:31:41<2:46:52, 15.92s/batch, batch_loss=10.7, bat

Epoch 6/10:  37%|▎| 363/991 [1:31:41<2:46:05, 15.87s/batch, batch_loss=10.7, bat

Epoch 6/10:  37%|▎| 363/991 [1:31:57<2:46:05, 15.87s/batch, batch_loss=15.2, bat

Epoch 6/10:  37%|▎| 364/991 [1:31:57<2:45:45, 15.86s/batch, batch_loss=15.2, bat

Epoch 6/10:  37%|▎| 364/991 [1:32:13<2:45:45, 15.86s/batch, batch_loss=8.82, bat

Epoch 6/10:  37%|▎| 365/991 [1:32:13<2:46:19, 15.94s/batch, batch_loss=8.82, bat

Epoch 6/10:  37%|▎| 365/991 [1:32:28<2:46:19, 15.94s/batch, batch_loss=12.8, bat

Epoch 6/10:  37%|▎| 366/991 [1:32:28<2:43:20, 15.68s/batch, batch_loss=12.8, bat

Epoch 6/10:  37%|▎| 366/991 [1:32:46<2:43:20, 15.68s/batch, batch_loss=13.2, bat

Epoch 6/10:  37%|▎| 367/991 [1:32:46<2:49:01, 16.25s/batch, batch_loss=13.2, bat

Epoch 6/10:  37%|▎| 367/991 [1:33:02<2:49:01, 16.25s/batch, batch_loss=13.6, bat

Epoch 6/10:  37%|▎| 368/991 [1:33:02<2:48:55, 16.27s/batch, batch_loss=13.6, bat

Epoch 6/10:  37%|▎| 368/991 [1:33:18<2:48:55, 16.27s/batch, batch_loss=12.1, bat

Epoch 6/10:  37%|▎| 369/991 [1:33:18<2:45:48, 15.99s/batch, batch_loss=12.1, bat

Epoch 6/10:  37%|▎| 369/991 [1:33:33<2:45:48, 15.99s/batch, batch_loss=1.21e+4, 

Epoch 6/10:  37%|▎| 370/991 [1:33:33<2:43:23, 15.79s/batch, batch_loss=1.21e+4, 

Epoch 6/10:  37%|▎| 370/991 [1:33:48<2:43:23, 15.79s/batch, batch_loss=18.8, bat

Epoch 6/10:  37%|▎| 371/991 [1:33:48<2:42:11, 15.70s/batch, batch_loss=18.8, bat

Epoch 6/10:  37%|▎| 371/991 [1:34:03<2:42:11, 15.70s/batch, batch_loss=14.8, bat

Epoch 6/10:  38%|▍| 372/991 [1:34:03<2:39:17, 15.44s/batch, batch_loss=14.8, bat

Epoch 6/10:  38%|▍| 372/991 [1:34:18<2:39:17, 15.44s/batch, batch_loss=23.9, bat

Epoch 6/10:  38%|▍| 373/991 [1:34:18<2:38:32, 15.39s/batch, batch_loss=23.9, bat

Epoch 6/10:  38%|▍| 373/991 [1:34:33<2:38:32, 15.39s/batch, batch_loss=469, batc

Epoch 6/10:  38%|▍| 374/991 [1:34:33<2:35:52, 15.16s/batch, batch_loss=469, batc

Epoch 6/10:  38%|▍| 374/991 [1:34:48<2:35:52, 15.16s/batch, batch_loss=1.43e+3, 

Epoch 6/10:  38%|▍| 375/991 [1:34:48<2:35:45, 15.17s/batch, batch_loss=1.43e+3, 

Epoch 6/10:  38%|▍| 375/991 [1:35:03<2:35:45, 15.17s/batch, batch_loss=1.22e+3, 

Epoch 6/10:  38%|▍| 376/991 [1:35:03<2:35:39, 15.19s/batch, batch_loss=1.22e+3, 

Epoch 6/10:  38%|▍| 376/991 [1:35:19<2:35:39, 15.19s/batch, batch_loss=20.3, bat

Epoch 6/10:  38%|▍| 377/991 [1:35:19<2:36:56, 15.34s/batch, batch_loss=20.3, bat

Epoch 6/10:  38%|▍| 377/991 [1:35:33<2:36:56, 15.34s/batch, batch_loss=1.18e+3, 

Epoch 6/10:  38%|▍| 378/991 [1:35:33<2:33:22, 15.01s/batch, batch_loss=1.18e+3, 

Epoch 6/10:  38%|▍| 378/991 [1:35:47<2:33:22, 15.01s/batch, batch_loss=9.36, bat

Epoch 6/10:  38%|▍| 379/991 [1:35:47<2:28:32, 14.56s/batch, batch_loss=9.36, bat

Epoch 6/10:  38%|▍| 379/991 [1:36:01<2:28:32, 14.56s/batch, batch_loss=12.8, bat

Epoch 6/10:  38%|▍| 380/991 [1:36:01<2:28:20, 14.57s/batch, batch_loss=12.8, bat

Epoch 6/10:  38%|▍| 380/991 [1:36:16<2:28:20, 14.57s/batch, batch_loss=17.1, bat

Epoch 6/10:  38%|▍| 381/991 [1:36:16<2:27:31, 14.51s/batch, batch_loss=17.1, bat

Epoch 6/10:  38%|▍| 381/991 [1:36:31<2:27:31, 14.51s/batch, batch_loss=10, batch

Epoch 6/10:  39%|▍| 382/991 [1:36:31<2:27:52, 14.57s/batch, batch_loss=10, batch

Epoch 6/10:  39%|▍| 382/991 [1:36:48<2:27:52, 14.57s/batch, batch_loss=9.81, bat

Epoch 6/10:  39%|▍| 383/991 [1:36:48<2:34:51, 15.28s/batch, batch_loss=9.81, bat

Epoch 6/10:  39%|▍| 383/991 [1:37:02<2:34:51, 15.28s/batch, batch_loss=22.9, bat

Epoch 6/10:  39%|▍| 384/991 [1:37:02<2:33:26, 15.17s/batch, batch_loss=22.9, bat

Epoch 6/10:  39%|▍| 384/991 [1:37:17<2:33:26, 15.17s/batch, batch_loss=7.48, bat

Epoch 6/10:  39%|▍| 385/991 [1:37:17<2:30:16, 14.88s/batch, batch_loss=7.48, bat

Epoch 6/10:  39%|▍| 385/991 [1:37:31<2:30:16, 14.88s/batch, batch_loss=16, batch

Epoch 6/10:  39%|▍| 386/991 [1:37:31<2:27:01, 14.58s/batch, batch_loss=16, batch

Epoch 6/10:  39%|▍| 386/991 [1:37:45<2:27:01, 14.58s/batch, batch_loss=24.2, bat

Epoch 6/10:  39%|▍| 387/991 [1:37:45<2:26:33, 14.56s/batch, batch_loss=24.2, bat

Epoch 6/10:  39%|▍| 387/991 [1:37:59<2:26:33, 14.56s/batch, batch_loss=792, batc

Epoch 6/10:  39%|▍| 388/991 [1:37:59<2:23:41, 14.30s/batch, batch_loss=792, batc

Epoch 6/10:  39%|▍| 388/991 [1:38:14<2:23:41, 14.30s/batch, batch_loss=12.4, bat

Epoch 6/10:  39%|▍| 389/991 [1:38:14<2:26:34, 14.61s/batch, batch_loss=12.4, bat

Epoch 6/10:  39%|▍| 389/991 [1:38:29<2:26:34, 14.61s/batch, batch_loss=868, batc

Epoch 6/10:  39%|▍| 390/991 [1:38:29<2:27:54, 14.77s/batch, batch_loss=868, batc

Epoch 6/10:  39%|▍| 390/991 [1:38:44<2:27:54, 14.77s/batch, batch_loss=17.3, bat

Epoch 6/10:  39%|▍| 391/991 [1:38:44<2:27:16, 14.73s/batch, batch_loss=17.3, bat

Epoch 6/10:  39%|▍| 391/991 [1:39:00<2:27:16, 14.73s/batch, batch_loss=12, batch

Epoch 6/10:  40%|▍| 392/991 [1:39:00<2:30:30, 15.08s/batch, batch_loss=12, batch

Epoch 6/10:  40%|▍| 392/991 [1:39:15<2:30:30, 15.08s/batch, batch_loss=14.3, bat

Epoch 6/10:  40%|▍| 393/991 [1:39:15<2:30:22, 15.09s/batch, batch_loss=14.3, bat

Epoch 6/10:  40%|▍| 393/991 [1:39:30<2:30:22, 15.09s/batch, batch_loss=607, batc

Epoch 6/10:  40%|▍| 394/991 [1:39:30<2:30:49, 15.16s/batch, batch_loss=607, batc

Epoch 6/10:  40%|▍| 394/991 [1:39:45<2:30:49, 15.16s/batch, batch_loss=16.6, bat

Epoch 6/10:  40%|▍| 395/991 [1:39:45<2:30:57, 15.20s/batch, batch_loss=16.6, bat

Epoch 6/10:  40%|▍| 395/991 [1:40:00<2:30:57, 15.20s/batch, batch_loss=11, batch

Epoch 6/10:  40%|▍| 396/991 [1:40:00<2:30:08, 15.14s/batch, batch_loss=11, batch

Epoch 6/10:  40%|▍| 396/991 [1:40:16<2:30:08, 15.14s/batch, batch_loss=13.3, bat

Epoch 6/10:  40%|▍| 397/991 [1:40:16<2:31:10, 15.27s/batch, batch_loss=13.3, bat

Epoch 6/10:  40%|▍| 397/991 [1:40:34<2:31:10, 15.27s/batch, batch_loss=13.3, bat

Epoch 6/10:  40%|▍| 398/991 [1:40:34<2:38:32, 16.04s/batch, batch_loss=13.3, bat

Epoch 6/10:  40%|▍| 398/991 [1:40:49<2:38:32, 16.04s/batch, batch_loss=20.6, bat

Epoch 6/10:  40%|▍| 399/991 [1:40:49<2:34:37, 15.67s/batch, batch_loss=20.6, bat

Epoch 6/10:  40%|▍| 399/991 [1:41:03<2:34:37, 15.67s/batch, batch_loss=10.2, bat

Epoch 6/10:  40%|▍| 400/991 [1:41:03<2:31:13, 15.35s/batch, batch_loss=10.2, bat

Epoch 6/10:  40%|▍| 400/991 [1:41:19<2:31:13, 15.35s/batch, batch_loss=9.66, bat

Epoch 6/10:  40%|▍| 401/991 [1:41:19<2:31:05, 15.36s/batch, batch_loss=9.66, bat

Epoch 6/10:  40%|▍| 401/991 [1:41:33<2:31:05, 15.36s/batch, batch_loss=15.4, bat

Epoch 6/10:  41%|▍| 402/991 [1:41:33<2:28:42, 15.15s/batch, batch_loss=15.4, bat

Epoch 6/10:  41%|▍| 402/991 [1:41:47<2:28:42, 15.15s/batch, batch_loss=16.3, bat

Epoch 6/10:  41%|▍| 403/991 [1:41:47<2:25:10, 14.81s/batch, batch_loss=16.3, bat

Epoch 6/10:  41%|▍| 403/991 [1:42:02<2:25:10, 14.81s/batch, batch_loss=10.7, bat

Epoch 6/10:  41%|▍| 404/991 [1:42:02<2:22:59, 14.62s/batch, batch_loss=10.7, bat

Epoch 6/10:  41%|▍| 404/991 [1:42:17<2:22:59, 14.62s/batch, batch_loss=12.8, bat

Epoch 6/10:  41%|▍| 405/991 [1:42:17<2:24:03, 14.75s/batch, batch_loss=12.8, bat

Epoch 6/10:  41%|▍| 405/991 [1:42:32<2:24:03, 14.75s/batch, batch_loss=5.1, batc

Epoch 6/10:  41%|▍| 406/991 [1:42:32<2:26:15, 15.00s/batch, batch_loss=5.1, batc

Epoch 6/10:  41%|▍| 406/991 [1:42:47<2:26:15, 15.00s/batch, batch_loss=21.6, bat

Epoch 6/10:  41%|▍| 407/991 [1:42:47<2:25:09, 14.91s/batch, batch_loss=21.6, bat

Epoch 6/10:  41%|▍| 407/991 [1:43:04<2:25:09, 14.91s/batch, batch_loss=6.8, batc

Epoch 6/10:  41%|▍| 408/991 [1:43:04<2:32:14, 15.67s/batch, batch_loss=6.8, batc

Epoch 6/10:  41%|▍| 408/991 [1:43:19<2:32:14, 15.67s/batch, batch_loss=21.1, bat

Epoch 6/10:  41%|▍| 409/991 [1:43:19<2:29:35, 15.42s/batch, batch_loss=21.1, bat

Epoch 6/10:  41%|▍| 409/991 [1:43:33<2:29:35, 15.42s/batch, batch_loss=19.9, bat

Epoch 6/10:  41%|▍| 410/991 [1:43:33<2:25:05, 14.98s/batch, batch_loss=19.9, bat

Epoch 6/10:  41%|▍| 410/991 [1:43:48<2:25:05, 14.98s/batch, batch_loss=8.94, bat

Epoch 6/10:  41%|▍| 411/991 [1:43:48<2:24:10, 14.92s/batch, batch_loss=8.94, bat

Epoch 6/10:  41%|▍| 411/991 [1:44:02<2:24:10, 14.92s/batch, batch_loss=12.1, bat

Epoch 6/10:  42%|▍| 412/991 [1:44:02<2:21:48, 14.70s/batch, batch_loss=12.1, bat

Epoch 6/10:  42%|▍| 412/991 [1:44:16<2:21:48, 14.70s/batch, batch_loss=12.7, bat

Epoch 6/10:  42%|▍| 413/991 [1:44:16<2:20:05, 14.54s/batch, batch_loss=12.7, bat

Epoch 6/10:  42%|▍| 413/991 [1:44:32<2:20:05, 14.54s/batch, batch_loss=12.3, bat

Epoch 6/10:  42%|▍| 414/991 [1:44:32<2:21:59, 14.76s/batch, batch_loss=12.3, bat

Epoch 6/10:  42%|▍| 414/991 [1:44:47<2:21:59, 14.76s/batch, batch_loss=7.29, bat

Epoch 6/10:  42%|▍| 415/991 [1:44:47<2:22:29, 14.84s/batch, batch_loss=7.29, bat

Epoch 6/10:  42%|▍| 415/991 [1:45:01<2:22:29, 14.84s/batch, batch_loss=10.2, bat

Epoch 6/10:  42%|▍| 416/991 [1:45:01<2:22:18, 14.85s/batch, batch_loss=10.2, bat

Epoch 6/10:  42%|▍| 416/991 [1:45:16<2:22:18, 14.85s/batch, batch_loss=8.24, bat

Epoch 6/10:  42%|▍| 417/991 [1:45:16<2:21:22, 14.78s/batch, batch_loss=8.24, bat

Epoch 6/10:  42%|▍| 417/991 [1:45:30<2:21:22, 14.78s/batch, batch_loss=10.7, bat

Epoch 6/10:  42%|▍| 418/991 [1:45:30<2:19:51, 14.65s/batch, batch_loss=10.7, bat

Epoch 6/10:  42%|▍| 418/991 [1:45:45<2:19:51, 14.65s/batch, batch_loss=1.3e+3, b

Epoch 6/10:  42%|▍| 419/991 [1:45:45<2:20:23, 14.73s/batch, batch_loss=1.3e+3, b

Epoch 6/10:  42%|▍| 419/991 [1:46:01<2:20:23, 14.73s/batch, batch_loss=12.7, bat

Epoch 6/10:  42%|▍| 420/991 [1:46:01<2:24:18, 15.16s/batch, batch_loss=12.7, bat

Epoch 6/10:  42%|▍| 420/991 [1:46:16<2:24:18, 15.16s/batch, batch_loss=12.5, bat

Epoch 6/10:  42%|▍| 421/991 [1:46:16<2:21:45, 14.92s/batch, batch_loss=12.5, bat

Epoch 6/10:  42%|▍| 421/991 [1:46:30<2:21:45, 14.92s/batch, batch_loss=8.16, bat

Epoch 6/10:  43%|▍| 422/991 [1:46:30<2:20:45, 14.84s/batch, batch_loss=8.16, bat

Epoch 6/10:  43%|▍| 422/991 [1:46:45<2:20:45, 14.84s/batch, batch_loss=9.29, bat

Epoch 6/10:  43%|▍| 423/991 [1:46:45<2:20:05, 14.80s/batch, batch_loss=9.29, bat

Epoch 6/10:  43%|▍| 423/991 [1:47:00<2:20:05, 14.80s/batch, batch_loss=9.85, bat

Epoch 6/10:  43%|▍| 424/991 [1:47:00<2:19:25, 14.75s/batch, batch_loss=9.85, bat

Epoch 6/10:  43%|▍| 424/991 [1:47:15<2:19:25, 14.75s/batch, batch_loss=6.01, bat

Epoch 6/10:  43%|▍| 425/991 [1:47:15<2:20:41, 14.91s/batch, batch_loss=6.01, bat

Epoch 6/10:  43%|▍| 425/991 [1:47:31<2:20:41, 14.91s/batch, batch_loss=2.21, bat

Epoch 6/10:  43%|▍| 426/991 [1:47:31<2:21:57, 15.08s/batch, batch_loss=2.21, bat

Epoch 6/10:  43%|▍| 426/991 [1:47:46<2:21:57, 15.08s/batch, batch_loss=9.57, bat

Epoch 6/10:  43%|▍| 427/991 [1:47:46<2:22:46, 15.19s/batch, batch_loss=9.57, bat

Epoch 6/10:  43%|▍| 427/991 [1:48:02<2:22:46, 15.19s/batch, batch_loss=14.6, bat

Epoch 6/10:  43%|▍| 428/991 [1:48:02<2:25:31, 15.51s/batch, batch_loss=14.6, bat

Epoch 6/10:  43%|▍| 428/991 [1:48:18<2:25:31, 15.51s/batch, batch_loss=20.6, bat

Epoch 6/10:  43%|▍| 429/991 [1:48:18<2:25:27, 15.53s/batch, batch_loss=20.6, bat

Epoch 6/10:  43%|▍| 429/991 [1:48:33<2:25:27, 15.53s/batch, batch_loss=9.3e+3, b

Epoch 6/10:  43%|▍| 430/991 [1:48:33<2:24:06, 15.41s/batch, batch_loss=9.3e+3, b

Epoch 6/10:  43%|▍| 430/991 [1:48:50<2:24:06, 15.41s/batch, batch_loss=19.1, bat

Epoch 6/10:  43%|▍| 431/991 [1:48:50<2:29:13, 15.99s/batch, batch_loss=19.1, bat

Epoch 6/10:  43%|▍| 431/991 [1:49:06<2:29:13, 15.99s/batch, batch_loss=22, batch

Epoch 6/10:  44%|▍| 432/991 [1:49:06<2:27:04, 15.79s/batch, batch_loss=22, batch

Epoch 6/10:  44%|▍| 432/991 [1:49:21<2:27:04, 15.79s/batch, batch_loss=9.74, bat

Epoch 6/10:  44%|▍| 433/991 [1:49:21<2:25:05, 15.60s/batch, batch_loss=9.74, bat

Epoch 6/10:  44%|▍| 433/991 [1:49:36<2:25:05, 15.60s/batch, batch_loss=18.6, bat

Epoch 6/10:  44%|▍| 434/991 [1:49:36<2:23:54, 15.50s/batch, batch_loss=18.6, bat

Epoch 6/10:  44%|▍| 434/991 [1:49:52<2:23:54, 15.50s/batch, batch_loss=13.9, bat

Epoch 6/10:  44%|▍| 435/991 [1:49:52<2:24:29, 15.59s/batch, batch_loss=13.9, bat

Epoch 6/10:  44%|▍| 435/991 [1:50:07<2:24:29, 15.59s/batch, batch_loss=13.9, bat

Epoch 6/10:  44%|▍| 436/991 [1:50:07<2:24:12, 15.59s/batch, batch_loss=13.9, bat

Epoch 6/10:  44%|▍| 436/991 [1:50:23<2:24:12, 15.59s/batch, batch_loss=18.1, bat

Epoch 6/10:  44%|▍| 437/991 [1:50:23<2:23:26, 15.53s/batch, batch_loss=18.1, bat

Epoch 6/10:  44%|▍| 437/991 [1:50:38<2:23:26, 15.53s/batch, batch_loss=21, batch

Epoch 6/10:  44%|▍| 438/991 [1:50:38<2:21:32, 15.36s/batch, batch_loss=21, batch

Epoch 6/10:  44%|▍| 438/991 [1:50:55<2:21:32, 15.36s/batch, batch_loss=16.1, bat

Epoch 6/10:  44%|▍| 439/991 [1:50:55<2:26:43, 15.95s/batch, batch_loss=16.1, bat

Epoch 6/10:  44%|▍| 439/991 [1:51:10<2:26:43, 15.95s/batch, batch_loss=24.2, bat

Epoch 6/10:  44%|▍| 440/991 [1:51:10<2:22:48, 15.55s/batch, batch_loss=24.2, bat

Epoch 6/10:  44%|▍| 440/991 [1:51:24<2:22:48, 15.55s/batch, batch_loss=20.2, bat

Epoch 6/10:  45%|▍| 441/991 [1:51:24<2:20:17, 15.31s/batch, batch_loss=20.2, bat

Epoch 6/10:  45%|▍| 441/991 [1:51:40<2:20:17, 15.31s/batch, batch_loss=12.9, bat

Epoch 6/10:  45%|▍| 442/991 [1:51:40<2:20:09, 15.32s/batch, batch_loss=12.9, bat

Epoch 6/10:  45%|▍| 442/991 [1:51:55<2:20:09, 15.32s/batch, batch_loss=18.9, bat

Epoch 6/10:  45%|▍| 443/991 [1:51:55<2:20:01, 15.33s/batch, batch_loss=18.9, bat

Epoch 6/10:  45%|▍| 443/991 [1:52:11<2:20:01, 15.33s/batch, batch_loss=14.3, bat

Epoch 6/10:  45%|▍| 444/991 [1:52:11<2:19:53, 15.35s/batch, batch_loss=14.3, bat

Epoch 6/10:  45%|▍| 444/991 [1:52:26<2:19:53, 15.35s/batch, batch_loss=18, batch

Epoch 6/10:  45%|▍| 445/991 [1:52:26<2:19:45, 15.36s/batch, batch_loss=18, batch

Epoch 6/10:  45%|▍| 445/991 [1:52:41<2:19:45, 15.36s/batch, batch_loss=24.2, bat

Epoch 6/10:  45%|▍| 446/991 [1:52:41<2:17:22, 15.12s/batch, batch_loss=24.2, bat

Epoch 6/10:  45%|▍| 446/991 [1:52:59<2:17:22, 15.12s/batch, batch_loss=10.9, bat

Epoch 6/10:  45%|▍| 447/991 [1:52:59<2:25:11, 16.01s/batch, batch_loss=10.9, bat

Epoch 6/10:  45%|▍| 447/991 [1:53:13<2:25:11, 16.01s/batch, batch_loss=18, batch

Epoch 6/10:  45%|▍| 448/991 [1:53:13<2:21:42, 15.66s/batch, batch_loss=18, batch

Epoch 6/10:  45%|▍| 448/991 [1:53:28<2:21:42, 15.66s/batch, batch_loss=15.8, bat

Epoch 6/10:  45%|▍| 449/991 [1:53:28<2:18:29, 15.33s/batch, batch_loss=15.8, bat

Epoch 6/10:  45%|▍| 449/991 [1:53:43<2:18:29, 15.33s/batch, batch_loss=24.2, bat

Epoch 6/10:  45%|▍| 450/991 [1:53:43<2:18:27, 15.36s/batch, batch_loss=24.2, bat

Epoch 6/10:  45%|▍| 450/991 [1:53:58<2:18:27, 15.36s/batch, batch_loss=18, batch

Epoch 6/10:  46%|▍| 451/991 [1:53:58<2:16:45, 15.20s/batch, batch_loss=18, batch

Epoch 6/10:  46%|▍| 451/991 [1:54:13<2:16:45, 15.20s/batch, batch_loss=14.7, bat

Epoch 6/10:  46%|▍| 452/991 [1:54:13<2:15:05, 15.04s/batch, batch_loss=14.7, bat

Epoch 6/10:  46%|▍| 452/991 [1:54:28<2:15:05, 15.04s/batch, batch_loss=18.8, bat

Epoch 6/10:  46%|▍| 453/991 [1:54:28<2:13:56, 14.94s/batch, batch_loss=18.8, bat

Epoch 6/10:  46%|▍| 453/991 [1:54:43<2:13:56, 14.94s/batch, batch_loss=7.23e+3, 

Epoch 6/10:  46%|▍| 454/991 [1:54:43<2:14:38, 15.04s/batch, batch_loss=7.23e+3, 

Epoch 6/10:  46%|▍| 454/991 [1:54:58<2:14:38, 15.04s/batch, batch_loss=19.6, bat

Epoch 6/10:  46%|▍| 455/991 [1:54:58<2:15:39, 15.19s/batch, batch_loss=19.6, bat

Epoch 6/10:  46%|▍| 455/991 [1:55:13<2:15:39, 15.19s/batch, batch_loss=23.7, bat

Epoch 6/10:  46%|▍| 456/991 [1:55:13<2:14:28, 15.08s/batch, batch_loss=23.7, bat

Epoch 6/10:  46%|▍| 456/991 [1:55:28<2:14:28, 15.08s/batch, batch_loss=14.3, bat

Epoch 6/10:  46%|▍| 457/991 [1:55:28<2:13:29, 15.00s/batch, batch_loss=14.3, bat

Epoch 6/10:  46%|▍| 457/991 [1:55:43<2:13:29, 15.00s/batch, batch_loss=14.8, bat

Epoch 6/10:  46%|▍| 458/991 [1:55:43<2:13:51, 15.07s/batch, batch_loss=14.8, bat

Epoch 6/10:  46%|▍| 458/991 [1:55:58<2:13:51, 15.07s/batch, batch_loss=21, batch

Epoch 6/10:  46%|▍| 459/991 [1:55:58<2:12:30, 14.95s/batch, batch_loss=21, batch

Epoch 6/10:  46%|▍| 459/991 [1:56:13<2:12:30, 14.95s/batch, batch_loss=18.5, bat

Epoch 6/10:  46%|▍| 460/991 [1:56:13<2:13:12, 15.05s/batch, batch_loss=18.5, bat

Epoch 6/10:  46%|▍| 460/991 [1:56:30<2:13:12, 15.05s/batch, batch_loss=52.5, bat

Epoch 6/10:  47%|▍| 461/991 [1:56:30<2:18:32, 15.68s/batch, batch_loss=52.5, bat

Epoch 6/10:  47%|▍| 461/991 [1:56:46<2:18:32, 15.68s/batch, batch_loss=14.1, bat

Epoch 6/10:  47%|▍| 462/991 [1:56:46<2:18:10, 15.67s/batch, batch_loss=14.1, bat

Epoch 6/10:  47%|▍| 462/991 [1:57:02<2:18:10, 15.67s/batch, batch_loss=6.22e+4, 

Epoch 6/10:  47%|▍| 463/991 [1:57:02<2:18:29, 15.74s/batch, batch_loss=6.22e+4, 

Epoch 6/10:  47%|▍| 463/991 [1:57:17<2:18:29, 15.74s/batch, batch_loss=12.4, bat

Epoch 6/10:  47%|▍| 464/991 [1:57:17<2:17:32, 15.66s/batch, batch_loss=12.4, bat

Epoch 6/10:  47%|▍| 464/991 [1:57:31<2:17:32, 15.66s/batch, batch_loss=14.3, bat

Epoch 6/10:  47%|▍| 465/991 [1:57:31<2:13:00, 15.17s/batch, batch_loss=14.3, bat

Epoch 6/10:  47%|▍| 465/991 [1:57:45<2:13:00, 15.17s/batch, batch_loss=16.1, bat

Epoch 6/10:  47%|▍| 466/991 [1:57:45<2:08:44, 14.71s/batch, batch_loss=16.1, bat

Epoch 6/10:  47%|▍| 466/991 [1:58:01<2:08:44, 14.71s/batch, batch_loss=13.2, bat

Epoch 6/10:  47%|▍| 467/991 [1:58:01<2:12:10, 15.13s/batch, batch_loss=13.2, bat

Epoch 6/10:  47%|▍| 467/991 [1:58:16<2:12:10, 15.13s/batch, batch_loss=17.9, bat

Epoch 6/10:  47%|▍| 468/991 [1:58:16<2:11:46, 15.12s/batch, batch_loss=17.9, bat

Epoch 6/10:  47%|▍| 468/991 [1:58:31<2:11:46, 15.12s/batch, batch_loss=10.5, bat

Epoch 6/10:  47%|▍| 469/991 [1:58:31<2:11:37, 15.13s/batch, batch_loss=10.5, bat

Epoch 6/10:  47%|▍| 469/991 [1:58:46<2:11:37, 15.13s/batch, batch_loss=12.4, bat

Epoch 6/10:  47%|▍| 470/991 [1:58:46<2:10:22, 15.02s/batch, batch_loss=12.4, bat

Epoch 6/10:  47%|▍| 470/991 [1:59:01<2:10:22, 15.02s/batch, batch_loss=18.4, bat

Epoch 6/10:  48%|▍| 471/991 [1:59:01<2:10:19, 15.04s/batch, batch_loss=18.4, bat

Epoch 6/10:  48%|▍| 471/991 [1:59:17<2:10:19, 15.04s/batch, batch_loss=16.5, bat

Epoch 6/10:  48%|▍| 472/991 [1:59:17<2:12:01, 15.26s/batch, batch_loss=16.5, bat

Epoch 6/10:  48%|▍| 472/991 [1:59:32<2:12:01, 15.26s/batch, batch_loss=15.5, bat

Epoch 6/10:  48%|▍| 473/991 [1:59:32<2:11:14, 15.20s/batch, batch_loss=15.5, bat

Epoch 6/10:  48%|▍| 473/991 [1:59:47<2:11:14, 15.20s/batch, batch_loss=14.9, bat

Epoch 6/10:  48%|▍| 474/991 [1:59:47<2:10:28, 15.14s/batch, batch_loss=14.9, bat

Epoch 6/10:  48%|▍| 474/991 [2:00:02<2:10:28, 15.14s/batch, batch_loss=2.4e+3, b

Epoch 6/10:  48%|▍| 475/991 [2:00:02<2:10:40, 15.19s/batch, batch_loss=2.4e+3, b

Epoch 6/10:  48%|▍| 475/991 [2:00:18<2:10:40, 15.19s/batch, batch_loss=13.8, bat

Epoch 6/10:  48%|▍| 476/991 [2:00:18<2:11:00, 15.26s/batch, batch_loss=13.8, bat

Epoch 6/10:  48%|▍| 476/991 [2:00:33<2:11:00, 15.26s/batch, batch_loss=15.1, bat

Epoch 6/10:  48%|▍| 477/991 [2:00:33<2:09:59, 15.17s/batch, batch_loss=15.1, bat

Epoch 6/10:  48%|▍| 477/991 [2:00:50<2:09:59, 15.17s/batch, batch_loss=15.7, bat

Epoch 6/10:  48%|▍| 478/991 [2:00:50<2:16:00, 15.91s/batch, batch_loss=15.7, bat

Epoch 6/10:  48%|▍| 478/991 [2:01:06<2:16:00, 15.91s/batch, batch_loss=18.1, bat

Epoch 6/10:  48%|▍| 479/991 [2:01:06<2:15:09, 15.84s/batch, batch_loss=18.1, bat

Epoch 6/10:  48%|▍| 479/991 [2:01:23<2:15:09, 15.84s/batch, batch_loss=17.8, bat

Epoch 6/10:  48%|▍| 480/991 [2:01:23<2:16:11, 15.99s/batch, batch_loss=17.8, bat

Epoch 6/10:  48%|▍| 480/991 [2:01:37<2:16:11, 15.99s/batch, batch_loss=27.7, bat

Epoch 6/10:  49%|▍| 481/991 [2:01:37<2:13:16, 15.68s/batch, batch_loss=27.7, bat

Epoch 6/10:  49%|▍| 481/991 [2:01:53<2:13:16, 15.68s/batch, batch_loss=19.9, bat

Epoch 6/10:  49%|▍| 482/991 [2:01:53<2:12:24, 15.61s/batch, batch_loss=19.9, bat

Epoch 6/10:  49%|▍| 482/991 [2:02:09<2:12:24, 15.61s/batch, batch_loss=14.1, bat

Epoch 6/10:  49%|▍| 483/991 [2:02:09<2:12:57, 15.70s/batch, batch_loss=14.1, bat

Epoch 6/10:  49%|▍| 483/991 [2:02:24<2:12:57, 15.70s/batch, batch_loss=20.6, bat

Epoch 6/10:  49%|▍| 484/991 [2:02:24<2:11:44, 15.59s/batch, batch_loss=20.6, bat

Epoch 6/10:  49%|▍| 484/991 [2:02:39<2:11:44, 15.59s/batch, batch_loss=9.22, bat

Epoch 6/10:  49%|▍| 485/991 [2:02:39<2:10:50, 15.51s/batch, batch_loss=9.22, bat

Epoch 6/10:  49%|▍| 485/991 [2:02:57<2:10:50, 15.51s/batch, batch_loss=22.6, bat

Epoch 6/10:  49%|▍| 486/991 [2:02:57<2:15:56, 16.15s/batch, batch_loss=22.6, bat

Epoch 6/10:  49%|▍| 486/991 [2:03:12<2:15:56, 16.15s/batch, batch_loss=13.3, bat

Epoch 6/10:  49%|▍| 487/991 [2:03:12<2:11:47, 15.69s/batch, batch_loss=13.3, bat

Epoch 6/10:  49%|▍| 487/991 [2:03:27<2:11:47, 15.69s/batch, batch_loss=8.48, bat

Epoch 6/10:  49%|▍| 488/991 [2:03:27<2:10:14, 15.54s/batch, batch_loss=8.48, bat

Epoch 6/10:  49%|▍| 488/991 [2:03:42<2:10:14, 15.54s/batch, batch_loss=8.9, batc

Epoch 6/10:  49%|▍| 489/991 [2:03:42<2:09:56, 15.53s/batch, batch_loss=8.9, batc

Epoch 6/10:  49%|▍| 489/991 [2:03:58<2:09:56, 15.53s/batch, batch_loss=7.57, bat

Epoch 6/10:  49%|▍| 490/991 [2:03:58<2:09:49, 15.55s/batch, batch_loss=7.57, bat

Epoch 6/10:  49%|▍| 490/991 [2:04:13<2:09:49, 15.55s/batch, batch_loss=18.9, bat

Epoch 6/10:  50%|▍| 491/991 [2:04:13<2:08:13, 15.39s/batch, batch_loss=18.9, bat

Epoch 6/10:  50%|▍| 491/991 [2:04:28<2:08:13, 15.39s/batch, batch_loss=15.1, bat

Epoch 6/10:  50%|▍| 492/991 [2:04:28<2:05:48, 15.13s/batch, batch_loss=15.1, bat

Epoch 6/10:  50%|▍| 492/991 [2:04:43<2:05:48, 15.13s/batch, batch_loss=18, batch

Epoch 6/10:  50%|▍| 493/991 [2:04:43<2:05:35, 15.13s/batch, batch_loss=18, batch

Epoch 6/10:  50%|▍| 493/991 [2:04:59<2:05:35, 15.13s/batch, batch_loss=8.62, bat

Epoch 6/10:  50%|▍| 494/991 [2:04:59<2:08:52, 15.56s/batch, batch_loss=8.62, bat

Epoch 6/10:  50%|▍| 494/991 [2:05:14<2:08:52, 15.56s/batch, batch_loss=8.54e+4, 

Epoch 6/10:  50%|▍| 495/991 [2:05:14<2:07:01, 15.37s/batch, batch_loss=8.54e+4, 

Epoch 6/10:  50%|▍| 495/991 [2:05:29<2:07:01, 15.37s/batch, batch_loss=14, batch

Epoch 6/10:  50%|▌| 496/991 [2:05:29<2:06:38, 15.35s/batch, batch_loss=14, batch

Epoch 6/10:  50%|▌| 496/991 [2:05:44<2:06:38, 15.35s/batch, batch_loss=175, batc

Epoch 6/10:  50%|▌| 497/991 [2:05:44<2:05:24, 15.23s/batch, batch_loss=175, batc

Epoch 6/10:  50%|▌| 497/991 [2:05:58<2:05:24, 15.23s/batch, batch_loss=14, batch

Epoch 6/10:  50%|▌| 498/991 [2:05:58<2:02:03, 14.86s/batch, batch_loss=14, batch

Epoch 6/10:  50%|▌| 498/991 [2:06:14<2:02:03, 14.86s/batch, batch_loss=405, batc

Epoch 6/10:  50%|▌| 499/991 [2:06:14<2:03:43, 15.09s/batch, batch_loss=405, batc

Epoch 6/10:  50%|▌| 499/991 [2:06:30<2:03:43, 15.09s/batch, batch_loss=17, batch

Epoch 6/10:  50%|▌| 500/991 [2:06:30<2:04:37, 15.23s/batch, batch_loss=17, batch

Epoch 6/10:  50%|▌| 500/991 [2:06:45<2:04:37, 15.23s/batch, batch_loss=7.56, bat

Epoch 6/10:  51%|▌| 501/991 [2:06:45<2:05:54, 15.42s/batch, batch_loss=7.56, bat

Epoch 6/10:  51%|▌| 501/991 [2:07:01<2:05:54, 15.42s/batch, batch_loss=9.74, bat

Epoch 6/10:  51%|▌| 502/991 [2:07:01<2:05:43, 15.43s/batch, batch_loss=9.74, bat

Epoch 6/10:  51%|▌| 502/991 [2:07:16<2:05:43, 15.43s/batch, batch_loss=24.1, bat

Epoch 6/10:  51%|▌| 503/991 [2:07:16<2:04:33, 15.31s/batch, batch_loss=24.1, bat

Epoch 6/10:  51%|▌| 503/991 [2:07:32<2:04:33, 15.31s/batch, batch_loss=10.7, bat

Epoch 6/10:  51%|▌| 504/991 [2:07:32<2:06:41, 15.61s/batch, batch_loss=10.7, bat

Epoch 6/10:  51%|▌| 504/991 [2:07:48<2:06:41, 15.61s/batch, batch_loss=7.92, bat

Epoch 6/10:  51%|▌| 505/991 [2:07:48<2:05:43, 15.52s/batch, batch_loss=7.92, bat

Epoch 6/10:  51%|▌| 505/991 [2:08:03<2:05:43, 15.52s/batch, batch_loss=15.5, bat

Epoch 6/10:  51%|▌| 506/991 [2:08:03<2:06:08, 15.61s/batch, batch_loss=15.5, bat

Epoch 6/10:  51%|▌| 506/991 [2:08:18<2:06:08, 15.61s/batch, batch_loss=10.4, bat

Epoch 6/10:  51%|▌| 507/991 [2:08:18<2:04:17, 15.41s/batch, batch_loss=10.4, bat

Epoch 6/10:  51%|▌| 507/991 [2:08:32<2:04:17, 15.41s/batch, batch_loss=18.4, bat

Epoch 6/10:  51%|▌| 508/991 [2:08:32<2:00:58, 15.03s/batch, batch_loss=18.4, bat

Epoch 6/10:  51%|▌| 508/991 [2:08:48<2:00:58, 15.03s/batch, batch_loss=18.1, bat

Epoch 6/10:  51%|▌| 509/991 [2:08:48<2:00:48, 15.04s/batch, batch_loss=18.1, bat

Epoch 6/10:  51%|▌| 509/991 [2:09:05<2:00:48, 15.04s/batch, batch_loss=14.9, bat

Epoch 6/10:  51%|▌| 510/991 [2:09:05<2:06:52, 15.83s/batch, batch_loss=14.9, bat

Epoch 6/10:  51%|▌| 510/991 [2:09:21<2:06:52, 15.83s/batch, batch_loss=14.3, bat

Epoch 6/10:  52%|▌| 511/991 [2:09:21<2:05:40, 15.71s/batch, batch_loss=14.3, bat

Epoch 6/10:  52%|▌| 511/991 [2:09:36<2:05:40, 15.71s/batch, batch_loss=9.32, bat

Epoch 6/10:  52%|▌| 512/991 [2:09:36<2:03:59, 15.53s/batch, batch_loss=9.32, bat

Epoch 6/10:  52%|▌| 512/991 [2:09:51<2:03:59, 15.53s/batch, batch_loss=8.88, bat

Epoch 6/10:  52%|▌| 513/991 [2:09:51<2:03:50, 15.54s/batch, batch_loss=8.88, bat

Epoch 6/10:  52%|▌| 513/991 [2:10:07<2:03:50, 15.54s/batch, batch_loss=15.6, bat

Epoch 6/10:  52%|▌| 514/991 [2:10:07<2:04:44, 15.69s/batch, batch_loss=15.6, bat

Epoch 6/10:  52%|▌| 514/991 [2:10:23<2:04:44, 15.69s/batch, batch_loss=16.7, bat

Epoch 6/10:  52%|▌| 515/991 [2:10:23<2:04:56, 15.75s/batch, batch_loss=16.7, bat

Epoch 6/10:  52%|▌| 515/991 [2:10:38<2:04:56, 15.75s/batch, batch_loss=17.4, bat

Epoch 6/10:  52%|▌| 516/991 [2:10:38<2:03:21, 15.58s/batch, batch_loss=17.4, bat

Epoch 6/10:  52%|▌| 516/991 [2:10:53<2:03:21, 15.58s/batch, batch_loss=10.8, bat

Epoch 6/10:  52%|▌| 517/991 [2:10:53<2:00:57, 15.31s/batch, batch_loss=10.8, bat

Epoch 6/10:  52%|▌| 517/991 [2:11:09<2:00:57, 15.31s/batch, batch_loss=18.5, bat

Epoch 6/10:  52%|▌| 518/991 [2:11:09<2:01:34, 15.42s/batch, batch_loss=18.5, bat

Epoch 6/10:  52%|▌| 518/991 [2:11:24<2:01:34, 15.42s/batch, batch_loss=14.2, bat

Epoch 6/10:  52%|▌| 519/991 [2:11:24<2:00:11, 15.28s/batch, batch_loss=14.2, bat

Epoch 6/10:  52%|▌| 519/991 [2:11:39<2:00:11, 15.28s/batch, batch_loss=11.7, bat

Epoch 6/10:  52%|▌| 520/991 [2:11:39<2:00:09, 15.31s/batch, batch_loss=11.7, bat

Epoch 6/10:  52%|▌| 520/991 [2:11:55<2:00:09, 15.31s/batch, batch_loss=8.35, bat

Epoch 6/10:  53%|▌| 521/991 [2:11:55<2:01:40, 15.53s/batch, batch_loss=8.35, bat

Epoch 6/10:  53%|▌| 521/991 [2:12:10<2:01:40, 15.53s/batch, batch_loss=9.61, bat

Epoch 6/10:  53%|▌| 522/991 [2:12:10<1:59:39, 15.31s/batch, batch_loss=9.61, bat

Epoch 6/10:  53%|▌| 522/991 [2:12:25<1:59:39, 15.31s/batch, batch_loss=3.04, bat

Epoch 6/10:  53%|▌| 523/991 [2:12:25<1:58:57, 15.25s/batch, batch_loss=3.04, bat

Epoch 6/10:  53%|▌| 523/991 [2:12:40<1:58:57, 15.25s/batch, batch_loss=7.59, bat

Epoch 6/10:  53%|▌| 524/991 [2:12:40<1:57:21, 15.08s/batch, batch_loss=7.59, bat

Epoch 6/10:  53%|▌| 524/991 [2:12:57<1:57:21, 15.08s/batch, batch_loss=6.5, batc

Epoch 6/10:  53%|▌| 525/991 [2:12:57<2:02:53, 15.82s/batch, batch_loss=6.5, batc

Epoch 6/10:  53%|▌| 525/991 [2:13:12<2:02:53, 15.82s/batch, batch_loss=7.03, bat

Epoch 6/10:  53%|▌| 526/991 [2:13:12<2:01:09, 15.63s/batch, batch_loss=7.03, bat

Epoch 6/10:  53%|▌| 526/991 [2:13:27<2:01:09, 15.63s/batch, batch_loss=13.7, bat

Epoch 6/10:  53%|▌| 527/991 [2:13:27<1:58:12, 15.29s/batch, batch_loss=13.7, bat

Epoch 6/10:  53%|▌| 527/991 [2:13:42<1:58:12, 15.29s/batch, batch_loss=15.3, bat

Epoch 6/10:  53%|▌| 528/991 [2:13:42<1:57:10, 15.18s/batch, batch_loss=15.3, bat

Epoch 6/10:  53%|▌| 528/991 [2:13:57<1:57:10, 15.18s/batch, batch_loss=8.7, batc

Epoch 6/10:  53%|▌| 529/991 [2:13:57<1:55:38, 15.02s/batch, batch_loss=8.7, batc

Epoch 6/10:  53%|▌| 529/991 [2:14:12<1:55:38, 15.02s/batch, batch_loss=15.3, bat

Epoch 6/10:  53%|▌| 530/991 [2:14:12<1:57:26, 15.28s/batch, batch_loss=15.3, bat

Epoch 6/10:  53%|▌| 530/991 [2:14:30<1:57:26, 15.28s/batch, batch_loss=13, batch

Epoch 6/10:  54%|▌| 531/991 [2:14:30<2:01:34, 15.86s/batch, batch_loss=13, batch

Epoch 6/10:  54%|▌| 531/991 [2:14:45<2:01:34, 15.86s/batch, batch_loss=13.7, bat

Epoch 6/10:  54%|▌| 532/991 [2:14:45<2:00:41, 15.78s/batch, batch_loss=13.7, bat

Epoch 6/10:  54%|▌| 532/991 [2:15:01<2:00:41, 15.78s/batch, batch_loss=12.2, bat

Epoch 6/10:  54%|▌| 533/991 [2:15:01<2:00:22, 15.77s/batch, batch_loss=12.2, bat

Epoch 6/10:  54%|▌| 533/991 [2:15:16<2:00:22, 15.77s/batch, batch_loss=11.5, bat

Epoch 6/10:  54%|▌| 534/991 [2:15:16<1:58:04, 15.50s/batch, batch_loss=11.5, bat

Epoch 6/10:  54%|▌| 534/991 [2:15:31<1:58:04, 15.50s/batch, batch_loss=19.5, bat

Epoch 6/10:  54%|▌| 535/991 [2:15:31<1:56:31, 15.33s/batch, batch_loss=19.5, bat

Epoch 6/10:  54%|▌| 535/991 [2:15:48<1:56:31, 15.33s/batch, batch_loss=16.7, bat

Epoch 6/10:  54%|▌| 536/991 [2:15:48<1:59:51, 15.81s/batch, batch_loss=16.7, bat

Epoch 6/10:  54%|▌| 536/991 [2:16:02<1:59:51, 15.81s/batch, batch_loss=8.43, bat

Epoch 6/10:  54%|▌| 537/991 [2:16:02<1:57:15, 15.50s/batch, batch_loss=8.43, bat

Epoch 6/10:  54%|▌| 537/991 [2:16:18<1:57:15, 15.50s/batch, batch_loss=1.78e+3, 

Epoch 6/10:  54%|▌| 538/991 [2:16:18<1:56:36, 15.44s/batch, batch_loss=1.78e+3, 

Epoch 6/10:  54%|▌| 538/991 [2:16:32<1:56:36, 15.44s/batch, batch_loss=24.2, bat

Epoch 6/10:  54%|▌| 539/991 [2:16:33<1:54:38, 15.22s/batch, batch_loss=24.2, bat

Epoch 6/10:  54%|▌| 539/991 [2:16:47<1:54:38, 15.22s/batch, batch_loss=23.2, bat

Epoch 6/10:  54%|▌| 540/991 [2:16:47<1:53:29, 15.10s/batch, batch_loss=23.2, bat

Epoch 6/10:  54%|▌| 540/991 [2:17:02<1:53:29, 15.10s/batch, batch_loss=1.3e+4, b

Epoch 6/10:  55%|▌| 541/991 [2:17:02<1:51:31, 14.87s/batch, batch_loss=1.3e+4, b

Epoch 6/10:  55%|▌| 541/991 [2:17:17<1:51:31, 14.87s/batch, batch_loss=2.85e+3, 

Epoch 6/10:  55%|▌| 542/991 [2:17:17<1:51:40, 14.92s/batch, batch_loss=2.85e+3, 

Epoch 6/10:  55%|▌| 542/991 [2:17:31<1:51:40, 14.92s/batch, batch_loss=34.8, bat

Epoch 6/10:  55%|▌| 543/991 [2:17:31<1:48:56, 14.59s/batch, batch_loss=34.8, bat

Epoch 6/10:  55%|▌| 543/991 [2:17:44<1:48:56, 14.59s/batch, batch_loss=20.7, bat

Epoch 6/10:  55%|▌| 544/991 [2:17:44<1:47:17, 14.40s/batch, batch_loss=20.7, bat

Epoch 6/10:  55%|▌| 544/991 [2:17:59<1:47:17, 14.40s/batch, batch_loss=14.4, bat

Epoch 6/10:  55%|▌| 545/991 [2:17:59<1:46:24, 14.31s/batch, batch_loss=14.4, bat

Epoch 6/10:  55%|▌| 545/991 [2:18:14<1:46:24, 14.31s/batch, batch_loss=300, batc

Epoch 6/10:  55%|▌| 546/991 [2:18:14<1:48:48, 14.67s/batch, batch_loss=300, batc

Epoch 6/10:  55%|▌| 546/991 [2:18:30<1:48:48, 14.67s/batch, batch_loss=15.6, bat

Epoch 6/10:  55%|▌| 547/991 [2:18:30<1:50:38, 14.95s/batch, batch_loss=15.6, bat

Epoch 6/10:  55%|▌| 547/991 [2:18:45<1:50:38, 14.95s/batch, batch_loss=12.1, bat

Epoch 6/10:  55%|▌| 548/991 [2:18:45<1:51:18, 15.08s/batch, batch_loss=12.1, bat

Epoch 6/10:  55%|▌| 548/991 [2:19:01<1:51:18, 15.08s/batch, batch_loss=9.02, bat

Epoch 6/10:  55%|▌| 549/991 [2:19:01<1:52:06, 15.22s/batch, batch_loss=9.02, bat

Epoch 6/10:  55%|▌| 549/991 [2:19:16<1:52:06, 15.22s/batch, batch_loss=19.7, bat

Epoch 6/10:  55%|▌| 550/991 [2:19:16<1:52:59, 15.37s/batch, batch_loss=19.7, bat

Epoch 6/10:  55%|▌| 550/991 [2:19:31<1:52:59, 15.37s/batch, batch_loss=17.2, bat

Epoch 6/10:  56%|▌| 551/991 [2:19:31<1:51:45, 15.24s/batch, batch_loss=17.2, bat

Epoch 6/10:  56%|▌| 551/991 [2:19:47<1:51:45, 15.24s/batch, batch_loss=15.4, bat

Epoch 6/10:  56%|▌| 552/991 [2:19:47<1:52:49, 15.42s/batch, batch_loss=15.4, bat

Epoch 6/10:  56%|▌| 552/991 [2:20:02<1:52:49, 15.42s/batch, batch_loss=15.2, bat

Epoch 6/10:  56%|▌| 553/991 [2:20:02<1:51:06, 15.22s/batch, batch_loss=15.2, bat

Epoch 6/10:  56%|▌| 553/991 [2:20:17<1:51:06, 15.22s/batch, batch_loss=5.74e+3, 

Epoch 6/10:  56%|▌| 554/991 [2:20:17<1:51:11, 15.27s/batch, batch_loss=5.74e+3, 

Epoch 6/10:  56%|▌| 554/991 [2:20:33<1:51:11, 15.27s/batch, batch_loss=2.57e+3, 

Epoch 6/10:  56%|▌| 555/991 [2:20:33<1:50:53, 15.26s/batch, batch_loss=2.57e+3, 

Epoch 6/10:  56%|▌| 555/991 [2:20:47<1:50:53, 15.26s/batch, batch_loss=15.2, bat

Epoch 6/10:  56%|▌| 556/991 [2:20:47<1:49:55, 15.16s/batch, batch_loss=15.2, bat

Epoch 6/10:  56%|▌| 556/991 [2:21:03<1:49:55, 15.16s/batch, batch_loss=1.27e+4, 

Epoch 6/10:  56%|▌| 557/991 [2:21:03<1:50:39, 15.30s/batch, batch_loss=1.27e+4, 

Epoch 6/10:  56%|▌| 557/991 [2:21:18<1:50:39, 15.30s/batch, batch_loss=8.02, bat

Epoch 6/10:  56%|▌| 558/991 [2:21:18<1:50:02, 15.25s/batch, batch_loss=8.02, bat

Epoch 6/10:  56%|▌| 558/991 [2:21:34<1:50:02, 15.25s/batch, batch_loss=16.1, bat

Epoch 6/10:  56%|▌| 559/991 [2:21:34<1:51:50, 15.53s/batch, batch_loss=16.1, bat

Epoch 6/10:  56%|▌| 559/991 [2:21:50<1:51:50, 15.53s/batch, batch_loss=6.91, bat

Epoch 6/10:  57%|▌| 560/991 [2:21:50<1:51:01, 15.46s/batch, batch_loss=6.91, bat

Epoch 6/10:  57%|▌| 560/991 [2:22:04<1:51:01, 15.46s/batch, batch_loss=6.06, bat

Epoch 6/10:  57%|▌| 561/991 [2:22:04<1:48:15, 15.11s/batch, batch_loss=6.06, bat

Epoch 6/10:  57%|▌| 561/991 [2:22:20<1:48:15, 15.11s/batch, batch_loss=16.6, bat

Epoch 6/10:  57%|▌| 562/991 [2:22:20<1:49:01, 15.25s/batch, batch_loss=16.6, bat

Epoch 6/10:  57%|▌| 562/991 [2:22:36<1:49:01, 15.25s/batch, batch_loss=12.6, bat

Epoch 6/10:  57%|▌| 563/991 [2:22:36<1:50:19, 15.47s/batch, batch_loss=12.6, bat

Epoch 6/10:  57%|▌| 563/991 [2:22:51<1:50:19, 15.47s/batch, batch_loss=11.1, bat

Epoch 6/10:  57%|▌| 564/991 [2:22:51<1:49:34, 15.40s/batch, batch_loss=11.1, bat

Epoch 6/10:  57%|▌| 564/991 [2:23:06<1:49:34, 15.40s/batch, batch_loss=503, batc

Epoch 6/10:  57%|▌| 565/991 [2:23:06<1:48:59, 15.35s/batch, batch_loss=503, batc

Epoch 6/10:  57%|▌| 565/991 [2:23:21<1:48:59, 15.35s/batch, batch_loss=11.6, bat

Epoch 6/10:  57%|▌| 566/991 [2:23:21<1:48:03, 15.25s/batch, batch_loss=11.6, bat

Epoch 6/10:  57%|▌| 566/991 [2:23:37<1:48:03, 15.25s/batch, batch_loss=17.8, bat

Epoch 6/10:  57%|▌| 567/991 [2:23:37<1:50:14, 15.60s/batch, batch_loss=17.8, bat

Epoch 6/10:  57%|▌| 567/991 [2:23:53<1:50:14, 15.60s/batch, batch_loss=307, batc

Epoch 6/10:  57%|▌| 568/991 [2:23:53<1:49:42, 15.56s/batch, batch_loss=307, batc

Epoch 6/10:  57%|▌| 568/991 [2:24:08<1:49:42, 15.56s/batch, batch_loss=39.3, bat

Epoch 6/10:  57%|▌| 569/991 [2:24:08<1:47:50, 15.33s/batch, batch_loss=39.3, bat

Epoch 6/10:  57%|▌| 569/991 [2:24:22<1:47:50, 15.33s/batch, batch_loss=8.46e+3, 

Epoch 6/10:  58%|▌| 570/991 [2:24:22<1:45:07, 14.98s/batch, batch_loss=8.46e+3, 

Epoch 6/10:  58%|▌| 570/991 [2:24:37<1:45:07, 14.98s/batch, batch_loss=9.65, bat

Epoch 6/10:  58%|▌| 571/991 [2:24:37<1:44:20, 14.91s/batch, batch_loss=9.65, bat

Epoch 6/10:  58%|▌| 571/991 [2:24:51<1:44:20, 14.91s/batch, batch_loss=10.7, bat

Epoch 6/10:  58%|▌| 572/991 [2:24:51<1:43:53, 14.88s/batch, batch_loss=10.7, bat

Epoch 6/10:  58%|▌| 572/991 [2:25:07<1:43:53, 14.88s/batch, batch_loss=8.44, bat

Epoch 6/10:  58%|▌| 573/991 [2:25:07<1:44:18, 14.97s/batch, batch_loss=8.44, bat

Epoch 6/10:  58%|▌| 573/991 [2:25:22<1:44:18, 14.97s/batch, batch_loss=12.5, bat

Epoch 6/10:  58%|▌| 574/991 [2:25:22<1:44:15, 15.00s/batch, batch_loss=12.5, bat

Epoch 6/10:  58%|▌| 574/991 [2:25:38<1:44:15, 15.00s/batch, batch_loss=19.8, bat

Epoch 6/10:  58%|▌| 575/991 [2:25:38<1:46:12, 15.32s/batch, batch_loss=19.8, bat

Epoch 6/10:  58%|▌| 575/991 [2:25:53<1:46:12, 15.32s/batch, batch_loss=26, batch

Epoch 6/10:  58%|▌| 576/991 [2:25:53<1:46:53, 15.45s/batch, batch_loss=26, batch

Epoch 6/10:  58%|▌| 576/991 [2:26:09<1:46:53, 15.45s/batch, batch_loss=12.2, bat

Epoch 6/10:  58%|▌| 577/991 [2:26:09<1:46:42, 15.46s/batch, batch_loss=12.2, bat

Epoch 6/10:  58%|▌| 577/991 [2:26:27<1:46:42, 15.46s/batch, batch_loss=8.67, bat

Epoch 6/10:  58%|▌| 578/991 [2:26:27<1:52:00, 16.27s/batch, batch_loss=8.67, bat

Epoch 6/10:  58%|▌| 578/991 [2:26:42<1:52:00, 16.27s/batch, batch_loss=11.7, bat

Epoch 6/10:  58%|▌| 579/991 [2:26:42<1:49:49, 15.99s/batch, batch_loss=11.7, bat

Epoch 6/10:  58%|▌| 579/991 [2:26:59<1:49:49, 15.99s/batch, batch_loss=17.6, bat

Epoch 6/10:  59%|▌| 580/991 [2:26:59<1:50:05, 16.07s/batch, batch_loss=17.6, bat

Epoch 6/10:  59%|▌| 580/991 [2:27:14<1:50:05, 16.07s/batch, batch_loss=6.75, bat

Epoch 6/10:  59%|▌| 581/991 [2:27:14<1:47:12, 15.69s/batch, batch_loss=6.75, bat

Epoch 6/10:  59%|▌| 581/991 [2:27:29<1:47:12, 15.69s/batch, batch_loss=0.269, ba

Epoch 6/10:  59%|▌| 582/991 [2:27:29<1:46:00, 15.55s/batch, batch_loss=0.269, ba

Epoch 6/10:  59%|▌| 582/991 [2:27:43<1:46:00, 15.55s/batch, batch_loss=6.61e+3, 

Epoch 6/10:  59%|▌| 583/991 [2:27:43<1:43:38, 15.24s/batch, batch_loss=6.61e+3, 

Epoch 6/10:  59%|▌| 583/991 [2:27:57<1:43:38, 15.24s/batch, batch_loss=11.8, bat

Epoch 6/10:  59%|▌| 584/991 [2:27:57<1:41:07, 14.91s/batch, batch_loss=11.8, bat

Epoch 6/10:  59%|▌| 584/991 [2:28:12<1:41:07, 14.91s/batch, batch_loss=7.7, batc

Epoch 6/10:  59%|▌| 585/991 [2:28:12<1:41:04, 14.94s/batch, batch_loss=7.7, batc

Epoch 6/10:  59%|▌| 585/991 [2:28:30<1:41:04, 14.94s/batch, batch_loss=19.5, bat

Epoch 6/10:  59%|▌| 586/991 [2:28:30<1:46:51, 15.83s/batch, batch_loss=19.5, bat

Epoch 6/10:  59%|▌| 586/991 [2:28:46<1:46:51, 15.83s/batch, batch_loss=21.5, bat

Epoch 6/10:  59%|▌| 587/991 [2:28:46<1:46:54, 15.88s/batch, batch_loss=21.5, bat

Epoch 6/10:  59%|▌| 587/991 [2:29:03<1:46:54, 15.88s/batch, batch_loss=16, batch

Epoch 6/10:  59%|▌| 588/991 [2:29:03<1:47:36, 16.02s/batch, batch_loss=16, batch

Epoch 6/10:  59%|▌| 588/991 [2:29:19<1:47:36, 16.02s/batch, batch_loss=8.48, bat

Epoch 6/10:  59%|▌| 589/991 [2:29:19<1:47:05, 15.98s/batch, batch_loss=8.48, bat

Epoch 6/10:  59%|▌| 589/991 [2:29:34<1:47:05, 15.98s/batch, batch_loss=15.1, bat

Epoch 6/10:  60%|▌| 590/991 [2:29:34<1:46:23, 15.92s/batch, batch_loss=15.1, bat

Epoch 6/10:  60%|▌| 590/991 [2:29:50<1:46:23, 15.92s/batch, batch_loss=14.4, bat

Epoch 6/10:  60%|▌| 591/991 [2:29:50<1:44:39, 15.70s/batch, batch_loss=14.4, bat

Epoch 6/10:  60%|▌| 591/991 [2:30:05<1:44:39, 15.70s/batch, batch_loss=7.16, bat

Epoch 6/10:  60%|▌| 592/991 [2:30:05<1:43:54, 15.62s/batch, batch_loss=7.16, bat

Epoch 6/10:  60%|▌| 592/991 [2:30:21<1:43:54, 15.62s/batch, batch_loss=12.4, bat

Epoch 6/10:  60%|▌| 593/991 [2:30:21<1:44:36, 15.77s/batch, batch_loss=12.4, bat

Epoch 6/10:  60%|▌| 593/991 [2:30:38<1:44:36, 15.77s/batch, batch_loss=11.7, bat

Epoch 6/10:  60%|▌| 594/991 [2:30:38<1:47:06, 16.19s/batch, batch_loss=11.7, bat

Epoch 6/10:  60%|▌| 594/991 [2:30:54<1:47:06, 16.19s/batch, batch_loss=7.32, bat

Epoch 6/10:  60%|▌| 595/991 [2:30:54<1:45:46, 16.03s/batch, batch_loss=7.32, bat

Epoch 6/10:  60%|▌| 595/991 [2:31:09<1:45:46, 16.03s/batch, batch_loss=6.25, bat

Epoch 6/10:  60%|▌| 596/991 [2:31:09<1:44:03, 15.81s/batch, batch_loss=6.25, bat

Epoch 6/10:  60%|▌| 596/991 [2:31:24<1:44:03, 15.81s/batch, batch_loss=20.2, bat

Epoch 6/10:  60%|▌| 597/991 [2:31:24<1:41:33, 15.47s/batch, batch_loss=20.2, bat

Epoch 6/10:  60%|▌| 597/991 [2:31:40<1:41:33, 15.47s/batch, batch_loss=9.1, batc

Epoch 6/10:  60%|▌| 598/991 [2:31:40<1:41:50, 15.55s/batch, batch_loss=9.1, batc

Epoch 6/10:  60%|▌| 598/991 [2:31:56<1:41:50, 15.55s/batch, batch_loss=17, batch

Epoch 6/10:  60%|▌| 599/991 [2:31:56<1:42:17, 15.66s/batch, batch_loss=17, batch

Epoch 6/10:  60%|▌| 599/991 [2:32:11<1:42:17, 15.66s/batch, batch_loss=13.8, bat

Epoch 6/10:  61%|▌| 600/991 [2:32:11<1:41:18, 15.55s/batch, batch_loss=13.8, bat

Epoch 6/10:  61%|▌| 600/991 [2:32:26<1:41:18, 15.55s/batch, batch_loss=14.6, bat

Epoch 6/10:  61%|▌| 601/991 [2:32:26<1:41:16, 15.58s/batch, batch_loss=14.6, bat

Epoch 6/10:  61%|▌| 601/991 [2:32:47<1:41:16, 15.58s/batch, batch_loss=10.7, bat

Epoch 6/10:  61%|▌| 602/991 [2:32:47<1:49:44, 16.93s/batch, batch_loss=10.7, bat

Epoch 6/10:  61%|▌| 602/991 [2:33:02<1:49:44, 16.93s/batch, batch_loss=6.75, bat

Epoch 6/10:  61%|▌| 603/991 [2:33:02<1:46:22, 16.45s/batch, batch_loss=6.75, bat

Epoch 6/10:  61%|▌| 603/991 [2:33:17<1:46:22, 16.45s/batch, batch_loss=1.01e+4, 

Epoch 6/10:  61%|▌| 604/991 [2:33:17<1:43:16, 16.01s/batch, batch_loss=1.01e+4, 

Epoch 6/10:  61%|▌| 604/991 [2:33:32<1:43:16, 16.01s/batch, batch_loss=10.6, bat

Epoch 6/10:  61%|▌| 605/991 [2:33:32<1:42:08, 15.88s/batch, batch_loss=10.6, bat

Epoch 6/10:  61%|▌| 605/991 [2:33:49<1:42:08, 15.88s/batch, batch_loss=9.65, bat

Epoch 6/10:  61%|▌| 606/991 [2:33:49<1:42:18, 15.94s/batch, batch_loss=9.65, bat

Epoch 6/10:  61%|▌| 606/991 [2:34:04<1:42:18, 15.94s/batch, batch_loss=8.14, bat

Epoch 6/10:  61%|▌| 607/991 [2:34:04<1:41:42, 15.89s/batch, batch_loss=8.14, bat

Epoch 6/10:  61%|▌| 607/991 [2:34:20<1:41:42, 15.89s/batch, batch_loss=15.3, bat

Epoch 6/10:  61%|▌| 608/991 [2:34:20<1:40:36, 15.76s/batch, batch_loss=15.3, bat

Epoch 6/10:  61%|▌| 608/991 [2:34:36<1:40:36, 15.76s/batch, batch_loss=15.8, bat

Epoch 6/10:  61%|▌| 609/991 [2:34:36<1:40:26, 15.78s/batch, batch_loss=15.8, bat

Epoch 6/10:  61%|▌| 609/991 [2:34:53<1:40:26, 15.78s/batch, batch_loss=14.9, bat

Epoch 6/10:  62%|▌| 610/991 [2:34:53<1:43:48, 16.35s/batch, batch_loss=14.9, bat

Epoch 6/10:  62%|▌| 610/991 [2:35:08<1:43:48, 16.35s/batch, batch_loss=26.1, bat

Epoch 6/10:  62%|▌| 611/991 [2:35:08<1:39:57, 15.78s/batch, batch_loss=26.1, bat

Epoch 6/10:  62%|▌| 611/991 [2:35:23<1:39:57, 15.78s/batch, batch_loss=7.55, bat

Epoch 6/10:  62%|▌| 612/991 [2:35:23<1:39:42, 15.78s/batch, batch_loss=7.55, bat

Epoch 6/10:  62%|▌| 612/991 [2:35:39<1:39:42, 15.78s/batch, batch_loss=11.4, bat

Epoch 6/10:  62%|▌| 613/991 [2:35:39<1:38:46, 15.68s/batch, batch_loss=11.4, bat

Epoch 6/10:  62%|▌| 613/991 [2:35:54<1:38:46, 15.68s/batch, batch_loss=1.73e+4, 

Epoch 6/10:  62%|▌| 614/991 [2:35:54<1:37:31, 15.52s/batch, batch_loss=1.73e+4, 

Epoch 6/10:  62%|▌| 614/991 [2:36:10<1:37:31, 15.52s/batch, batch_loss=987, batc

Epoch 6/10:  62%|▌| 615/991 [2:36:10<1:37:30, 15.56s/batch, batch_loss=987, batc

Epoch 6/10:  62%|▌| 615/991 [2:36:24<1:37:30, 15.56s/batch, batch_loss=7.93, bat

Epoch 6/10:  62%|▌| 616/991 [2:36:24<1:35:21, 15.26s/batch, batch_loss=7.93, bat

Epoch 6/10:  62%|▌| 616/991 [2:36:40<1:35:21, 15.26s/batch, batch_loss=17.7, bat

Epoch 6/10:  62%|▌| 617/991 [2:36:40<1:35:10, 15.27s/batch, batch_loss=17.7, bat

Epoch 6/10:  62%|▌| 617/991 [2:36:57<1:35:10, 15.27s/batch, batch_loss=12.4, bat

Epoch 6/10:  62%|▌| 618/991 [2:36:57<1:38:12, 15.80s/batch, batch_loss=12.4, bat

Epoch 6/10:  62%|▌| 618/991 [2:37:12<1:38:12, 15.80s/batch, batch_loss=22.1, bat

Epoch 6/10:  62%|▌| 619/991 [2:37:12<1:36:57, 15.64s/batch, batch_loss=22.1, bat

Epoch 6/10:  62%|▌| 619/991 [2:37:26<1:36:57, 15.64s/batch, batch_loss=11.1, bat

Epoch 6/10:  63%|▋| 620/991 [2:37:26<1:34:21, 15.26s/batch, batch_loss=11.1, bat

Epoch 6/10:  63%|▋| 620/991 [2:37:41<1:34:21, 15.26s/batch, batch_loss=8.63, bat

Epoch 6/10:  63%|▋| 621/991 [2:37:41<1:33:27, 15.16s/batch, batch_loss=8.63, bat

Epoch 6/10:  63%|▋| 621/991 [2:37:56<1:33:27, 15.16s/batch, batch_loss=5.47e+3, 

Epoch 6/10:  63%|▋| 622/991 [2:37:56<1:32:15, 15.00s/batch, batch_loss=5.47e+3, 

Epoch 6/10:  63%|▋| 622/991 [2:38:10<1:32:15, 15.00s/batch, batch_loss=26.9, bat

Epoch 6/10:  63%|▋| 623/991 [2:38:10<1:30:26, 14.75s/batch, batch_loss=26.9, bat

Epoch 6/10:  63%|▋| 623/991 [2:38:24<1:30:26, 14.75s/batch, batch_loss=1.6e+4, b

Epoch 6/10:  63%|▋| 624/991 [2:38:24<1:29:16, 14.60s/batch, batch_loss=1.6e+4, b

Epoch 6/10:  63%|▋| 624/991 [2:38:41<1:29:16, 14.60s/batch, batch_loss=11.1, bat

Epoch 6/10:  63%|▋| 625/991 [2:38:41<1:32:32, 15.17s/batch, batch_loss=11.1, bat

Epoch 6/10:  63%|▋| 625/991 [2:38:57<1:32:32, 15.17s/batch, batch_loss=6.38, bat

Epoch 6/10:  63%|▋| 626/991 [2:38:57<1:33:30, 15.37s/batch, batch_loss=6.38, bat

Epoch 6/10:  63%|▋| 626/991 [2:39:11<1:33:30, 15.37s/batch, batch_loss=4.31e+3, 

Epoch 6/10:  63%|▋| 627/991 [2:39:11<1:31:17, 15.05s/batch, batch_loss=4.31e+3, 

Epoch 6/10:  63%|▋| 627/991 [2:39:25<1:31:17, 15.05s/batch, batch_loss=1.05e+3, 

Epoch 6/10:  63%|▋| 628/991 [2:39:25<1:28:53, 14.69s/batch, batch_loss=1.05e+3, 

Epoch 6/10:  63%|▋| 628/991 [2:39:39<1:28:53, 14.69s/batch, batch_loss=13.5, bat

Epoch 6/10:  63%|▋| 629/991 [2:39:39<1:28:22, 14.65s/batch, batch_loss=13.5, bat

Epoch 6/10:  63%|▋| 629/991 [2:39:54<1:28:22, 14.65s/batch, batch_loss=20.9, bat

Epoch 6/10:  64%|▋| 630/991 [2:39:54<1:29:08, 14.82s/batch, batch_loss=20.9, bat

Epoch 6/10:  64%|▋| 630/991 [2:40:10<1:29:08, 14.82s/batch, batch_loss=17, batch

Epoch 6/10:  64%|▋| 631/991 [2:40:10<1:30:21, 15.06s/batch, batch_loss=17, batch

Epoch 6/10:  64%|▋| 631/991 [2:40:25<1:30:21, 15.06s/batch, batch_loss=4.12, bat

Epoch 6/10:  64%|▋| 632/991 [2:40:25<1:29:08, 14.90s/batch, batch_loss=4.12, bat

Epoch 6/10:  64%|▋| 632/991 [2:40:39<1:29:08, 14.90s/batch, batch_loss=26, batch

Epoch 6/10:  64%|▋| 633/991 [2:40:39<1:28:22, 14.81s/batch, batch_loss=26, batch

Epoch 6/10:  64%|▋| 633/991 [2:40:55<1:28:22, 14.81s/batch, batch_loss=28.1, bat

Epoch 6/10:  64%|▋| 634/991 [2:40:55<1:30:15, 15.17s/batch, batch_loss=28.1, bat

Epoch 6/10:  64%|▋| 634/991 [2:41:10<1:30:15, 15.17s/batch, batch_loss=27, batch

Epoch 6/10:  64%|▋| 635/991 [2:41:10<1:28:57, 14.99s/batch, batch_loss=27, batch

Epoch 6/10:  64%|▋| 635/991 [2:41:25<1:28:57, 14.99s/batch, batch_loss=18.4, bat

Epoch 6/10:  64%|▋| 636/991 [2:41:25<1:28:14, 14.91s/batch, batch_loss=18.4, bat

Epoch 6/10:  64%|▋| 636/991 [2:41:39<1:28:14, 14.91s/batch, batch_loss=19.5, bat

Epoch 6/10:  64%|▋| 637/991 [2:41:39<1:27:43, 14.87s/batch, batch_loss=19.5, bat

Epoch 6/10:  64%|▋| 637/991 [2:41:54<1:27:43, 14.87s/batch, batch_loss=17.4, bat

Epoch 6/10:  64%|▋| 638/991 [2:41:54<1:26:49, 14.76s/batch, batch_loss=17.4, bat

Epoch 6/10:  64%|▋| 638/991 [2:42:10<1:26:49, 14.76s/batch, batch_loss=9.97, bat

Epoch 6/10:  64%|▋| 639/991 [2:42:10<1:29:05, 15.19s/batch, batch_loss=9.97, bat

Epoch 6/10:  64%|▋| 639/991 [2:42:24<1:29:05, 15.19s/batch, batch_loss=673, batc

Epoch 6/10:  65%|▋| 640/991 [2:42:24<1:27:35, 14.97s/batch, batch_loss=673, batc

Epoch 6/10:  65%|▋| 640/991 [2:42:41<1:27:35, 14.97s/batch, batch_loss=14.7, bat

Epoch 6/10:  65%|▋| 641/991 [2:42:41<1:30:03, 15.44s/batch, batch_loss=14.7, bat

Epoch 6/10:  65%|▋| 641/991 [2:42:57<1:30:03, 15.44s/batch, batch_loss=8.48, bat

Epoch 6/10:  65%|▋| 642/991 [2:42:57<1:30:21, 15.53s/batch, batch_loss=8.48, bat

Epoch 6/10:  65%|▋| 642/991 [2:43:12<1:30:21, 15.53s/batch, batch_loss=2.13e+4, 

Epoch 6/10:  65%|▋| 643/991 [2:43:12<1:29:04, 15.36s/batch, batch_loss=2.13e+4, 

Epoch 6/10:  65%|▋| 643/991 [2:43:29<1:29:04, 15.36s/batch, batch_loss=1.76e+4, 

Epoch 6/10:  65%|▋| 644/991 [2:43:29<1:32:25, 15.98s/batch, batch_loss=1.76e+4, 

Epoch 6/10:  65%|▋| 644/991 [2:43:44<1:32:25, 15.98s/batch, batch_loss=2.19e+3, 

Epoch 6/10:  65%|▋| 645/991 [2:43:44<1:30:19, 15.66s/batch, batch_loss=2.19e+3, 

Epoch 6/10:  65%|▋| 645/991 [2:43:59<1:30:19, 15.66s/batch, batch_loss=10.7, bat

Epoch 6/10:  65%|▋| 646/991 [2:43:59<1:28:16, 15.35s/batch, batch_loss=10.7, bat

Epoch 6/10:  65%|▋| 646/991 [2:44:14<1:28:16, 15.35s/batch, batch_loss=13.8, bat

Epoch 6/10:  65%|▋| 647/991 [2:44:14<1:28:32, 15.44s/batch, batch_loss=13.8, bat

Epoch 6/10:  65%|▋| 647/991 [2:44:29<1:28:32, 15.44s/batch, batch_loss=13.8, bat

Epoch 6/10:  65%|▋| 648/991 [2:44:29<1:26:29, 15.13s/batch, batch_loss=13.8, bat

Epoch 6/10:  65%|▋| 648/991 [2:44:44<1:26:29, 15.13s/batch, batch_loss=14, batch

Epoch 6/10:  65%|▋| 649/991 [2:44:44<1:26:48, 15.23s/batch, batch_loss=14, batch

Epoch 6/10:  65%|▋| 649/991 [2:45:00<1:26:48, 15.23s/batch, batch_loss=1.34e+4, 

Epoch 6/10:  66%|▋| 650/991 [2:45:00<1:27:00, 15.31s/batch, batch_loss=1.34e+4, 

Epoch 6/10:  66%|▋| 650/991 [2:45:15<1:27:00, 15.31s/batch, batch_loss=9.01, bat

Epoch 6/10:  66%|▋| 651/991 [2:45:15<1:26:03, 15.19s/batch, batch_loss=9.01, bat

Epoch 6/10:  66%|▋| 651/991 [2:45:29<1:26:03, 15.19s/batch, batch_loss=12.8, bat

Epoch 6/10:  66%|▋| 652/991 [2:45:29<1:24:51, 15.02s/batch, batch_loss=12.8, bat

Epoch 6/10:  66%|▋| 652/991 [2:45:45<1:24:51, 15.02s/batch, batch_loss=18.1, bat

Epoch 6/10:  66%|▋| 653/991 [2:45:45<1:25:08, 15.12s/batch, batch_loss=18.1, bat

Epoch 6/10:  66%|▋| 653/991 [2:46:00<1:25:08, 15.12s/batch, batch_loss=21.6, bat

Epoch 6/10:  66%|▋| 654/991 [2:46:00<1:25:30, 15.22s/batch, batch_loss=21.6, bat

Epoch 6/10:  66%|▋| 654/991 [2:46:15<1:25:30, 15.22s/batch, batch_loss=3.84e+3, 

Epoch 6/10:  66%|▋| 655/991 [2:46:15<1:25:36, 15.29s/batch, batch_loss=3.84e+3, 

Epoch 6/10:  66%|▋| 655/991 [2:46:31<1:25:36, 15.29s/batch, batch_loss=5.19e+3, 

Epoch 6/10:  66%|▋| 656/991 [2:46:31<1:26:16, 15.45s/batch, batch_loss=5.19e+3, 

Epoch 6/10:  66%|▋| 656/991 [2:46:45<1:26:16, 15.45s/batch, batch_loss=4.21e+3, 

Epoch 6/10:  66%|▋| 657/991 [2:46:45<1:23:54, 15.07s/batch, batch_loss=4.21e+3, 

Epoch 6/10:  66%|▋| 657/991 [2:47:01<1:23:54, 15.07s/batch, batch_loss=2.2e+4, b

Epoch 6/10:  66%|▋| 658/991 [2:47:01<1:24:05, 15.15s/batch, batch_loss=2.2e+4, b

Epoch 6/10:  66%|▋| 658/991 [2:47:16<1:24:05, 15.15s/batch, batch_loss=4.05, bat

Epoch 6/10:  66%|▋| 659/991 [2:47:16<1:24:12, 15.22s/batch, batch_loss=4.05, bat

Epoch 6/10:  66%|▋| 659/991 [2:47:32<1:24:12, 15.22s/batch, batch_loss=5.65, bat

Epoch 6/10:  67%|▋| 660/991 [2:47:32<1:25:04, 15.42s/batch, batch_loss=5.65, bat

Epoch 6/10:  67%|▋| 660/991 [2:47:47<1:25:04, 15.42s/batch, batch_loss=11.5, bat

Epoch 6/10:  67%|▋| 661/991 [2:47:47<1:23:59, 15.27s/batch, batch_loss=11.5, bat

Epoch 6/10:  67%|▋| 661/991 [2:48:03<1:23:59, 15.27s/batch, batch_loss=16.2, bat

Epoch 6/10:  67%|▋| 662/991 [2:48:03<1:24:16, 15.37s/batch, batch_loss=16.2, bat

Epoch 6/10:  67%|▋| 662/991 [2:48:18<1:24:16, 15.37s/batch, batch_loss=18.5, bat

Epoch 6/10:  67%|▋| 663/991 [2:48:18<1:24:49, 15.52s/batch, batch_loss=18.5, bat

Epoch 6/10:  67%|▋| 663/991 [2:48:34<1:24:49, 15.52s/batch, batch_loss=3.03e+3, 

Epoch 6/10:  67%|▋| 664/991 [2:48:34<1:25:02, 15.60s/batch, batch_loss=3.03e+3, 

Epoch 6/10:  67%|▋| 664/991 [2:48:51<1:25:02, 15.60s/batch, batch_loss=13.9, bat

Epoch 6/10:  67%|▋| 665/991 [2:48:51<1:25:56, 15.82s/batch, batch_loss=13.9, bat

Epoch 6/10:  67%|▋| 665/991 [2:49:08<1:25:56, 15.82s/batch, batch_loss=3.05e+3, 

Epoch 6/10:  67%|▋| 666/991 [2:49:08<1:28:55, 16.42s/batch, batch_loss=3.05e+3, 

Epoch 6/10:  67%|▋| 666/991 [2:49:24<1:28:55, 16.42s/batch, batch_loss=19.8, bat

Epoch 6/10:  67%|▋| 667/991 [2:49:24<1:27:14, 16.16s/batch, batch_loss=19.8, bat

Epoch 6/10:  67%|▋| 667/991 [2:49:39<1:27:14, 16.16s/batch, batch_loss=370, batc

Epoch 6/10:  67%|▋| 668/991 [2:49:39<1:25:51, 15.95s/batch, batch_loss=370, batc

Epoch 6/10:  67%|▋| 668/991 [2:49:55<1:25:51, 15.95s/batch, batch_loss=2.97e+3, 

Epoch 6/10:  68%|▋| 669/991 [2:49:55<1:24:20, 15.72s/batch, batch_loss=2.97e+3, 

Epoch 6/10:  68%|▋| 669/991 [2:50:10<1:24:20, 15.72s/batch, batch_loss=1.01e+3, 

Epoch 6/10:  68%|▋| 670/991 [2:50:10<1:23:32, 15.61s/batch, batch_loss=1.01e+3, 

Epoch 6/10:  68%|▋| 670/991 [2:50:25<1:23:32, 15.61s/batch, batch_loss=12.3, bat

Epoch 6/10:  68%|▋| 671/991 [2:50:25<1:21:45, 15.33s/batch, batch_loss=12.3, bat

Epoch 6/10:  68%|▋| 671/991 [2:50:40<1:21:45, 15.33s/batch, batch_loss=15.8, bat

Epoch 6/10:  68%|▋| 672/991 [2:50:40<1:20:47, 15.19s/batch, batch_loss=15.8, bat

Epoch 6/10:  68%|▋| 672/991 [2:50:57<1:20:47, 15.19s/batch, batch_loss=18.9, bat

Epoch 6/10:  68%|▋| 673/991 [2:50:57<1:24:52, 16.02s/batch, batch_loss=18.9, bat

Epoch 6/10:  68%|▋| 673/991 [2:51:13<1:24:52, 16.02s/batch, batch_loss=22.4, bat

Epoch 6/10:  68%|▋| 674/991 [2:51:13<1:24:19, 15.96s/batch, batch_loss=22.4, bat

Epoch 6/10:  68%|▋| 674/991 [2:51:27<1:24:19, 15.96s/batch, batch_loss=4.13, bat

Epoch 6/10:  68%|▋| 675/991 [2:51:27<1:21:08, 15.41s/batch, batch_loss=4.13, bat

Epoch 6/10:  68%|▋| 675/991 [2:51:43<1:21:08, 15.41s/batch, batch_loss=11.8, bat

Epoch 6/10:  68%|▋| 676/991 [2:51:43<1:20:30, 15.33s/batch, batch_loss=11.8, bat

Epoch 6/10:  68%|▋| 676/991 [2:51:58<1:20:30, 15.33s/batch, batch_loss=21.9, bat

Epoch 6/10:  68%|▋| 677/991 [2:51:58<1:19:51, 15.26s/batch, batch_loss=21.9, bat

Epoch 6/10:  68%|▋| 677/991 [2:52:12<1:19:51, 15.26s/batch, batch_loss=6.57, bat

Epoch 6/10:  68%|▋| 678/991 [2:52:12<1:18:09, 14.98s/batch, batch_loss=6.57, bat

Epoch 6/10:  68%|▋| 678/991 [2:52:27<1:18:09, 14.98s/batch, batch_loss=3.82e+3, 

Epoch 6/10:  69%|▋| 679/991 [2:52:27<1:18:26, 15.09s/batch, batch_loss=3.82e+3, 

Epoch 6/10:  69%|▋| 679/991 [2:52:43<1:18:26, 15.09s/batch, batch_loss=6.12e+3, 

Epoch 6/10:  69%|▋| 680/991 [2:52:43<1:18:26, 15.13s/batch, batch_loss=6.12e+3, 

Epoch 6/10:  69%|▋| 680/991 [2:53:00<1:18:26, 15.13s/batch, batch_loss=7.23e+4, 

Epoch 6/10:  69%|▋| 681/991 [2:53:00<1:21:06, 15.70s/batch, batch_loss=7.23e+4, 

Epoch 6/10:  69%|▋| 681/991 [2:53:15<1:21:06, 15.70s/batch, batch_loss=14.6, bat

Epoch 6/10:  69%|▋| 682/991 [2:53:15<1:19:56, 15.52s/batch, batch_loss=14.6, bat

Epoch 6/10:  69%|▋| 682/991 [2:53:30<1:19:56, 15.52s/batch, batch_loss=376, batc

Epoch 6/10:  69%|▋| 683/991 [2:53:30<1:19:00, 15.39s/batch, batch_loss=376, batc

Epoch 6/10:  69%|▋| 683/991 [2:53:45<1:19:00, 15.39s/batch, batch_loss=5.2, batc

Epoch 6/10:  69%|▋| 684/991 [2:53:45<1:18:02, 15.25s/batch, batch_loss=5.2, batc

Epoch 6/10:  69%|▋| 684/991 [2:53:59<1:18:02, 15.25s/batch, batch_loss=14.4, bat

Epoch 6/10:  69%|▋| 685/991 [2:53:59<1:15:58, 14.90s/batch, batch_loss=14.4, bat

Epoch 6/10:  69%|▋| 685/991 [2:54:14<1:15:58, 14.90s/batch, batch_loss=13.6, bat

Epoch 6/10:  69%|▋| 686/991 [2:54:14<1:16:08, 14.98s/batch, batch_loss=13.6, bat

Epoch 6/10:  69%|▋| 686/991 [2:54:29<1:16:08, 14.98s/batch, batch_loss=537, batc

Epoch 6/10:  69%|▋| 687/991 [2:54:29<1:16:22, 15.07s/batch, batch_loss=537, batc

Epoch 6/10:  69%|▋| 687/991 [2:54:44<1:16:22, 15.07s/batch, batch_loss=5.44, bat

Epoch 6/10:  69%|▋| 688/991 [2:54:44<1:15:49, 15.02s/batch, batch_loss=5.44, bat

Epoch 6/10:  69%|▋| 688/991 [2:55:00<1:15:49, 15.02s/batch, batch_loss=6.01, bat

Epoch 6/10:  70%|▋| 689/991 [2:55:00<1:16:33, 15.21s/batch, batch_loss=6.01, bat

Epoch 6/10:  70%|▋| 689/991 [2:55:15<1:16:33, 15.21s/batch, batch_loss=12.1, bat

Epoch 6/10:  70%|▋| 690/991 [2:55:15<1:16:49, 15.31s/batch, batch_loss=12.1, bat

Epoch 6/10:  70%|▋| 690/991 [2:55:30<1:16:49, 15.31s/batch, batch_loss=18.8, bat

Epoch 6/10:  70%|▋| 691/991 [2:55:30<1:16:00, 15.20s/batch, batch_loss=18.8, bat

Epoch 6/10:  70%|▋| 691/991 [2:55:46<1:16:00, 15.20s/batch, batch_loss=6.59, bat

Epoch 6/10:  70%|▋| 692/991 [2:55:46<1:17:07, 15.48s/batch, batch_loss=6.59, bat

Epoch 6/10:  70%|▋| 692/991 [2:56:01<1:17:07, 15.48s/batch, batch_loss=4.66e+3, 

Epoch 6/10:  70%|▋| 693/991 [2:56:01<1:16:16, 15.36s/batch, batch_loss=4.66e+3, 

Epoch 6/10:  70%|▋| 693/991 [2:56:16<1:16:16, 15.36s/batch, batch_loss=454, batc

Epoch 6/10:  70%|▋| 694/991 [2:56:16<1:15:18, 15.21s/batch, batch_loss=454, batc

Epoch 6/10:  70%|▋| 694/991 [2:56:33<1:15:18, 15.21s/batch, batch_loss=778, batc

Epoch 6/10:  70%|▋| 695/991 [2:56:33<1:17:03, 15.62s/batch, batch_loss=778, batc

Epoch 6/10:  70%|▋| 695/991 [2:56:48<1:17:03, 15.62s/batch, batch_loss=9.81, bat

Epoch 6/10:  70%|▋| 696/991 [2:56:48<1:16:03, 15.47s/batch, batch_loss=9.81, bat

Epoch 6/10:  70%|▋| 696/991 [2:57:03<1:16:03, 15.47s/batch, batch_loss=6.79e+3, 

Epoch 6/10:  70%|▋| 697/991 [2:57:03<1:14:41, 15.24s/batch, batch_loss=6.79e+3, 

Epoch 6/10:  70%|▋| 697/991 [2:57:18<1:14:41, 15.24s/batch, batch_loss=11, batch

Epoch 6/10:  70%|▋| 698/991 [2:57:18<1:14:15, 15.20s/batch, batch_loss=11, batch

Epoch 6/10:  70%|▋| 698/991 [2:57:32<1:14:15, 15.20s/batch, batch_loss=7.38, bat

Epoch 6/10:  71%|▋| 699/991 [2:57:32<1:12:13, 14.84s/batch, batch_loss=7.38, bat

Epoch 6/10:  71%|▋| 699/991 [2:57:46<1:12:13, 14.84s/batch, batch_loss=9.45, bat

Epoch 6/10:  71%|▋| 700/991 [2:57:46<1:11:11, 14.68s/batch, batch_loss=9.45, bat

Epoch 6/10:  71%|▋| 700/991 [2:58:00<1:11:11, 14.68s/batch, batch_loss=212, batc

Epoch 6/10:  71%|▋| 701/991 [2:58:00<1:09:22, 14.35s/batch, batch_loss=212, batc

Epoch 6/10:  71%|▋| 701/991 [2:58:15<1:09:22, 14.35s/batch, batch_loss=16.9, bat

Epoch 6/10:  71%|▋| 702/991 [2:58:15<1:10:08, 14.56s/batch, batch_loss=16.9, bat

Epoch 6/10:  71%|▋| 702/991 [2:58:29<1:10:08, 14.56s/batch, batch_loss=274, batc

Epoch 6/10:  71%|▋| 703/991 [2:58:29<1:09:28, 14.47s/batch, batch_loss=274, batc

Epoch 6/10:  71%|▋| 703/991 [2:58:44<1:09:28, 14.47s/batch, batch_loss=7.99, bat

Epoch 6/10:  71%|▋| 704/991 [2:58:44<1:09:49, 14.60s/batch, batch_loss=7.99, bat

Epoch 6/10:  71%|▋| 704/991 [2:59:00<1:09:49, 14.60s/batch, batch_loss=9.6, batc

Epoch 6/10:  71%|▋| 705/991 [2:59:00<1:11:56, 15.09s/batch, batch_loss=9.6, batc

Epoch 6/10:  71%|▋| 705/991 [2:59:16<1:11:56, 15.09s/batch, batch_loss=16, batch

Epoch 6/10:  71%|▋| 706/991 [2:59:16<1:12:08, 15.19s/batch, batch_loss=16, batch

Epoch 6/10:  71%|▋| 706/991 [2:59:31<1:12:08, 15.19s/batch, batch_loss=14.9, bat

Epoch 6/10:  71%|▋| 707/991 [2:59:31<1:12:32, 15.33s/batch, batch_loss=14.9, bat

Epoch 6/10:  71%|▋| 707/991 [2:59:47<1:12:32, 15.33s/batch, batch_loss=8.98, bat

Epoch 6/10:  71%|▋| 708/991 [2:59:47<1:12:22, 15.34s/batch, batch_loss=8.98, bat

Epoch 6/10:  71%|▋| 708/991 [3:00:01<1:12:22, 15.34s/batch, batch_loss=7.25, bat

Epoch 6/10:  72%|▋| 709/991 [3:00:01<1:10:34, 15.02s/batch, batch_loss=7.25, bat

Epoch 6/10:  72%|▋| 709/991 [3:00:16<1:10:34, 15.02s/batch, batch_loss=26.5, bat

Epoch 6/10:  72%|▋| 710/991 [3:00:16<1:10:23, 15.03s/batch, batch_loss=26.5, bat

Epoch 6/10:  72%|▋| 710/991 [3:00:31<1:10:23, 15.03s/batch, batch_loss=95.6, bat

Epoch 6/10:  72%|▋| 711/991 [3:00:31<1:09:55, 14.98s/batch, batch_loss=95.6, bat

Epoch 6/10:  72%|▋| 711/991 [3:00:46<1:09:55, 14.98s/batch, batch_loss=11.5, bat

Epoch 6/10:  72%|▋| 712/991 [3:00:46<1:10:25, 15.14s/batch, batch_loss=11.5, bat

Epoch 6/10:  72%|▋| 712/991 [3:01:04<1:10:25, 15.14s/batch, batch_loss=78.4, bat

Epoch 6/10:  72%|▋| 713/991 [3:01:04<1:13:57, 15.96s/batch, batch_loss=78.4, bat

Epoch 6/10:  72%|▋| 713/991 [3:01:20<1:13:57, 15.96s/batch, batch_loss=24, batch

Epoch 6/10:  72%|▋| 714/991 [3:01:20<1:12:58, 15.81s/batch, batch_loss=24, batch

Epoch 6/10:  72%|▋| 714/991 [3:01:36<1:12:58, 15.81s/batch, batch_loss=18.4, bat

Epoch 6/10:  72%|▋| 715/991 [3:01:36<1:12:45, 15.82s/batch, batch_loss=18.4, bat

Epoch 6/10:  72%|▋| 715/991 [3:01:50<1:12:45, 15.82s/batch, batch_loss=15.1, bat

Epoch 6/10:  72%|▋| 716/991 [3:01:50<1:11:01, 15.50s/batch, batch_loss=15.1, bat

Epoch 6/10:  72%|▋| 716/991 [3:02:06<1:11:01, 15.50s/batch, batch_loss=19.8, bat

Epoch 6/10:  72%|▋| 717/991 [3:02:06<1:11:11, 15.59s/batch, batch_loss=19.8, bat

Epoch 6/10:  72%|▋| 717/991 [3:02:22<1:11:11, 15.59s/batch, batch_loss=20.7, bat

Epoch 6/10:  72%|▋| 718/991 [3:02:22<1:11:10, 15.64s/batch, batch_loss=20.7, bat

Epoch 6/10:  72%|▋| 718/991 [3:02:38<1:11:10, 15.64s/batch, batch_loss=10.8, bat

Epoch 6/10:  73%|▋| 719/991 [3:02:38<1:11:44, 15.83s/batch, batch_loss=10.8, bat

Epoch 6/10:  73%|▋| 719/991 [3:02:56<1:11:44, 15.83s/batch, batch_loss=11.8, bat

Epoch 6/10:  73%|▋| 720/991 [3:02:56<1:14:39, 16.53s/batch, batch_loss=11.8, bat

Epoch 6/10:  73%|▋| 720/991 [3:03:12<1:14:39, 16.53s/batch, batch_loss=15.8, bat

Epoch 6/10:  73%|▋| 721/991 [3:03:12<1:13:08, 16.25s/batch, batch_loss=15.8, bat

Epoch 6/10:  73%|▋| 721/991 [3:03:28<1:13:08, 16.25s/batch, batch_loss=20.9, bat

Epoch 6/10:  73%|▋| 722/991 [3:03:28<1:12:06, 16.08s/batch, batch_loss=20.9, bat

Epoch 6/10:  73%|▋| 722/991 [3:03:43<1:12:06, 16.08s/batch, batch_loss=7.24e+3, 

Epoch 6/10:  73%|▋| 723/991 [3:03:43<1:11:30, 16.01s/batch, batch_loss=7.24e+3, 

Epoch 6/10:  73%|▋| 723/991 [3:03:59<1:11:30, 16.01s/batch, batch_loss=3.96, bat

Epoch 6/10:  73%|▋| 724/991 [3:03:59<1:10:27, 15.83s/batch, batch_loss=3.96, bat

Epoch 6/10:  73%|▋| 724/991 [3:04:14<1:10:27, 15.83s/batch, batch_loss=13.7, bat

Epoch 6/10:  73%|▋| 725/991 [3:04:14<1:09:33, 15.69s/batch, batch_loss=13.7, bat

Epoch 6/10:  73%|▋| 725/991 [3:04:30<1:09:33, 15.69s/batch, batch_loss=11, batch

Epoch 6/10:  73%|▋| 726/991 [3:04:30<1:09:12, 15.67s/batch, batch_loss=11, batch

Epoch 6/10:  73%|▋| 726/991 [3:04:45<1:09:12, 15.67s/batch, batch_loss=1.29e+4, 

Epoch 6/10:  73%|▋| 727/991 [3:04:45<1:08:32, 15.58s/batch, batch_loss=1.29e+4, 

Epoch 6/10:  73%|▋| 727/991 [3:05:02<1:08:32, 15.58s/batch, batch_loss=9.21, bat

Epoch 6/10:  73%|▋| 728/991 [3:05:02<1:09:37, 15.88s/batch, batch_loss=9.21, bat

Epoch 6/10:  73%|▋| 728/991 [3:05:16<1:09:37, 15.88s/batch, batch_loss=128, batc

Epoch 6/10:  74%|▋| 729/991 [3:05:16<1:07:14, 15.40s/batch, batch_loss=128, batc

Epoch 6/10:  74%|▋| 729/991 [3:05:31<1:07:14, 15.40s/batch, batch_loss=9.41, bat

Epoch 6/10:  74%|▋| 730/991 [3:05:31<1:06:44, 15.34s/batch, batch_loss=9.41, bat

Epoch 6/10:  74%|▋| 730/991 [3:05:46<1:06:44, 15.34s/batch, batch_loss=107, batc

Epoch 6/10:  74%|▋| 731/991 [3:05:46<1:06:04, 15.25s/batch, batch_loss=107, batc

Epoch 6/10:  74%|▋| 731/991 [3:06:01<1:06:04, 15.25s/batch, batch_loss=1.39e+4, 

Epoch 6/10:  74%|▋| 732/991 [3:06:01<1:05:39, 15.21s/batch, batch_loss=1.39e+4, 

Epoch 6/10:  74%|▋| 732/991 [3:06:16<1:05:39, 15.21s/batch, batch_loss=13.5, bat

Epoch 6/10:  74%|▋| 733/991 [3:06:16<1:04:41, 15.05s/batch, batch_loss=13.5, bat

Epoch 6/10:  74%|▋| 733/991 [3:06:31<1:04:41, 15.05s/batch, batch_loss=6.8e+3, b

Epoch 6/10:  74%|▋| 734/991 [3:06:31<1:04:46, 15.12s/batch, batch_loss=6.8e+3, b

Epoch 6/10:  74%|▋| 734/991 [3:06:46<1:04:46, 15.12s/batch, batch_loss=11.3, bat

Epoch 6/10:  74%|▋| 735/991 [3:06:46<1:04:22, 15.09s/batch, batch_loss=11.3, bat

Epoch 6/10:  74%|▋| 735/991 [3:07:02<1:04:22, 15.09s/batch, batch_loss=16, batch

Epoch 6/10:  74%|▋| 736/991 [3:07:02<1:04:47, 15.24s/batch, batch_loss=16, batch

Epoch 6/10:  74%|▋| 736/991 [3:07:17<1:04:47, 15.24s/batch, batch_loss=8.93, bat

Epoch 6/10:  74%|▋| 737/991 [3:07:17<1:04:20, 15.20s/batch, batch_loss=8.93, bat

Epoch 6/10:  74%|▋| 737/991 [3:07:32<1:04:20, 15.20s/batch, batch_loss=1.48e+3, 

Epoch 6/10:  74%|▋| 738/991 [3:07:32<1:03:55, 15.16s/batch, batch_loss=1.48e+3, 

Epoch 6/10:  74%|▋| 738/991 [3:07:48<1:03:55, 15.16s/batch, batch_loss=28.6, bat

Epoch 6/10:  75%|▋| 739/991 [3:07:48<1:04:32, 15.37s/batch, batch_loss=28.6, bat

Epoch 6/10:  75%|▋| 739/991 [3:08:03<1:04:32, 15.37s/batch, batch_loss=8.72, bat

Epoch 6/10:  75%|▋| 740/991 [3:08:03<1:03:45, 15.24s/batch, batch_loss=8.72, bat

Epoch 6/10:  75%|▋| 740/991 [3:08:18<1:03:45, 15.24s/batch, batch_loss=1.8e+4, b

Epoch 6/10:  75%|▋| 741/991 [3:08:18<1:03:28, 15.24s/batch, batch_loss=1.8e+4, b

Epoch 6/10:  75%|▋| 741/991 [3:08:33<1:03:28, 15.24s/batch, batch_loss=2.28e+3, 

Epoch 6/10:  75%|▋| 742/991 [3:08:33<1:03:00, 15.18s/batch, batch_loss=2.28e+3, 

Epoch 6/10:  75%|▋| 742/991 [3:08:51<1:03:00, 15.18s/batch, batch_loss=10.6, bat

Epoch 6/10:  75%|▋| 743/991 [3:08:51<1:05:36, 15.87s/batch, batch_loss=10.6, bat

Epoch 6/10:  75%|▋| 743/991 [3:09:05<1:05:36, 15.87s/batch, batch_loss=13.3, bat

Epoch 6/10:  75%|▊| 744/991 [3:09:05<1:03:47, 15.50s/batch, batch_loss=13.3, bat

Epoch 6/10:  75%|▊| 744/991 [3:09:20<1:03:47, 15.50s/batch, batch_loss=13.5, bat

Epoch 6/10:  75%|▊| 745/991 [3:09:20<1:02:12, 15.17s/batch, batch_loss=13.5, bat

Epoch 6/10:  75%|▊| 745/991 [3:09:35<1:02:12, 15.17s/batch, batch_loss=1.15e+3, 

Epoch 6/10:  75%|▊| 746/991 [3:09:35<1:02:18, 15.26s/batch, batch_loss=1.15e+3, 

Epoch 6/10:  75%|▊| 746/991 [3:09:50<1:02:18, 15.26s/batch, batch_loss=3.88e+3, 

Epoch 6/10:  75%|▊| 747/991 [3:09:50<1:01:59, 15.24s/batch, batch_loss=3.88e+3, 

Epoch 6/10:  75%|▊| 747/991 [3:10:06<1:01:59, 15.24s/batch, batch_loss=12.7, bat

Epoch 6/10:  75%|▊| 748/991 [3:10:06<1:02:42, 15.48s/batch, batch_loss=12.7, bat

Epoch 6/10:  75%|▊| 748/991 [3:10:22<1:02:42, 15.48s/batch, batch_loss=12.9, bat

Epoch 6/10:  76%|▊| 749/991 [3:10:22<1:02:28, 15.49s/batch, batch_loss=12.9, bat

Epoch 6/10:  76%|▊| 749/991 [3:10:36<1:02:28, 15.49s/batch, batch_loss=10.6, bat

Epoch 6/10:  76%|▊| 750/991 [3:10:36<1:00:54, 15.16s/batch, batch_loss=10.6, bat

Epoch 6/10:  76%|▊| 750/991 [3:10:52<1:00:54, 15.16s/batch, batch_loss=13.2, bat

Epoch 6/10:  76%|▊| 751/991 [3:10:52<1:01:14, 15.31s/batch, batch_loss=13.2, bat

Epoch 6/10:  76%|▊| 751/991 [3:11:08<1:01:14, 15.31s/batch, batch_loss=7.84, bat

Epoch 6/10:  76%|▊| 752/991 [3:11:08<1:01:45, 15.50s/batch, batch_loss=7.84, bat

Epoch 6/10:  76%|▊| 752/991 [3:11:24<1:01:45, 15.50s/batch, batch_loss=7.95, bat

Epoch 6/10:  76%|▊| 753/991 [3:11:24<1:01:46, 15.57s/batch, batch_loss=7.95, bat

Epoch 6/10:  76%|▊| 753/991 [3:11:40<1:01:46, 15.57s/batch, batch_loss=6.31, bat

Epoch 6/10:  76%|▊| 754/991 [3:11:40<1:02:04, 15.72s/batch, batch_loss=6.31, bat

Epoch 6/10:  76%|▊| 754/991 [3:11:56<1:02:04, 15.72s/batch, batch_loss=17.4, bat

Epoch 6/10:  76%|▊| 755/991 [3:11:56<1:02:56, 16.00s/batch, batch_loss=17.4, bat

Epoch 6/10:  76%|▊| 755/991 [3:12:11<1:02:56, 16.00s/batch, batch_loss=16.4, bat

Epoch 6/10:  76%|▊| 756/991 [3:12:11<1:00:45, 15.51s/batch, batch_loss=16.4, bat

Epoch 6/10:  76%|▊| 756/991 [3:12:26<1:00:45, 15.51s/batch, batch_loss=4.63, bat

Epoch 6/10:  76%|▊| 757/991 [3:12:26<59:56, 15.37s/batch, batch_loss=4.63, batch

Epoch 6/10:  76%|▊| 757/991 [3:12:41<59:56, 15.37s/batch, batch_loss=15.5, batch

Epoch 6/10:  76%|▊| 758/991 [3:12:41<59:09, 15.23s/batch, batch_loss=15.5, batch

Epoch 6/10:  76%|▊| 758/991 [3:12:58<59:09, 15.23s/batch, batch_loss=13.8, batch

Epoch 6/10:  77%|▊| 759/991 [3:12:58<1:01:48, 15.99s/batch, batch_loss=13.8, bat

Epoch 6/10:  77%|▊| 759/991 [3:13:14<1:01:48, 15.99s/batch, batch_loss=16.1, bat

Epoch 6/10:  77%|▊| 760/991 [3:13:14<1:00:28, 15.71s/batch, batch_loss=16.1, bat

Epoch 6/10:  77%|▊| 760/991 [3:13:29<1:00:28, 15.71s/batch, batch_loss=16.6, bat

Epoch 6/10:  77%|▊| 761/991 [3:13:29<1:00:17, 15.73s/batch, batch_loss=16.6, bat

Epoch 6/10:  77%|▊| 761/991 [3:13:45<1:00:17, 15.73s/batch, batch_loss=26.2, bat

Epoch 6/10:  77%|▊| 762/991 [3:13:45<59:55, 15.70s/batch, batch_loss=26.2, batch

Epoch 6/10:  77%|▊| 762/991 [3:14:01<59:55, 15.70s/batch, batch_loss=510, batch_

Epoch 6/10:  77%|▊| 763/991 [3:14:01<1:00:20, 15.88s/batch, batch_loss=510, batc

Epoch 6/10:  77%|▊| 763/991 [3:14:17<1:00:20, 15.88s/batch, batch_loss=12.1, bat

Epoch 6/10:  77%|▊| 764/991 [3:14:17<59:51, 15.82s/batch, batch_loss=12.1, batch

Epoch 6/10:  77%|▊| 764/991 [3:14:34<59:51, 15.82s/batch, batch_loss=3.06, batch

Epoch 6/10:  77%|▊| 765/991 [3:14:34<1:01:03, 16.21s/batch, batch_loss=3.06, bat

Epoch 6/10:  77%|▊| 765/991 [3:14:49<1:01:03, 16.21s/batch, batch_loss=12, batch

Epoch 6/10:  77%|▊| 766/991 [3:14:49<59:53, 15.97s/batch, batch_loss=12, batch_i

Epoch 6/10:  77%|▊| 766/991 [3:15:05<59:53, 15.97s/batch, batch_loss=13.1, batch

Epoch 6/10:  77%|▊| 767/991 [3:15:05<59:38, 15.97s/batch, batch_loss=13.1, batch

Epoch 6/10:  77%|▊| 767/991 [3:15:21<59:38, 15.97s/batch, batch_loss=3.25, batch

Epoch 6/10:  77%|▊| 768/991 [3:15:21<58:34, 15.76s/batch, batch_loss=3.25, batch

Epoch 6/10:  77%|▊| 768/991 [3:15:35<58:34, 15.76s/batch, batch_loss=2.05, batch

Epoch 6/10:  78%|▊| 769/991 [3:15:35<56:41, 15.32s/batch, batch_loss=2.05, batch

Epoch 6/10:  78%|▊| 769/991 [3:15:50<56:41, 15.32s/batch, batch_loss=10.3, batch

Epoch 6/10:  78%|▊| 770/991 [3:15:50<55:50, 15.16s/batch, batch_loss=10.3, batch

Epoch 6/10:  78%|▊| 770/991 [3:16:06<55:50, 15.16s/batch, batch_loss=2.75e+3, ba

Epoch 6/10:  78%|▊| 771/991 [3:16:06<56:27, 15.40s/batch, batch_loss=2.75e+3, ba

Epoch 6/10:  78%|▊| 771/991 [3:16:24<56:27, 15.40s/batch, batch_loss=4.16, batch

Epoch 6/10:  78%|▊| 772/991 [3:16:24<59:20, 16.26s/batch, batch_loss=4.16, batch

Epoch 6/10:  78%|▊| 772/991 [3:16:39<59:20, 16.26s/batch, batch_loss=1.41, batch

Epoch 6/10:  78%|▊| 773/991 [3:16:39<57:54, 15.94s/batch, batch_loss=1.41, batch

Epoch 6/10:  78%|▊| 773/991 [3:16:54<57:54, 15.94s/batch, batch_loss=5.56, batch

Epoch 6/10:  78%|▊| 774/991 [3:16:54<56:46, 15.70s/batch, batch_loss=5.56, batch

Epoch 6/10:  78%|▊| 774/991 [3:17:10<56:46, 15.70s/batch, batch_loss=8.24, batch

Epoch 6/10:  78%|▊| 775/991 [3:17:10<56:39, 15.74s/batch, batch_loss=8.24, batch

Epoch 6/10:  78%|▊| 775/991 [3:17:26<56:39, 15.74s/batch, batch_loss=256, batch_

Epoch 6/10:  78%|▊| 776/991 [3:17:26<56:16, 15.70s/batch, batch_loss=256, batch_

Epoch 6/10:  78%|▊| 776/991 [3:17:40<56:16, 15.70s/batch, batch_loss=0.513, batc

Epoch 6/10:  78%|▊| 777/991 [3:17:40<54:51, 15.38s/batch, batch_loss=0.513, batc

Epoch 6/10:  78%|▊| 777/991 [3:17:55<54:51, 15.38s/batch, batch_loss=1.96, batch

Epoch 6/10:  79%|▊| 778/991 [3:17:55<54:14, 15.28s/batch, batch_loss=1.96, batch

Epoch 6/10:  79%|▊| 778/991 [3:18:11<54:14, 15.28s/batch, batch_loss=3.23, batch

Epoch 6/10:  79%|▊| 779/991 [3:18:11<54:34, 15.44s/batch, batch_loss=3.23, batch

Epoch 6/10:  79%|▊| 779/991 [3:18:27<54:34, 15.44s/batch, batch_loss=2.51, batch

Epoch 6/10:  79%|▊| 780/991 [3:18:27<54:10, 15.41s/batch, batch_loss=2.51, batch

Epoch 6/10:  79%|▊| 780/991 [3:18:41<54:10, 15.41s/batch, batch_loss=3.23, batch

Epoch 6/10:  79%|▊| 781/991 [3:18:41<53:18, 15.23s/batch, batch_loss=3.23, batch

Epoch 6/10:  79%|▊| 781/991 [3:18:57<53:18, 15.23s/batch, batch_loss=2.51e+4, ba

Epoch 6/10:  79%|▊| 782/991 [3:18:57<53:16, 15.30s/batch, batch_loss=2.51e+4, ba

Epoch 6/10:  79%|▊| 782/991 [3:19:10<53:16, 15.30s/batch, batch_loss=18.6, batch

Epoch 6/10:  79%|▊| 783/991 [3:19:10<51:03, 14.73s/batch, batch_loss=18.6, batch

Epoch 6/10:  79%|▊| 783/991 [3:19:26<51:03, 14.73s/batch, batch_loss=14.2, batch

Epoch 6/10:  79%|▊| 784/991 [3:19:26<51:39, 14.97s/batch, batch_loss=14.2, batch

Epoch 6/10:  79%|▊| 784/991 [3:19:41<51:39, 14.97s/batch, batch_loss=11.9, batch

Epoch 6/10:  79%|▊| 785/991 [3:19:41<51:08, 14.89s/batch, batch_loss=11.9, batch

Epoch 6/10:  79%|▊| 785/991 [3:19:56<51:08, 14.89s/batch, batch_loss=7.68, batch

Epoch 6/10:  79%|▊| 786/991 [3:19:56<51:58, 15.21s/batch, batch_loss=7.68, batch

Epoch 6/10:  79%|▊| 786/991 [3:20:12<51:58, 15.21s/batch, batch_loss=2.48e+4, ba

Epoch 6/10:  79%|▊| 787/991 [3:20:12<51:45, 15.23s/batch, batch_loss=2.48e+4, ba

Epoch 6/10:  79%|▊| 787/991 [3:20:26<51:45, 15.23s/batch, batch_loss=680, batch_

Epoch 6/10:  80%|▊| 788/991 [3:20:26<50:38, 14.97s/batch, batch_loss=680, batch_

Epoch 6/10:  80%|▊| 788/991 [3:20:42<50:38, 14.97s/batch, batch_loss=21.1, batch

Epoch 6/10:  80%|▊| 789/991 [3:20:42<50:54, 15.12s/batch, batch_loss=21.1, batch

Epoch 6/10:  80%|▊| 789/991 [3:20:57<50:54, 15.12s/batch, batch_loss=23.1, batch

Epoch 6/10:  80%|▊| 790/991 [3:20:57<51:08, 15.26s/batch, batch_loss=23.1, batch

Epoch 6/10:  80%|▊| 790/991 [3:21:13<51:08, 15.26s/batch, batch_loss=14.2, batch

Epoch 6/10:  80%|▊| 791/991 [3:21:13<51:14, 15.37s/batch, batch_loss=14.2, batch

Epoch 6/10:  80%|▊| 791/991 [3:21:28<51:14, 15.37s/batch, batch_loss=1.04e+4, ba

Epoch 6/10:  80%|▊| 792/991 [3:21:28<50:22, 15.19s/batch, batch_loss=1.04e+4, ba

Epoch 6/10:  80%|▊| 792/991 [3:21:42<50:22, 15.19s/batch, batch_loss=8.41, batch

Epoch 6/10:  80%|▊| 793/991 [3:21:42<49:35, 15.03s/batch, batch_loss=8.41, batch

Epoch 6/10:  80%|▊| 793/991 [3:21:58<49:35, 15.03s/batch, batch_loss=1.94, batch

Epoch 6/10:  80%|▊| 794/991 [3:21:58<50:00, 15.23s/batch, batch_loss=1.94, batch

Epoch 6/10:  80%|▊| 794/991 [3:22:14<50:00, 15.23s/batch, batch_loss=6.55, batch

Epoch 6/10:  80%|▊| 795/991 [3:22:14<50:22, 15.42s/batch, batch_loss=6.55, batch

Epoch 6/10:  80%|▊| 795/991 [3:22:29<50:22, 15.42s/batch, batch_loss=19.6, batch

Epoch 6/10:  80%|▊| 796/991 [3:22:29<50:14, 15.46s/batch, batch_loss=19.6, batch

Epoch 6/10:  80%|▊| 796/991 [3:22:45<50:14, 15.46s/batch, batch_loss=35.7, batch

Epoch 6/10:  80%|▊| 797/991 [3:22:45<50:17, 15.55s/batch, batch_loss=35.7, batch

Epoch 6/10:  80%|▊| 797/991 [3:23:00<50:17, 15.55s/batch, batch_loss=349, batch_

Epoch 6/10:  81%|▊| 798/991 [3:23:00<49:40, 15.45s/batch, batch_loss=349, batch_

Epoch 6/10:  81%|▊| 798/991 [3:23:15<49:40, 15.45s/batch, batch_loss=9.57, batch

Epoch 6/10:  81%|▊| 799/991 [3:23:15<48:19, 15.10s/batch, batch_loss=9.57, batch

Epoch 6/10:  81%|▊| 799/991 [3:23:30<48:19, 15.10s/batch, batch_loss=21.2, batch

Epoch 6/10:  81%|▊| 800/991 [3:23:30<48:24, 15.21s/batch, batch_loss=21.2, batch

Epoch 6/10:  81%|▊| 800/991 [3:23:46<48:24, 15.21s/batch, batch_loss=12.5, batch

Epoch 6/10:  81%|▊| 801/991 [3:23:46<48:23, 15.28s/batch, batch_loss=12.5, batch

Epoch 6/10:  81%|▊| 801/991 [3:24:01<48:23, 15.28s/batch, batch_loss=17.1, batch

Epoch 6/10:  81%|▊| 802/991 [3:24:01<48:41, 15.46s/batch, batch_loss=17.1, batch

Epoch 6/10:  81%|▊| 802/991 [3:24:16<48:41, 15.46s/batch, batch_loss=6.84, batch

Epoch 6/10:  81%|▊| 803/991 [3:24:16<47:53, 15.28s/batch, batch_loss=6.84, batch

Epoch 6/10:  81%|▊| 803/991 [3:24:31<47:53, 15.28s/batch, batch_loss=19.9, batch

Epoch 6/10:  81%|▊| 804/991 [3:24:31<47:20, 15.19s/batch, batch_loss=19.9, batch

Epoch 6/10:  81%|▊| 804/991 [3:24:47<47:20, 15.19s/batch, batch_loss=6.37, batch

Epoch 6/10:  81%|▊| 805/991 [3:24:47<47:26, 15.30s/batch, batch_loss=6.37, batch

Epoch 6/10:  81%|▊| 805/991 [3:25:03<47:26, 15.30s/batch, batch_loss=12.5, batch

Epoch 6/10:  81%|▊| 806/991 [3:25:03<47:52, 15.53s/batch, batch_loss=12.5, batch

Epoch 6/10:  81%|▊| 806/991 [3:25:19<47:52, 15.53s/batch, batch_loss=11.4, batch

Epoch 6/10:  81%|▊| 807/991 [3:25:19<47:50, 15.60s/batch, batch_loss=11.4, batch

Epoch 6/10:  81%|▊| 807/991 [3:25:35<47:50, 15.60s/batch, batch_loss=19.1, batch

Epoch 6/10:  82%|▊| 808/991 [3:25:35<48:21, 15.85s/batch, batch_loss=19.1, batch

Epoch 6/10:  82%|▊| 808/991 [3:25:51<48:21, 15.85s/batch, batch_loss=1.21e+4, ba

Epoch 6/10:  82%|▊| 809/991 [3:25:51<47:52, 15.78s/batch, batch_loss=1.21e+4, ba

Epoch 6/10:  82%|▊| 809/991 [3:26:07<47:52, 15.78s/batch, batch_loss=14.8, batch

Epoch 6/10:  82%|▊| 810/991 [3:26:07<47:50, 15.86s/batch, batch_loss=14.8, batch

Epoch 6/10:  82%|▊| 810/991 [3:26:22<47:50, 15.86s/batch, batch_loss=7.42, batch

Epoch 6/10:  82%|▊| 811/991 [3:26:22<46:47, 15.60s/batch, batch_loss=7.42, batch

Epoch 6/10:  82%|▊| 811/991 [3:26:39<46:47, 15.60s/batch, batch_loss=7.06, batch

Epoch 6/10:  82%|▊| 812/991 [3:26:39<48:07, 16.13s/batch, batch_loss=7.06, batch

Epoch 6/10:  82%|▊| 812/991 [3:26:55<48:07, 16.13s/batch, batch_loss=6.93, batch

Epoch 6/10:  82%|▊| 813/991 [3:26:55<47:37, 16.06s/batch, batch_loss=6.93, batch

Epoch 6/10:  82%|▊| 813/991 [3:27:11<47:37, 16.06s/batch, batch_loss=13.9, batch

Epoch 6/10:  82%|▊| 814/991 [3:27:11<47:08, 15.98s/batch, batch_loss=13.9, batch

Epoch 6/10:  82%|▊| 814/991 [3:27:26<47:08, 15.98s/batch, batch_loss=6.2, batch_

Epoch 6/10:  82%|▊| 815/991 [3:27:26<46:19, 15.79s/batch, batch_loss=6.2, batch_

Epoch 6/10:  82%|▊| 815/991 [3:27:41<46:19, 15.79s/batch, batch_loss=92.7, batch

Epoch 6/10:  82%|▊| 816/991 [3:27:41<44:56, 15.41s/batch, batch_loss=92.7, batch

Epoch 6/10:  82%|▊| 816/991 [3:27:54<44:56, 15.41s/batch, batch_loss=362, batch_

Epoch 6/10:  82%|▊| 817/991 [3:27:54<42:46, 14.75s/batch, batch_loss=362, batch_

Epoch 6/10:  82%|▊| 817/991 [3:28:08<42:46, 14.75s/batch, batch_loss=363, batch_

Epoch 6/10:  83%|▊| 818/991 [3:28:08<42:03, 14.59s/batch, batch_loss=363, batch_

Epoch 6/10:  83%|▊| 818/991 [3:28:24<42:03, 14.59s/batch, batch_loss=13.5, batch

Epoch 6/10:  83%|▊| 819/991 [3:28:24<43:09, 15.05s/batch, batch_loss=13.5, batch

Epoch 6/10:  83%|▊| 819/991 [3:28:42<43:09, 15.05s/batch, batch_loss=8.1, batch_

Epoch 6/10:  83%|▊| 820/991 [3:28:42<45:00, 15.79s/batch, batch_loss=8.1, batch_

Epoch 6/10:  83%|▊| 820/991 [3:28:58<45:00, 15.79s/batch, batch_loss=7.17, batch

Epoch 6/10:  83%|▊| 821/991 [3:28:58<44:52, 15.84s/batch, batch_loss=7.17, batch

Epoch 6/10:  83%|▊| 821/991 [3:29:13<44:52, 15.84s/batch, batch_loss=10.4, batch

Epoch 6/10:  83%|▊| 822/991 [3:29:13<44:06, 15.66s/batch, batch_loss=10.4, batch

Epoch 6/10:  83%|▊| 822/991 [3:29:29<44:06, 15.66s/batch, batch_loss=156, batch_

Epoch 6/10:  83%|▊| 823/991 [3:29:29<44:05, 15.75s/batch, batch_loss=156, batch_

Epoch 6/10:  83%|▊| 823/991 [3:29:44<44:05, 15.75s/batch, batch_loss=6.52, batch

Epoch 6/10:  83%|▊| 824/991 [3:29:44<43:35, 15.66s/batch, batch_loss=6.52, batch

Epoch 6/10:  83%|▊| 824/991 [3:30:00<43:35, 15.66s/batch, batch_loss=15.6, batch

Epoch 6/10:  83%|▊| 825/991 [3:30:00<43:45, 15.81s/batch, batch_loss=15.6, batch

Epoch 6/10:  83%|▊| 825/991 [3:30:16<43:45, 15.81s/batch, batch_loss=2.6e+3, bat

Epoch 6/10:  83%|▊| 826/991 [3:30:16<43:15, 15.73s/batch, batch_loss=2.6e+3, bat

Epoch 6/10:  83%|▊| 826/991 [3:30:34<43:15, 15.73s/batch, batch_loss=26, batch_i

Epoch 6/10:  83%|▊| 827/991 [3:30:34<44:28, 16.27s/batch, batch_loss=26, batch_i

Epoch 6/10:  83%|▊| 827/991 [3:30:50<44:28, 16.27s/batch, batch_loss=20, batch_i

Epoch 6/10:  84%|▊| 828/991 [3:30:50<44:00, 16.20s/batch, batch_loss=20, batch_i

Epoch 6/10:  84%|▊| 828/991 [3:31:06<44:00, 16.20s/batch, batch_loss=10, batch_i

Epoch 6/10:  84%|▊| 829/991 [3:31:06<43:39, 16.17s/batch, batch_loss=10, batch_i

Epoch 6/10:  84%|▊| 829/991 [3:31:21<43:39, 16.17s/batch, batch_loss=14.2, batch

Epoch 6/10:  84%|▊| 830/991 [3:31:21<42:30, 15.84s/batch, batch_loss=14.2, batch

Epoch 6/10:  84%|▊| 830/991 [3:31:36<42:30, 15.84s/batch, batch_loss=9.88, batch

Epoch 6/10:  84%|▊| 831/991 [3:31:36<41:46, 15.67s/batch, batch_loss=9.88, batch

Epoch 6/10:  84%|▊| 831/991 [3:31:52<41:46, 15.67s/batch, batch_loss=23.6, batch

Epoch 6/10:  84%|▊| 832/991 [3:31:52<41:35, 15.70s/batch, batch_loss=23.6, batch

Epoch 6/10:  84%|▊| 832/991 [3:32:07<41:35, 15.70s/batch, batch_loss=227, batch_

Epoch 6/10:  84%|▊| 833/991 [3:32:07<40:58, 15.56s/batch, batch_loss=227, batch_

Epoch 6/10:  84%|▊| 833/991 [3:32:23<40:58, 15.56s/batch, batch_loss=22.9, batch

Epoch 6/10:  84%|▊| 834/991 [3:32:23<40:54, 15.64s/batch, batch_loss=22.9, batch

Epoch 6/10:  84%|▊| 834/991 [3:32:38<40:54, 15.64s/batch, batch_loss=14.7, batch

Epoch 6/10:  84%|▊| 835/991 [3:32:38<40:34, 15.61s/batch, batch_loss=14.7, batch

Epoch 6/10:  84%|▊| 835/991 [3:32:54<40:34, 15.61s/batch, batch_loss=3.27e+3, ba

Epoch 6/10:  84%|▊| 836/991 [3:32:54<39:57, 15.47s/batch, batch_loss=3.27e+3, ba

Epoch 6/10:  84%|▊| 836/991 [3:33:09<39:57, 15.47s/batch, batch_loss=4.89e+3, ba

Epoch 6/10:  84%|▊| 837/991 [3:33:09<39:49, 15.52s/batch, batch_loss=4.89e+3, ba

Epoch 6/10:  84%|▊| 837/991 [3:33:26<39:49, 15.52s/batch, batch_loss=18.5, batch

Epoch 6/10:  85%|▊| 838/991 [3:33:26<40:51, 16.02s/batch, batch_loss=18.5, batch

Epoch 6/10:  85%|▊| 838/991 [3:33:42<40:51, 16.02s/batch, batch_loss=4.3, batch_

Epoch 6/10:  85%|▊| 839/991 [3:33:42<40:25, 15.96s/batch, batch_loss=4.3, batch_

Epoch 6/10:  85%|▊| 839/991 [3:33:57<40:25, 15.96s/batch, batch_loss=4.26, batch

Epoch 6/10:  85%|▊| 840/991 [3:33:57<39:20, 15.64s/batch, batch_loss=4.26, batch

Epoch 6/10:  85%|▊| 840/991 [3:34:12<39:20, 15.64s/batch, batch_loss=24.8, batch

Epoch 6/10:  85%|▊| 841/991 [3:34:12<38:50, 15.54s/batch, batch_loss=24.8, batch

Epoch 6/10:  85%|▊| 841/991 [3:34:28<38:50, 15.54s/batch, batch_loss=17.3, batch

Epoch 6/10:  85%|▊| 842/991 [3:34:28<38:28, 15.49s/batch, batch_loss=17.3, batch

Epoch 6/10:  85%|▊| 842/991 [3:34:43<38:28, 15.49s/batch, batch_loss=10.1, batch

Epoch 6/10:  85%|▊| 843/991 [3:34:43<38:01, 15.42s/batch, batch_loss=10.1, batch

Epoch 6/10:  85%|▊| 843/991 [3:34:59<38:01, 15.42s/batch, batch_loss=1.69e+3, ba

Epoch 6/10:  85%|▊| 844/991 [3:34:59<38:28, 15.70s/batch, batch_loss=1.69e+3, ba

Epoch 6/10:  85%|▊| 844/991 [3:35:15<38:28, 15.70s/batch, batch_loss=15.2, batch

Epoch 6/10:  85%|▊| 845/991 [3:35:15<38:00, 15.62s/batch, batch_loss=15.2, batch

Epoch 6/10:  85%|▊| 845/991 [3:35:31<38:00, 15.62s/batch, batch_loss=1.18e+4, ba

Epoch 6/10:  85%|▊| 846/991 [3:35:31<38:09, 15.79s/batch, batch_loss=1.18e+4, ba

Epoch 6/10:  85%|▊| 846/991 [3:35:46<38:09, 15.79s/batch, batch_loss=19.1, batch

Epoch 6/10:  85%|▊| 847/991 [3:35:46<37:34, 15.66s/batch, batch_loss=19.1, batch

Epoch 6/10:  85%|▊| 847/991 [3:36:02<37:34, 15.66s/batch, batch_loss=46.9, batch

Epoch 6/10:  86%|▊| 848/991 [3:36:02<37:07, 15.58s/batch, batch_loss=46.9, batch

Epoch 6/10:  86%|▊| 848/991 [3:36:17<37:07, 15.58s/batch, batch_loss=1.03e+3, ba

Epoch 6/10:  86%|▊| 849/991 [3:36:17<36:43, 15.51s/batch, batch_loss=1.03e+3, ba

Epoch 6/10:  86%|▊| 849/991 [3:36:33<36:43, 15.51s/batch, batch_loss=8.11, batch

Epoch 6/10:  86%|▊| 850/991 [3:36:33<36:43, 15.63s/batch, batch_loss=8.11, batch

Epoch 6/10:  86%|▊| 850/991 [3:36:51<36:43, 15.63s/batch, batch_loss=13.6, batch

Epoch 6/10:  86%|▊| 851/991 [3:36:51<37:53, 16.24s/batch, batch_loss=13.6, batch

Epoch 6/10:  86%|▊| 851/991 [3:37:06<37:53, 16.24s/batch, batch_loss=16, batch_i

Epoch 6/10:  86%|▊| 852/991 [3:37:06<37:04, 16.00s/batch, batch_loss=16, batch_i

Epoch 6/10:  86%|▊| 852/991 [3:37:22<37:04, 16.00s/batch, batch_loss=7.66e+3, ba

Epoch 6/10:  86%|▊| 853/991 [3:37:22<36:38, 15.93s/batch, batch_loss=7.66e+3, ba

Epoch 6/10:  86%|▊| 853/991 [3:37:38<36:38, 15.93s/batch, batch_loss=18.8, batch

Epoch 6/10:  86%|▊| 854/991 [3:37:38<36:44, 16.09s/batch, batch_loss=18.8, batch

Epoch 6/10:  86%|▊| 854/991 [3:37:54<36:44, 16.09s/batch, batch_loss=8.09, batch

Epoch 6/10:  86%|▊| 855/991 [3:37:54<36:07, 15.93s/batch, batch_loss=8.09, batch

Epoch 6/10:  86%|▊| 855/991 [3:38:09<36:07, 15.93s/batch, batch_loss=8.71, batch

Epoch 6/10:  86%|▊| 856/991 [3:38:09<35:09, 15.63s/batch, batch_loss=8.71, batch

Epoch 6/10:  86%|▊| 856/991 [3:38:24<35:09, 15.63s/batch, batch_loss=8.73, batch

Epoch 6/10:  86%|▊| 857/991 [3:38:24<34:36, 15.50s/batch, batch_loss=8.73, batch

Epoch 6/10:  86%|▊| 857/991 [3:38:39<34:36, 15.50s/batch, batch_loss=17.1, batch

Epoch 6/10:  87%|▊| 858/991 [3:38:39<34:20, 15.50s/batch, batch_loss=17.1, batch

Epoch 6/10:  87%|▊| 858/991 [3:38:55<34:20, 15.50s/batch, batch_loss=15.2, batch

Epoch 6/10:  87%|▊| 859/991 [3:38:55<33:54, 15.41s/batch, batch_loss=15.2, batch

Epoch 6/10:  87%|▊| 859/991 [3:39:10<33:54, 15.41s/batch, batch_loss=19.1, batch

Epoch 6/10:  87%|▊| 860/991 [3:39:10<33:19, 15.27s/batch, batch_loss=19.1, batch

Epoch 6/10:  87%|▊| 860/991 [3:39:25<33:19, 15.27s/batch, batch_loss=8.27, batch

Epoch 6/10:  87%|▊| 861/991 [3:39:25<33:27, 15.44s/batch, batch_loss=8.27, batch

Epoch 6/10:  87%|▊| 861/991 [3:39:40<33:27, 15.44s/batch, batch_loss=18.4, batch

Epoch 6/10:  87%|▊| 862/991 [3:39:40<32:50, 15.27s/batch, batch_loss=18.4, batch

Epoch 6/10:  87%|▊| 862/991 [3:39:55<32:50, 15.27s/batch, batch_loss=26.7, batch

Epoch 6/10:  87%|▊| 863/991 [3:39:55<32:22, 15.17s/batch, batch_loss=26.7, batch

Epoch 6/10:  87%|▊| 863/991 [3:40:10<32:22, 15.17s/batch, batch_loss=9.14, batch

Epoch 6/10:  87%|▊| 864/991 [3:40:10<31:57, 15.10s/batch, batch_loss=9.14, batch

Epoch 6/10:  87%|▊| 864/991 [3:40:25<31:57, 15.10s/batch, batch_loss=16.8, batch

Epoch 6/10:  87%|▊| 865/991 [3:40:25<31:28, 14.99s/batch, batch_loss=16.8, batch

Epoch 6/10:  87%|▊| 865/991 [3:40:40<31:28, 14.99s/batch, batch_loss=21.8, batch

Epoch 6/10:  87%|▊| 866/991 [3:40:40<30:59, 14.88s/batch, batch_loss=21.8, batch

Epoch 6/10:  87%|▊| 866/991 [3:40:54<30:59, 14.88s/batch, batch_loss=20.6, batch

Epoch 6/10:  87%|▊| 867/991 [3:40:54<30:47, 14.90s/batch, batch_loss=20.6, batch

Epoch 6/10:  87%|▊| 867/991 [3:41:10<30:47, 14.90s/batch, batch_loss=20, batch_i

Epoch 6/10:  88%|▉| 868/991 [3:41:10<30:40, 14.97s/batch, batch_loss=20, batch_i

Epoch 6/10:  88%|▉| 868/991 [3:41:25<30:40, 14.97s/batch, batch_loss=11.1, batch

Epoch 6/10:  88%|▉| 869/991 [3:41:25<30:32, 15.02s/batch, batch_loss=11.1, batch

Epoch 6/10:  88%|▉| 869/991 [3:41:40<30:32, 15.02s/batch, batch_loss=12.8, batch

Epoch 6/10:  88%|▉| 870/991 [3:41:40<30:22, 15.06s/batch, batch_loss=12.8, batch

Epoch 6/10:  88%|▉| 870/991 [3:41:54<30:22, 15.06s/batch, batch_loss=8.48, batch

Epoch 6/10:  88%|▉| 871/991 [3:41:54<29:42, 14.86s/batch, batch_loss=8.48, batch

Epoch 6/10:  88%|▉| 871/991 [3:42:10<29:42, 14.86s/batch, batch_loss=18.6, batch

Epoch 6/10:  88%|▉| 872/991 [3:42:10<29:57, 15.10s/batch, batch_loss=18.6, batch

Epoch 6/10:  88%|▉| 872/991 [3:42:25<29:57, 15.10s/batch, batch_loss=13.3, batch

Epoch 6/10:  88%|▉| 873/991 [3:42:25<29:45, 15.14s/batch, batch_loss=13.3, batch

Epoch 6/10:  88%|▉| 873/991 [3:42:41<29:45, 15.14s/batch, batch_loss=8.42, batch

Epoch 6/10:  88%|▉| 874/991 [3:42:41<29:39, 15.21s/batch, batch_loss=8.42, batch

Epoch 6/10:  88%|▉| 874/991 [3:42:56<29:39, 15.21s/batch, batch_loss=13.3, batch

Epoch 6/10:  88%|▉| 875/991 [3:42:56<29:29, 15.26s/batch, batch_loss=13.3, batch

Epoch 6/10:  88%|▉| 875/991 [3:43:15<29:29, 15.26s/batch, batch_loss=18.8, batch

Epoch 6/10:  88%|▉| 876/991 [3:43:15<31:08, 16.25s/batch, batch_loss=18.8, batch

Epoch 6/10:  88%|▉| 876/991 [3:43:30<31:08, 16.25s/batch, batch_loss=16.2, batch

Epoch 6/10:  88%|▉| 877/991 [3:43:30<30:22, 15.99s/batch, batch_loss=16.2, batch

Epoch 6/10:  88%|▉| 877/991 [3:43:45<30:22, 15.99s/batch, batch_loss=23.3, batch

Epoch 6/10:  89%|▉| 878/991 [3:43:45<29:44, 15.79s/batch, batch_loss=23.3, batch

Epoch 6/10:  89%|▉| 878/991 [3:44:01<29:44, 15.79s/batch, batch_loss=18.1, batch

Epoch 6/10:  89%|▉| 879/991 [3:44:01<29:19, 15.71s/batch, batch_loss=18.1, batch

Epoch 6/10:  89%|▉| 879/991 [3:44:16<29:19, 15.71s/batch, batch_loss=12.6, batch

Epoch 6/10:  89%|▉| 880/991 [3:44:16<28:48, 15.58s/batch, batch_loss=12.6, batch

Epoch 6/10:  89%|▉| 880/991 [3:44:31<28:48, 15.58s/batch, batch_loss=5.14e+3, ba

Epoch 6/10:  89%|▉| 881/991 [3:44:31<27:59, 15.27s/batch, batch_loss=5.14e+3, ba

Epoch 6/10:  89%|▉| 881/991 [3:44:48<27:59, 15.27s/batch, batch_loss=15.9, batch

Epoch 6/10:  89%|▉| 882/991 [3:44:48<28:42, 15.80s/batch, batch_loss=15.9, batch

Epoch 6/10:  89%|▉| 882/991 [3:45:03<28:42, 15.80s/batch, batch_loss=20.4, batch

Epoch 6/10:  89%|▉| 883/991 [3:45:03<28:00, 15.56s/batch, batch_loss=20.4, batch

Epoch 6/10:  89%|▉| 883/991 [3:45:18<28:00, 15.56s/batch, batch_loss=12.1, batch

Epoch 6/10:  89%|▉| 884/991 [3:45:18<27:33, 15.45s/batch, batch_loss=12.1, batch

Epoch 6/10:  89%|▉| 884/991 [3:45:32<27:33, 15.45s/batch, batch_loss=14.8, batch

Epoch 6/10:  89%|▉| 885/991 [3:45:32<26:50, 15.19s/batch, batch_loss=14.8, batch

Epoch 6/10:  89%|▉| 885/991 [3:45:47<26:50, 15.19s/batch, batch_loss=16.5, batch

Epoch 6/10:  89%|▉| 886/991 [3:45:47<26:18, 15.04s/batch, batch_loss=16.5, batch

Epoch 6/10:  89%|▉| 886/991 [3:46:01<26:18, 15.04s/batch, batch_loss=1.93e+4, ba

Epoch 6/10:  90%|▉| 887/991 [3:46:01<25:25, 14.67s/batch, batch_loss=1.93e+4, ba

Epoch 6/10:  90%|▉| 887/991 [3:46:16<25:25, 14.67s/batch, batch_loss=18.4, batch

Epoch 6/10:  90%|▉| 888/991 [3:46:16<25:30, 14.86s/batch, batch_loss=18.4, batch

Epoch 6/10:  90%|▉| 888/991 [3:46:32<25:30, 14.86s/batch, batch_loss=18.4, batch

Epoch 6/10:  90%|▉| 889/991 [3:46:32<25:39, 15.09s/batch, batch_loss=18.4, batch

Epoch 6/10:  90%|▉| 889/991 [3:46:46<25:39, 15.09s/batch, batch_loss=12.4, batch

Epoch 6/10:  90%|▉| 890/991 [3:46:46<24:57, 14.83s/batch, batch_loss=12.4, batch

Epoch 6/10:  90%|▉| 890/991 [3:47:00<24:57, 14.83s/batch, batch_loss=15.1, batch

Epoch 6/10:  90%|▉| 891/991 [3:47:00<24:13, 14.53s/batch, batch_loss=15.1, batch

Epoch 6/10:  90%|▉| 891/991 [3:47:15<24:13, 14.53s/batch, batch_loss=15.9, batch

Epoch 6/10:  90%|▉| 892/991 [3:47:15<24:14, 14.69s/batch, batch_loss=15.9, batch

Epoch 6/10:  90%|▉| 892/991 [3:47:30<24:14, 14.69s/batch, batch_loss=3.7e+3, bat

Epoch 6/10:  90%|▉| 893/991 [3:47:30<24:04, 14.74s/batch, batch_loss=3.7e+3, bat

Epoch 6/10:  90%|▉| 893/991 [3:47:45<24:04, 14.74s/batch, batch_loss=8.92, batch

Epoch 6/10:  90%|▉| 894/991 [3:47:45<24:11, 14.96s/batch, batch_loss=8.92, batch

Epoch 6/10:  90%|▉| 894/991 [3:48:00<24:11, 14.96s/batch, batch_loss=14.3, batch

Epoch 6/10:  90%|▉| 895/991 [3:48:00<24:01, 15.01s/batch, batch_loss=14.3, batch

Epoch 6/10:  90%|▉| 895/991 [3:48:15<24:01, 15.01s/batch, batch_loss=11, batch_i

Epoch 6/10:  90%|▉| 896/991 [3:48:15<23:25, 14.80s/batch, batch_loss=11, batch_i

Epoch 6/10:  90%|▉| 896/991 [3:48:29<23:25, 14.80s/batch, batch_loss=16.2, batch

Epoch 6/10:  91%|▉| 897/991 [3:48:29<22:59, 14.67s/batch, batch_loss=16.2, batch

Epoch 6/10:  91%|▉| 897/991 [3:48:45<22:59, 14.67s/batch, batch_loss=22.5, batch

Epoch 6/10:  91%|▉| 898/991 [3:48:45<23:12, 14.97s/batch, batch_loss=22.5, batch

Epoch 6/10:  91%|▉| 898/991 [3:49:00<23:12, 14.97s/batch, batch_loss=18.1, batch

Epoch 6/10:  91%|▉| 899/991 [3:49:00<22:55, 14.95s/batch, batch_loss=18.1, batch

Epoch 6/10:  91%|▉| 899/991 [3:49:15<22:55, 14.95s/batch, batch_loss=17, batch_i

Epoch 6/10:  91%|▉| 900/991 [3:49:15<22:58, 15.14s/batch, batch_loss=17, batch_i

Epoch 6/10:  91%|▉| 900/991 [3:49:32<22:58, 15.14s/batch, batch_loss=13.6, batch

Epoch 6/10:  91%|▉| 901/991 [3:49:32<23:13, 15.49s/batch, batch_loss=13.6, batch

Epoch 6/10:  91%|▉| 901/991 [3:49:48<23:13, 15.49s/batch, batch_loss=13.9, batch

Epoch 6/10:  91%|▉| 902/991 [3:49:48<23:11, 15.63s/batch, batch_loss=13.9, batch

Epoch 6/10:  91%|▉| 902/991 [3:50:03<23:11, 15.63s/batch, batch_loss=8.1, batch_

Epoch 6/10:  91%|▉| 903/991 [3:50:03<22:39, 15.45s/batch, batch_loss=8.1, batch_

Epoch 6/10:  91%|▉| 903/991 [3:50:18<22:39, 15.45s/batch, batch_loss=9.59, batch

Epoch 6/10:  91%|▉| 904/991 [3:50:18<22:17, 15.37s/batch, batch_loss=9.59, batch

Epoch 6/10:  91%|▉| 904/991 [3:50:33<22:17, 15.37s/batch, batch_loss=21.5, batch

Epoch 6/10:  91%|▉| 905/991 [3:50:33<21:54, 15.29s/batch, batch_loss=21.5, batch

Epoch 6/10:  91%|▉| 905/991 [3:50:50<21:54, 15.29s/batch, batch_loss=17.2, batch

Epoch 6/10:  91%|▉| 906/991 [3:50:50<22:34, 15.94s/batch, batch_loss=17.2, batch

Epoch 6/10:  91%|▉| 906/991 [3:51:07<22:34, 15.94s/batch, batch_loss=20.7, batch

Epoch 6/10:  92%|▉| 907/991 [3:51:07<22:31, 16.09s/batch, batch_loss=20.7, batch

Epoch 6/10:  92%|▉| 907/991 [3:51:23<22:31, 16.09s/batch, batch_loss=11.2, batch

Epoch 6/10:  92%|▉| 908/991 [3:51:23<22:17, 16.12s/batch, batch_loss=11.2, batch

Epoch 6/10:  92%|▉| 908/991 [3:51:39<22:17, 16.12s/batch, batch_loss=6.26, batch

Epoch 6/10:  92%|▉| 909/991 [3:51:39<21:49, 15.97s/batch, batch_loss=6.26, batch

Epoch 6/10:  92%|▉| 909/991 [3:51:54<21:49, 15.97s/batch, batch_loss=685, batch_

Epoch 6/10:  92%|▉| 910/991 [3:51:54<21:11, 15.69s/batch, batch_loss=685, batch_

Epoch 6/10:  92%|▉| 910/991 [3:52:09<21:11, 15.69s/batch, batch_loss=1.02e+3, ba

Epoch 6/10:  92%|▉| 911/991 [3:52:09<20:46, 15.58s/batch, batch_loss=1.02e+3, ba

Epoch 6/10:  92%|▉| 911/991 [3:52:24<20:46, 15.58s/batch, batch_loss=24.5, batch

Epoch 6/10:  92%|▉| 912/991 [3:52:24<20:17, 15.42s/batch, batch_loss=24.5, batch

Epoch 6/10:  92%|▉| 912/991 [3:52:40<20:17, 15.42s/batch, batch_loss=23.9, batch

Epoch 6/10:  92%|▉| 913/991 [3:52:40<20:07, 15.49s/batch, batch_loss=23.9, batch

Epoch 6/10:  92%|▉| 913/991 [3:52:58<20:07, 15.49s/batch, batch_loss=20.9, batch

Epoch 6/10:  92%|▉| 914/991 [3:52:58<21:07, 16.46s/batch, batch_loss=20.9, batch

Epoch 6/10:  92%|▉| 914/991 [3:53:14<21:07, 16.46s/batch, batch_loss=17.4, batch

Epoch 6/10:  92%|▉| 915/991 [3:53:14<20:29, 16.18s/batch, batch_loss=17.4, batch

Epoch 6/10:  92%|▉| 915/991 [3:53:28<20:29, 16.18s/batch, batch_loss=16.1, batch

Epoch 6/10:  92%|▉| 916/991 [3:53:28<19:36, 15.68s/batch, batch_loss=16.1, batch

Epoch 6/10:  92%|▉| 916/991 [3:53:44<19:36, 15.68s/batch, batch_loss=8.01, batch

Epoch 6/10:  93%|▉| 917/991 [3:53:44<19:26, 15.77s/batch, batch_loss=8.01, batch

Epoch 6/10:  93%|▉| 917/991 [3:54:01<19:26, 15.77s/batch, batch_loss=13.8, batch

Epoch 6/10:  93%|▉| 918/991 [3:54:01<19:23, 15.93s/batch, batch_loss=13.8, batch

Epoch 6/10:  93%|▉| 918/991 [3:54:16<19:23, 15.93s/batch, batch_loss=12.1, batch

Epoch 6/10:  93%|▉| 919/991 [3:54:16<18:57, 15.80s/batch, batch_loss=12.1, batch

Epoch 6/10:  93%|▉| 919/991 [3:54:32<18:57, 15.80s/batch, batch_loss=14.1, batch

Epoch 6/10:  93%|▉| 920/991 [3:54:32<18:51, 15.94s/batch, batch_loss=14.1, batch

Epoch 6/10:  93%|▉| 920/991 [3:54:47<18:51, 15.94s/batch, batch_loss=18.2, batch

Epoch 6/10:  93%|▉| 921/991 [3:54:47<18:06, 15.53s/batch, batch_loss=18.2, batch

Epoch 6/10:  93%|▉| 921/991 [3:55:02<18:06, 15.53s/batch, batch_loss=21.3, batch

Epoch 6/10:  93%|▉| 922/991 [3:55:02<17:35, 15.29s/batch, batch_loss=21.3, batch

Epoch 6/10:  93%|▉| 922/991 [3:55:18<17:35, 15.29s/batch, batch_loss=6.73, batch

Epoch 6/10:  93%|▉| 923/991 [3:55:18<17:44, 15.65s/batch, batch_loss=6.73, batch

Epoch 6/10:  93%|▉| 923/991 [3:55:34<17:44, 15.65s/batch, batch_loss=11.7, batch

Epoch 6/10:  93%|▉| 924/991 [3:55:34<17:34, 15.73s/batch, batch_loss=11.7, batch

Epoch 6/10:  93%|▉| 924/991 [3:55:50<17:34, 15.73s/batch, batch_loss=10.4, batch

Epoch 6/10:  93%|▉| 925/991 [3:55:50<17:20, 15.76s/batch, batch_loss=10.4, batch

Epoch 6/10:  93%|▉| 925/991 [3:56:06<17:20, 15.76s/batch, batch_loss=3e+4, batch

Epoch 6/10:  93%|▉| 926/991 [3:56:06<17:16, 15.95s/batch, batch_loss=3e+4, batch

Epoch 6/10:  93%|▉| 926/991 [3:56:22<17:16, 15.95s/batch, batch_loss=6.63, batch

Epoch 6/10:  94%|▉| 927/991 [3:56:22<16:52, 15.82s/batch, batch_loss=6.63, batch

Epoch 6/10:  94%|▉| 927/991 [3:56:39<16:52, 15.82s/batch, batch_loss=843, batch_

Epoch 6/10:  94%|▉| 928/991 [3:56:39<17:07, 16.31s/batch, batch_loss=843, batch_

Epoch 6/10:  94%|▉| 928/991 [3:56:55<17:07, 16.31s/batch, batch_loss=10.2, batch

Epoch 6/10:  94%|▉| 929/991 [3:56:55<16:44, 16.20s/batch, batch_loss=10.2, batch

Epoch 6/10:  94%|▉| 929/991 [3:57:11<16:44, 16.20s/batch, batch_loss=9.57, batch

Epoch 6/10:  94%|▉| 930/991 [3:57:11<16:15, 15.99s/batch, batch_loss=9.57, batch

Epoch 6/10:  94%|▉| 930/991 [3:57:25<16:15, 15.99s/batch, batch_loss=11.7, batch

Epoch 6/10:  94%|▉| 931/991 [3:57:25<15:31, 15.52s/batch, batch_loss=11.7, batch

Epoch 6/10:  94%|▉| 931/991 [3:57:39<15:31, 15.52s/batch, batch_loss=10.3, batch

Epoch 6/10:  94%|▉| 932/991 [3:57:39<14:41, 14.95s/batch, batch_loss=10.3, batch

Epoch 6/10:  94%|▉| 932/991 [3:57:53<14:41, 14.95s/batch, batch_loss=10.2, batch

Epoch 6/10:  94%|▉| 933/991 [3:57:53<14:10, 14.66s/batch, batch_loss=10.2, batch

Epoch 6/10:  94%|▉| 933/991 [3:58:07<14:10, 14.66s/batch, batch_loss=1.63, batch

Epoch 6/10:  94%|▉| 934/991 [3:58:07<13:41, 14.42s/batch, batch_loss=1.63, batch

Epoch 6/10:  94%|▉| 934/991 [3:58:21<13:41, 14.42s/batch, batch_loss=1.58, batch

Epoch 6/10:  94%|▉| 935/991 [3:58:21<13:26, 14.40s/batch, batch_loss=1.58, batch

Epoch 6/10:  94%|▉| 935/991 [3:58:36<13:26, 14.40s/batch, batch_loss=165, batch_

Epoch 6/10:  94%|▉| 936/991 [3:58:36<13:19, 14.53s/batch, batch_loss=165, batch_

Epoch 6/10:  94%|▉| 936/991 [3:58:51<13:19, 14.53s/batch, batch_loss=37, batch_i

Epoch 6/10:  95%|▉| 937/991 [3:58:51<13:14, 14.71s/batch, batch_loss=37, batch_i

Epoch 6/10:  95%|▉| 937/991 [3:59:07<13:14, 14.71s/batch, batch_loss=8.18, batch

Epoch 6/10:  95%|▉| 938/991 [3:59:07<13:21, 15.12s/batch, batch_loss=8.18, batch

Epoch 6/10:  95%|▉| 938/991 [3:59:23<13:21, 15.12s/batch, batch_loss=8.08, batch

Epoch 6/10:  95%|▉| 939/991 [3:59:23<13:14, 15.28s/batch, batch_loss=8.08, batch

Epoch 6/10:  95%|▉| 939/991 [3:59:37<13:14, 15.28s/batch, batch_loss=419, batch_

Epoch 6/10:  95%|▉| 940/991 [3:59:37<12:50, 15.11s/batch, batch_loss=419, batch_

Epoch 6/10:  95%|▉| 940/991 [3:59:52<12:50, 15.11s/batch, batch_loss=17.8, batch

Epoch 6/10:  95%|▉| 941/991 [3:59:52<12:27, 14.95s/batch, batch_loss=17.8, batch

Epoch 6/10:  95%|▉| 941/991 [4:00:07<12:27, 14.95s/batch, batch_loss=13.2, batch

Epoch 6/10:  95%|▉| 942/991 [4:00:07<12:17, 15.04s/batch, batch_loss=13.2, batch

Epoch 6/10:  95%|▉| 942/991 [4:00:22<12:17, 15.04s/batch, batch_loss=10.7, batch

Epoch 6/10:  95%|▉| 943/991 [4:00:22<11:58, 14.96s/batch, batch_loss=10.7, batch

Epoch 6/10:  95%|▉| 943/991 [4:00:37<11:58, 14.96s/batch, batch_loss=12.9, batch

Epoch 6/10:  95%|▉| 944/991 [4:00:37<11:40, 14.90s/batch, batch_loss=12.9, batch

Epoch 6/10:  95%|▉| 944/991 [4:00:54<11:40, 14.90s/batch, batch_loss=1.69, batch

Epoch 6/10:  95%|▉| 945/991 [4:00:54<11:54, 15.53s/batch, batch_loss=1.69, batch

Epoch 6/10:  95%|▉| 945/991 [4:01:09<11:54, 15.53s/batch, batch_loss=11.5, batch

Epoch 6/10:  95%|▉| 946/991 [4:01:09<11:41, 15.60s/batch, batch_loss=11.5, batch

Epoch 6/10:  95%|▉| 946/991 [4:01:26<11:41, 15.60s/batch, batch_loss=14.4, batch

Epoch 6/10:  96%|▉| 947/991 [4:01:26<11:32, 15.73s/batch, batch_loss=14.4, batch

Epoch 6/10:  96%|▉| 947/991 [4:01:41<11:32, 15.73s/batch, batch_loss=11.1, batch

Epoch 6/10:  96%|▉| 948/991 [4:01:41<11:11, 15.62s/batch, batch_loss=11.1, batch

Epoch 6/10:  96%|▉| 948/991 [4:01:56<11:11, 15.62s/batch, batch_loss=5.8, batch_

Epoch 6/10:  96%|▉| 949/991 [4:01:56<10:51, 15.51s/batch, batch_loss=5.8, batch_

Epoch 6/10:  96%|▉| 949/991 [4:02:11<10:51, 15.51s/batch, batch_loss=8.45, batch

Epoch 6/10:  96%|▉| 950/991 [4:02:11<10:24, 15.22s/batch, batch_loss=8.45, batch

Epoch 6/10:  96%|▉| 950/991 [4:02:26<10:24, 15.22s/batch, batch_loss=15.5, batch

Epoch 6/10:  96%|▉| 951/991 [4:02:26<10:09, 15.25s/batch, batch_loss=15.5, batch

Epoch 6/10:  96%|▉| 951/991 [4:02:41<10:09, 15.25s/batch, batch_loss=16.7, batch

Epoch 6/10:  96%|▉| 952/991 [4:02:41<09:52, 15.19s/batch, batch_loss=16.7, batch

Epoch 6/10:  96%|▉| 952/991 [4:02:58<09:52, 15.19s/batch, batch_loss=6.42, batch

Epoch 6/10:  96%|▉| 953/991 [4:02:58<09:59, 15.78s/batch, batch_loss=6.42, batch

Epoch 6/10:  96%|▉| 953/991 [4:03:13<09:59, 15.78s/batch, batch_loss=332, batch_

Epoch 6/10:  96%|▉| 954/991 [4:03:13<09:33, 15.51s/batch, batch_loss=332, batch_

Epoch 6/10:  96%|▉| 954/991 [4:03:28<09:33, 15.51s/batch, batch_loss=13.3, batch

Epoch 6/10:  96%|▉| 955/991 [4:03:28<09:12, 15.33s/batch, batch_loss=13.3, batch

Epoch 6/10:  96%|▉| 955/991 [4:03:44<09:12, 15.33s/batch, batch_loss=13.2, batch

Epoch 6/10:  96%|▉| 956/991 [4:03:44<09:01, 15.46s/batch, batch_loss=13.2, batch

Epoch 6/10:  96%|▉| 956/991 [4:03:58<09:01, 15.46s/batch, batch_loss=16.5, batch

Epoch 6/10:  97%|▉| 957/991 [4:03:58<08:34, 15.14s/batch, batch_loss=16.5, batch

Epoch 6/10:  97%|▉| 957/991 [4:04:14<08:34, 15.14s/batch, batch_loss=13.7, batch

Epoch 6/10:  97%|▉| 958/991 [4:04:14<08:22, 15.21s/batch, batch_loss=13.7, batch

Epoch 6/10:  97%|▉| 958/991 [4:04:29<08:22, 15.21s/batch, batch_loss=7.53, batch

Epoch 6/10:  97%|▉| 959/991 [4:04:29<08:06, 15.22s/batch, batch_loss=7.53, batch

Epoch 6/10:  97%|▉| 959/991 [4:04:44<08:06, 15.22s/batch, batch_loss=11.4, batch

Epoch 6/10:  97%|▉| 960/991 [4:04:44<07:51, 15.20s/batch, batch_loss=11.4, batch

Epoch 6/10:  97%|▉| 960/991 [4:05:01<07:51, 15.20s/batch, batch_loss=13.4, batch

Epoch 6/10:  97%|▉| 961/991 [4:05:01<07:50, 15.68s/batch, batch_loss=13.4, batch

Epoch 6/10:  97%|▉| 961/991 [4:05:16<07:50, 15.68s/batch, batch_loss=5.02, batch

Epoch 6/10:  97%|▉| 962/991 [4:05:16<07:33, 15.65s/batch, batch_loss=5.02, batch

Epoch 6/10:  97%|▉| 962/991 [4:05:32<07:33, 15.65s/batch, batch_loss=6.39, batch

Epoch 6/10:  97%|▉| 963/991 [4:05:32<07:14, 15.53s/batch, batch_loss=6.39, batch

Epoch 6/10:  97%|▉| 963/991 [4:05:47<07:14, 15.53s/batch, batch_loss=9.43e+3, ba

Epoch 6/10:  97%|▉| 964/991 [4:05:47<06:56, 15.42s/batch, batch_loss=9.43e+3, ba

Epoch 6/10:  97%|▉| 964/991 [4:06:02<06:56, 15.42s/batch, batch_loss=18.4, batch

Epoch 6/10:  97%|▉| 965/991 [4:06:02<06:42, 15.49s/batch, batch_loss=18.4, batch

Epoch 6/10:  97%|▉| 965/991 [4:06:18<06:42, 15.49s/batch, batch_loss=16.4, batch

Epoch 6/10:  97%|▉| 966/991 [4:06:18<06:27, 15.49s/batch, batch_loss=16.4, batch

Epoch 6/10:  97%|▉| 966/991 [4:06:33<06:27, 15.49s/batch, batch_loss=2.4e+4, bat

Epoch 6/10:  98%|▉| 967/991 [4:06:33<06:12, 15.52s/batch, batch_loss=2.4e+4, bat

Epoch 6/10:  98%|▉| 967/991 [4:06:49<06:12, 15.52s/batch, batch_loss=408, batch_

Epoch 6/10:  98%|▉| 968/991 [4:06:49<05:56, 15.49s/batch, batch_loss=408, batch_

Epoch 6/10:  98%|▉| 968/991 [4:07:05<05:56, 15.49s/batch, batch_loss=17.8, batch

Epoch 6/10:  98%|▉| 969/991 [4:07:05<05:44, 15.65s/batch, batch_loss=17.8, batch

Epoch 6/10:  98%|▉| 969/991 [4:07:21<05:44, 15.65s/batch, batch_loss=0.978, batc

Epoch 6/10:  98%|▉| 970/991 [4:07:21<05:29, 15.71s/batch, batch_loss=0.978, batc

Epoch 6/10:  98%|▉| 970/991 [4:07:37<05:29, 15.71s/batch, batch_loss=8.62, batch

Epoch 6/10:  98%|▉| 971/991 [4:07:37<05:17, 15.89s/batch, batch_loss=8.62, batch

Epoch 6/10:  98%|▉| 971/991 [4:07:52<05:17, 15.89s/batch, batch_loss=24.3, batch

Epoch 6/10:  98%|▉| 972/991 [4:07:52<04:56, 15.61s/batch, batch_loss=24.3, batch

Epoch 6/10:  98%|▉| 972/991 [4:08:07<04:56, 15.61s/batch, batch_loss=18.3, batch

Epoch 6/10:  98%|▉| 973/991 [4:08:07<04:40, 15.57s/batch, batch_loss=18.3, batch

Epoch 6/10:  98%|▉| 973/991 [4:08:23<04:40, 15.57s/batch, batch_loss=17, batch_i

Epoch 6/10:  98%|▉| 974/991 [4:08:23<04:24, 15.53s/batch, batch_loss=17, batch_i

Epoch 6/10:  98%|▉| 974/991 [4:08:38<04:24, 15.53s/batch, batch_loss=10.1, batch

Epoch 6/10:  98%|▉| 975/991 [4:08:38<04:05, 15.32s/batch, batch_loss=10.1, batch

Epoch 6/10:  98%|▉| 975/991 [4:08:54<04:05, 15.32s/batch, batch_loss=28.7, batch

Epoch 6/10:  98%|▉| 976/991 [4:08:54<03:55, 15.69s/batch, batch_loss=28.7, batch

Epoch 6/10:  98%|▉| 976/991 [4:09:08<03:55, 15.69s/batch, batch_loss=1.23, batch

Epoch 6/10:  99%|▉| 977/991 [4:09:08<03:30, 15.01s/batch, batch_loss=1.23, batch

Epoch 6/10:  99%|▉| 977/991 [4:09:22<03:30, 15.01s/batch, batch_loss=1.05, batch

Epoch 6/10:  99%|▉| 978/991 [4:09:22<03:10, 14.67s/batch, batch_loss=1.05, batch

Epoch 6/10:  99%|▉| 978/991 [4:09:35<03:10, 14.67s/batch, batch_loss=0.805, batc

Epoch 6/10:  99%|▉| 979/991 [4:09:35<02:51, 14.30s/batch, batch_loss=0.805, batc

Epoch 6/10:  99%|▉| 979/991 [4:09:48<02:51, 14.30s/batch, batch_loss=0.567, batc

Epoch 6/10:  99%|▉| 980/991 [4:09:48<02:34, 14.01s/batch, batch_loss=0.567, batc

Epoch 6/10:  99%|▉| 980/991 [4:10:02<02:34, 14.01s/batch, batch_loss=0.391, batc

Epoch 6/10:  99%|▉| 981/991 [4:10:02<02:18, 13.81s/batch, batch_loss=0.391, batc

Epoch 6/10:  99%|▉| 981/991 [4:10:15<02:18, 13.81s/batch, batch_loss=0.292, batc

Epoch 6/10:  99%|▉| 982/991 [4:10:15<02:03, 13.73s/batch, batch_loss=0.292, batc

Epoch 6/10:  99%|▉| 982/991 [4:10:29<02:03, 13.73s/batch, batch_loss=0.22, batch

Epoch 6/10:  99%|▉| 983/991 [4:10:29<01:50, 13.87s/batch, batch_loss=0.22, batch

Epoch 6/10:  99%|▉| 983/991 [4:10:44<01:50, 13.87s/batch, batch_loss=0.174, batc

Epoch 6/10:  99%|▉| 984/991 [4:10:44<01:38, 14.02s/batch, batch_loss=0.174, batc

Epoch 6/10:  99%|▉| 984/991 [4:10:58<01:38, 14.02s/batch, batch_loss=0.147, batc

Epoch 6/10:  99%|▉| 985/991 [4:10:58<01:24, 14.03s/batch, batch_loss=0.147, batc

Epoch 6/10:  99%|▉| 985/991 [4:11:12<01:24, 14.03s/batch, batch_loss=0.134, batc

Epoch 6/10:  99%|▉| 986/991 [4:11:12<01:09, 13.96s/batch, batch_loss=0.134, batc

Epoch 6/10:  99%|▉| 986/991 [4:11:25<01:09, 13.96s/batch, batch_loss=0.129, batc

Epoch 6/10: 100%|▉| 987/991 [4:11:25<00:55, 13.81s/batch, batch_loss=0.129, batc

Epoch 6/10: 100%|▉| 987/991 [4:11:39<00:55, 13.81s/batch, batch_loss=0.129, batc

Epoch 6/10: 100%|▉| 988/991 [4:11:39<00:41, 13.82s/batch, batch_loss=0.129, batc

Epoch 6/10: 100%|▉| 988/991 [4:11:53<00:41, 13.82s/batch, batch_loss=0.132, batc

Epoch 6/10: 100%|▉| 989/991 [4:11:53<00:27, 13.89s/batch, batch_loss=0.132, batc

Epoch 6/10: 100%|▉| 989/991 [4:12:07<00:27, 13.89s/batch, batch_loss=0.134, batc

Epoch 6/10: 100%|▉| 990/991 [4:12:07<00:13, 13.82s/batch, batch_loss=0.134, batc

Epoch 6/10: 100%|▉| 990/991 [4:12:18<00:13, 13.82s/batch, batch_loss=0.135, batc

Epoch 6/10: 100%|█| 991/991 [4:12:18<00:00, 13.21s/batch, batch_loss=0.135, batc

Epoch 6/10: 100%|█| 991/991 [4:12:18<00:00, 15.28s/batch, batch_loss=0.135, batc




Epoch 6, Loss: 987.0505


Validation:   0%|                                    | 0/743 [00:00<?, ?batch/s]

Validation:   0%| | 0/743 [00:15<?, ?batch/s, batch_loss=18.1, batch_index=1, ba

Validation:   0%| | 1/743 [00:15<3:12:35, 15.57s/batch, batch_loss=18.1, batch_i

Validation:   0%| | 1/743 [00:33<3:12:35, 15.57s/batch, batch_loss=18.7, batch_i

Validation:   0%| | 2/743 [00:33<3:27:47, 16.82s/batch, batch_loss=18.7, batch_i

Validation:   0%| | 2/743 [00:48<3:27:47, 16.82s/batch, batch_loss=14, batch_ind

Validation:   0%| | 3/743 [00:48<3:18:36, 16.10s/batch, batch_loss=14, batch_ind

Validation:   0%| | 3/743 [01:03<3:18:36, 16.10s/batch, batch_loss=10.4, batch_i

Validation:   1%| | 4/743 [01:03<3:11:33, 15.55s/batch, batch_loss=10.4, batch_i

Validation:   1%| | 4/743 [01:18<3:11:33, 15.55s/batch, batch_loss=20.5, batch_i

Validation:   1%| | 5/743 [01:18<3:10:57, 15.53s/batch, batch_loss=20.5, batch_i

Validation:   1%| | 5/743 [01:34<3:10:57, 15.53s/batch, batch_loss=18.5, batch_i

Validation:   1%| | 6/743 [01:34<3:10:06, 15.48s/batch, batch_loss=18.5, batch_i

Validation:   1%| | 6/743 [01:49<3:10:06, 15.48s/batch, batch_loss=557, batch_in

Validation:   1%| | 7/743 [01:49<3:08:14, 15.35s/batch, batch_loss=557, batch_in

Validation:   1%| | 7/743 [02:03<3:08:14, 15.35s/batch, batch_loss=16.7, batch_i

Validation:   1%| | 8/743 [02:03<3:05:16, 15.12s/batch, batch_loss=16.7, batch_i

Validation:   1%| | 8/743 [02:21<3:05:16, 15.12s/batch, batch_loss=14.9, batch_i

Validation:   1%| | 9/743 [02:21<3:14:31, 15.90s/batch, batch_loss=14.9, batch_i

Validation:   1%| | 9/743 [02:36<3:14:31, 15.90s/batch, batch_loss=12.8, batch_i

Validation:   1%| | 10/743 [02:36<3:12:51, 15.79s/batch, batch_loss=12.8, batch_

Validation:   1%| | 10/743 [02:51<3:12:51, 15.79s/batch, batch_loss=10.1, batch_

Validation:   1%| | 11/743 [02:51<3:08:04, 15.42s/batch, batch_loss=10.1, batch_

Validation:   1%| | 11/743 [03:06<3:08:04, 15.42s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:06<3:05:58, 15.26s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:21<3:05:58, 15.26s/batch, batch_loss=16.1, batch_

Validation:   2%| | 13/743 [03:21<3:06:23, 15.32s/batch, batch_loss=16.1, batch_

Validation:   2%| | 13/743 [03:36<3:06:23, 15.32s/batch, batch_loss=10.3, batch_

Validation:   2%| | 14/743 [03:36<3:03:32, 15.11s/batch, batch_loss=10.3, batch_

Validation:   2%| | 14/743 [03:51<3:03:32, 15.11s/batch, batch_loss=18.2, batch_

Validation:   2%| | 15/743 [03:51<3:03:55, 15.16s/batch, batch_loss=18.2, batch_

Validation:   2%| | 15/743 [04:09<3:03:55, 15.16s/batch, batch_loss=13.9, batch_

Validation:   2%| | 16/743 [04:09<3:11:55, 15.84s/batch, batch_loss=13.9, batch_

Validation:   2%| | 16/743 [04:23<3:11:55, 15.84s/batch, batch_loss=11, batch_in

Validation:   2%| | 17/743 [04:23<3:06:07, 15.38s/batch, batch_loss=11, batch_in

Validation:   2%| | 17/743 [04:39<3:06:07, 15.38s/batch, batch_loss=4.53e+3, bat

Validation:   2%| | 18/743 [04:39<3:06:18, 15.42s/batch, batch_loss=4.53e+3, bat

Validation:   2%| | 18/743 [04:54<3:06:18, 15.42s/batch, batch_loss=9.78, batch_

Validation:   3%| | 19/743 [04:54<3:04:29, 15.29s/batch, batch_loss=9.78, batch_

Validation:   3%| | 19/743 [05:08<3:04:29, 15.29s/batch, batch_loss=15.5, batch_

Validation:   3%| | 20/743 [05:08<3:00:05, 14.95s/batch, batch_loss=15.5, batch_

Validation:   3%| | 20/743 [05:22<3:00:05, 14.95s/batch, batch_loss=963, batch_i

Validation:   3%| | 21/743 [05:22<2:56:27, 14.66s/batch, batch_loss=963, batch_i

Validation:   3%| | 21/743 [05:37<2:56:27, 14.66s/batch, batch_loss=14.8, batch_

Validation:   3%| | 22/743 [05:37<2:57:20, 14.76s/batch, batch_loss=14.8, batch_

Validation:   3%| | 22/743 [05:51<2:57:20, 14.76s/batch, batch_loss=6.59, batch_

Validation:   3%| | 23/743 [05:51<2:56:06, 14.68s/batch, batch_loss=6.59, batch_

Validation:   3%| | 23/743 [06:06<2:56:06, 14.68s/batch, batch_loss=15.6, batch_

Validation:   3%| | 24/743 [06:06<2:57:50, 14.84s/batch, batch_loss=15.6, batch_

Validation:   3%| | 24/743 [06:21<2:57:50, 14.84s/batch, batch_loss=13.7, batch_

Validation:   3%| | 25/743 [06:21<2:58:04, 14.88s/batch, batch_loss=13.7, batch_

Validation:   3%| | 25/743 [06:36<2:58:04, 14.88s/batch, batch_loss=20.7, batch_

Validation:   3%| | 26/743 [06:36<2:58:00, 14.90s/batch, batch_loss=20.7, batch_

Validation:   3%| | 26/743 [06:51<2:58:00, 14.90s/batch, batch_loss=1.63e+3, bat

Validation:   4%| | 27/743 [06:51<2:55:49, 14.73s/batch, batch_loss=1.63e+3, bat

Validation:   4%| | 27/743 [07:06<2:55:49, 14.73s/batch, batch_loss=14.6, batch_

Validation:   4%| | 28/743 [07:06<2:56:35, 14.82s/batch, batch_loss=14.6, batch_

Validation:   4%| | 28/743 [07:20<2:56:35, 14.82s/batch, batch_loss=14.4, batch_

Validation:   4%| | 29/743 [07:20<2:54:58, 14.70s/batch, batch_loss=14.4, batch_

Validation:   4%| | 29/743 [07:35<2:54:58, 14.70s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:35<2:56:14, 14.83s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:51<2:56:14, 14.83s/batch, batch_loss=19.3, batch_

Validation:   4%| | 31/743 [07:51<2:57:45, 14.98s/batch, batch_loss=19.3, batch_

Validation:   4%| | 31/743 [08:05<2:57:45, 14.98s/batch, batch_loss=15.7, batch_

Validation:   4%| | 32/743 [08:05<2:56:58, 14.93s/batch, batch_loss=15.7, batch_

Validation:   4%| | 32/743 [08:20<2:56:58, 14.93s/batch, batch_loss=16.7, batch_

Validation:   4%| | 33/743 [08:20<2:55:48, 14.86s/batch, batch_loss=16.7, batch_

Validation:   4%| | 33/743 [08:35<2:55:48, 14.86s/batch, batch_loss=16.3, batch_

Validation:   5%| | 34/743 [08:35<2:56:33, 14.94s/batch, batch_loss=16.3, batch_

Validation:   5%| | 34/743 [08:50<2:56:33, 14.94s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [08:50<2:55:20, 14.86s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [09:04<2:55:20, 14.86s/batch, batch_loss=12.8, batch_

Validation:   5%| | 36/743 [09:04<2:52:11, 14.61s/batch, batch_loss=12.8, batch_

Validation:   5%| | 36/743 [09:19<2:52:11, 14.61s/batch, batch_loss=164, batch_i

Validation:   5%| | 37/743 [09:19<2:52:57, 14.70s/batch, batch_loss=164, batch_i

Validation:   5%| | 37/743 [09:34<2:52:57, 14.70s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:34<2:55:20, 14.92s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:49<2:55:20, 14.92s/batch, batch_loss=12.6, batch_

Validation:   5%| | 39/743 [09:49<2:53:16, 14.77s/batch, batch_loss=12.6, batch_

Validation:   5%| | 39/743 [10:04<2:53:16, 14.77s/batch, batch_loss=20, batch_in

Validation:   5%| | 40/743 [10:04<2:53:44, 14.83s/batch, batch_loss=20, batch_in

Validation:   5%| | 40/743 [10:19<2:53:44, 14.83s/batch, batch_loss=14.1, batch_

Validation:   6%| | 41/743 [10:19<2:55:08, 14.97s/batch, batch_loss=14.1, batch_

Validation:   6%| | 41/743 [10:34<2:55:08, 14.97s/batch, batch_loss=15, batch_in

Validation:   6%| | 42/743 [10:34<2:54:14, 14.91s/batch, batch_loss=15, batch_in

Validation:   6%| | 42/743 [10:49<2:54:14, 14.91s/batch, batch_loss=10.2, batch_

Validation:   6%| | 43/743 [10:49<2:55:05, 15.01s/batch, batch_loss=10.2, batch_

Validation:   6%| | 43/743 [11:03<2:55:05, 15.01s/batch, batch_loss=14.6, batch_

Validation:   6%| | 44/743 [11:03<2:51:34, 14.73s/batch, batch_loss=14.6, batch_

Validation:   6%| | 44/743 [11:17<2:51:34, 14.73s/batch, batch_loss=16.9, batch_

Validation:   6%| | 45/743 [11:17<2:50:29, 14.66s/batch, batch_loss=16.9, batch_

Validation:   6%| | 45/743 [11:32<2:50:29, 14.66s/batch, batch_loss=9.3, batch_i

Validation:   6%| | 46/743 [11:32<2:50:21, 14.66s/batch, batch_loss=9.3, batch_i

Validation:   6%| | 46/743 [11:46<2:50:21, 14.66s/batch, batch_loss=19.2, batch_

Validation:   6%| | 47/743 [11:46<2:48:07, 14.49s/batch, batch_loss=19.2, batch_

Validation:   6%| | 47/743 [12:01<2:48:07, 14.49s/batch, batch_loss=17.6, batch_

Validation:   6%| | 48/743 [12:01<2:48:55, 14.58s/batch, batch_loss=17.6, batch_

Validation:   6%| | 48/743 [12:16<2:48:55, 14.58s/batch, batch_loss=18.9, batch_

Validation:   7%| | 49/743 [12:16<2:51:21, 14.81s/batch, batch_loss=18.9, batch_

Validation:   7%| | 49/743 [12:32<2:51:21, 14.81s/batch, batch_loss=13.6, batch_

Validation:   7%| | 50/743 [12:32<2:52:34, 14.94s/batch, batch_loss=13.6, batch_

Validation:   7%| | 50/743 [12:46<2:52:34, 14.94s/batch, batch_loss=14.6, batch_

Validation:   7%| | 51/743 [12:46<2:51:55, 14.91s/batch, batch_loss=14.6, batch_

Validation:   7%| | 51/743 [13:01<2:51:55, 14.91s/batch, batch_loss=15.8, batch_

Validation:   7%| | 52/743 [13:01<2:51:43, 14.91s/batch, batch_loss=15.8, batch_

Validation:   7%| | 52/743 [13:17<2:51:43, 14.91s/batch, batch_loss=22, batch_in

Validation:   7%| | 53/743 [13:17<2:54:20, 15.16s/batch, batch_loss=22, batch_in

Validation:   7%| | 53/743 [13:32<2:54:20, 15.16s/batch, batch_loss=13.2, batch_

Validation:   7%| | 54/743 [13:32<2:52:46, 15.05s/batch, batch_loss=13.2, batch_

Validation:   7%| | 54/743 [13:48<2:52:46, 15.05s/batch, batch_loss=17.9, batch_

Validation:   7%| | 55/743 [13:48<2:55:10, 15.28s/batch, batch_loss=17.9, batch_

Validation:   7%| | 55/743 [14:03<2:55:10, 15.28s/batch, batch_loss=18.3, batch_

Validation:   8%| | 56/743 [14:03<2:54:23, 15.23s/batch, batch_loss=18.3, batch_

Validation:   8%| | 56/743 [14:21<2:54:23, 15.23s/batch, batch_loss=12, batch_in

Validation:   8%| | 57/743 [14:21<3:04:06, 16.10s/batch, batch_loss=12, batch_in

Validation:   8%| | 57/743 [14:36<3:04:06, 16.10s/batch, batch_loss=17.8, batch_

Validation:   8%| | 58/743 [14:36<2:59:33, 15.73s/batch, batch_loss=17.8, batch_

Validation:   8%| | 58/743 [14:51<2:59:33, 15.73s/batch, batch_loss=107, batch_i

Validation:   8%| | 59/743 [14:51<2:56:18, 15.47s/batch, batch_loss=107, batch_i

Validation:   8%| | 59/743 [15:07<2:56:18, 15.47s/batch, batch_loss=6.15e+3, bat

Validation:   8%| | 60/743 [15:07<2:57:17, 15.57s/batch, batch_loss=6.15e+3, bat

Validation:   8%| | 60/743 [15:20<2:57:17, 15.57s/batch, batch_loss=7.63, batch_

Validation:   8%| | 61/743 [15:20<2:49:05, 14.88s/batch, batch_loss=7.63, batch_

Validation:   8%| | 61/743 [15:33<2:49:05, 14.88s/batch, batch_loss=9.32, batch_

Validation:   8%| | 62/743 [15:33<2:43:03, 14.37s/batch, batch_loss=9.32, batch_

Validation:   8%| | 62/743 [15:46<2:43:03, 14.37s/batch, batch_loss=23.7, batch_

Validation:   8%| | 63/743 [15:46<2:40:00, 14.12s/batch, batch_loss=23.7, batch_

Validation:   8%| | 63/743 [16:01<2:40:00, 14.12s/batch, batch_loss=11.2, batch_

Validation:   9%| | 64/743 [16:01<2:41:30, 14.27s/batch, batch_loss=11.2, batch_

Validation:   9%| | 64/743 [16:19<2:41:30, 14.27s/batch, batch_loss=17.1, batch_

Validation:   9%| | 65/743 [16:19<2:53:30, 15.36s/batch, batch_loss=17.1, batch_

Validation:   9%| | 65/743 [16:34<2:53:30, 15.36s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [16:34<2:52:47, 15.31s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [16:50<2:52:47, 15.31s/batch, batch_loss=12.8, batch_

Validation:   9%| | 67/743 [16:50<2:53:29, 15.40s/batch, batch_loss=12.8, batch_

Validation:   9%| | 67/743 [17:05<2:53:29, 15.40s/batch, batch_loss=13.2, batch_

Validation:   9%| | 68/743 [17:05<2:51:51, 15.28s/batch, batch_loss=13.2, batch_

Validation:   9%| | 68/743 [17:20<2:51:51, 15.28s/batch, batch_loss=8.06, batch_

Validation:   9%| | 69/743 [17:20<2:52:01, 15.31s/batch, batch_loss=8.06, batch_

Validation:   9%| | 69/743 [17:35<2:52:01, 15.31s/batch, batch_loss=12.7, batch_

Validation:   9%| | 70/743 [17:35<2:51:11, 15.26s/batch, batch_loss=12.7, batch_

Validation:   9%| | 70/743 [17:50<2:51:11, 15.26s/batch, batch_loss=7.59, batch_

Validation:  10%| | 71/743 [17:50<2:50:24, 15.21s/batch, batch_loss=7.59, batch_

Validation:  10%| | 71/743 [18:06<2:50:24, 15.21s/batch, batch_loss=13.6, batch_

Validation:  10%| | 72/743 [18:06<2:49:58, 15.20s/batch, batch_loss=13.6, batch_

Validation:  10%| | 72/743 [18:24<2:49:58, 15.20s/batch, batch_loss=15, batch_in

Validation:  10%| | 73/743 [18:24<2:58:51, 16.02s/batch, batch_loss=15, batch_in

Validation:  10%| | 73/743 [18:39<2:58:51, 16.02s/batch, batch_loss=17.4, batch_

Validation:  10%| | 74/743 [18:39<2:55:10, 15.71s/batch, batch_loss=17.4, batch_

Validation:  10%| | 74/743 [18:53<2:55:10, 15.71s/batch, batch_loss=11.2, batch_

Validation:  10%| | 75/743 [18:54<2:52:27, 15.49s/batch, batch_loss=11.2, batch_

Validation:  10%| | 75/743 [19:08<2:52:27, 15.49s/batch, batch_loss=14, batch_in

Validation:  10%| | 76/743 [19:08<2:49:24, 15.24s/batch, batch_loss=14, batch_in

Validation:  10%| | 76/743 [19:23<2:49:24, 15.24s/batch, batch_loss=12, batch_in

Validation:  10%| | 77/743 [19:23<2:46:50, 15.03s/batch, batch_loss=12, batch_in

Validation:  10%| | 77/743 [19:38<2:46:50, 15.03s/batch, batch_loss=13.3, batch_

Validation:  10%| | 78/743 [19:38<2:48:01, 15.16s/batch, batch_loss=13.3, batch_

Validation:  10%| | 78/743 [19:54<2:48:01, 15.16s/batch, batch_loss=8.8, batch_i

Validation:  11%| | 79/743 [19:54<2:50:13, 15.38s/batch, batch_loss=8.8, batch_i

Validation:  11%| | 79/743 [20:10<2:50:13, 15.38s/batch, batch_loss=6.89, batch_

Validation:  11%| | 80/743 [20:10<2:50:50, 15.46s/batch, batch_loss=6.89, batch_

Validation:  11%| | 80/743 [20:28<2:50:50, 15.46s/batch, batch_loss=149, batch_i

Validation:  11%| | 81/743 [20:28<2:58:57, 16.22s/batch, batch_loss=149, batch_i

Validation:  11%| | 81/743 [20:43<2:58:57, 16.22s/batch, batch_loss=1.5e+3, batc

Validation:  11%| | 82/743 [20:43<2:55:07, 15.90s/batch, batch_loss=1.5e+3, batc

Validation:  11%| | 82/743 [20:58<2:55:07, 15.90s/batch, batch_loss=30.2, batch_

Validation:  11%| | 83/743 [20:58<2:52:55, 15.72s/batch, batch_loss=30.2, batch_

Validation:  11%| | 83/743 [21:13<2:52:55, 15.72s/batch, batch_loss=16.3, batch_

Validation:  11%| | 84/743 [21:13<2:49:28, 15.43s/batch, batch_loss=16.3, batch_

Validation:  11%| | 84/743 [21:28<2:49:28, 15.43s/batch, batch_loss=19.5, batch_

Validation:  11%| | 85/743 [21:28<2:48:01, 15.32s/batch, batch_loss=19.5, batch_

Validation:  11%| | 85/743 [21:44<2:48:01, 15.32s/batch, batch_loss=23.2, batch_

Validation:  12%| | 86/743 [21:44<2:49:19, 15.46s/batch, batch_loss=23.2, batch_

Validation:  12%| | 86/743 [21:59<2:49:19, 15.46s/batch, batch_loss=29.2, batch_

Validation:  12%| | 87/743 [21:59<2:47:52, 15.35s/batch, batch_loss=29.2, batch_

Validation:  12%| | 87/743 [22:14<2:47:52, 15.35s/batch, batch_loss=22.1, batch_

Validation:  12%| | 88/743 [22:14<2:47:43, 15.36s/batch, batch_loss=22.1, batch_

Validation:  12%| | 88/743 [22:30<2:47:43, 15.36s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [22:30<2:49:50, 15.58s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [22:49<2:49:50, 15.58s/batch, batch_loss=4.86, batch_

Validation:  12%| | 90/743 [22:49<2:58:02, 16.36s/batch, batch_loss=4.86, batch_

Validation:  12%| | 90/743 [23:05<2:58:02, 16.36s/batch, batch_loss=28.8, batch_

Validation:  12%| | 91/743 [23:05<2:56:41, 16.26s/batch, batch_loss=28.8, batch_

Validation:  12%| | 91/743 [23:20<2:56:41, 16.26s/batch, batch_loss=32.8, batch_

Validation:  12%| | 92/743 [23:20<2:53:21, 15.98s/batch, batch_loss=32.8, batch_

Validation:  12%| | 92/743 [23:35<2:53:21, 15.98s/batch, batch_loss=22.6, batch_

Validation:  13%|▏| 93/743 [23:35<2:51:27, 15.83s/batch, batch_loss=22.6, batch_

Validation:  13%|▏| 93/743 [23:50<2:51:27, 15.83s/batch, batch_loss=33.7, batch_

Validation:  13%|▏| 94/743 [23:50<2:48:58, 15.62s/batch, batch_loss=33.7, batch_

Validation:  13%|▏| 94/743 [24:05<2:48:58, 15.62s/batch, batch_loss=10.7, batch_

Validation:  13%|▏| 95/743 [24:05<2:45:52, 15.36s/batch, batch_loss=10.7, batch_

Validation:  13%|▏| 95/743 [24:20<2:45:52, 15.36s/batch, batch_loss=17.2, batch_

Validation:  13%|▏| 96/743 [24:20<2:44:50, 15.29s/batch, batch_loss=17.2, batch_

Validation:  13%|▏| 96/743 [24:35<2:44:50, 15.29s/batch, batch_loss=26.4, batch_

Validation:  13%|▏| 97/743 [24:35<2:44:07, 15.24s/batch, batch_loss=26.4, batch_

Validation:  13%|▏| 97/743 [24:51<2:44:07, 15.24s/batch, batch_loss=17, batch_in

Validation:  13%|▏| 98/743 [24:51<2:44:08, 15.27s/batch, batch_loss=17, batch_in

Validation:  13%|▏| 98/743 [25:08<2:44:08, 15.27s/batch, batch_loss=23.7, batch_

Validation:  13%|▏| 99/743 [25:08<2:51:11, 15.95s/batch, batch_loss=23.7, batch_

Validation:  13%|▏| 99/743 [25:24<2:51:11, 15.95s/batch, batch_loss=12.1, batch_

Validation:  13%|▏| 100/743 [25:24<2:49:42, 15.84s/batch, batch_loss=12.1, batch

Validation:  13%|▏| 100/743 [25:39<2:49:42, 15.84s/batch, batch_loss=15.6, batch

Validation:  14%|▏| 101/743 [25:39<2:45:28, 15.47s/batch, batch_loss=15.6, batch

Validation:  14%|▏| 101/743 [25:53<2:45:28, 15.47s/batch, batch_loss=11.2, batch

Validation:  14%|▏| 102/743 [25:53<2:42:59, 15.26s/batch, batch_loss=11.2, batch

Validation:  14%|▏| 102/743 [26:08<2:42:59, 15.26s/batch, batch_loss=3.39e+3, ba

Validation:  14%|▏| 103/743 [26:08<2:40:17, 15.03s/batch, batch_loss=3.39e+3, ba

Validation:  14%|▏| 103/743 [26:24<2:40:17, 15.03s/batch, batch_loss=14.2, batch

Validation:  14%|▏| 104/743 [26:24<2:42:17, 15.24s/batch, batch_loss=14.2, batch

Validation:  14%|▏| 104/743 [26:38<2:42:17, 15.24s/batch, batch_loss=7.49, batch

Validation:  14%|▏| 105/743 [26:38<2:40:12, 15.07s/batch, batch_loss=7.49, batch

Validation:  14%|▏| 105/743 [26:53<2:40:12, 15.07s/batch, batch_loss=15.3, batch

Validation:  14%|▏| 106/743 [26:53<2:40:05, 15.08s/batch, batch_loss=15.3, batch

Validation:  14%|▏| 106/743 [27:09<2:40:05, 15.08s/batch, batch_loss=717, batch_

Validation:  14%|▏| 107/743 [27:09<2:40:45, 15.17s/batch, batch_loss=717, batch_

Validation:  14%|▏| 107/743 [27:23<2:40:45, 15.17s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [27:23<2:38:53, 15.01s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [27:39<2:38:53, 15.01s/batch, batch_loss=198, batch_

Validation:  15%|▏| 109/743 [27:39<2:40:15, 15.17s/batch, batch_loss=198, batch_

Validation:  15%|▏| 109/743 [27:54<2:40:15, 15.17s/batch, batch_loss=22.7, batch

Validation:  15%|▏| 110/743 [27:54<2:39:13, 15.09s/batch, batch_loss=22.7, batch

Validation:  15%|▏| 110/743 [28:09<2:39:13, 15.09s/batch, batch_loss=13.1, batch

Validation:  15%|▏| 111/743 [28:09<2:38:29, 15.05s/batch, batch_loss=13.1, batch

Validation:  15%|▏| 111/743 [28:24<2:38:29, 15.05s/batch, batch_loss=21.4, batch

Validation:  15%|▏| 112/743 [28:24<2:38:43, 15.09s/batch, batch_loss=21.4, batch

Validation:  15%|▏| 112/743 [28:38<2:38:43, 15.09s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [28:38<2:36:49, 14.94s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [28:55<2:36:49, 14.94s/batch, batch_loss=18.2, batch

Validation:  15%|▏| 114/743 [28:55<2:41:39, 15.42s/batch, batch_loss=18.2, batch

Validation:  15%|▏| 114/743 [29:10<2:41:39, 15.42s/batch, batch_loss=19.3, batch

Validation:  15%|▏| 115/743 [29:10<2:41:18, 15.41s/batch, batch_loss=19.3, batch

Validation:  15%|▏| 115/743 [29:25<2:41:18, 15.41s/batch, batch_loss=13.6, batch

Validation:  16%|▏| 116/743 [29:25<2:39:03, 15.22s/batch, batch_loss=13.6, batch

Validation:  16%|▏| 116/743 [29:40<2:39:03, 15.22s/batch, batch_loss=23.1, batch

Validation:  16%|▏| 117/743 [29:40<2:38:36, 15.20s/batch, batch_loss=23.1, batch

Validation:  16%|▏| 117/743 [29:55<2:38:36, 15.20s/batch, batch_loss=24.4, batch

Validation:  16%|▏| 118/743 [29:55<2:37:55, 15.16s/batch, batch_loss=24.4, batch

Validation:  16%|▏| 118/743 [30:10<2:37:55, 15.16s/batch, batch_loss=14.6, batch

Validation:  16%|▏| 119/743 [30:10<2:34:50, 14.89s/batch, batch_loss=14.6, batch

Validation:  16%|▏| 119/743 [30:24<2:34:50, 14.89s/batch, batch_loss=20.2, batch

Validation:  16%|▏| 120/743 [30:24<2:34:23, 14.87s/batch, batch_loss=20.2, batch

Validation:  16%|▏| 120/743 [30:40<2:34:23, 14.87s/batch, batch_loss=11.7, batch

Validation:  16%|▏| 121/743 [30:40<2:35:28, 15.00s/batch, batch_loss=11.7, batch

Validation:  16%|▏| 121/743 [30:58<2:35:28, 15.00s/batch, batch_loss=4.1, batch_

Validation:  16%|▏| 122/743 [30:58<2:45:08, 15.96s/batch, batch_loss=4.1, batch_

Validation:  16%|▏| 122/743 [31:13<2:45:08, 15.96s/batch, batch_loss=6.96, batch

Validation:  17%|▏| 123/743 [31:13<2:41:40, 15.65s/batch, batch_loss=6.96, batch

Validation:  17%|▏| 123/743 [31:27<2:41:40, 15.65s/batch, batch_loss=12, batch_i

Validation:  17%|▏| 124/743 [31:27<2:37:29, 15.27s/batch, batch_loss=12, batch_i

Validation:  17%|▏| 124/743 [31:42<2:37:29, 15.27s/batch, batch_loss=23.6, batch

Validation:  17%|▏| 125/743 [31:42<2:34:12, 14.97s/batch, batch_loss=23.6, batch

Validation:  17%|▏| 125/743 [31:56<2:34:12, 14.97s/batch, batch_loss=12.2, batch

Validation:  17%|▏| 126/743 [31:56<2:32:57, 14.87s/batch, batch_loss=12.2, batch

Validation:  17%|▏| 126/743 [32:11<2:32:57, 14.87s/batch, batch_loss=11.4, batch

Validation:  17%|▏| 127/743 [32:11<2:32:16, 14.83s/batch, batch_loss=11.4, batch

Validation:  17%|▏| 127/743 [32:26<2:32:16, 14.83s/batch, batch_loss=24.5, batch

Validation:  17%|▏| 128/743 [32:26<2:32:05, 14.84s/batch, batch_loss=24.5, batch

Validation:  17%|▏| 128/743 [32:41<2:32:05, 14.84s/batch, batch_loss=13.4, batch

Validation:  17%|▏| 129/743 [32:41<2:32:27, 14.90s/batch, batch_loss=13.4, batch

Validation:  17%|▏| 129/743 [32:56<2:32:27, 14.90s/batch, batch_loss=18.2, batch

Validation:  17%|▏| 130/743 [32:56<2:33:01, 14.98s/batch, batch_loss=18.2, batch

Validation:  17%|▏| 130/743 [33:09<2:33:01, 14.98s/batch, batch_loss=21.4, batch

Validation:  18%|▏| 131/743 [33:09<2:27:40, 14.48s/batch, batch_loss=21.4, batch

Validation:  18%|▏| 131/743 [33:24<2:27:40, 14.48s/batch, batch_loss=24.1, batch

Validation:  18%|▏| 132/743 [33:24<2:29:08, 14.65s/batch, batch_loss=24.1, batch

Validation:  18%|▏| 132/743 [33:40<2:29:08, 14.65s/batch, batch_loss=35.1, batch

Validation:  18%|▏| 133/743 [33:40<2:30:42, 14.82s/batch, batch_loss=35.1, batch

Validation:  18%|▏| 133/743 [33:55<2:30:42, 14.82s/batch, batch_loss=16, batch_i

Validation:  18%|▏| 134/743 [33:55<2:31:25, 14.92s/batch, batch_loss=16, batch_i

Validation:  18%|▏| 134/743 [34:09<2:31:25, 14.92s/batch, batch_loss=31, batch_i

Validation:  18%|▏| 135/743 [34:09<2:28:27, 14.65s/batch, batch_loss=31, batch_i

Validation:  18%|▏| 135/743 [34:24<2:28:27, 14.65s/batch, batch_loss=16.8, batch

Validation:  18%|▏| 136/743 [34:24<2:30:32, 14.88s/batch, batch_loss=16.8, batch

Validation:  18%|▏| 136/743 [34:38<2:30:32, 14.88s/batch, batch_loss=24.3, batch

Validation:  18%|▏| 137/743 [34:38<2:28:04, 14.66s/batch, batch_loss=24.3, batch

Validation:  18%|▏| 137/743 [34:53<2:28:04, 14.66s/batch, batch_loss=7.26, batch

Validation:  19%|▏| 138/743 [34:53<2:27:52, 14.67s/batch, batch_loss=7.26, batch

Validation:  19%|▏| 138/743 [35:07<2:27:52, 14.67s/batch, batch_loss=251, batch_

Validation:  19%|▏| 139/743 [35:07<2:26:30, 14.55s/batch, batch_loss=251, batch_

Validation:  19%|▏| 139/743 [35:21<2:26:30, 14.55s/batch, batch_loss=15.4, batch

Validation:  19%|▏| 140/743 [35:21<2:22:53, 14.22s/batch, batch_loss=15.4, batch

Validation:  19%|▏| 140/743 [35:36<2:22:53, 14.22s/batch, batch_loss=11.8, batch

Validation:  19%|▏| 141/743 [35:36<2:24:47, 14.43s/batch, batch_loss=11.8, batch

Validation:  19%|▏| 141/743 [35:51<2:24:47, 14.43s/batch, batch_loss=13.5, batch

Validation:  19%|▏| 142/743 [35:51<2:26:54, 14.67s/batch, batch_loss=13.5, batch

Validation:  19%|▏| 142/743 [36:05<2:26:54, 14.67s/batch, batch_loss=12.1, batch

Validation:  19%|▏| 143/743 [36:05<2:24:52, 14.49s/batch, batch_loss=12.1, batch

Validation:  19%|▏| 143/743 [36:22<2:24:52, 14.49s/batch, batch_loss=15.7, batch

Validation:  19%|▏| 144/743 [36:22<2:32:25, 15.27s/batch, batch_loss=15.7, batch

Validation:  19%|▏| 144/743 [36:38<2:32:25, 15.27s/batch, batch_loss=14.3, batch

Validation:  20%|▏| 145/743 [36:38<2:34:17, 15.48s/batch, batch_loss=14.3, batch

Validation:  20%|▏| 145/743 [36:53<2:34:17, 15.48s/batch, batch_loss=14.5, batch

Validation:  20%|▏| 146/743 [36:53<2:31:57, 15.27s/batch, batch_loss=14.5, batch

Validation:  20%|▏| 146/743 [37:07<2:31:57, 15.27s/batch, batch_loss=17.1, batch

Validation:  20%|▏| 147/743 [37:07<2:28:47, 14.98s/batch, batch_loss=17.1, batch

Validation:  20%|▏| 147/743 [37:23<2:28:47, 14.98s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [37:23<2:30:07, 15.14s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [37:37<2:30:07, 15.14s/batch, batch_loss=22.5, batch

Validation:  20%|▏| 149/743 [37:37<2:28:33, 15.01s/batch, batch_loss=22.5, batch

Validation:  20%|▏| 149/743 [37:52<2:28:33, 15.01s/batch, batch_loss=24.4, batch

Validation:  20%|▏| 150/743 [37:52<2:28:20, 15.01s/batch, batch_loss=24.4, batch

Validation:  20%|▏| 150/743 [38:06<2:28:20, 15.01s/batch, batch_loss=14, batch_i

Validation:  20%|▏| 151/743 [38:06<2:24:38, 14.66s/batch, batch_loss=14, batch_i

Validation:  20%|▏| 151/743 [38:21<2:24:38, 14.66s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [38:21<2:23:39, 14.59s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [38:37<2:23:39, 14.59s/batch, batch_loss=16.6, batch

Validation:  21%|▏| 153/743 [38:37<2:28:56, 15.15s/batch, batch_loss=16.6, batch

Validation:  21%|▏| 153/743 [38:51<2:28:56, 15.15s/batch, batch_loss=15.5, batch

Validation:  21%|▏| 154/743 [38:51<2:26:20, 14.91s/batch, batch_loss=15.5, batch

Validation:  21%|▏| 154/743 [39:07<2:26:20, 14.91s/batch, batch_loss=18.7, batch

Validation:  21%|▏| 155/743 [39:07<2:27:56, 15.10s/batch, batch_loss=18.7, batch

Validation:  21%|▏| 155/743 [39:21<2:27:56, 15.10s/batch, batch_loss=17.4, batch

Validation:  21%|▏| 156/743 [39:21<2:25:56, 14.92s/batch, batch_loss=17.4, batch

Validation:  21%|▏| 156/743 [39:36<2:25:56, 14.92s/batch, batch_loss=18, batch_i

Validation:  21%|▏| 157/743 [39:36<2:25:57, 14.94s/batch, batch_loss=18, batch_i

Validation:  21%|▏| 157/743 [39:51<2:25:57, 14.94s/batch, batch_loss=22.7, batch

Validation:  21%|▏| 158/743 [39:51<2:25:39, 14.94s/batch, batch_loss=22.7, batch

Validation:  21%|▏| 158/743 [40:07<2:25:39, 14.94s/batch, batch_loss=25.5, batch

Validation:  21%|▏| 159/743 [40:07<2:27:22, 15.14s/batch, batch_loss=25.5, batch

Validation:  21%|▏| 159/743 [40:24<2:27:22, 15.14s/batch, batch_loss=15.2, batch

Validation:  22%|▏| 160/743 [40:24<2:33:36, 15.81s/batch, batch_loss=15.2, batch

Validation:  22%|▏| 160/743 [40:39<2:33:36, 15.81s/batch, batch_loss=16.6, batch

Validation:  22%|▏| 161/743 [40:39<2:29:31, 15.42s/batch, batch_loss=16.6, batch

Validation:  22%|▏| 161/743 [40:54<2:29:31, 15.42s/batch, batch_loss=22.6, batch

Validation:  22%|▏| 162/743 [40:54<2:28:11, 15.30s/batch, batch_loss=22.6, batch

Validation:  22%|▏| 162/743 [41:08<2:28:11, 15.30s/batch, batch_loss=11.1, batch

Validation:  22%|▏| 163/743 [41:08<2:25:34, 15.06s/batch, batch_loss=11.1, batch

Validation:  22%|▏| 163/743 [41:23<2:25:34, 15.06s/batch, batch_loss=10.2, batch

Validation:  22%|▏| 164/743 [41:23<2:23:45, 14.90s/batch, batch_loss=10.2, batch

Validation:  22%|▏| 164/743 [41:38<2:23:45, 14.90s/batch, batch_loss=12.3, batch

Validation:  22%|▏| 165/743 [41:38<2:23:41, 14.92s/batch, batch_loss=12.3, batch

Validation:  22%|▏| 165/743 [41:53<2:23:41, 14.92s/batch, batch_loss=10.2, batch

Validation:  22%|▏| 166/743 [41:53<2:24:28, 15.02s/batch, batch_loss=10.2, batch

Validation:  22%|▏| 166/743 [42:08<2:24:28, 15.02s/batch, batch_loss=13.1, batch

Validation:  22%|▏| 167/743 [42:08<2:24:22, 15.04s/batch, batch_loss=13.1, batch

Validation:  22%|▏| 167/743 [42:23<2:24:22, 15.04s/batch, batch_loss=24.4, batch

Validation:  23%|▏| 168/743 [42:23<2:22:50, 14.91s/batch, batch_loss=24.4, batch

Validation:  23%|▏| 168/743 [42:38<2:22:50, 14.91s/batch, batch_loss=21.9, batch

Validation:  23%|▏| 169/743 [42:38<2:22:24, 14.89s/batch, batch_loss=21.9, batch

Validation:  23%|▏| 169/743 [42:52<2:22:24, 14.89s/batch, batch_loss=21.1, batch

Validation:  23%|▏| 170/743 [42:52<2:21:00, 14.77s/batch, batch_loss=21.1, batch

Validation:  23%|▏| 170/743 [43:07<2:21:00, 14.77s/batch, batch_loss=21.2, batch

Validation:  23%|▏| 171/743 [43:07<2:22:12, 14.92s/batch, batch_loss=21.2, batch

Validation:  23%|▏| 171/743 [43:22<2:22:12, 14.92s/batch, batch_loss=15.5, batch

Validation:  23%|▏| 172/743 [43:22<2:21:34, 14.88s/batch, batch_loss=15.5, batch

Validation:  23%|▏| 172/743 [43:38<2:21:34, 14.88s/batch, batch_loss=19.7, batch

Validation:  23%|▏| 173/743 [43:38<2:23:04, 15.06s/batch, batch_loss=19.7, batch

Validation:  23%|▏| 173/743 [43:52<2:23:04, 15.06s/batch, batch_loss=12.6, batch

Validation:  23%|▏| 174/743 [43:52<2:20:55, 14.86s/batch, batch_loss=12.6, batch

Validation:  23%|▏| 174/743 [44:09<2:20:55, 14.86s/batch, batch_loss=20.6, batch

Validation:  24%|▏| 175/743 [44:09<2:26:06, 15.43s/batch, batch_loss=20.6, batch

Validation:  24%|▏| 175/743 [44:23<2:26:06, 15.43s/batch, batch_loss=14.7, batch

Validation:  24%|▏| 176/743 [44:23<2:23:04, 15.14s/batch, batch_loss=14.7, batch

Validation:  24%|▏| 176/743 [44:38<2:23:04, 15.14s/batch, batch_loss=17.6, batch

Validation:  24%|▏| 177/743 [44:38<2:21:41, 15.02s/batch, batch_loss=17.6, batch

Validation:  24%|▏| 177/743 [44:54<2:21:41, 15.02s/batch, batch_loss=19.5, batch

Validation:  24%|▏| 178/743 [44:54<2:22:57, 15.18s/batch, batch_loss=19.5, batch

Validation:  24%|▏| 178/743 [45:08<2:22:57, 15.18s/batch, batch_loss=16.8, batch

Validation:  24%|▏| 179/743 [45:08<2:21:38, 15.07s/batch, batch_loss=16.8, batch

Validation:  24%|▏| 179/743 [45:22<2:21:38, 15.07s/batch, batch_loss=7.22e+3, ba

Validation:  24%|▏| 180/743 [45:22<2:17:39, 14.67s/batch, batch_loss=7.22e+3, ba

Validation:  24%|▏| 180/743 [45:35<2:17:39, 14.67s/batch, batch_loss=14.3, batch

Validation:  24%|▏| 181/743 [45:35<2:11:57, 14.09s/batch, batch_loss=14.3, batch

Validation:  24%|▏| 181/743 [45:48<2:11:57, 14.09s/batch, batch_loss=20.4, batch

Validation:  24%|▏| 182/743 [45:48<2:09:03, 13.80s/batch, batch_loss=20.4, batch

Validation:  24%|▏| 182/743 [46:02<2:09:03, 13.80s/batch, batch_loss=15.2, batch

Validation:  25%|▏| 183/743 [46:02<2:09:15, 13.85s/batch, batch_loss=15.2, batch

Validation:  25%|▏| 183/743 [46:17<2:09:15, 13.85s/batch, batch_loss=9.94, batch

Validation:  25%|▏| 184/743 [46:17<2:11:38, 14.13s/batch, batch_loss=9.94, batch

Validation:  25%|▏| 184/743 [46:32<2:11:38, 14.13s/batch, batch_loss=15.7, batch

Validation:  25%|▏| 185/743 [46:32<2:14:22, 14.45s/batch, batch_loss=15.7, batch

Validation:  25%|▏| 185/743 [46:47<2:14:22, 14.45s/batch, batch_loss=25.5, batch

Validation:  25%|▎| 186/743 [46:47<2:16:55, 14.75s/batch, batch_loss=25.5, batch

Validation:  25%|▎| 186/743 [47:03<2:16:55, 14.75s/batch, batch_loss=27.5, batch

Validation:  25%|▎| 187/743 [47:03<2:18:54, 14.99s/batch, batch_loss=27.5, batch

Validation:  25%|▎| 187/743 [47:18<2:18:54, 14.99s/batch, batch_loss=14.4, batch

Validation:  25%|▎| 188/743 [47:18<2:19:13, 15.05s/batch, batch_loss=14.4, batch

Validation:  25%|▎| 188/743 [47:33<2:19:13, 15.05s/batch, batch_loss=14.4, batch

Validation:  25%|▎| 189/743 [47:33<2:19:00, 15.05s/batch, batch_loss=14.4, batch

Validation:  25%|▎| 189/743 [47:48<2:19:00, 15.05s/batch, batch_loss=981, batch_

Validation:  26%|▎| 190/743 [47:48<2:18:22, 15.01s/batch, batch_loss=981, batch_

Validation:  26%|▎| 190/743 [48:03<2:18:22, 15.01s/batch, batch_loss=19.2, batch

Validation:  26%|▎| 191/743 [48:03<2:16:46, 14.87s/batch, batch_loss=19.2, batch

Validation:  26%|▎| 191/743 [48:18<2:16:46, 14.87s/batch, batch_loss=11.4, batch

Validation:  26%|▎| 192/743 [48:18<2:17:27, 14.97s/batch, batch_loss=11.4, batch

Validation:  26%|▎| 192/743 [48:35<2:17:27, 14.97s/batch, batch_loss=17.9, batch

Validation:  26%|▎| 193/743 [48:35<2:23:08, 15.62s/batch, batch_loss=17.9, batch

Validation:  26%|▎| 193/743 [48:50<2:23:08, 15.62s/batch, batch_loss=17.4, batch

Validation:  26%|▎| 194/743 [48:50<2:21:48, 15.50s/batch, batch_loss=17.4, batch

Validation:  26%|▎| 194/743 [49:06<2:21:48, 15.50s/batch, batch_loss=9.34, batch

Validation:  26%|▎| 195/743 [49:06<2:21:23, 15.48s/batch, batch_loss=9.34, batch

Validation:  26%|▎| 195/743 [49:21<2:21:23, 15.48s/batch, batch_loss=16.1, batch

Validation:  26%|▎| 196/743 [49:21<2:20:30, 15.41s/batch, batch_loss=16.1, batch

Validation:  26%|▎| 196/743 [49:34<2:20:30, 15.41s/batch, batch_loss=9.12, batch

Validation:  27%|▎| 197/743 [49:34<2:14:55, 14.83s/batch, batch_loss=9.12, batch

Validation:  27%|▎| 197/743 [49:49<2:14:55, 14.83s/batch, batch_loss=18.5, batch

Validation:  27%|▎| 198/743 [49:49<2:13:23, 14.69s/batch, batch_loss=18.5, batch

Validation:  27%|▎| 198/743 [50:02<2:13:23, 14.69s/batch, batch_loss=18.2, batch

Validation:  27%|▎| 199/743 [50:02<2:10:28, 14.39s/batch, batch_loss=18.2, batch

Validation:  27%|▎| 199/743 [50:16<2:10:28, 14.39s/batch, batch_loss=283, batch_

Validation:  27%|▎| 200/743 [50:16<2:09:24, 14.30s/batch, batch_loss=283, batch_

Validation:  27%|▎| 200/743 [50:32<2:09:24, 14.30s/batch, batch_loss=30.2, batch

Validation:  27%|▎| 201/743 [50:32<2:12:42, 14.69s/batch, batch_loss=30.2, batch

Validation:  27%|▎| 201/743 [50:47<2:12:42, 14.69s/batch, batch_loss=18.8, batch

Validation:  27%|▎| 202/743 [50:47<2:12:42, 14.72s/batch, batch_loss=18.8, batch

Validation:  27%|▎| 202/743 [51:02<2:12:42, 14.72s/batch, batch_loss=15.8, batch

Validation:  27%|▎| 203/743 [51:02<2:13:22, 14.82s/batch, batch_loss=15.8, batch

Validation:  27%|▎| 203/743 [51:17<2:13:22, 14.82s/batch, batch_loss=17.5, batch

Validation:  27%|▎| 204/743 [51:17<2:12:44, 14.78s/batch, batch_loss=17.5, batch

Validation:  27%|▎| 204/743 [51:31<2:12:44, 14.78s/batch, batch_loss=16.9, batch

Validation:  28%|▎| 205/743 [51:31<2:11:12, 14.63s/batch, batch_loss=16.9, batch

Validation:  28%|▎| 205/743 [51:46<2:11:12, 14.63s/batch, batch_loss=11, batch_i

Validation:  28%|▎| 206/743 [51:46<2:12:25, 14.80s/batch, batch_loss=11, batch_i

Validation:  28%|▎| 206/743 [52:01<2:12:25, 14.80s/batch, batch_loss=19.2, batch

Validation:  28%|▎| 207/743 [52:01<2:13:36, 14.96s/batch, batch_loss=19.2, batch

Validation:  28%|▎| 207/743 [52:16<2:13:36, 14.96s/batch, batch_loss=16.4, batch

Validation:  28%|▎| 208/743 [52:16<2:13:00, 14.92s/batch, batch_loss=16.4, batch

Validation:  28%|▎| 208/743 [52:32<2:13:00, 14.92s/batch, batch_loss=8.05, batch

Validation:  28%|▎| 209/743 [52:32<2:15:00, 15.17s/batch, batch_loss=8.05, batch

Validation:  28%|▎| 209/743 [52:46<2:15:00, 15.17s/batch, batch_loss=9.04, batch

Validation:  28%|▎| 210/743 [52:46<2:13:01, 14.98s/batch, batch_loss=9.04, batch

Validation:  28%|▎| 210/743 [53:02<2:13:01, 14.98s/batch, batch_loss=12.1, batch

Validation:  28%|▎| 211/743 [53:02<2:13:10, 15.02s/batch, batch_loss=12.1, batch

Validation:  28%|▎| 211/743 [53:16<2:13:10, 15.02s/batch, batch_loss=12.4, batch

Validation:  29%|▎| 212/743 [53:16<2:12:24, 14.96s/batch, batch_loss=12.4, batch

Validation:  29%|▎| 212/743 [53:31<2:12:24, 14.96s/batch, batch_loss=541, batch_

Validation:  29%|▎| 213/743 [53:31<2:11:36, 14.90s/batch, batch_loss=541, batch_

Validation:  29%|▎| 213/743 [53:46<2:11:36, 14.90s/batch, batch_loss=11.6, batch

Validation:  29%|▎| 214/743 [53:46<2:10:22, 14.79s/batch, batch_loss=11.6, batch

Validation:  29%|▎| 214/743 [54:00<2:10:22, 14.79s/batch, batch_loss=17.8, batch

Validation:  29%|▎| 215/743 [54:00<2:09:41, 14.74s/batch, batch_loss=17.8, batch

Validation:  29%|▎| 215/743 [54:14<2:09:41, 14.74s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [54:14<2:06:24, 14.39s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [54:29<2:06:24, 14.39s/batch, batch_loss=18.4, batch

Validation:  29%|▎| 217/743 [54:29<2:07:39, 14.56s/batch, batch_loss=18.4, batch

Validation:  29%|▎| 217/743 [54:43<2:07:39, 14.56s/batch, batch_loss=12.5, batch

Validation:  29%|▎| 218/743 [54:43<2:07:07, 14.53s/batch, batch_loss=12.5, batch

Validation:  29%|▎| 218/743 [54:58<2:07:07, 14.53s/batch, batch_loss=26.6, batch

Validation:  29%|▎| 219/743 [54:58<2:07:15, 14.57s/batch, batch_loss=26.6, batch

Validation:  29%|▎| 219/743 [55:13<2:07:15, 14.57s/batch, batch_loss=23.7, batch

Validation:  30%|▎| 220/743 [55:13<2:08:35, 14.75s/batch, batch_loss=23.7, batch

Validation:  30%|▎| 220/743 [55:28<2:08:35, 14.75s/batch, batch_loss=16, batch_i

Validation:  30%|▎| 221/743 [55:28<2:08:53, 14.82s/batch, batch_loss=16, batch_i

Validation:  30%|▎| 221/743 [55:42<2:08:53, 14.82s/batch, batch_loss=11.6, batch

Validation:  30%|▎| 222/743 [55:42<2:06:35, 14.58s/batch, batch_loss=11.6, batch

Validation:  30%|▎| 222/743 [55:56<2:06:35, 14.58s/batch, batch_loss=11.2, batch

Validation:  30%|▎| 223/743 [55:56<2:05:42, 14.50s/batch, batch_loss=11.2, batch

Validation:  30%|▎| 223/743 [56:11<2:05:42, 14.50s/batch, batch_loss=9.79, batch

Validation:  30%|▎| 224/743 [56:11<2:06:40, 14.64s/batch, batch_loss=9.79, batch

Validation:  30%|▎| 224/743 [56:26<2:06:40, 14.64s/batch, batch_loss=4.93e+3, ba

Validation:  30%|▎| 225/743 [56:26<2:06:37, 14.67s/batch, batch_loss=4.93e+3, ba

Validation:  30%|▎| 225/743 [56:43<2:06:37, 14.67s/batch, batch_loss=15.5, batch

Validation:  30%|▎| 226/743 [56:43<2:11:04, 15.21s/batch, batch_loss=15.5, batch

Validation:  30%|▎| 226/743 [56:57<2:11:04, 15.21s/batch, batch_loss=16, batch_i

Validation:  31%|▎| 227/743 [56:57<2:08:05, 14.89s/batch, batch_loss=16, batch_i

Validation:  31%|▎| 227/743 [57:11<2:08:05, 14.89s/batch, batch_loss=17.4, batch

Validation:  31%|▎| 228/743 [57:11<2:04:51, 14.55s/batch, batch_loss=17.4, batch

Validation:  31%|▎| 228/743 [57:25<2:04:51, 14.55s/batch, batch_loss=14.8, batch

Validation:  31%|▎| 229/743 [57:25<2:04:51, 14.57s/batch, batch_loss=14.8, batch

Validation:  31%|▎| 229/743 [57:40<2:04:51, 14.57s/batch, batch_loss=20.6, batch

Validation:  31%|▎| 230/743 [57:40<2:05:03, 14.63s/batch, batch_loss=20.6, batch

Validation:  31%|▎| 230/743 [57:54<2:05:03, 14.63s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [57:54<2:04:17, 14.56s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [58:09<2:04:17, 14.56s/batch, batch_loss=16.2, batch

Validation:  31%|▎| 232/743 [58:09<2:05:03, 14.68s/batch, batch_loss=16.2, batch

Validation:  31%|▎| 232/743 [58:24<2:05:03, 14.68s/batch, batch_loss=8.49, batch

Validation:  31%|▎| 233/743 [58:24<2:05:51, 14.81s/batch, batch_loss=8.49, batch

Validation:  31%|▎| 233/743 [58:39<2:05:51, 14.81s/batch, batch_loss=12.7, batch

Validation:  31%|▎| 234/743 [58:39<2:05:16, 14.77s/batch, batch_loss=12.7, batch

Validation:  31%|▎| 234/743 [58:54<2:05:16, 14.77s/batch, batch_loss=16.7, batch

Validation:  32%|▎| 235/743 [58:54<2:05:12, 14.79s/batch, batch_loss=16.7, batch

Validation:  32%|▎| 235/743 [59:09<2:05:12, 14.79s/batch, batch_loss=2.78, batch

Validation:  32%|▎| 236/743 [59:09<2:05:40, 14.87s/batch, batch_loss=2.78, batch

Validation:  32%|▎| 236/743 [59:23<2:05:40, 14.87s/batch, batch_loss=19.1, batch

Validation:  32%|▎| 237/743 [59:23<2:03:50, 14.68s/batch, batch_loss=19.1, batch

Validation:  32%|▎| 237/743 [59:37<2:03:50, 14.68s/batch, batch_loss=14.4, batch

Validation:  32%|▎| 238/743 [59:37<2:00:34, 14.33s/batch, batch_loss=14.4, batch

Validation:  32%|▎| 238/743 [59:51<2:00:34, 14.33s/batch, batch_loss=4.5e+3, bat

Validation:  32%|▎| 239/743 [59:51<1:59:02, 14.17s/batch, batch_loss=4.5e+3, bat

Validation:  32%|▎| 239/743 [1:00:05<1:59:02, 14.17s/batch, batch_loss=18.6, bat

Validation:  32%|▎| 240/743 [1:00:05<1:59:17, 14.23s/batch, batch_loss=18.6, bat

Validation:  32%|▎| 240/743 [1:00:20<1:59:17, 14.23s/batch, batch_loss=18, batch

Validation:  32%|▎| 241/743 [1:00:20<2:01:27, 14.52s/batch, batch_loss=18, batch

Validation:  32%|▎| 241/743 [1:00:35<2:01:27, 14.52s/batch, batch_loss=229, batc

Validation:  33%|▎| 242/743 [1:00:35<2:00:56, 14.48s/batch, batch_loss=229, batc

Validation:  33%|▎| 242/743 [1:00:51<2:00:56, 14.48s/batch, batch_loss=8.96, bat

Validation:  33%|▎| 243/743 [1:00:51<2:06:24, 15.17s/batch, batch_loss=8.96, bat

Validation:  33%|▎| 243/743 [1:01:07<2:06:24, 15.17s/batch, batch_loss=14.4, bat

Validation:  33%|▎| 244/743 [1:01:07<2:06:22, 15.20s/batch, batch_loss=14.4, bat

Validation:  33%|▎| 244/743 [1:01:20<2:06:22, 15.20s/batch, batch_loss=17.4, bat

Validation:  33%|▎| 245/743 [1:01:20<2:02:56, 14.81s/batch, batch_loss=17.4, bat

Validation:  33%|▎| 245/743 [1:01:35<2:02:56, 14.81s/batch, batch_loss=6.51, bat

Validation:  33%|▎| 246/743 [1:01:35<2:02:08, 14.75s/batch, batch_loss=6.51, bat

Validation:  33%|▎| 246/743 [1:01:50<2:02:08, 14.75s/batch, batch_loss=14.6, bat

Validation:  33%|▎| 247/743 [1:01:50<2:01:44, 14.73s/batch, batch_loss=14.6, bat

Validation:  33%|▎| 247/743 [1:02:05<2:01:44, 14.73s/batch, batch_loss=33.2, bat

Validation:  33%|▎| 248/743 [1:02:05<2:01:56, 14.78s/batch, batch_loss=33.2, bat

Validation:  33%|▎| 248/743 [1:02:21<2:01:56, 14.78s/batch, batch_loss=12.5, bat

Validation:  34%|▎| 249/743 [1:02:21<2:06:13, 15.33s/batch, batch_loss=12.5, bat

Validation:  34%|▎| 249/743 [1:02:36<2:06:13, 15.33s/batch, batch_loss=20.4, bat

Validation:  34%|▎| 250/743 [1:02:36<2:03:39, 15.05s/batch, batch_loss=20.4, bat

Validation:  34%|▎| 250/743 [1:02:51<2:03:39, 15.05s/batch, batch_loss=18.9, bat

Validation:  34%|▎| 251/743 [1:02:51<2:03:57, 15.12s/batch, batch_loss=18.9, bat

Validation:  34%|▎| 251/743 [1:03:06<2:03:57, 15.12s/batch, batch_loss=22, batch

Validation:  34%|▎| 252/743 [1:03:06<2:03:44, 15.12s/batch, batch_loss=22, batch

Validation:  34%|▎| 252/743 [1:03:20<2:03:44, 15.12s/batch, batch_loss=19.8, bat

Validation:  34%|▎| 253/743 [1:03:20<2:01:44, 14.91s/batch, batch_loss=19.8, bat

Validation:  34%|▎| 253/743 [1:03:35<2:01:44, 14.91s/batch, batch_loss=1.15e+4, 

Validation:  34%|▎| 254/743 [1:03:35<1:59:43, 14.69s/batch, batch_loss=1.15e+4, 

Validation:  34%|▎| 254/743 [1:03:49<1:59:43, 14.69s/batch, batch_loss=2.43e+3, 

Validation:  34%|▎| 255/743 [1:03:49<1:59:24, 14.68s/batch, batch_loss=2.43e+3, 

Validation:  34%|▎| 255/743 [1:04:04<1:59:24, 14.68s/batch, batch_loss=20.4, bat

Validation:  34%|▎| 256/743 [1:04:04<1:58:22, 14.58s/batch, batch_loss=20.4, bat

Validation:  34%|▎| 256/743 [1:04:18<1:58:22, 14.58s/batch, batch_loss=17.1, bat

Validation:  35%|▎| 257/743 [1:04:18<1:57:04, 14.45s/batch, batch_loss=17.1, bat

Validation:  35%|▎| 257/743 [1:04:31<1:57:04, 14.45s/batch, batch_loss=12.4, bat

Validation:  35%|▎| 258/743 [1:04:31<1:54:30, 14.17s/batch, batch_loss=12.4, bat

Validation:  35%|▎| 258/743 [1:04:47<1:54:30, 14.17s/batch, batch_loss=2.93, bat

Validation:  35%|▎| 259/743 [1:04:47<1:56:56, 14.50s/batch, batch_loss=2.93, bat

Validation:  35%|▎| 259/743 [1:05:01<1:56:56, 14.50s/batch, batch_loss=1.66, bat

Validation:  35%|▎| 260/743 [1:05:01<1:57:10, 14.56s/batch, batch_loss=1.66, bat

Validation:  35%|▎| 260/743 [1:05:19<1:57:10, 14.56s/batch, batch_loss=7.29, bat

Validation:  35%|▎| 261/743 [1:05:19<2:03:25, 15.36s/batch, batch_loss=7.29, bat

Validation:  35%|▎| 261/743 [1:05:34<2:03:25, 15.36s/batch, batch_loss=29.5, bat

Validation:  35%|▎| 262/743 [1:05:34<2:02:27, 15.28s/batch, batch_loss=29.5, bat

Validation:  35%|▎| 262/743 [1:05:48<2:02:27, 15.28s/batch, batch_loss=2.71e+3, 

Validation:  35%|▎| 263/743 [1:05:48<2:01:05, 15.14s/batch, batch_loss=2.71e+3, 

Validation:  35%|▎| 263/743 [1:06:03<2:01:05, 15.14s/batch, batch_loss=11.3, bat

Validation:  36%|▎| 264/743 [1:06:03<1:59:41, 14.99s/batch, batch_loss=11.3, bat

Validation:  36%|▎| 264/743 [1:06:17<1:59:41, 14.99s/batch, batch_loss=19.2, bat

Validation:  36%|▎| 265/743 [1:06:17<1:57:39, 14.77s/batch, batch_loss=19.2, bat

Validation:  36%|▎| 265/743 [1:06:32<1:57:39, 14.77s/batch, batch_loss=24.5, bat

Validation:  36%|▎| 266/743 [1:06:32<1:57:44, 14.81s/batch, batch_loss=24.5, bat

Validation:  36%|▎| 266/743 [1:06:47<1:57:44, 14.81s/batch, batch_loss=21.7, bat

Validation:  36%|▎| 267/743 [1:06:47<1:57:38, 14.83s/batch, batch_loss=21.7, bat

Validation:  36%|▎| 267/743 [1:07:02<1:57:38, 14.83s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:07:02<1:58:34, 14.98s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:07:17<1:58:34, 14.98s/batch, batch_loss=41.3, bat

Validation:  36%|▎| 269/743 [1:07:17<1:58:01, 14.94s/batch, batch_loss=41.3, bat

Validation:  36%|▎| 269/743 [1:07:31<1:58:01, 14.94s/batch, batch_loss=27.7, bat

Validation:  36%|▎| 270/743 [1:07:31<1:55:43, 14.68s/batch, batch_loss=27.7, bat

Validation:  36%|▎| 270/743 [1:07:45<1:55:43, 14.68s/batch, batch_loss=27.8, bat

Validation:  36%|▎| 271/743 [1:07:45<1:54:10, 14.51s/batch, batch_loss=27.8, bat

Validation:  36%|▎| 271/743 [1:07:58<1:54:10, 14.51s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:07:58<1:49:40, 13.97s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:08:13<1:49:40, 13.97s/batch, batch_loss=16.3, bat

Validation:  37%|▎| 273/743 [1:08:13<1:51:21, 14.22s/batch, batch_loss=16.3, bat

Validation:  37%|▎| 273/743 [1:08:27<1:51:21, 14.22s/batch, batch_loss=20.9, bat

Validation:  37%|▎| 274/743 [1:08:27<1:51:39, 14.28s/batch, batch_loss=20.9, bat

Validation:  37%|▎| 274/743 [1:08:42<1:51:39, 14.28s/batch, batch_loss=17.2, bat

Validation:  37%|▎| 275/743 [1:08:42<1:52:07, 14.38s/batch, batch_loss=17.2, bat

Validation:  37%|▎| 275/743 [1:08:57<1:52:07, 14.38s/batch, batch_loss=14, batch

Validation:  37%|▎| 276/743 [1:08:57<1:53:45, 14.62s/batch, batch_loss=14, batch

Validation:  37%|▎| 276/743 [1:09:11<1:53:45, 14.62s/batch, batch_loss=24.2, bat

Validation:  37%|▎| 277/743 [1:09:11<1:52:42, 14.51s/batch, batch_loss=24.2, bat

Validation:  37%|▎| 277/743 [1:09:26<1:52:42, 14.51s/batch, batch_loss=21.5, bat

Validation:  37%|▎| 278/743 [1:09:26<1:53:00, 14.58s/batch, batch_loss=21.5, bat

Validation:  37%|▎| 278/743 [1:09:41<1:53:00, 14.58s/batch, batch_loss=7.8, batc

Validation:  38%|▍| 279/743 [1:09:41<1:54:07, 14.76s/batch, batch_loss=7.8, batc

Validation:  38%|▍| 279/743 [1:09:56<1:54:07, 14.76s/batch, batch_loss=14.4, bat

Validation:  38%|▍| 280/743 [1:09:56<1:53:40, 14.73s/batch, batch_loss=14.4, bat

Validation:  38%|▍| 280/743 [1:10:10<1:53:40, 14.73s/batch, batch_loss=17.2, bat

Validation:  38%|▍| 281/743 [1:10:10<1:51:45, 14.51s/batch, batch_loss=17.2, bat

Validation:  38%|▍| 281/743 [1:10:24<1:51:45, 14.51s/batch, batch_loss=24.7, bat

Validation:  38%|▍| 282/743 [1:10:24<1:51:10, 14.47s/batch, batch_loss=24.7, bat

Validation:  38%|▍| 282/743 [1:10:39<1:51:10, 14.47s/batch, batch_loss=16.8, bat

Validation:  38%|▍| 283/743 [1:10:39<1:50:53, 14.46s/batch, batch_loss=16.8, bat

Validation:  38%|▍| 283/743 [1:10:54<1:50:53, 14.46s/batch, batch_loss=16.5, bat

Validation:  38%|▍| 284/743 [1:10:54<1:51:12, 14.54s/batch, batch_loss=16.5, bat

Validation:  38%|▍| 284/743 [1:11:07<1:51:12, 14.54s/batch, batch_loss=13.9, bat

Validation:  38%|▍| 285/743 [1:11:07<1:49:23, 14.33s/batch, batch_loss=13.9, bat

Validation:  38%|▍| 285/743 [1:11:22<1:49:23, 14.33s/batch, batch_loss=15.6, bat

Validation:  38%|▍| 286/743 [1:11:22<1:49:17, 14.35s/batch, batch_loss=15.6, bat

Validation:  38%|▍| 286/743 [1:11:37<1:49:17, 14.35s/batch, batch_loss=1.18e+4, 

Validation:  39%|▍| 287/743 [1:11:37<1:50:01, 14.48s/batch, batch_loss=1.18e+4, 

Validation:  39%|▍| 287/743 [1:11:52<1:50:01, 14.48s/batch, batch_loss=23.7, bat

Validation:  39%|▍| 288/743 [1:11:52<1:50:55, 14.63s/batch, batch_loss=23.7, bat

Validation:  39%|▍| 288/743 [1:12:07<1:50:55, 14.63s/batch, batch_loss=22, batch

Validation:  39%|▍| 289/743 [1:12:07<1:51:30, 14.74s/batch, batch_loss=22, batch

Validation:  39%|▍| 289/743 [1:12:21<1:51:30, 14.74s/batch, batch_loss=489, batc

Validation:  39%|▍| 290/743 [1:12:21<1:51:12, 14.73s/batch, batch_loss=489, batc

Validation:  39%|▍| 290/743 [1:12:35<1:51:12, 14.73s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:12:35<1:48:13, 14.37s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:12:49<1:48:13, 14.37s/batch, batch_loss=1.2e+3, b

Validation:  39%|▍| 292/743 [1:12:49<1:48:23, 14.42s/batch, batch_loss=1.2e+3, b

Validation:  39%|▍| 292/743 [1:13:04<1:48:23, 14.42s/batch, batch_loss=27, batch

Validation:  39%|▍| 293/743 [1:13:04<1:48:04, 14.41s/batch, batch_loss=27, batch

Validation:  39%|▍| 293/743 [1:13:18<1:48:04, 14.41s/batch, batch_loss=1.1e+3, b

Validation:  40%|▍| 294/743 [1:13:18<1:46:45, 14.27s/batch, batch_loss=1.1e+3, b

Validation:  40%|▍| 294/743 [1:13:32<1:46:45, 14.27s/batch, batch_loss=17.6, bat

Validation:  40%|▍| 295/743 [1:13:32<1:47:22, 14.38s/batch, batch_loss=17.6, bat

Validation:  40%|▍| 295/743 [1:13:47<1:47:22, 14.38s/batch, batch_loss=16, batch

Validation:  40%|▍| 296/743 [1:13:47<1:46:59, 14.36s/batch, batch_loss=16, batch

Validation:  40%|▍| 296/743 [1:14:01<1:46:59, 14.36s/batch, batch_loss=12.3, bat

Validation:  40%|▍| 297/743 [1:14:01<1:46:18, 14.30s/batch, batch_loss=12.3, bat

Validation:  40%|▍| 297/743 [1:14:17<1:46:18, 14.30s/batch, batch_loss=23.3, bat

Validation:  40%|▍| 298/743 [1:14:17<1:50:48, 14.94s/batch, batch_loss=23.3, bat

Validation:  40%|▍| 298/743 [1:14:31<1:50:48, 14.94s/batch, batch_loss=28.7, bat

Validation:  40%|▍| 299/743 [1:14:31<1:48:51, 14.71s/batch, batch_loss=28.7, bat

Validation:  40%|▍| 299/743 [1:14:45<1:48:51, 14.71s/batch, batch_loss=30, batch

Validation:  40%|▍| 300/743 [1:14:45<1:46:56, 14.48s/batch, batch_loss=30, batch

Validation:  40%|▍| 300/743 [1:14:59<1:46:56, 14.48s/batch, batch_loss=841, batc

Validation:  41%|▍| 301/743 [1:14:59<1:45:36, 14.34s/batch, batch_loss=841, batc

Validation:  41%|▍| 301/743 [1:15:14<1:45:36, 14.34s/batch, batch_loss=9.9, batc

Validation:  41%|▍| 302/743 [1:15:14<1:46:04, 14.43s/batch, batch_loss=9.9, batc

Validation:  41%|▍| 302/743 [1:15:27<1:46:04, 14.43s/batch, batch_loss=15.1, bat

Validation:  41%|▍| 303/743 [1:15:27<1:42:24, 13.97s/batch, batch_loss=15.1, bat

Validation:  41%|▍| 303/743 [1:15:40<1:42:24, 13.97s/batch, batch_loss=16.6, bat

Validation:  41%|▍| 304/743 [1:15:40<1:39:24, 13.59s/batch, batch_loss=16.6, bat

Validation:  41%|▍| 304/743 [1:15:55<1:39:24, 13.59s/batch, batch_loss=12.8, bat

Validation:  41%|▍| 305/743 [1:15:55<1:42:26, 14.03s/batch, batch_loss=12.8, bat

Validation:  41%|▍| 305/743 [1:16:12<1:42:26, 14.03s/batch, batch_loss=20.2, bat

Validation:  41%|▍| 306/743 [1:16:12<1:49:37, 15.05s/batch, batch_loss=20.2, bat

Validation:  41%|▍| 306/743 [1:16:26<1:49:37, 15.05s/batch, batch_loss=18.1, bat

Validation:  41%|▍| 307/743 [1:16:26<1:47:08, 14.74s/batch, batch_loss=18.1, bat

Validation:  41%|▍| 307/743 [1:16:41<1:47:08, 14.74s/batch, batch_loss=887, batc

Validation:  41%|▍| 308/743 [1:16:41<1:46:25, 14.68s/batch, batch_loss=887, batc

Validation:  41%|▍| 308/743 [1:16:55<1:46:25, 14.68s/batch, batch_loss=27.2, bat

Validation:  42%|▍| 309/743 [1:16:55<1:44:58, 14.51s/batch, batch_loss=27.2, bat

Validation:  42%|▍| 309/743 [1:17:10<1:44:58, 14.51s/batch, batch_loss=17.5, bat

Validation:  42%|▍| 310/743 [1:17:10<1:46:32, 14.76s/batch, batch_loss=17.5, bat

Validation:  42%|▍| 310/743 [1:17:25<1:46:32, 14.76s/batch, batch_loss=18.3, bat

Validation:  42%|▍| 311/743 [1:17:25<1:46:55, 14.85s/batch, batch_loss=18.3, bat

Validation:  42%|▍| 311/743 [1:17:40<1:46:55, 14.85s/batch, batch_loss=17.5, bat

Validation:  42%|▍| 312/743 [1:17:40<1:46:51, 14.88s/batch, batch_loss=17.5, bat

Validation:  42%|▍| 312/743 [1:17:56<1:46:51, 14.88s/batch, batch_loss=7.75, bat

Validation:  42%|▍| 313/743 [1:17:56<1:49:16, 15.25s/batch, batch_loss=7.75, bat

Validation:  42%|▍| 313/743 [1:18:15<1:49:16, 15.25s/batch, batch_loss=8.98, bat

Validation:  42%|▍| 314/743 [1:18:15<1:56:21, 16.27s/batch, batch_loss=8.98, bat

Validation:  42%|▍| 314/743 [1:18:30<1:56:21, 16.27s/batch, batch_loss=20, batch

Validation:  42%|▍| 315/743 [1:18:30<1:52:39, 15.79s/batch, batch_loss=20, batch

Validation:  42%|▍| 315/743 [1:18:44<1:52:39, 15.79s/batch, batch_loss=18.7, bat

Validation:  43%|▍| 316/743 [1:18:44<1:50:02, 15.46s/batch, batch_loss=18.7, bat

Validation:  43%|▍| 316/743 [1:18:59<1:50:02, 15.46s/batch, batch_loss=18.7, bat

Validation:  43%|▍| 317/743 [1:18:59<1:48:50, 15.33s/batch, batch_loss=18.7, bat

Validation:  43%|▍| 317/743 [1:19:13<1:48:50, 15.33s/batch, batch_loss=15.4, bat

Validation:  43%|▍| 318/743 [1:19:13<1:44:56, 14.82s/batch, batch_loss=15.4, bat

Validation:  43%|▍| 318/743 [1:19:27<1:44:56, 14.82s/batch, batch_loss=21.8, bat

Validation:  43%|▍| 319/743 [1:19:27<1:44:00, 14.72s/batch, batch_loss=21.8, bat

Validation:  43%|▍| 319/743 [1:19:42<1:44:00, 14.72s/batch, batch_loss=18.5, bat

Validation:  43%|▍| 320/743 [1:19:42<1:43:51, 14.73s/batch, batch_loss=18.5, bat

Validation:  43%|▍| 320/743 [1:19:57<1:43:51, 14.73s/batch, batch_loss=16.2, bat

Validation:  43%|▍| 321/743 [1:19:57<1:43:48, 14.76s/batch, batch_loss=16.2, bat

Validation:  43%|▍| 321/743 [1:20:12<1:43:48, 14.76s/batch, batch_loss=16.5, bat

Validation:  43%|▍| 322/743 [1:20:12<1:44:31, 14.90s/batch, batch_loss=16.5, bat

Validation:  43%|▍| 322/743 [1:20:29<1:44:31, 14.90s/batch, batch_loss=19.9, bat

Validation:  43%|▍| 323/743 [1:20:29<1:48:26, 15.49s/batch, batch_loss=19.9, bat

Validation:  43%|▍| 323/743 [1:20:46<1:48:26, 15.49s/batch, batch_loss=295, batc

Validation:  44%|▍| 324/743 [1:20:46<1:51:43, 16.00s/batch, batch_loss=295, batc

Validation:  44%|▍| 324/743 [1:21:01<1:51:43, 16.00s/batch, batch_loss=21.6, bat

Validation:  44%|▍| 325/743 [1:21:01<1:49:45, 15.75s/batch, batch_loss=21.6, bat

Validation:  44%|▍| 325/743 [1:21:17<1:49:45, 15.75s/batch, batch_loss=17.7, bat

Validation:  44%|▍| 326/743 [1:21:17<1:48:27, 15.61s/batch, batch_loss=17.7, bat

Validation:  44%|▍| 326/743 [1:21:31<1:48:27, 15.61s/batch, batch_loss=20.1, bat

Validation:  44%|▍| 327/743 [1:21:31<1:45:53, 15.27s/batch, batch_loss=20.1, bat

Validation:  44%|▍| 327/743 [1:21:46<1:45:53, 15.27s/batch, batch_loss=18, batch

Validation:  44%|▍| 328/743 [1:21:46<1:45:26, 15.25s/batch, batch_loss=18, batch

Validation:  44%|▍| 328/743 [1:22:02<1:45:26, 15.25s/batch, batch_loss=6.9, batc

Validation:  44%|▍| 329/743 [1:22:02<1:46:45, 15.47s/batch, batch_loss=6.9, batc

Validation:  44%|▍| 329/743 [1:22:17<1:46:45, 15.47s/batch, batch_loss=15.4, bat

Validation:  44%|▍| 330/743 [1:22:17<1:45:31, 15.33s/batch, batch_loss=15.4, bat

Validation:  44%|▍| 330/743 [1:22:32<1:45:31, 15.33s/batch, batch_loss=23.8, bat

Validation:  45%|▍| 331/743 [1:22:32<1:43:54, 15.13s/batch, batch_loss=23.8, bat

Validation:  45%|▍| 331/743 [1:22:47<1:43:54, 15.13s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:22:47<1:43:49, 15.16s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:23:02<1:43:49, 15.16s/batch, batch_loss=31.8, bat

Validation:  45%|▍| 333/743 [1:23:02<1:43:13, 15.11s/batch, batch_loss=31.8, bat

Validation:  45%|▍| 333/743 [1:23:19<1:43:13, 15.11s/batch, batch_loss=22.6, bat

Validation:  45%|▍| 334/743 [1:23:19<1:45:29, 15.47s/batch, batch_loss=22.6, bat

Validation:  45%|▍| 334/743 [1:23:32<1:45:29, 15.47s/batch, batch_loss=33.3, bat

Validation:  45%|▍| 335/743 [1:23:32<1:41:14, 14.89s/batch, batch_loss=33.3, bat

Validation:  45%|▍| 335/743 [1:23:47<1:41:14, 14.89s/batch, batch_loss=12.8, bat

Validation:  45%|▍| 336/743 [1:23:47<1:40:35, 14.83s/batch, batch_loss=12.8, bat

Validation:  45%|▍| 336/743 [1:24:01<1:40:35, 14.83s/batch, batch_loss=20.1, bat

Validation:  45%|▍| 337/743 [1:24:01<1:39:21, 14.68s/batch, batch_loss=20.1, bat

Validation:  45%|▍| 337/743 [1:24:16<1:39:21, 14.68s/batch, batch_loss=28.9, bat

Validation:  45%|▍| 338/743 [1:24:16<1:39:32, 14.75s/batch, batch_loss=28.9, bat

Validation:  45%|▍| 338/743 [1:24:30<1:39:32, 14.75s/batch, batch_loss=28, batch

Validation:  46%|▍| 339/743 [1:24:30<1:38:36, 14.65s/batch, batch_loss=28, batch

Validation:  46%|▍| 339/743 [1:24:46<1:38:36, 14.65s/batch, batch_loss=24.5, bat

Validation:  46%|▍| 340/743 [1:24:46<1:39:51, 14.87s/batch, batch_loss=24.5, bat

Validation:  46%|▍| 340/743 [1:25:03<1:39:51, 14.87s/batch, batch_loss=15.5, bat

Validation:  46%|▍| 341/743 [1:25:03<1:44:12, 15.55s/batch, batch_loss=15.5, bat

Validation:  46%|▍| 341/743 [1:25:18<1:44:12, 15.55s/batch, batch_loss=21.1, bat

Validation:  46%|▍| 342/743 [1:25:18<1:42:10, 15.29s/batch, batch_loss=21.1, bat

Validation:  46%|▍| 342/743 [1:25:32<1:42:10, 15.29s/batch, batch_loss=26.2, bat

Validation:  46%|▍| 343/743 [1:25:32<1:40:17, 15.04s/batch, batch_loss=26.2, bat

Validation:  46%|▍| 343/743 [1:25:47<1:40:17, 15.04s/batch, batch_loss=21.3, bat

Validation:  46%|▍| 344/743 [1:25:47<1:39:23, 14.95s/batch, batch_loss=21.3, bat

Validation:  46%|▍| 344/743 [1:26:02<1:39:23, 14.95s/batch, batch_loss=18.3, bat

Validation:  46%|▍| 345/743 [1:26:02<1:38:50, 14.90s/batch, batch_loss=18.3, bat

Validation:  46%|▍| 345/743 [1:26:17<1:38:50, 14.90s/batch, batch_loss=25.8, bat

Validation:  47%|▍| 346/743 [1:26:17<1:38:43, 14.92s/batch, batch_loss=25.8, bat

Validation:  47%|▍| 346/743 [1:26:31<1:38:43, 14.92s/batch, batch_loss=18.4, bat

Validation:  47%|▍| 347/743 [1:26:31<1:38:19, 14.90s/batch, batch_loss=18.4, bat

Validation:  47%|▍| 347/743 [1:26:45<1:38:19, 14.90s/batch, batch_loss=27.8, bat

Validation:  47%|▍| 348/743 [1:26:45<1:36:02, 14.59s/batch, batch_loss=27.8, bat

Validation:  47%|▍| 348/743 [1:27:00<1:36:02, 14.59s/batch, batch_loss=31, batch

Validation:  47%|▍| 349/743 [1:27:00<1:36:16, 14.66s/batch, batch_loss=31, batch

Validation:  47%|▍| 349/743 [1:27:15<1:36:16, 14.66s/batch, batch_loss=22.2, bat

Validation:  47%|▍| 350/743 [1:27:15<1:37:12, 14.84s/batch, batch_loss=22.2, bat

Validation:  47%|▍| 350/743 [1:27:31<1:37:12, 14.84s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:27:31<1:38:31, 15.08s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:27:46<1:38:31, 15.08s/batch, batch_loss=27.5, bat

Validation:  47%|▍| 352/743 [1:27:46<1:39:06, 15.21s/batch, batch_loss=27.5, bat

Validation:  47%|▍| 352/743 [1:28:01<1:39:06, 15.21s/batch, batch_loss=13.5, bat

Validation:  48%|▍| 353/743 [1:28:01<1:37:52, 15.06s/batch, batch_loss=13.5, bat

Validation:  48%|▍| 353/743 [1:28:16<1:37:52, 15.06s/batch, batch_loss=19.8, bat

Validation:  48%|▍| 354/743 [1:28:16<1:37:21, 15.02s/batch, batch_loss=19.8, bat

Validation:  48%|▍| 354/743 [1:28:31<1:37:21, 15.02s/batch, batch_loss=23.4, bat

Validation:  48%|▍| 355/743 [1:28:31<1:37:10, 15.03s/batch, batch_loss=23.4, bat

Validation:  48%|▍| 355/743 [1:28:46<1:37:10, 15.03s/batch, batch_loss=32.1, bat

Validation:  48%|▍| 356/743 [1:28:46<1:36:54, 15.02s/batch, batch_loss=32.1, bat

Validation:  48%|▍| 356/743 [1:29:03<1:36:54, 15.02s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:29:03<1:39:44, 15.50s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:29:18<1:39:44, 15.50s/batch, batch_loss=14.8, bat

Validation:  48%|▍| 358/743 [1:29:18<1:38:54, 15.41s/batch, batch_loss=14.8, bat

Validation:  48%|▍| 358/743 [1:29:33<1:38:54, 15.41s/batch, batch_loss=10.6, bat

Validation:  48%|▍| 359/743 [1:29:33<1:37:26, 15.22s/batch, batch_loss=10.6, bat

Validation:  48%|▍| 359/743 [1:29:48<1:37:26, 15.22s/batch, batch_loss=23, batch

Validation:  48%|▍| 360/743 [1:29:48<1:36:25, 15.10s/batch, batch_loss=23, batch

Validation:  48%|▍| 360/743 [1:30:02<1:36:25, 15.10s/batch, batch_loss=13.8, bat

Validation:  49%|▍| 361/743 [1:30:02<1:35:31, 15.00s/batch, batch_loss=13.8, bat

Validation:  49%|▍| 361/743 [1:30:17<1:35:31, 15.00s/batch, batch_loss=19.8, bat

Validation:  49%|▍| 362/743 [1:30:17<1:35:00, 14.96s/batch, batch_loss=19.8, bat

Validation:  49%|▍| 362/743 [1:30:32<1:35:00, 14.96s/batch, batch_loss=24.9, bat

Validation:  49%|▍| 363/743 [1:30:32<1:34:53, 14.98s/batch, batch_loss=24.9, bat

Validation:  49%|▍| 363/743 [1:30:48<1:34:53, 14.98s/batch, batch_loss=21.6, bat

Validation:  49%|▍| 364/743 [1:30:48<1:35:23, 15.10s/batch, batch_loss=21.6, bat

Validation:  49%|▍| 364/743 [1:31:05<1:35:23, 15.10s/batch, batch_loss=18, batch

Validation:  49%|▍| 365/743 [1:31:05<1:39:00, 15.72s/batch, batch_loss=18, batch

Validation:  49%|▍| 365/743 [1:31:20<1:39:00, 15.72s/batch, batch_loss=13.3, bat

Validation:  49%|▍| 366/743 [1:31:20<1:37:32, 15.52s/batch, batch_loss=13.3, bat

Validation:  49%|▍| 366/743 [1:31:36<1:37:32, 15.52s/batch, batch_loss=18.1, bat

Validation:  49%|▍| 367/743 [1:31:36<1:38:16, 15.68s/batch, batch_loss=18.1, bat

Validation:  49%|▍| 367/743 [1:31:52<1:38:16, 15.68s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:31:52<1:38:00, 15.68s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:32:08<1:38:00, 15.68s/batch, batch_loss=16.7, bat

Validation:  50%|▍| 369/743 [1:32:08<1:38:58, 15.88s/batch, batch_loss=16.7, bat

Validation:  50%|▍| 369/743 [1:32:23<1:38:58, 15.88s/batch, batch_loss=29.5, bat

Validation:  50%|▍| 370/743 [1:32:23<1:36:22, 15.50s/batch, batch_loss=29.5, bat

Validation:  50%|▍| 370/743 [1:32:38<1:36:22, 15.50s/batch, batch_loss=21.2, bat

Validation:  50%|▍| 371/743 [1:32:38<1:35:58, 15.48s/batch, batch_loss=21.2, bat

Validation:  50%|▍| 371/743 [1:32:52<1:35:58, 15.48s/batch, batch_loss=18, batch

Validation:  50%|▌| 372/743 [1:32:52<1:32:46, 15.00s/batch, batch_loss=18, batch

Validation:  50%|▌| 372/743 [1:33:07<1:32:46, 15.00s/batch, batch_loss=26.6, bat

Validation:  50%|▌| 373/743 [1:33:07<1:32:46, 15.05s/batch, batch_loss=26.6, bat

Validation:  50%|▌| 373/743 [1:33:22<1:32:46, 15.05s/batch, batch_loss=14, batch

Validation:  50%|▌| 374/743 [1:33:22<1:32:42, 15.08s/batch, batch_loss=14, batch

Validation:  50%|▌| 374/743 [1:33:38<1:32:42, 15.08s/batch, batch_loss=8.93, bat

Validation:  50%|▌| 375/743 [1:33:38<1:33:06, 15.18s/batch, batch_loss=8.93, bat

Validation:  50%|▌| 375/743 [1:33:52<1:33:06, 15.18s/batch, batch_loss=23.4, bat

Validation:  51%|▌| 376/743 [1:33:52<1:31:32, 14.97s/batch, batch_loss=23.4, bat

Validation:  51%|▌| 376/743 [1:34:06<1:31:32, 14.97s/batch, batch_loss=10.7, bat

Validation:  51%|▌| 377/743 [1:34:06<1:29:54, 14.74s/batch, batch_loss=10.7, bat

Validation:  51%|▌| 377/743 [1:34:21<1:29:54, 14.74s/batch, batch_loss=18.7, bat

Validation:  51%|▌| 378/743 [1:34:21<1:30:24, 14.86s/batch, batch_loss=18.7, bat

Validation:  51%|▌| 378/743 [1:34:37<1:30:24, 14.86s/batch, batch_loss=7.43, bat

Validation:  51%|▌| 379/743 [1:34:37<1:32:16, 15.21s/batch, batch_loss=7.43, bat

Validation:  51%|▌| 379/743 [1:34:52<1:32:16, 15.21s/batch, batch_loss=7.39, bat

Validation:  51%|▌| 380/743 [1:34:52<1:31:08, 15.06s/batch, batch_loss=7.39, bat

Validation:  51%|▌| 380/743 [1:35:07<1:31:08, 15.06s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:35:07<1:29:48, 14.89s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:35:21<1:29:48, 14.89s/batch, batch_loss=911, batc

Validation:  51%|▌| 382/743 [1:35:21<1:29:15, 14.83s/batch, batch_loss=911, batc

Validation:  51%|▌| 382/743 [1:35:36<1:29:15, 14.83s/batch, batch_loss=210, batc

Validation:  52%|▌| 383/743 [1:35:36<1:28:48, 14.80s/batch, batch_loss=210, batc

Validation:  52%|▌| 383/743 [1:35:51<1:28:48, 14.80s/batch, batch_loss=278, batc

Validation:  52%|▌| 384/743 [1:35:51<1:29:27, 14.95s/batch, batch_loss=278, batc

Validation:  52%|▌| 384/743 [1:36:06<1:29:27, 14.95s/batch, batch_loss=18.5, bat

Validation:  52%|▌| 385/743 [1:36:06<1:29:08, 14.94s/batch, batch_loss=18.5, bat

Validation:  52%|▌| 385/743 [1:36:20<1:29:08, 14.94s/batch, batch_loss=10.7, bat

Validation:  52%|▌| 386/743 [1:36:20<1:27:26, 14.70s/batch, batch_loss=10.7, bat

Validation:  52%|▌| 386/743 [1:36:37<1:27:26, 14.70s/batch, batch_loss=7.63, bat

Validation:  52%|▌| 387/743 [1:36:37<1:30:33, 15.26s/batch, batch_loss=7.63, bat

Validation:  52%|▌| 387/743 [1:36:52<1:30:33, 15.26s/batch, batch_loss=17.5, bat

Validation:  52%|▌| 388/743 [1:36:52<1:29:53, 15.19s/batch, batch_loss=17.5, bat

Validation:  52%|▌| 388/743 [1:37:07<1:29:53, 15.19s/batch, batch_loss=14, batch

Validation:  52%|▌| 389/743 [1:37:07<1:29:18, 15.14s/batch, batch_loss=14, batch

Validation:  52%|▌| 389/743 [1:37:22<1:29:18, 15.14s/batch, batch_loss=18.1, bat

Validation:  52%|▌| 390/743 [1:37:22<1:28:49, 15.10s/batch, batch_loss=18.1, bat

Validation:  52%|▌| 390/743 [1:37:37<1:28:49, 15.10s/batch, batch_loss=13.9, bat

Validation:  53%|▌| 391/743 [1:37:37<1:27:44, 14.96s/batch, batch_loss=13.9, bat

Validation:  53%|▌| 391/743 [1:37:51<1:27:44, 14.96s/batch, batch_loss=16.1, bat

Validation:  53%|▌| 392/743 [1:37:51<1:26:46, 14.83s/batch, batch_loss=16.1, bat

Validation:  53%|▌| 392/743 [1:38:07<1:26:46, 14.83s/batch, batch_loss=17.2, bat

Validation:  53%|▌| 393/743 [1:38:07<1:28:14, 15.13s/batch, batch_loss=17.2, bat

Validation:  53%|▌| 393/743 [1:38:22<1:28:14, 15.13s/batch, batch_loss=17, batch

Validation:  53%|▌| 394/743 [1:38:22<1:27:16, 15.01s/batch, batch_loss=17, batch

Validation:  53%|▌| 394/743 [1:38:39<1:27:16, 15.01s/batch, batch_loss=12.2, bat

Validation:  53%|▌| 395/743 [1:38:39<1:31:22, 15.76s/batch, batch_loss=12.2, bat

Validation:  53%|▌| 395/743 [1:38:54<1:31:22, 15.76s/batch, batch_loss=16.9, bat

Validation:  53%|▌| 396/743 [1:38:54<1:30:05, 15.58s/batch, batch_loss=16.9, bat

Validation:  53%|▌| 396/743 [1:39:09<1:30:05, 15.58s/batch, batch_loss=10.2, bat

Validation:  53%|▌| 397/743 [1:39:09<1:28:48, 15.40s/batch, batch_loss=10.2, bat

Validation:  53%|▌| 397/743 [1:39:24<1:28:48, 15.40s/batch, batch_loss=21.8, bat

Validation:  54%|▌| 398/743 [1:39:24<1:26:59, 15.13s/batch, batch_loss=21.8, bat

Validation:  54%|▌| 398/743 [1:39:38<1:26:59, 15.13s/batch, batch_loss=13.7, bat

Validation:  54%|▌| 399/743 [1:39:38<1:25:17, 14.88s/batch, batch_loss=13.7, bat

Validation:  54%|▌| 399/743 [1:39:53<1:25:17, 14.88s/batch, batch_loss=22.4, bat

Validation:  54%|▌| 400/743 [1:39:53<1:25:41, 14.99s/batch, batch_loss=22.4, bat

Validation:  54%|▌| 400/743 [1:40:09<1:25:41, 14.99s/batch, batch_loss=18.7, bat

Validation:  54%|▌| 401/743 [1:40:09<1:25:36, 15.02s/batch, batch_loss=18.7, bat

Validation:  54%|▌| 401/743 [1:40:24<1:25:36, 15.02s/batch, batch_loss=6.15, bat

Validation:  54%|▌| 402/743 [1:40:24<1:25:48, 15.10s/batch, batch_loss=6.15, bat

Validation:  54%|▌| 402/743 [1:40:38<1:25:48, 15.10s/batch, batch_loss=17.2, bat

Validation:  54%|▌| 403/743 [1:40:38<1:24:26, 14.90s/batch, batch_loss=17.2, bat

Validation:  54%|▌| 403/743 [1:40:56<1:24:26, 14.90s/batch, batch_loss=13.7, bat

Validation:  54%|▌| 404/743 [1:40:56<1:28:36, 15.68s/batch, batch_loss=13.7, bat

Validation:  54%|▌| 404/743 [1:41:11<1:28:36, 15.68s/batch, batch_loss=9.59, bat

Validation:  55%|▌| 405/743 [1:41:11<1:26:50, 15.42s/batch, batch_loss=9.59, bat

Validation:  55%|▌| 405/743 [1:41:25<1:26:50, 15.42s/batch, batch_loss=11.4, bat

Validation:  55%|▌| 406/743 [1:41:25<1:24:20, 15.02s/batch, batch_loss=11.4, bat

Validation:  55%|▌| 406/743 [1:41:39<1:24:20, 15.02s/batch, batch_loss=16.6, bat

Validation:  55%|▌| 407/743 [1:41:39<1:23:19, 14.88s/batch, batch_loss=16.6, bat

Validation:  55%|▌| 407/743 [1:41:54<1:23:19, 14.88s/batch, batch_loss=21.8, bat

Validation:  55%|▌| 408/743 [1:41:54<1:22:39, 14.80s/batch, batch_loss=21.8, bat

Validation:  55%|▌| 408/743 [1:42:08<1:22:39, 14.80s/batch, batch_loss=11.2, bat

Validation:  55%|▌| 409/743 [1:42:08<1:21:21, 14.62s/batch, batch_loss=11.2, bat

Validation:  55%|▌| 409/743 [1:42:22<1:21:21, 14.62s/batch, batch_loss=15.2, bat

Validation:  55%|▌| 410/743 [1:42:22<1:20:01, 14.42s/batch, batch_loss=15.2, bat

Validation:  55%|▌| 410/743 [1:42:36<1:20:01, 14.42s/batch, batch_loss=19.8, bat

Validation:  55%|▌| 411/743 [1:42:36<1:19:46, 14.42s/batch, batch_loss=19.8, bat

Validation:  55%|▌| 411/743 [1:42:52<1:19:46, 14.42s/batch, batch_loss=17.1, bat

Validation:  55%|▌| 412/743 [1:42:52<1:21:54, 14.85s/batch, batch_loss=17.1, bat

Validation:  55%|▌| 412/743 [1:43:06<1:21:54, 14.85s/batch, batch_loss=1.93e+3, 

Validation:  56%|▌| 413/743 [1:43:06<1:19:16, 14.41s/batch, batch_loss=1.93e+3, 

Validation:  56%|▌| 413/743 [1:43:20<1:19:16, 14.41s/batch, batch_loss=25.1, bat

Validation:  56%|▌| 414/743 [1:43:20<1:18:57, 14.40s/batch, batch_loss=25.1, bat

Validation:  56%|▌| 414/743 [1:43:35<1:18:57, 14.40s/batch, batch_loss=22.3, bat

Validation:  56%|▌| 415/743 [1:43:35<1:19:21, 14.52s/batch, batch_loss=22.3, bat

Validation:  56%|▌| 415/743 [1:43:50<1:19:21, 14.52s/batch, batch_loss=6.48e+3, 

Validation:  56%|▌| 416/743 [1:43:50<1:19:31, 14.59s/batch, batch_loss=6.48e+3, 

Validation:  56%|▌| 416/743 [1:44:04<1:19:31, 14.59s/batch, batch_loss=16.3, bat

Validation:  56%|▌| 417/743 [1:44:04<1:19:25, 14.62s/batch, batch_loss=16.3, bat

Validation:  56%|▌| 417/743 [1:44:21<1:19:25, 14.62s/batch, batch_loss=14, batch

Validation:  56%|▌| 418/743 [1:44:21<1:22:57, 15.32s/batch, batch_loss=14, batch

Validation:  56%|▌| 418/743 [1:44:36<1:22:57, 15.32s/batch, batch_loss=15, batch

Validation:  56%|▌| 419/743 [1:44:36<1:22:37, 15.30s/batch, batch_loss=15, batch

Validation:  56%|▌| 419/743 [1:44:51<1:22:37, 15.30s/batch, batch_loss=14.4, bat

Validation:  57%|▌| 420/743 [1:44:51<1:21:18, 15.10s/batch, batch_loss=14.4, bat

Validation:  57%|▌| 420/743 [1:45:06<1:21:18, 15.10s/batch, batch_loss=28.1, bat

Validation:  57%|▌| 421/743 [1:45:06<1:20:27, 14.99s/batch, batch_loss=28.1, bat

Validation:  57%|▌| 421/743 [1:45:20<1:20:27, 14.99s/batch, batch_loss=8.45, bat

Validation:  57%|▌| 422/743 [1:45:20<1:19:20, 14.83s/batch, batch_loss=8.45, bat

Validation:  57%|▌| 422/743 [1:45:34<1:19:20, 14.83s/batch, batch_loss=17.2, bat

Validation:  57%|▌| 423/743 [1:45:34<1:17:22, 14.51s/batch, batch_loss=17.2, bat

Validation:  57%|▌| 423/743 [1:45:48<1:17:22, 14.51s/batch, batch_loss=323, batc

Validation:  57%|▌| 424/743 [1:45:48<1:16:11, 14.33s/batch, batch_loss=323, batc

Validation:  57%|▌| 424/743 [1:46:03<1:16:11, 14.33s/batch, batch_loss=17.5, bat

Validation:  57%|▌| 425/743 [1:46:03<1:17:08, 14.56s/batch, batch_loss=17.5, bat

Validation:  57%|▌| 425/743 [1:46:17<1:17:08, 14.56s/batch, batch_loss=20.4, bat

Validation:  57%|▌| 426/743 [1:46:17<1:16:15, 14.43s/batch, batch_loss=20.4, bat

Validation:  57%|▌| 426/743 [1:46:33<1:16:15, 14.43s/batch, batch_loss=18.7, bat

Validation:  57%|▌| 427/743 [1:46:33<1:18:19, 14.87s/batch, batch_loss=18.7, bat

Validation:  57%|▌| 427/743 [1:46:49<1:18:19, 14.87s/batch, batch_loss=5.29e+3, 

Validation:  58%|▌| 428/743 [1:46:49<1:19:48, 15.20s/batch, batch_loss=5.29e+3, 

Validation:  58%|▌| 428/743 [1:47:04<1:19:48, 15.20s/batch, batch_loss=17.2, bat

Validation:  58%|▌| 429/743 [1:47:04<1:18:30, 15.00s/batch, batch_loss=17.2, bat

Validation:  58%|▌| 429/743 [1:47:19<1:18:30, 15.00s/batch, batch_loss=5.37e+3, 

Validation:  58%|▌| 430/743 [1:47:19<1:19:09, 15.17s/batch, batch_loss=5.37e+3, 

Validation:  58%|▌| 430/743 [1:47:34<1:19:09, 15.17s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [1:47:34<1:19:04, 15.21s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [1:47:49<1:19:04, 15.21s/batch, batch_loss=964, batc

Validation:  58%|▌| 432/743 [1:47:49<1:18:25, 15.13s/batch, batch_loss=964, batc

Validation:  58%|▌| 432/743 [1:48:04<1:18:25, 15.13s/batch, batch_loss=15.6, bat

Validation:  58%|▌| 433/743 [1:48:04<1:17:09, 14.93s/batch, batch_loss=15.6, bat

Validation:  58%|▌| 433/743 [1:48:19<1:17:09, 14.93s/batch, batch_loss=11.1, bat

Validation:  58%|▌| 434/743 [1:48:19<1:17:56, 15.13s/batch, batch_loss=11.1, bat

Validation:  58%|▌| 434/743 [1:48:37<1:17:56, 15.13s/batch, batch_loss=14.1, bat

Validation:  59%|▌| 435/743 [1:48:37<1:21:31, 15.88s/batch, batch_loss=14.1, bat

Validation:  59%|▌| 435/743 [1:48:53<1:21:31, 15.88s/batch, batch_loss=14.5, bat

Validation:  59%|▌| 436/743 [1:48:53<1:20:58, 15.83s/batch, batch_loss=14.5, bat

Validation:  59%|▌| 436/743 [1:49:07<1:20:58, 15.83s/batch, batch_loss=21.2, bat

Validation:  59%|▌| 437/743 [1:49:07<1:17:50, 15.26s/batch, batch_loss=21.2, bat

Validation:  59%|▌| 437/743 [1:49:22<1:17:50, 15.26s/batch, batch_loss=977, batc

Validation:  59%|▌| 438/743 [1:49:22<1:18:11, 15.38s/batch, batch_loss=977, batc

Validation:  59%|▌| 438/743 [1:49:37<1:18:11, 15.38s/batch, batch_loss=904, batc

Validation:  59%|▌| 439/743 [1:49:37<1:16:39, 15.13s/batch, batch_loss=904, batc

Validation:  59%|▌| 439/743 [1:49:51<1:16:39, 15.13s/batch, batch_loss=19.6, bat

Validation:  59%|▌| 440/743 [1:49:51<1:14:29, 14.75s/batch, batch_loss=19.6, bat

Validation:  59%|▌| 440/743 [1:50:06<1:14:29, 14.75s/batch, batch_loss=15.2, bat

Validation:  59%|▌| 441/743 [1:50:06<1:15:02, 14.91s/batch, batch_loss=15.2, bat

Validation:  59%|▌| 441/743 [1:50:21<1:15:02, 14.91s/batch, batch_loss=16.2, bat

Validation:  59%|▌| 442/743 [1:50:21<1:14:09, 14.78s/batch, batch_loss=16.2, bat

Validation:  59%|▌| 442/743 [1:50:37<1:14:09, 14.78s/batch, batch_loss=10.9, bat

Validation:  60%|▌| 443/743 [1:50:37<1:16:03, 15.21s/batch, batch_loss=10.9, bat

Validation:  60%|▌| 443/743 [1:50:51<1:16:03, 15.21s/batch, batch_loss=16.8, bat

Validation:  60%|▌| 444/743 [1:50:51<1:14:34, 14.96s/batch, batch_loss=16.8, bat

Validation:  60%|▌| 444/743 [1:51:06<1:14:34, 14.96s/batch, batch_loss=8.03, bat

Validation:  60%|▌| 445/743 [1:51:06<1:14:18, 14.96s/batch, batch_loss=8.03, bat

Validation:  60%|▌| 445/743 [1:51:22<1:14:18, 14.96s/batch, batch_loss=15.7, bat

Validation:  60%|▌| 446/743 [1:51:22<1:15:03, 15.16s/batch, batch_loss=15.7, bat

Validation:  60%|▌| 446/743 [1:51:37<1:15:03, 15.16s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [1:51:37<1:15:25, 15.29s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [1:51:52<1:15:25, 15.29s/batch, batch_loss=6.08, bat

Validation:  60%|▌| 448/743 [1:51:52<1:14:52, 15.23s/batch, batch_loss=6.08, bat

Validation:  60%|▌| 448/743 [1:52:08<1:14:52, 15.23s/batch, batch_loss=10.5, bat

Validation:  60%|▌| 449/743 [1:52:08<1:14:25, 15.19s/batch, batch_loss=10.5, bat

Validation:  60%|▌| 449/743 [1:52:23<1:14:25, 15.19s/batch, batch_loss=16.4, bat

Validation:  61%|▌| 450/743 [1:52:23<1:14:13, 15.20s/batch, batch_loss=16.4, bat

Validation:  61%|▌| 450/743 [1:52:40<1:14:13, 15.20s/batch, batch_loss=13.6, bat

Validation:  61%|▌| 451/743 [1:52:40<1:17:13, 15.87s/batch, batch_loss=13.6, bat

Validation:  61%|▌| 451/743 [1:52:54<1:17:13, 15.87s/batch, batch_loss=19.1, bat

Validation:  61%|▌| 452/743 [1:52:54<1:14:08, 15.29s/batch, batch_loss=19.1, bat

Validation:  61%|▌| 452/743 [1:53:08<1:14:08, 15.29s/batch, batch_loss=13.2, bat

Validation:  61%|▌| 453/743 [1:53:08<1:12:35, 15.02s/batch, batch_loss=13.2, bat

Validation:  61%|▌| 453/743 [1:53:23<1:12:35, 15.02s/batch, batch_loss=6.07, bat

Validation:  61%|▌| 454/743 [1:53:23<1:12:11, 14.99s/batch, batch_loss=6.07, bat

Validation:  61%|▌| 454/743 [1:53:38<1:12:11, 14.99s/batch, batch_loss=10.1, bat

Validation:  61%|▌| 455/743 [1:53:38<1:11:00, 14.79s/batch, batch_loss=10.1, bat

Validation:  61%|▌| 455/743 [1:53:51<1:11:00, 14.79s/batch, batch_loss=10.3, bat

Validation:  61%|▌| 456/743 [1:53:51<1:09:13, 14.47s/batch, batch_loss=10.3, bat

Validation:  61%|▌| 456/743 [1:54:07<1:09:13, 14.47s/batch, batch_loss=13.1, bat

Validation:  62%|▌| 457/743 [1:54:07<1:10:17, 14.75s/batch, batch_loss=13.1, bat

Validation:  62%|▌| 457/743 [1:54:22<1:10:17, 14.75s/batch, batch_loss=19.9, bat

Validation:  62%|▌| 458/743 [1:54:22<1:10:32, 14.85s/batch, batch_loss=19.9, bat

Validation:  62%|▌| 458/743 [1:54:37<1:10:32, 14.85s/batch, batch_loss=16.1, bat

Validation:  62%|▌| 459/743 [1:54:37<1:10:32, 14.90s/batch, batch_loss=16.1, bat

Validation:  62%|▌| 459/743 [1:54:52<1:10:32, 14.90s/batch, batch_loss=19.6, bat

Validation:  62%|▌| 460/743 [1:54:52<1:10:36, 14.97s/batch, batch_loss=19.6, bat

Validation:  62%|▌| 460/743 [1:55:08<1:10:36, 14.97s/batch, batch_loss=15.5, bat

Validation:  62%|▌| 461/743 [1:55:08<1:12:06, 15.34s/batch, batch_loss=15.5, bat

Validation:  62%|▌| 461/743 [1:55:23<1:12:06, 15.34s/batch, batch_loss=13.6, bat

Validation:  62%|▌| 462/743 [1:55:23<1:11:20, 15.23s/batch, batch_loss=13.6, bat

Validation:  62%|▌| 462/743 [1:55:38<1:11:20, 15.23s/batch, batch_loss=11.1, bat

Validation:  62%|▌| 463/743 [1:55:38<1:10:54, 15.19s/batch, batch_loss=11.1, bat

Validation:  62%|▌| 463/743 [1:55:54<1:10:54, 15.19s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:55:54<1:10:36, 15.19s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:56:08<1:10:36, 15.19s/batch, batch_loss=20.7, bat

Validation:  63%|▋| 465/743 [1:56:08<1:09:50, 15.08s/batch, batch_loss=20.7, bat

Validation:  63%|▋| 465/743 [1:56:24<1:09:50, 15.08s/batch, batch_loss=15.1, bat

Validation:  63%|▋| 466/743 [1:56:24<1:10:07, 15.19s/batch, batch_loss=15.1, bat

Validation:  63%|▋| 466/743 [1:56:41<1:10:07, 15.19s/batch, batch_loss=26.3, bat

Validation:  63%|▋| 467/743 [1:56:41<1:12:28, 15.75s/batch, batch_loss=26.3, bat

Validation:  63%|▋| 467/743 [1:56:57<1:12:28, 15.75s/batch, batch_loss=13.4, bat

Validation:  63%|▋| 468/743 [1:56:57<1:12:02, 15.72s/batch, batch_loss=13.4, bat

Validation:  63%|▋| 468/743 [1:57:13<1:12:02, 15.72s/batch, batch_loss=19.3, bat

Validation:  63%|▋| 469/743 [1:57:13<1:12:43, 15.92s/batch, batch_loss=19.3, bat

Validation:  63%|▋| 469/743 [1:57:29<1:12:43, 15.92s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [1:57:29<1:12:52, 16.02s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [1:57:45<1:12:52, 16.02s/batch, batch_loss=13.1, bat

Validation:  63%|▋| 471/743 [1:57:45<1:12:46, 16.05s/batch, batch_loss=13.1, bat

Validation:  63%|▋| 471/743 [1:58:00<1:12:46, 16.05s/batch, batch_loss=18, batch

Validation:  64%|▋| 472/743 [1:58:00<1:10:55, 15.70s/batch, batch_loss=18, batch

Validation:  64%|▋| 472/743 [1:58:15<1:10:55, 15.70s/batch, batch_loss=585, batc

Validation:  64%|▋| 473/743 [1:58:15<1:10:05, 15.57s/batch, batch_loss=585, batc

Validation:  64%|▋| 473/743 [1:58:31<1:10:05, 15.57s/batch, batch_loss=16.2, bat

Validation:  64%|▋| 474/743 [1:58:31<1:09:51, 15.58s/batch, batch_loss=16.2, bat

Validation:  64%|▋| 474/743 [1:58:47<1:09:51, 15.58s/batch, batch_loss=19.7, bat

Validation:  64%|▋| 475/743 [1:58:47<1:09:27, 15.55s/batch, batch_loss=19.7, bat

Validation:  64%|▋| 475/743 [1:59:03<1:09:27, 15.55s/batch, batch_loss=9.66, bat

Validation:  64%|▋| 476/743 [1:59:03<1:10:15, 15.79s/batch, batch_loss=9.66, bat

Validation:  64%|▋| 476/743 [1:59:19<1:10:15, 15.79s/batch, batch_loss=12, batch

Validation:  64%|▋| 477/743 [1:59:19<1:10:00, 15.79s/batch, batch_loss=12, batch

Validation:  64%|▋| 477/743 [1:59:33<1:10:00, 15.79s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [1:59:33<1:08:15, 15.46s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [1:59:49<1:08:15, 15.46s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [1:59:49<1:07:52, 15.43s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [2:00:04<1:07:52, 15.43s/batch, batch_loss=11.1, bat

Validation:  65%|▋| 480/743 [2:00:04<1:07:15, 15.34s/batch, batch_loss=11.1, bat

Validation:  65%|▋| 480/743 [2:00:18<1:07:15, 15.34s/batch, batch_loss=12.1, bat

Validation:  65%|▋| 481/743 [2:00:18<1:05:58, 15.11s/batch, batch_loss=12.1, bat

Validation:  65%|▋| 481/743 [2:00:35<1:05:58, 15.11s/batch, batch_loss=6.97e+3, 

Validation:  65%|▋| 482/743 [2:00:35<1:07:06, 15.43s/batch, batch_loss=6.97e+3, 

Validation:  65%|▋| 482/743 [2:00:49<1:07:06, 15.43s/batch, batch_loss=19.5, bat

Validation:  65%|▋| 483/743 [2:00:49<1:05:50, 15.19s/batch, batch_loss=19.5, bat

Validation:  65%|▋| 483/743 [2:01:04<1:05:50, 15.19s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [2:01:04<1:04:27, 14.93s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [2:01:19<1:04:27, 14.93s/batch, batch_loss=3.13e+4, 

Validation:  65%|▋| 485/743 [2:01:19<1:04:50, 15.08s/batch, batch_loss=3.13e+4, 

Validation:  65%|▋| 485/743 [2:01:34<1:04:50, 15.08s/batch, batch_loss=16.7, bat

Validation:  65%|▋| 486/743 [2:01:34<1:04:34, 15.08s/batch, batch_loss=16.7, bat

Validation:  65%|▋| 486/743 [2:01:49<1:04:34, 15.08s/batch, batch_loss=34, batch

Validation:  66%|▋| 487/743 [2:01:49<1:03:50, 14.96s/batch, batch_loss=34, batch

Validation:  66%|▋| 487/743 [2:02:04<1:03:50, 14.96s/batch, batch_loss=24.6, bat

Validation:  66%|▋| 488/743 [2:02:04<1:04:03, 15.07s/batch, batch_loss=24.6, bat

Validation:  66%|▋| 488/743 [2:02:21<1:04:03, 15.07s/batch, batch_loss=10.8, bat

Validation:  66%|▋| 489/743 [2:02:21<1:06:00, 15.59s/batch, batch_loss=10.8, bat

Validation:  66%|▋| 489/743 [2:02:37<1:06:00, 15.59s/batch, batch_loss=18.1, bat

Validation:  66%|▋| 490/743 [2:02:37<1:05:49, 15.61s/batch, batch_loss=18.1, bat

Validation:  66%|▋| 490/743 [2:02:51<1:05:49, 15.61s/batch, batch_loss=16.9, bat

Validation:  66%|▋| 491/743 [2:02:51<1:04:42, 15.40s/batch, batch_loss=16.9, bat

Validation:  66%|▋| 491/743 [2:03:06<1:04:42, 15.40s/batch, batch_loss=1.05e+3, 

Validation:  66%|▋| 492/743 [2:03:06<1:03:54, 15.28s/batch, batch_loss=1.05e+3, 

Validation:  66%|▋| 492/743 [2:03:22<1:03:54, 15.28s/batch, batch_loss=1.43e+4, 

Validation:  66%|▋| 493/743 [2:03:22<1:03:20, 15.20s/batch, batch_loss=1.43e+4, 

Validation:  66%|▋| 493/743 [2:03:37<1:03:20, 15.20s/batch, batch_loss=7.55, bat

Validation:  66%|▋| 494/743 [2:03:37<1:02:54, 15.16s/batch, batch_loss=7.55, bat

Validation:  66%|▋| 494/743 [2:03:51<1:02:54, 15.16s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:03:51<1:02:06, 15.02s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:04:05<1:02:06, 15.02s/batch, batch_loss=16.1, bat

Validation:  67%|▋| 496/743 [2:04:05<1:00:39, 14.74s/batch, batch_loss=16.1, bat

Validation:  67%|▋| 496/743 [2:04:20<1:00:39, 14.74s/batch, batch_loss=12.9, bat

Validation:  67%|▋| 497/743 [2:04:20<1:00:46, 14.82s/batch, batch_loss=12.9, bat

Validation:  67%|▋| 497/743 [2:04:35<1:00:46, 14.82s/batch, batch_loss=15.1, bat

Validation:  67%|▋| 498/743 [2:04:35<1:00:11, 14.74s/batch, batch_loss=15.1, bat

Validation:  67%|▋| 498/743 [2:04:50<1:00:11, 14.74s/batch, batch_loss=4.05, bat

Validation:  67%|▋| 499/743 [2:04:50<1:00:00, 14.76s/batch, batch_loss=4.05, bat

Validation:  67%|▋| 499/743 [2:05:05<1:00:00, 14.76s/batch, batch_loss=2.51e+4, 

Validation:  67%|▋| 500/743 [2:05:05<1:00:27, 14.93s/batch, batch_loss=2.51e+4, 

Validation:  67%|▋| 500/743 [2:05:20<1:00:27, 14.93s/batch, batch_loss=20.9, bat

Validation:  67%|▋| 501/743 [2:05:20<1:00:14, 14.93s/batch, batch_loss=20.9, bat

Validation:  67%|▋| 501/743 [2:05:35<1:00:14, 14.93s/batch, batch_loss=3.15e+3, 

Validation:  68%|▋| 502/743 [2:05:35<1:00:26, 15.05s/batch, batch_loss=3.15e+3, 

Validation:  68%|▋| 502/743 [2:05:51<1:00:26, 15.05s/batch, batch_loss=13.7, bat

Validation:  68%|▋| 503/743 [2:05:51<1:00:27, 15.11s/batch, batch_loss=13.7, bat

Validation:  68%|▋| 503/743 [2:06:05<1:00:27, 15.11s/batch, batch_loss=12.8, bat

Validation:  68%|▋| 504/743 [2:06:05<59:36, 14.96s/batch, batch_loss=12.8, batch

Validation:  68%|▋| 504/743 [2:06:22<59:36, 14.96s/batch, batch_loss=15.6, batch

Validation:  68%|▋| 505/743 [2:06:22<1:01:20, 15.46s/batch, batch_loss=15.6, bat

Validation:  68%|▋| 505/743 [2:06:36<1:01:20, 15.46s/batch, batch_loss=2.84e+3, 

Validation:  68%|▋| 506/743 [2:06:36<1:00:05, 15.21s/batch, batch_loss=2.84e+3, 

Validation:  68%|▋| 506/743 [2:06:51<1:00:05, 15.21s/batch, batch_loss=1.99e+3, 

Validation:  68%|▋| 507/743 [2:06:51<59:00, 15.00s/batch, batch_loss=1.99e+3, ba

Validation:  68%|▋| 507/743 [2:07:06<59:00, 15.00s/batch, batch_loss=8.37e+3, ba

Validation:  68%|▋| 508/743 [2:07:06<58:15, 14.88s/batch, batch_loss=8.37e+3, ba

Validation:  68%|▋| 508/743 [2:07:22<58:15, 14.88s/batch, batch_loss=8.46e+3, ba

Validation:  69%|▋| 509/743 [2:07:22<59:23, 15.23s/batch, batch_loss=8.46e+3, ba

Validation:  69%|▋| 509/743 [2:07:37<59:23, 15.23s/batch, batch_loss=15.8, batch

Validation:  69%|▋| 510/743 [2:07:37<59:02, 15.20s/batch, batch_loss=15.8, batch

Validation:  69%|▋| 510/743 [2:07:51<59:02, 15.20s/batch, batch_loss=18.3, batch

Validation:  69%|▋| 511/743 [2:07:51<58:00, 15.00s/batch, batch_loss=18.3, batch

Validation:  69%|▋| 511/743 [2:08:06<58:00, 15.00s/batch, batch_loss=16.7, batch

Validation:  69%|▋| 512/743 [2:08:06<57:50, 15.02s/batch, batch_loss=16.7, batch

Validation:  69%|▋| 512/743 [2:08:21<57:50, 15.02s/batch, batch_loss=16.7, batch

Validation:  69%|▋| 513/743 [2:08:21<57:28, 14.99s/batch, batch_loss=16.7, batch

Validation:  69%|▋| 513/743 [2:08:37<57:28, 14.99s/batch, batch_loss=12.6, batch

Validation:  69%|▋| 514/743 [2:08:37<57:32, 15.07s/batch, batch_loss=12.6, batch

Validation:  69%|▋| 514/743 [2:08:51<57:32, 15.07s/batch, batch_loss=11.5, batch

Validation:  69%|▋| 515/743 [2:08:51<56:47, 14.94s/batch, batch_loss=11.5, batch

Validation:  69%|▋| 515/743 [2:09:06<56:47, 14.94s/batch, batch_loss=11, batch_i

Validation:  69%|▋| 516/743 [2:09:06<56:19, 14.89s/batch, batch_loss=11, batch_i

Validation:  69%|▋| 516/743 [2:09:21<56:19, 14.89s/batch, batch_loss=6.15e+4, ba

Validation:  70%|▋| 517/743 [2:09:21<55:49, 14.82s/batch, batch_loss=6.15e+4, ba

Validation:  70%|▋| 517/743 [2:09:35<55:49, 14.82s/batch, batch_loss=506, batch_

Validation:  70%|▋| 518/743 [2:09:35<55:18, 14.75s/batch, batch_loss=506, batch_

Validation:  70%|▋| 518/743 [2:09:50<55:18, 14.75s/batch, batch_loss=11.1, batch

Validation:  70%|▋| 519/743 [2:09:50<55:09, 14.77s/batch, batch_loss=11.1, batch

Validation:  70%|▋| 519/743 [2:10:05<55:09, 14.77s/batch, batch_loss=17.2, batch

Validation:  70%|▋| 520/743 [2:10:05<55:26, 14.92s/batch, batch_loss=17.2, batch

Validation:  70%|▋| 520/743 [2:10:20<55:26, 14.92s/batch, batch_loss=13.6, batch

Validation:  70%|▋| 521/743 [2:10:20<55:07, 14.90s/batch, batch_loss=13.6, batch

Validation:  70%|▋| 521/743 [2:10:34<55:07, 14.90s/batch, batch_loss=14.1, batch

Validation:  70%|▋| 522/743 [2:10:34<53:29, 14.52s/batch, batch_loss=14.1, batch

Validation:  70%|▋| 522/743 [2:10:48<53:29, 14.52s/batch, batch_loss=430, batch_

Validation:  70%|▋| 523/743 [2:10:48<53:15, 14.52s/batch, batch_loss=430, batch_

Validation:  70%|▋| 523/743 [2:11:03<53:15, 14.52s/batch, batch_loss=15.7, batch

Validation:  71%|▋| 524/743 [2:11:03<53:19, 14.61s/batch, batch_loss=15.7, batch

Validation:  71%|▋| 524/743 [2:11:18<53:19, 14.61s/batch, batch_loss=22.2, batch

Validation:  71%|▋| 525/743 [2:11:18<53:26, 14.71s/batch, batch_loss=22.2, batch

Validation:  71%|▋| 525/743 [2:11:33<53:26, 14.71s/batch, batch_loss=9.87, batch

Validation:  71%|▋| 526/743 [2:11:33<53:55, 14.91s/batch, batch_loss=9.87, batch

Validation:  71%|▋| 526/743 [2:11:49<53:55, 14.91s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:11:49<54:17, 15.08s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:12:04<54:17, 15.08s/batch, batch_loss=512, batch_

Validation:  71%|▋| 528/743 [2:12:04<53:37, 14.96s/batch, batch_loss=512, batch_

Validation:  71%|▋| 528/743 [2:12:18<53:37, 14.96s/batch, batch_loss=6.52e+3, ba

Validation:  71%|▋| 529/743 [2:12:18<52:55, 14.84s/batch, batch_loss=6.52e+3, ba

Validation:  71%|▋| 529/743 [2:12:32<52:55, 14.84s/batch, batch_loss=207, batch_

Validation:  71%|▋| 530/743 [2:12:32<52:11, 14.70s/batch, batch_loss=207, batch_

Validation:  71%|▋| 530/743 [2:12:47<52:11, 14.70s/batch, batch_loss=41.5, batch

Validation:  71%|▋| 531/743 [2:12:47<51:35, 14.60s/batch, batch_loss=41.5, batch

Validation:  71%|▋| 531/743 [2:13:02<51:35, 14.60s/batch, batch_loss=250, batch_

Validation:  72%|▋| 532/743 [2:13:02<52:07, 14.82s/batch, batch_loss=250, batch_

Validation:  72%|▋| 532/743 [2:13:17<52:07, 14.82s/batch, batch_loss=7.79, batch

Validation:  72%|▋| 533/743 [2:13:17<51:39, 14.76s/batch, batch_loss=7.79, batch

Validation:  72%|▋| 533/743 [2:13:31<51:39, 14.76s/batch, batch_loss=11.9, batch

Validation:  72%|▋| 534/743 [2:13:31<50:47, 14.58s/batch, batch_loss=11.9, batch

Validation:  72%|▋| 534/743 [2:13:45<50:47, 14.58s/batch, batch_loss=15.3, batch

Validation:  72%|▋| 535/743 [2:13:45<50:27, 14.56s/batch, batch_loss=15.3, batch

Validation:  72%|▋| 535/743 [2:14:01<50:27, 14.56s/batch, batch_loss=14.3, batch

Validation:  72%|▋| 536/743 [2:14:01<51:03, 14.80s/batch, batch_loss=14.3, batch

Validation:  72%|▋| 536/743 [2:14:18<51:03, 14.80s/batch, batch_loss=13.1, batch

Validation:  72%|▋| 537/743 [2:14:18<53:32, 15.59s/batch, batch_loss=13.1, batch

Validation:  72%|▋| 537/743 [2:14:32<53:32, 15.59s/batch, batch_loss=15.6, batch

Validation:  72%|▋| 538/743 [2:14:32<51:43, 15.14s/batch, batch_loss=15.6, batch

Validation:  72%|▋| 538/743 [2:14:47<51:43, 15.14s/batch, batch_loss=252, batch_

Validation:  73%|▋| 539/743 [2:14:47<50:48, 14.95s/batch, batch_loss=252, batch_

Validation:  73%|▋| 539/743 [2:15:02<50:48, 14.95s/batch, batch_loss=18.7, batch

Validation:  73%|▋| 540/743 [2:15:02<50:31, 14.93s/batch, batch_loss=18.7, batch

Validation:  73%|▋| 540/743 [2:15:17<50:31, 14.93s/batch, batch_loss=28, batch_i

Validation:  73%|▋| 541/743 [2:15:17<50:12, 14.92s/batch, batch_loss=28, batch_i

Validation:  73%|▋| 541/743 [2:15:29<50:12, 14.92s/batch, batch_loss=1.95e+3, ba

Validation:  73%|▋| 542/743 [2:15:29<47:33, 14.19s/batch, batch_loss=1.95e+3, ba

Validation:  73%|▋| 542/743 [2:15:42<47:33, 14.19s/batch, batch_loss=15.5, batch

Validation:  73%|▋| 543/743 [2:15:42<46:04, 13.82s/batch, batch_loss=15.5, batch

Validation:  73%|▋| 543/743 [2:15:56<46:04, 13.82s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:15:56<46:22, 13.98s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:16:13<46:22, 13.98s/batch, batch_loss=2.74e+3, ba

Validation:  73%|▋| 545/743 [2:16:13<48:54, 14.82s/batch, batch_loss=2.74e+3, ba

Validation:  73%|▋| 545/743 [2:16:28<48:54, 14.82s/batch, batch_loss=6.73, batch

Validation:  73%|▋| 546/743 [2:16:28<49:01, 14.93s/batch, batch_loss=6.73, batch

Validation:  73%|▋| 546/743 [2:16:43<49:01, 14.93s/batch, batch_loss=258, batch_

Validation:  74%|▋| 547/743 [2:16:43<48:37, 14.88s/batch, batch_loss=258, batch_

Validation:  74%|▋| 547/743 [2:16:58<48:37, 14.88s/batch, batch_loss=25.9, batch

Validation:  74%|▋| 548/743 [2:16:58<48:05, 14.80s/batch, batch_loss=25.9, batch

Validation:  74%|▋| 548/743 [2:17:13<48:05, 14.80s/batch, batch_loss=4.1e+3, bat

Validation:  74%|▋| 549/743 [2:17:13<48:40, 15.06s/batch, batch_loss=4.1e+3, bat

Validation:  74%|▋| 549/743 [2:17:29<48:40, 15.06s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:17:29<48:34, 15.10s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:17:43<48:34, 15.10s/batch, batch_loss=15, batch_i

Validation:  74%|▋| 551/743 [2:17:43<47:56, 14.98s/batch, batch_loss=15, batch_i

Validation:  74%|▋| 551/743 [2:17:58<47:56, 14.98s/batch, batch_loss=6.75e+3, ba

Validation:  74%|▋| 552/743 [2:17:58<47:24, 14.89s/batch, batch_loss=6.75e+3, ba

Validation:  74%|▋| 552/743 [2:18:15<47:24, 14.89s/batch, batch_loss=20.8, batch

Validation:  74%|▋| 553/743 [2:18:15<49:23, 15.60s/batch, batch_loss=20.8, batch

Validation:  74%|▋| 553/743 [2:18:31<49:23, 15.60s/batch, batch_loss=22.1, batch

Validation:  75%|▋| 554/743 [2:18:31<49:06, 15.59s/batch, batch_loss=22.1, batch

Validation:  75%|▋| 554/743 [2:18:45<49:06, 15.59s/batch, batch_loss=2.46e+3, ba

Validation:  75%|▋| 555/743 [2:18:45<47:32, 15.17s/batch, batch_loss=2.46e+3, ba

Validation:  75%|▋| 555/743 [2:19:00<47:32, 15.17s/batch, batch_loss=26.5, batch

Validation:  75%|▋| 556/743 [2:19:00<46:59, 15.08s/batch, batch_loss=26.5, batch

Validation:  75%|▋| 556/743 [2:19:15<46:59, 15.08s/batch, batch_loss=7.95, batch

Validation:  75%|▋| 557/743 [2:19:15<47:01, 15.17s/batch, batch_loss=7.95, batch

Validation:  75%|▋| 557/743 [2:19:30<47:01, 15.17s/batch, batch_loss=1.51e+4, ba

Validation:  75%|▊| 558/743 [2:19:30<46:35, 15.11s/batch, batch_loss=1.51e+4, ba

Validation:  75%|▊| 558/743 [2:19:44<46:35, 15.11s/batch, batch_loss=3.6e+3, bat

Validation:  75%|▊| 559/743 [2:19:44<45:27, 14.82s/batch, batch_loss=3.6e+3, bat

Validation:  75%|▊| 559/743 [2:19:59<45:27, 14.82s/batch, batch_loss=2.94e+3, ba

Validation:  75%|▊| 560/743 [2:19:59<45:16, 14.85s/batch, batch_loss=2.94e+3, ba

Validation:  75%|▊| 560/743 [2:20:14<45:16, 14.85s/batch, batch_loss=10.1, batch

Validation:  76%|▊| 561/743 [2:20:14<44:26, 14.65s/batch, batch_loss=10.1, batch

Validation:  76%|▊| 561/743 [2:20:30<44:26, 14.65s/batch, batch_loss=14.6, batch

Validation:  76%|▊| 562/743 [2:20:30<45:54, 15.22s/batch, batch_loss=14.6, batch

Validation:  76%|▊| 562/743 [2:20:45<45:54, 15.22s/batch, batch_loss=19.4, batch

Validation:  76%|▊| 563/743 [2:20:45<45:09, 15.05s/batch, batch_loss=19.4, batch

Validation:  76%|▊| 563/743 [2:20:59<45:09, 15.05s/batch, batch_loss=1.09e+3, ba

Validation:  76%|▊| 564/743 [2:20:59<44:23, 14.88s/batch, batch_loss=1.09e+3, ba

Validation:  76%|▊| 564/743 [2:21:14<44:23, 14.88s/batch, batch_loss=3.69e+3, ba

Validation:  76%|▊| 565/743 [2:21:14<44:09, 14.88s/batch, batch_loss=3.69e+3, ba

Validation:  76%|▊| 565/743 [2:21:28<44:09, 14.88s/batch, batch_loss=12.3, batch

Validation:  76%|▊| 566/743 [2:21:28<43:13, 14.66s/batch, batch_loss=12.3, batch

Validation:  76%|▊| 566/743 [2:21:43<43:13, 14.66s/batch, batch_loss=13.2, batch

Validation:  76%|▊| 567/743 [2:21:43<43:29, 14.83s/batch, batch_loss=13.2, batch

Validation:  76%|▊| 567/743 [2:21:58<43:29, 14.83s/batch, batch_loss=10.3, batch

Validation:  76%|▊| 568/743 [2:21:58<43:11, 14.81s/batch, batch_loss=10.3, batch

Validation:  76%|▊| 568/743 [2:22:16<43:11, 14.81s/batch, batch_loss=16.6, batch

Validation:  77%|▊| 569/743 [2:22:16<45:15, 15.61s/batch, batch_loss=16.6, batch

Validation:  77%|▊| 569/743 [2:22:30<45:15, 15.61s/batch, batch_loss=18.6, batch

Validation:  77%|▊| 570/743 [2:22:30<44:03, 15.28s/batch, batch_loss=18.6, batch

Validation:  77%|▊| 570/743 [2:22:45<44:03, 15.28s/batch, batch_loss=11.4, batch

Validation:  77%|▊| 571/743 [2:22:45<43:11, 15.07s/batch, batch_loss=11.4, batch

Validation:  77%|▊| 571/743 [2:23:00<43:11, 15.07s/batch, batch_loss=20.6, batch

Validation:  77%|▊| 572/743 [2:23:00<43:09, 15.14s/batch, batch_loss=20.6, batch

Validation:  77%|▊| 572/743 [2:23:15<43:09, 15.14s/batch, batch_loss=14.4, batch

Validation:  77%|▊| 573/743 [2:23:15<42:30, 15.01s/batch, batch_loss=14.4, batch

Validation:  77%|▊| 573/743 [2:23:29<42:30, 15.01s/batch, batch_loss=16.6, batch

Validation:  77%|▊| 574/743 [2:23:29<41:41, 14.80s/batch, batch_loss=16.6, batch

Validation:  77%|▊| 574/743 [2:23:43<41:41, 14.80s/batch, batch_loss=14.6, batch

Validation:  77%|▊| 575/743 [2:23:43<41:00, 14.65s/batch, batch_loss=14.6, batch

Validation:  77%|▊| 575/743 [2:23:58<41:00, 14.65s/batch, batch_loss=19.8, batch

Validation:  78%|▊| 576/743 [2:23:58<40:26, 14.53s/batch, batch_loss=19.8, batch

Validation:  78%|▊| 576/743 [2:24:13<40:26, 14.53s/batch, batch_loss=17.6, batch

Validation:  78%|▊| 577/743 [2:24:13<40:45, 14.73s/batch, batch_loss=17.6, batch

Validation:  78%|▊| 577/743 [2:24:27<40:45, 14.73s/batch, batch_loss=21.3, batch

Validation:  78%|▊| 578/743 [2:24:27<40:26, 14.70s/batch, batch_loss=21.3, batch

Validation:  78%|▊| 578/743 [2:24:44<40:26, 14.70s/batch, batch_loss=312, batch_

Validation:  78%|▊| 579/743 [2:24:44<41:50, 15.31s/batch, batch_loss=312, batch_

Validation:  78%|▊| 579/743 [2:24:59<41:50, 15.31s/batch, batch_loss=6.1, batch_

Validation:  78%|▊| 580/743 [2:24:59<40:48, 15.02s/batch, batch_loss=6.1, batch_

Validation:  78%|▊| 580/743 [2:25:13<40:48, 15.02s/batch, batch_loss=9.82, batch

Validation:  78%|▊| 581/743 [2:25:13<39:54, 14.78s/batch, batch_loss=9.82, batch

Validation:  78%|▊| 581/743 [2:25:28<39:54, 14.78s/batch, batch_loss=14.9, batch

Validation:  78%|▊| 582/743 [2:25:28<39:54, 14.88s/batch, batch_loss=14.9, batch

Validation:  78%|▊| 582/743 [2:25:42<39:54, 14.88s/batch, batch_loss=2.39e+3, ba

Validation:  78%|▊| 583/743 [2:25:42<39:25, 14.78s/batch, batch_loss=2.39e+3, ba

Validation:  78%|▊| 583/743 [2:25:58<39:25, 14.78s/batch, batch_loss=2.07, batch

Validation:  79%|▊| 584/743 [2:25:58<39:56, 15.07s/batch, batch_loss=2.07, batch

Validation:  79%|▊| 584/743 [2:26:13<39:56, 15.07s/batch, batch_loss=18.5, batch

Validation:  79%|▊| 585/743 [2:26:13<39:06, 14.85s/batch, batch_loss=18.5, batch

Validation:  79%|▊| 585/743 [2:26:27<39:06, 14.85s/batch, batch_loss=547, batch_

Validation:  79%|▊| 586/743 [2:26:27<38:33, 14.74s/batch, batch_loss=547, batch_

Validation:  79%|▊| 586/743 [2:26:41<38:33, 14.74s/batch, batch_loss=7.23, batch

Validation:  79%|▊| 587/743 [2:26:41<37:36, 14.46s/batch, batch_loss=7.23, batch

Validation:  79%|▊| 587/743 [2:26:55<37:36, 14.46s/batch, batch_loss=397, batch_

Validation:  79%|▊| 588/743 [2:26:55<37:07, 14.37s/batch, batch_loss=397, batch_

Validation:  79%|▊| 588/743 [2:27:09<37:07, 14.37s/batch, batch_loss=2.5e+4, bat

Validation:  79%|▊| 589/743 [2:27:09<36:57, 14.40s/batch, batch_loss=2.5e+4, bat

Validation:  79%|▊| 589/743 [2:27:24<36:57, 14.40s/batch, batch_loss=19.5, batch

Validation:  79%|▊| 590/743 [2:27:24<36:49, 14.44s/batch, batch_loss=19.5, batch

Validation:  79%|▊| 590/743 [2:27:38<36:49, 14.44s/batch, batch_loss=14, batch_i

Validation:  80%|▊| 591/743 [2:27:38<36:17, 14.32s/batch, batch_loss=14, batch_i

Validation:  80%|▊| 591/743 [2:27:53<36:17, 14.32s/batch, batch_loss=10.7, batch

Validation:  80%|▊| 592/743 [2:27:53<36:13, 14.40s/batch, batch_loss=10.7, batch

Validation:  80%|▊| 592/743 [2:28:07<36:13, 14.40s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:28:07<35:43, 14.29s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:28:21<35:43, 14.29s/batch, batch_loss=3.47, batch

Validation:  80%|▊| 594/743 [2:28:21<35:51, 14.44s/batch, batch_loss=3.47, batch

Validation:  80%|▊| 594/743 [2:28:36<35:51, 14.44s/batch, batch_loss=3.34, batch

Validation:  80%|▊| 595/743 [2:28:36<35:53, 14.55s/batch, batch_loss=3.34, batch

Validation:  80%|▊| 595/743 [2:28:53<35:53, 14.55s/batch, batch_loss=4.58, batch

Validation:  80%|▊| 596/743 [2:28:53<37:24, 15.27s/batch, batch_loss=4.58, batch

Validation:  80%|▊| 596/743 [2:29:08<37:24, 15.27s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:29:08<37:02, 15.22s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:29:23<37:02, 15.22s/batch, batch_loss=13.5, batch

Validation:  80%|▊| 598/743 [2:29:23<36:42, 15.19s/batch, batch_loss=13.5, batch

Validation:  80%|▊| 598/743 [2:29:39<36:42, 15.19s/batch, batch_loss=13.3, batch

Validation:  81%|▊| 599/743 [2:29:39<36:35, 15.24s/batch, batch_loss=13.3, batch

Validation:  81%|▊| 599/743 [2:29:55<36:35, 15.24s/batch, batch_loss=23.1, batch

Validation:  81%|▊| 600/743 [2:29:55<36:46, 15.43s/batch, batch_loss=23.1, batch

Validation:  81%|▊| 600/743 [2:30:09<36:46, 15.43s/batch, batch_loss=13.9, batch

Validation:  81%|▊| 601/743 [2:30:09<35:48, 15.13s/batch, batch_loss=13.9, batch

Validation:  81%|▊| 601/743 [2:30:23<35:48, 15.13s/batch, batch_loss=19, batch_i

Validation:  81%|▊| 602/743 [2:30:23<34:50, 14.83s/batch, batch_loss=19, batch_i

Validation:  81%|▊| 602/743 [2:30:37<34:50, 14.83s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:30:37<33:56, 14.55s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:30:51<33:56, 14.55s/batch, batch_loss=18.7, batch

Validation:  81%|▊| 604/743 [2:30:51<33:20, 14.40s/batch, batch_loss=18.7, batch

Validation:  81%|▊| 604/743 [2:31:08<33:20, 14.40s/batch, batch_loss=24.9, batch

Validation:  81%|▊| 605/743 [2:31:08<34:37, 15.05s/batch, batch_loss=24.9, batch

Validation:  81%|▊| 605/743 [2:31:23<34:37, 15.05s/batch, batch_loss=248, batch_

Validation:  82%|▊| 606/743 [2:31:23<34:32, 15.13s/batch, batch_loss=248, batch_

Validation:  82%|▊| 606/743 [2:31:37<34:32, 15.13s/batch, batch_loss=26.9, batch

Validation:  82%|▊| 607/743 [2:31:37<33:47, 14.91s/batch, batch_loss=26.9, batch

Validation:  82%|▊| 607/743 [2:31:52<33:47, 14.91s/batch, batch_loss=19.2, batch

Validation:  82%|▊| 608/743 [2:31:52<33:17, 14.79s/batch, batch_loss=19.2, batch

Validation:  82%|▊| 608/743 [2:32:06<33:17, 14.79s/batch, batch_loss=17.4, batch

Validation:  82%|▊| 609/743 [2:32:06<32:44, 14.66s/batch, batch_loss=17.4, batch

Validation:  82%|▊| 609/743 [2:32:21<32:44, 14.66s/batch, batch_loss=15.8, batch

Validation:  82%|▊| 610/743 [2:32:21<32:14, 14.54s/batch, batch_loss=15.8, batch

Validation:  82%|▊| 610/743 [2:32:34<32:14, 14.54s/batch, batch_loss=17, batch_i

Validation:  82%|▊| 611/743 [2:32:34<31:14, 14.20s/batch, batch_loss=17, batch_i

Validation:  82%|▊| 611/743 [2:32:48<31:14, 14.20s/batch, batch_loss=10.5, batch

Validation:  82%|▊| 612/743 [2:32:48<31:02, 14.22s/batch, batch_loss=10.5, batch

Validation:  82%|▊| 612/743 [2:33:03<31:02, 14.22s/batch, batch_loss=15.1, batch

Validation:  83%|▊| 613/743 [2:33:03<31:15, 14.43s/batch, batch_loss=15.1, batch

Validation:  83%|▊| 613/743 [2:33:18<31:15, 14.43s/batch, batch_loss=5.64e+3, ba

Validation:  83%|▊| 614/743 [2:33:18<30:59, 14.42s/batch, batch_loss=5.64e+3, ba

Validation:  83%|▊| 614/743 [2:33:33<30:59, 14.42s/batch, batch_loss=12.7, batch

Validation:  83%|▊| 615/743 [2:33:33<31:14, 14.65s/batch, batch_loss=12.7, batch

Validation:  83%|▊| 615/743 [2:33:46<31:14, 14.65s/batch, batch_loss=15.1, batch

Validation:  83%|▊| 616/743 [2:33:46<30:15, 14.29s/batch, batch_loss=15.1, batch

Validation:  83%|▊| 616/743 [2:34:01<30:15, 14.29s/batch, batch_loss=6.38, batch

Validation:  83%|▊| 617/743 [2:34:01<30:20, 14.45s/batch, batch_loss=6.38, batch

Validation:  83%|▊| 617/743 [2:34:15<30:20, 14.45s/batch, batch_loss=8.67, batch

Validation:  83%|▊| 618/743 [2:34:15<29:59, 14.39s/batch, batch_loss=8.67, batch

Validation:  83%|▊| 618/743 [2:34:30<29:59, 14.39s/batch, batch_loss=342, batch_

Validation:  83%|▊| 619/743 [2:34:30<30:04, 14.55s/batch, batch_loss=342, batch_

Validation:  83%|▊| 619/743 [2:34:44<30:04, 14.55s/batch, batch_loss=13.7, batch

Validation:  83%|▊| 620/743 [2:34:44<29:22, 14.33s/batch, batch_loss=13.7, batch

Validation:  83%|▊| 620/743 [2:34:58<29:22, 14.33s/batch, batch_loss=7.38, batch

Validation:  84%|▊| 621/743 [2:34:58<29:12, 14.36s/batch, batch_loss=7.38, batch

Validation:  84%|▊| 621/743 [2:35:13<29:12, 14.36s/batch, batch_loss=13.9, batch

Validation:  84%|▊| 622/743 [2:35:13<29:22, 14.57s/batch, batch_loss=13.9, batch

Validation:  84%|▊| 622/743 [2:35:29<29:22, 14.57s/batch, batch_loss=191, batch_

Validation:  84%|▊| 623/743 [2:35:29<29:30, 14.76s/batch, batch_loss=191, batch_

Validation:  84%|▊| 623/743 [2:35:44<29:30, 14.76s/batch, batch_loss=14.5, batch

Validation:  84%|▊| 624/743 [2:35:44<29:38, 14.95s/batch, batch_loss=14.5, batch

Validation:  84%|▊| 624/743 [2:35:59<29:38, 14.95s/batch, batch_loss=2.4e+3, bat

Validation:  84%|▊| 625/743 [2:35:59<29:35, 15.05s/batch, batch_loss=2.4e+3, bat

Validation:  84%|▊| 625/743 [2:36:14<29:35, 15.05s/batch, batch_loss=18, batch_i

Validation:  84%|▊| 626/743 [2:36:14<29:11, 14.97s/batch, batch_loss=18, batch_i

Validation:  84%|▊| 626/743 [2:36:29<29:11, 14.97s/batch, batch_loss=17.4, batch

Validation:  84%|▊| 627/743 [2:36:29<29:00, 15.00s/batch, batch_loss=17.4, batch

Validation:  84%|▊| 627/743 [2:36:44<29:00, 15.00s/batch, batch_loss=15.2, batch

Validation:  85%|▊| 628/743 [2:36:44<28:41, 14.97s/batch, batch_loss=15.2, batch

Validation:  85%|▊| 628/743 [2:36:58<28:41, 14.97s/batch, batch_loss=10.8, batch

Validation:  85%|▊| 629/743 [2:36:58<28:02, 14.76s/batch, batch_loss=10.8, batch

Validation:  85%|▊| 629/743 [2:37:13<28:02, 14.76s/batch, batch_loss=17.9, batch

Validation:  85%|▊| 630/743 [2:37:13<27:43, 14.72s/batch, batch_loss=17.9, batch

Validation:  85%|▊| 630/743 [2:37:27<27:43, 14.72s/batch, batch_loss=241, batch_

Validation:  85%|▊| 631/743 [2:37:27<27:21, 14.65s/batch, batch_loss=241, batch_

Validation:  85%|▊| 631/743 [2:37:42<27:21, 14.65s/batch, batch_loss=18.5, batch

Validation:  85%|▊| 632/743 [2:37:42<27:04, 14.63s/batch, batch_loss=18.5, batch

Validation:  85%|▊| 632/743 [2:37:57<27:04, 14.63s/batch, batch_loss=13.4, batch

Validation:  85%|▊| 633/743 [2:37:57<27:14, 14.86s/batch, batch_loss=13.4, batch

Validation:  85%|▊| 633/743 [2:38:14<27:14, 14.86s/batch, batch_loss=9.93, batch

Validation:  85%|▊| 634/743 [2:38:14<27:51, 15.33s/batch, batch_loss=9.93, batch

Validation:  85%|▊| 634/743 [2:38:29<27:51, 15.33s/batch, batch_loss=7.56, batch

Validation:  85%|▊| 635/743 [2:38:29<27:26, 15.25s/batch, batch_loss=7.56, batch

Validation:  85%|▊| 635/743 [2:38:41<27:26, 15.25s/batch, batch_loss=798, batch_

Validation:  86%|▊| 636/743 [2:38:41<25:22, 14.23s/batch, batch_loss=798, batch_

Validation:  86%|▊| 636/743 [2:38:53<25:22, 14.23s/batch, batch_loss=709, batch_

Validation:  86%|▊| 637/743 [2:38:53<24:15, 13.73s/batch, batch_loss=709, batch_

Validation:  86%|▊| 637/743 [2:39:06<24:15, 13.73s/batch, batch_loss=19.6, batch

Validation:  86%|▊| 638/743 [2:39:06<23:23, 13.37s/batch, batch_loss=19.6, batch

Validation:  86%|▊| 638/743 [2:39:20<23:23, 13.37s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:39:20<23:37, 13.63s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:39:35<23:37, 13.63s/batch, batch_loss=22.2, batch

Validation:  86%|▊| 640/743 [2:39:35<23:51, 13.90s/batch, batch_loss=22.2, batch

Validation:  86%|▊| 640/743 [2:39:48<23:51, 13.90s/batch, batch_loss=29, batch_i

Validation:  86%|▊| 641/743 [2:39:48<23:27, 13.80s/batch, batch_loss=29, batch_i

Validation:  86%|▊| 641/743 [2:40:02<23:27, 13.80s/batch, batch_loss=29.1, batch

Validation:  86%|▊| 642/743 [2:40:02<23:01, 13.68s/batch, batch_loss=29.1, batch

Validation:  86%|▊| 642/743 [2:40:14<23:01, 13.68s/batch, batch_loss=1.04e+3, ba

Validation:  87%|▊| 643/743 [2:40:14<22:16, 13.37s/batch, batch_loss=1.04e+3, ba

Validation:  87%|▊| 643/743 [2:40:29<22:16, 13.37s/batch, batch_loss=18.5, batch

Validation:  87%|▊| 644/743 [2:40:29<22:58, 13.93s/batch, batch_loss=18.5, batch

Validation:  87%|▊| 644/743 [2:40:42<22:58, 13.93s/batch, batch_loss=15.6, batch

Validation:  87%|▊| 645/743 [2:40:42<22:11, 13.59s/batch, batch_loss=15.6, batch

Validation:  87%|▊| 645/743 [2:40:55<22:11, 13.59s/batch, batch_loss=6.26e+3, ba

Validation:  87%|▊| 646/743 [2:40:55<21:32, 13.32s/batch, batch_loss=6.26e+3, ba

Validation:  87%|▊| 646/743 [2:41:08<21:32, 13.32s/batch, batch_loss=16.8, batch

Validation:  87%|▊| 647/743 [2:41:08<21:13, 13.26s/batch, batch_loss=16.8, batch

Validation:  87%|▊| 647/743 [2:41:21<21:13, 13.26s/batch, batch_loss=5.67, batch

Validation:  87%|▊| 648/743 [2:41:21<20:46, 13.12s/batch, batch_loss=5.67, batch

Validation:  87%|▊| 648/743 [2:41:34<20:46, 13.12s/batch, batch_loss=8.93, batch

Validation:  87%|▊| 649/743 [2:41:34<20:40, 13.20s/batch, batch_loss=8.93, batch

Validation:  87%|▊| 649/743 [2:41:49<20:40, 13.20s/batch, batch_loss=15.2, batch

Validation:  87%|▊| 650/743 [2:41:49<21:01, 13.56s/batch, batch_loss=15.2, batch

Validation:  87%|▊| 650/743 [2:42:04<21:01, 13.56s/batch, batch_loss=23.9, batch

Validation:  88%|▉| 651/743 [2:42:04<21:23, 13.95s/batch, batch_loss=23.9, batch

Validation:  88%|▉| 651/743 [2:42:18<21:23, 13.95s/batch, batch_loss=24.6, batch

Validation:  88%|▉| 652/743 [2:42:18<21:33, 14.22s/batch, batch_loss=24.6, batch

Validation:  88%|▉| 652/743 [2:42:33<21:33, 14.22s/batch, batch_loss=14.4, batch

Validation:  88%|▉| 653/743 [2:42:33<21:22, 14.25s/batch, batch_loss=14.4, batch

Validation:  88%|▉| 653/743 [2:42:47<21:22, 14.25s/batch, batch_loss=18.1, batch

Validation:  88%|▉| 654/743 [2:42:47<21:05, 14.22s/batch, batch_loss=18.1, batch

Validation:  88%|▉| 654/743 [2:43:02<21:05, 14.22s/batch, batch_loss=27.8, batch

Validation:  88%|▉| 655/743 [2:43:02<21:07, 14.40s/batch, batch_loss=27.8, batch

Validation:  88%|▉| 655/743 [2:43:17<21:07, 14.40s/batch, batch_loss=17.9, batch

Validation:  88%|▉| 656/743 [2:43:17<21:16, 14.67s/batch, batch_loss=17.9, batch

Validation:  88%|▉| 656/743 [2:43:32<21:16, 14.67s/batch, batch_loss=13.5, batch

Validation:  88%|▉| 657/743 [2:43:32<21:12, 14.80s/batch, batch_loss=13.5, batch

Validation:  88%|▉| 657/743 [2:43:47<21:12, 14.80s/batch, batch_loss=17.2, batch

Validation:  89%|▉| 658/743 [2:43:47<20:48, 14.69s/batch, batch_loss=17.2, batch

Validation:  89%|▉| 658/743 [2:44:02<20:48, 14.69s/batch, batch_loss=21.3, batch

Validation:  89%|▉| 659/743 [2:44:02<20:41, 14.79s/batch, batch_loss=21.3, batch

Validation:  89%|▉| 659/743 [2:44:18<20:41, 14.79s/batch, batch_loss=18.9, batch

Validation:  89%|▉| 660/743 [2:44:18<21:12, 15.33s/batch, batch_loss=18.9, batch

Validation:  89%|▉| 660/743 [2:44:32<21:12, 15.33s/batch, batch_loss=18, batch_i

Validation:  89%|▉| 661/743 [2:44:32<20:12, 14.79s/batch, batch_loss=18, batch_i

Validation:  89%|▉| 661/743 [2:44:47<20:12, 14.79s/batch, batch_loss=5.97, batch

Validation:  89%|▉| 662/743 [2:44:47<20:09, 14.93s/batch, batch_loss=5.97, batch

Validation:  89%|▉| 662/743 [2:45:02<20:09, 14.93s/batch, batch_loss=3.57e+3, ba

Validation:  89%|▉| 663/743 [2:45:02<19:51, 14.90s/batch, batch_loss=3.57e+3, ba

Validation:  89%|▉| 663/743 [2:45:16<19:51, 14.90s/batch, batch_loss=17.1, batch

Validation:  89%|▉| 664/743 [2:45:16<19:27, 14.78s/batch, batch_loss=17.1, batch

Validation:  89%|▉| 664/743 [2:45:29<19:27, 14.78s/batch, batch_loss=17.3, batch

Validation:  90%|▉| 665/743 [2:45:29<18:32, 14.26s/batch, batch_loss=17.3, batch

Validation:  90%|▉| 665/743 [2:45:42<18:32, 14.26s/batch, batch_loss=12.1, batch

Validation:  90%|▉| 666/743 [2:45:42<17:42, 13.80s/batch, batch_loss=12.1, batch

Validation:  90%|▉| 666/743 [2:45:57<17:42, 13.80s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:45:57<17:45, 14.02s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:46:14<17:45, 14.02s/batch, batch_loss=18.2, batch

Validation:  90%|▉| 668/743 [2:46:14<18:45, 15.00s/batch, batch_loss=18.2, batch

Validation:  90%|▉| 668/743 [2:46:28<18:45, 15.00s/batch, batch_loss=21.9, batch

Validation:  90%|▉| 669/743 [2:46:28<18:19, 14.86s/batch, batch_loss=21.9, batch

Validation:  90%|▉| 669/743 [2:46:42<18:19, 14.86s/batch, batch_loss=24.4, batch

Validation:  90%|▉| 670/743 [2:46:42<17:48, 14.63s/batch, batch_loss=24.4, batch

Validation:  90%|▉| 670/743 [2:46:57<17:48, 14.63s/batch, batch_loss=3.1e+3, bat

Validation:  90%|▉| 671/743 [2:46:57<17:27, 14.55s/batch, batch_loss=3.1e+3, bat

Validation:  90%|▉| 671/743 [2:47:11<17:27, 14.55s/batch, batch_loss=19.7, batch

Validation:  90%|▉| 672/743 [2:47:11<17:13, 14.56s/batch, batch_loss=19.7, batch

Validation:  90%|▉| 672/743 [2:47:26<17:13, 14.56s/batch, batch_loss=14.8, batch

Validation:  91%|▉| 673/743 [2:47:26<17:10, 14.72s/batch, batch_loss=14.8, batch

Validation:  91%|▉| 673/743 [2:47:42<17:10, 14.72s/batch, batch_loss=12.4, batch

Validation:  91%|▉| 674/743 [2:47:42<17:14, 14.99s/batch, batch_loss=12.4, batch

Validation:  91%|▉| 674/743 [2:47:57<17:14, 14.99s/batch, batch_loss=21.7, batch

Validation:  91%|▉| 675/743 [2:47:57<17:00, 15.01s/batch, batch_loss=21.7, batch

Validation:  91%|▉| 675/743 [2:48:12<17:00, 15.01s/batch, batch_loss=20.3, batch

Validation:  91%|▉| 676/743 [2:48:12<16:42, 14.96s/batch, batch_loss=20.3, batch

Validation:  91%|▉| 676/743 [2:48:27<16:42, 14.96s/batch, batch_loss=24.6, batch

Validation:  91%|▉| 677/743 [2:48:27<16:28, 14.97s/batch, batch_loss=24.6, batch

Validation:  91%|▉| 677/743 [2:48:43<16:28, 14.97s/batch, batch_loss=15.8, batch

Validation:  91%|▉| 678/743 [2:48:43<16:34, 15.30s/batch, batch_loss=15.8, batch

Validation:  91%|▉| 678/743 [2:48:56<16:34, 15.30s/batch, batch_loss=13.9, batch

Validation:  91%|▉| 679/743 [2:48:56<15:37, 14.65s/batch, batch_loss=13.9, batch

Validation:  91%|▉| 679/743 [2:49:09<15:37, 14.65s/batch, batch_loss=16.1, batch

Validation:  92%|▉| 680/743 [2:49:09<14:53, 14.18s/batch, batch_loss=16.1, batch

Validation:  92%|▉| 680/743 [2:49:23<14:53, 14.18s/batch, batch_loss=20.3, batch

Validation:  92%|▉| 681/743 [2:49:23<14:34, 14.10s/batch, batch_loss=20.3, batch

Validation:  92%|▉| 681/743 [2:49:36<14:34, 14.10s/batch, batch_loss=18.5, batch

Validation:  92%|▉| 682/743 [2:49:36<13:55, 13.69s/batch, batch_loss=18.5, batch

Validation:  92%|▉| 682/743 [2:49:49<13:55, 13.69s/batch, batch_loss=21.5, batch

Validation:  92%|▉| 683/743 [2:49:49<13:23, 13.40s/batch, batch_loss=21.5, batch

Validation:  92%|▉| 683/743 [2:50:01<13:23, 13.40s/batch, batch_loss=14, batch_i

Validation:  92%|▉| 684/743 [2:50:01<12:58, 13.20s/batch, batch_loss=14, batch_i

Validation:  92%|▉| 684/743 [2:50:14<12:58, 13.20s/batch, batch_loss=13.7, batch

Validation:  92%|▉| 685/743 [2:50:14<12:34, 13.01s/batch, batch_loss=13.7, batch

Validation:  92%|▉| 685/743 [2:50:27<12:34, 13.01s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [2:50:27<12:24, 13.07s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [2:50:42<12:24, 13.07s/batch, batch_loss=22.6, batch

Validation:  92%|▉| 687/743 [2:50:42<12:48, 13.72s/batch, batch_loss=22.6, batch

Validation:  92%|▉| 687/743 [2:50:55<12:48, 13.72s/batch, batch_loss=13.2, batch

Validation:  93%|▉| 688/743 [2:50:55<12:18, 13.42s/batch, batch_loss=13.2, batch

Validation:  93%|▉| 688/743 [2:51:08<12:18, 13.42s/batch, batch_loss=15.1, batch

Validation:  93%|▉| 689/743 [2:51:08<11:50, 13.16s/batch, batch_loss=15.1, batch

Validation:  93%|▉| 689/743 [2:51:20<11:50, 13.16s/batch, batch_loss=19.1, batch

Validation:  93%|▉| 690/743 [2:51:20<11:31, 13.04s/batch, batch_loss=19.1, batch

Validation:  93%|▉| 690/743 [2:51:34<11:31, 13.04s/batch, batch_loss=13.1, batch

Validation:  93%|▉| 691/743 [2:51:34<11:21, 13.10s/batch, batch_loss=13.1, batch

Validation:  93%|▉| 691/743 [2:51:47<11:21, 13.10s/batch, batch_loss=18.6, batch

Validation:  93%|▉| 692/743 [2:51:47<11:07, 13.08s/batch, batch_loss=18.6, batch

Validation:  93%|▉| 692/743 [2:51:59<11:07, 13.08s/batch, batch_loss=21.6, batch

Validation:  93%|▉| 693/743 [2:51:59<10:48, 12.98s/batch, batch_loss=21.6, batch

Validation:  93%|▉| 693/743 [2:52:12<10:48, 12.98s/batch, batch_loss=24.7, batch

Validation:  93%|▉| 694/743 [2:52:12<10:35, 12.98s/batch, batch_loss=24.7, batch

Validation:  93%|▉| 694/743 [2:52:25<10:35, 12.98s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [2:52:25<10:21, 12.95s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [2:52:41<10:21, 12.95s/batch, batch_loss=7.81, batch

Validation:  94%|▉| 696/743 [2:52:41<10:42, 13.66s/batch, batch_loss=7.81, batch

Validation:  94%|▉| 696/743 [2:52:54<10:42, 13.66s/batch, batch_loss=34.8, batch

Validation:  94%|▉| 697/743 [2:52:54<10:17, 13.43s/batch, batch_loss=34.8, batch

Validation:  94%|▉| 697/743 [2:53:06<10:17, 13.43s/batch, batch_loss=755, batch_

Validation:  94%|▉| 698/743 [2:53:06<09:53, 13.18s/batch, batch_loss=755, batch_

Validation:  94%|▉| 698/743 [2:53:19<09:53, 13.18s/batch, batch_loss=5.8, batch_

Validation:  94%|▉| 699/743 [2:53:19<09:35, 13.08s/batch, batch_loss=5.8, batch_

Validation:  94%|▉| 699/743 [2:53:32<09:35, 13.08s/batch, batch_loss=940, batch_

Validation:  94%|▉| 700/743 [2:53:32<09:19, 13.00s/batch, batch_loss=940, batch_

Validation:  94%|▉| 700/743 [2:53:45<09:19, 13.00s/batch, batch_loss=6.97, batch

Validation:  94%|▉| 701/743 [2:53:45<09:06, 13.01s/batch, batch_loss=6.97, batch

Validation:  94%|▉| 701/743 [2:53:57<09:06, 13.01s/batch, batch_loss=7.21, batch

Validation:  94%|▉| 702/743 [2:53:57<08:42, 12.75s/batch, batch_loss=7.21, batch

Validation:  94%|▉| 702/743 [2:54:10<08:42, 12.75s/batch, batch_loss=178, batch_

Validation:  95%|▉| 703/743 [2:54:10<08:27, 12.69s/batch, batch_loss=178, batch_

Validation:  95%|▉| 703/743 [2:54:22<08:27, 12.69s/batch, batch_loss=469, batch_

Validation:  95%|▉| 704/743 [2:54:22<08:13, 12.66s/batch, batch_loss=469, batch_

Validation:  95%|▉| 704/743 [2:54:35<08:13, 12.66s/batch, batch_loss=8.71, batch

Validation:  95%|▉| 705/743 [2:54:35<08:03, 12.73s/batch, batch_loss=8.71, batch

Validation:  95%|▉| 705/743 [2:54:48<08:03, 12.73s/batch, batch_loss=18, batch_i

Validation:  95%|▉| 706/743 [2:54:48<07:49, 12.70s/batch, batch_loss=18, batch_i

Validation:  95%|▉| 706/743 [2:55:00<07:49, 12.70s/batch, batch_loss=407, batch_

Validation:  95%|▉| 707/743 [2:55:00<07:35, 12.66s/batch, batch_loss=407, batch_

Validation:  95%|▉| 707/743 [2:55:13<07:35, 12.66s/batch, batch_loss=18.4, batch

Validation:  95%|▉| 708/743 [2:55:13<07:25, 12.72s/batch, batch_loss=18.4, batch

Validation:  95%|▉| 708/743 [2:55:26<07:25, 12.72s/batch, batch_loss=24.6, batch

Validation:  95%|▉| 709/743 [2:55:26<07:15, 12.80s/batch, batch_loss=24.6, batch

Validation:  95%|▉| 709/743 [2:55:39<07:15, 12.80s/batch, batch_loss=17.7, batch

Validation:  96%|▉| 710/743 [2:55:39<06:59, 12.71s/batch, batch_loss=17.7, batch

Validation:  96%|▉| 710/743 [2:55:51<06:59, 12.71s/batch, batch_loss=12.9, batch

Validation:  96%|▉| 711/743 [2:55:51<06:44, 12.65s/batch, batch_loss=12.9, batch

Validation:  96%|▉| 711/743 [2:56:04<06:44, 12.65s/batch, batch_loss=20.4, batch

Validation:  96%|▉| 712/743 [2:56:04<06:31, 12.62s/batch, batch_loss=20.4, batch

Validation:  96%|▉| 712/743 [2:56:16<06:31, 12.62s/batch, batch_loss=16.8, batch

Validation:  96%|▉| 713/743 [2:56:16<06:18, 12.60s/batch, batch_loss=16.8, batch

Validation:  96%|▉| 713/743 [2:56:28<06:18, 12.60s/batch, batch_loss=5.64, batch

Validation:  96%|▉| 714/743 [2:56:28<06:01, 12.47s/batch, batch_loss=5.64, batch

Validation:  96%|▉| 714/743 [2:56:44<06:01, 12.47s/batch, batch_loss=10.2, batch

Validation:  96%|▉| 715/743 [2:56:44<06:12, 13.29s/batch, batch_loss=10.2, batch

Validation:  96%|▉| 715/743 [2:56:56<06:12, 13.29s/batch, batch_loss=20.1, batch

Validation:  96%|▉| 716/743 [2:56:56<05:50, 12.99s/batch, batch_loss=20.1, batch

Validation:  96%|▉| 716/743 [2:57:08<05:50, 12.99s/batch, batch_loss=384, batch_

Validation:  97%|▉| 717/743 [2:57:08<05:33, 12.81s/batch, batch_loss=384, batch_

Validation:  97%|▉| 717/743 [2:57:21<05:33, 12.81s/batch, batch_loss=17, batch_i

Validation:  97%|▉| 718/743 [2:57:21<05:21, 12.86s/batch, batch_loss=17, batch_i

Validation:  97%|▉| 718/743 [2:57:34<05:21, 12.86s/batch, batch_loss=14, batch_i

Validation:  97%|▉| 719/743 [2:57:34<05:09, 12.89s/batch, batch_loss=14, batch_i

Validation:  97%|▉| 719/743 [2:57:47<05:09, 12.89s/batch, batch_loss=14.8, batch

Validation:  97%|▉| 720/743 [2:57:47<04:55, 12.85s/batch, batch_loss=14.8, batch

Validation:  97%|▉| 720/743 [2:58:00<04:55, 12.85s/batch, batch_loss=12.3, batch

Validation:  97%|▉| 721/743 [2:58:00<04:42, 12.84s/batch, batch_loss=12.3, batch

Validation:  97%|▉| 721/743 [2:58:12<04:42, 12.84s/batch, batch_loss=23.8, batch

Validation:  97%|▉| 722/743 [2:58:12<04:27, 12.76s/batch, batch_loss=23.8, batch

Validation:  97%|▉| 722/743 [2:58:25<04:27, 12.76s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [2:58:25<04:14, 12.72s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [2:58:38<04:14, 12.72s/batch, batch_loss=20.3, batch

Validation:  97%|▉| 724/743 [2:58:38<04:02, 12.78s/batch, batch_loss=20.3, batch

Validation:  97%|▉| 724/743 [2:58:50<04:02, 12.78s/batch, batch_loss=14.9, batch

Validation:  98%|▉| 725/743 [2:58:50<03:48, 12.72s/batch, batch_loss=14.9, batch

Validation:  98%|▉| 725/743 [2:59:03<03:48, 12.72s/batch, batch_loss=18.7, batch

Validation:  98%|▉| 726/743 [2:59:03<03:35, 12.69s/batch, batch_loss=18.7, batch

Validation:  98%|▉| 726/743 [2:59:17<03:35, 12.69s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [2:59:17<03:29, 13.10s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [2:59:34<03:29, 13.10s/batch, batch_loss=23.6, batch

Validation:  98%|▉| 728/743 [2:59:34<03:33, 14.26s/batch, batch_loss=23.6, batch

Validation:  98%|▉| 728/743 [2:59:51<03:33, 14.26s/batch, batch_loss=38.8, batch

Validation:  98%|▉| 729/743 [2:59:51<03:32, 15.18s/batch, batch_loss=38.8, batch

Validation:  98%|▉| 729/743 [3:00:09<03:32, 15.18s/batch, batch_loss=23, batch_i

Validation:  98%|▉| 730/743 [3:00:09<03:26, 15.92s/batch, batch_loss=23, batch_i

Validation:  98%|▉| 730/743 [3:00:42<03:26, 15.92s/batch, batch_loss=14.1, batch

Validation:  98%|▉| 731/743 [3:00:42<04:13, 21.15s/batch, batch_loss=14.1, batch

Validation:  98%|▉| 731/743 [3:01:17<04:13, 21.15s/batch, batch_loss=8.72, batch

Validation:  99%|▉| 732/743 [3:01:17<04:38, 25.30s/batch, batch_loss=8.72, batch

Validation:  99%|▉| 732/743 [3:01:52<04:38, 25.30s/batch, batch_loss=25.8, batch

Validation:  99%|▉| 733/743 [3:01:52<04:40, 28.02s/batch, batch_loss=25.8, batch

Validation:  99%|▉| 733/743 [3:02:28<04:40, 28.02s/batch, batch_loss=3.32, batch

Validation:  99%|▉| 734/743 [3:02:28<04:33, 30.37s/batch, batch_loss=3.32, batch

Validation:  99%|▉| 734/743 [3:03:03<04:33, 30.37s/batch, batch_loss=6.9, batch_

Validation:  99%|▉| 735/743 [3:03:03<04:14, 31.82s/batch, batch_loss=6.9, batch_

Validation:  99%|▉| 735/743 [3:03:37<04:14, 31.82s/batch, batch_loss=1.18, batch

Validation:  99%|▉| 736/743 [3:03:37<03:46, 32.39s/batch, batch_loss=1.18, batch

Validation:  99%|▉| 736/743 [3:04:10<03:46, 32.39s/batch, batch_loss=0.0412, bat

Validation:  99%|▉| 737/743 [3:04:10<03:16, 32.81s/batch, batch_loss=0.0412, bat

Validation:  99%|▉| 737/743 [3:04:44<03:16, 32.81s/batch, batch_loss=0.0412, bat

Validation:  99%|▉| 738/743 [3:04:44<02:45, 33.12s/batch, batch_loss=0.0412, bat

Validation:  99%|▉| 738/743 [3:05:18<02:45, 33.12s/batch, batch_loss=0.0412, bat

Validation:  99%|▉| 739/743 [3:05:18<02:13, 33.27s/batch, batch_loss=0.0412, bat

Validation:  99%|▉| 739/743 [3:05:51<02:13, 33.27s/batch, batch_loss=0.0412, bat

Validation: 100%|▉| 740/743 [3:05:51<01:40, 33.34s/batch, batch_loss=0.0412, bat

Validation: 100%|▉| 740/743 [3:06:24<01:40, 33.34s/batch, batch_loss=0.0412, bat

Validation: 100%|▉| 741/743 [3:06:24<01:06, 33.04s/batch, batch_loss=0.0412, bat

Validation: 100%|▉| 741/743 [3:06:35<01:06, 33.04s/batch, batch_loss=0.0412, bat

Validation: 100%|▉| 742/743 [3:06:35<00:26, 26.61s/batch, batch_loss=0.0412, bat

Validation: 100%|▉| 742/743 [3:06:47<00:26, 26.61s/batch, batch_loss=0.0401, bat

Validation: 100%|█| 743/743 [3:06:47<00:00, 22.07s/batch, batch_loss=0.0401, bat

Validation: 100%|█| 743/743 [3:06:47<00:00, 15.08s/batch, batch_loss=0.0401, bat




Val Loss: 1295.9433


Epoch 7/10:   0%|                                    | 0/991 [00:00<?, ?batch/s]

Epoch 7/10:   0%| | 0/991 [00:14<?, ?batch/s, batch_loss=18.3, batch_index=1, ba

Epoch 7/10:   0%| | 1/991 [00:14<3:54:42, 14.23s/batch, batch_loss=18.3, batch_i

Epoch 7/10:   0%| | 1/991 [00:27<3:54:42, 14.23s/batch, batch_loss=18, batch_ind

Epoch 7/10:   0%| | 2/991 [00:27<3:47:22, 13.79s/batch, batch_loss=18, batch_ind

Epoch 7/10:   0%| | 2/991 [00:41<3:47:22, 13.79s/batch, batch_loss=10.8, batch_i

Epoch 7/10:   0%| | 3/991 [00:41<3:44:08, 13.61s/batch, batch_loss=10.8, batch_i

Epoch 7/10:   0%| | 3/991 [00:53<3:44:08, 13.61s/batch, batch_loss=6.84, batch_i

Epoch 7/10:   0%| | 4/991 [00:53<3:39:11, 13.32s/batch, batch_loss=6.84, batch_i

Epoch 7/10:   0%| | 4/991 [01:07<3:39:11, 13.32s/batch, batch_loss=23.9, batch_i

Epoch 7/10:   1%| | 5/991 [01:07<3:41:12, 13.46s/batch, batch_loss=23.9, batch_i

Epoch 7/10:   1%| | 5/991 [01:21<3:41:12, 13.46s/batch, batch_loss=21, batch_ind

Epoch 7/10:   1%| | 6/991 [01:21<3:40:20, 13.42s/batch, batch_loss=21, batch_ind

Epoch 7/10:   1%| | 6/991 [01:35<3:40:20, 13.42s/batch, batch_loss=18.1, batch_i

Epoch 7/10:   1%| | 7/991 [01:35<3:46:42, 13.82s/batch, batch_loss=18.1, batch_i

Epoch 7/10:   1%| | 7/991 [01:51<3:46:42, 13.82s/batch, batch_loss=603, batch_in

Epoch 7/10:   1%| | 8/991 [01:51<3:56:23, 14.43s/batch, batch_loss=603, batch_in

Epoch 7/10:   1%| | 8/991 [02:06<3:56:23, 14.43s/batch, batch_loss=14.2, batch_i

Epoch 7/10:   1%| | 9/991 [02:06<4:01:38, 14.76s/batch, batch_loss=14.2, batch_i

Epoch 7/10:   1%| | 9/991 [02:22<4:01:38, 14.76s/batch, batch_loss=16.6, batch_i

Epoch 7/10:   1%| | 10/991 [02:22<4:04:15, 14.94s/batch, batch_loss=16.6, batch_

Epoch 7/10:   1%| | 10/991 [02:38<4:04:15, 14.94s/batch, batch_loss=10.6, batch_

Epoch 7/10:   1%| | 11/991 [02:38<4:10:07, 15.31s/batch, batch_loss=10.6, batch_

Epoch 7/10:   1%| | 11/991 [02:53<4:10:07, 15.31s/batch, batch_loss=1.99e+3, bat

Epoch 7/10:   1%| | 12/991 [02:53<4:11:09, 15.39s/batch, batch_loss=1.99e+3, bat

Epoch 7/10:   1%| | 12/991 [03:10<4:11:09, 15.39s/batch, batch_loss=18.3, batch_

Epoch 7/10:   1%| | 13/991 [03:10<4:14:26, 15.61s/batch, batch_loss=18.3, batch_

Epoch 7/10:   1%| | 13/991 [03:25<4:14:26, 15.61s/batch, batch_loss=10.6, batch_

Epoch 7/10:   1%| | 14/991 [03:25<4:14:26, 15.63s/batch, batch_loss=10.6, batch_

Epoch 7/10:   1%| | 14/991 [03:41<4:14:26, 15.63s/batch, batch_loss=10.1, batch_

Epoch 7/10:   2%| | 15/991 [03:41<4:17:06, 15.81s/batch, batch_loss=10.1, batch_

Epoch 7/10:   2%| | 15/991 [03:57<4:17:06, 15.81s/batch, batch_loss=12.4, batch_

Epoch 7/10:   2%| | 16/991 [03:57<4:17:25, 15.84s/batch, batch_loss=12.4, batch_

Epoch 7/10:   2%| | 16/991 [04:12<4:17:25, 15.84s/batch, batch_loss=12.9, batch_

Epoch 7/10:   2%| | 17/991 [04:12<4:11:59, 15.52s/batch, batch_loss=12.9, batch_

Epoch 7/10:   2%| | 17/991 [04:28<4:11:59, 15.52s/batch, batch_loss=9.1, batch_i

Epoch 7/10:   2%| | 18/991 [04:28<4:13:02, 15.60s/batch, batch_loss=9.1, batch_i

Epoch 7/10:   2%| | 18/991 [04:43<4:13:02, 15.60s/batch, batch_loss=8.96e+3, bat

Epoch 7/10:   2%| | 19/991 [04:43<4:09:41, 15.41s/batch, batch_loss=8.96e+3, bat

Epoch 7/10:   2%| | 19/991 [04:59<4:09:41, 15.41s/batch, batch_loss=10.7, batch_

Epoch 7/10:   2%| | 20/991 [04:59<4:14:49, 15.75s/batch, batch_loss=10.7, batch_

Epoch 7/10:   2%| | 20/991 [05:15<4:14:49, 15.75s/batch, batch_loss=16.5, batch_

Epoch 7/10:   2%| | 21/991 [05:15<4:12:40, 15.63s/batch, batch_loss=16.5, batch_

Epoch 7/10:   2%| | 21/991 [05:30<4:12:40, 15.63s/batch, batch_loss=1.07e+3, bat

Epoch 7/10:   2%| | 22/991 [05:30<4:09:26, 15.45s/batch, batch_loss=1.07e+3, bat

Epoch 7/10:   2%| | 22/991 [05:46<4:09:26, 15.45s/batch, batch_loss=7.07, batch_

Epoch 7/10:   2%| | 23/991 [05:46<4:13:08, 15.69s/batch, batch_loss=7.07, batch_

Epoch 7/10:   2%| | 23/991 [06:02<4:13:08, 15.69s/batch, batch_loss=13.4, batch_

Epoch 7/10:   2%| | 24/991 [06:02<4:16:00, 15.88s/batch, batch_loss=13.4, batch_

Epoch 7/10:   2%| | 24/991 [06:18<4:16:00, 15.88s/batch, batch_loss=12.7, batch_

Epoch 7/10:   3%| | 25/991 [06:18<4:12:25, 15.68s/batch, batch_loss=12.7, batch_

Epoch 7/10:   3%| | 25/991 [06:33<4:12:25, 15.68s/batch, batch_loss=19, batch_in

Epoch 7/10:   3%| | 26/991 [06:33<4:09:46, 15.53s/batch, batch_loss=19, batch_in

Epoch 7/10:   3%| | 26/991 [06:48<4:09:46, 15.53s/batch, batch_loss=15.7, batch_

Epoch 7/10:   3%| | 27/991 [06:48<4:07:49, 15.43s/batch, batch_loss=15.7, batch_

Epoch 7/10:   3%| | 27/991 [07:02<4:07:49, 15.43s/batch, batch_loss=1.12e+3, bat

Epoch 7/10:   3%| | 28/991 [07:02<4:01:21, 15.04s/batch, batch_loss=1.12e+3, bat

Epoch 7/10:   3%| | 28/991 [07:17<4:01:21, 15.04s/batch, batch_loss=11.8, batch_

Epoch 7/10:   3%| | 29/991 [07:17<4:01:52, 15.09s/batch, batch_loss=11.8, batch_

Epoch 7/10:   3%| | 29/991 [07:35<4:01:52, 15.09s/batch, batch_loss=10.6, batch_

Epoch 7/10:   3%| | 30/991 [07:35<4:16:15, 16.00s/batch, batch_loss=10.6, batch_

Epoch 7/10:   3%| | 30/991 [07:51<4:16:15, 16.00s/batch, batch_loss=9.44, batch_

Epoch 7/10:   3%| | 31/991 [07:51<4:13:57, 15.87s/batch, batch_loss=9.44, batch_

Epoch 7/10:   3%| | 31/991 [08:08<4:13:57, 15.87s/batch, batch_loss=1.3e+4, batc

Epoch 7/10:   3%| | 32/991 [08:08<4:16:37, 16.06s/batch, batch_loss=1.3e+4, batc

Epoch 7/10:   3%| | 32/991 [08:27<4:16:37, 16.06s/batch, batch_loss=12.5, batch_

Epoch 7/10:   3%| | 33/991 [08:27<4:32:38, 17.08s/batch, batch_loss=12.5, batch_

Epoch 7/10:   3%| | 33/991 [08:42<4:32:38, 17.08s/batch, batch_loss=8.66, batch_

Epoch 7/10:   3%| | 34/991 [08:42<4:23:03, 16.49s/batch, batch_loss=8.66, batch_

Epoch 7/10:   3%| | 34/991 [08:56<4:23:03, 16.49s/batch, batch_loss=12.8, batch_

Epoch 7/10:   4%| | 35/991 [08:56<4:12:37, 15.85s/batch, batch_loss=12.8, batch_

Epoch 7/10:   4%| | 35/991 [09:12<4:12:37, 15.85s/batch, batch_loss=10.1, batch_

Epoch 7/10:   4%| | 36/991 [09:12<4:11:56, 15.83s/batch, batch_loss=10.1, batch_

Epoch 7/10:   4%| | 36/991 [09:30<4:11:56, 15.83s/batch, batch_loss=8.66, batch_

Epoch 7/10:   4%| | 37/991 [09:30<4:18:40, 16.27s/batch, batch_loss=8.66, batch_

Epoch 7/10:   4%| | 37/991 [09:45<4:18:40, 16.27s/batch, batch_loss=7.28, batch_

Epoch 7/10:   4%| | 38/991 [09:45<4:15:34, 16.09s/batch, batch_loss=7.28, batch_

Epoch 7/10:   4%| | 38/991 [10:00<4:15:34, 16.09s/batch, batch_loss=1.72e+3, bat

Epoch 7/10:   4%| | 39/991 [10:00<4:10:54, 15.81s/batch, batch_loss=1.72e+3, bat

Epoch 7/10:   4%| | 39/991 [10:16<4:10:54, 15.81s/batch, batch_loss=12.2, batch_

Epoch 7/10:   4%| | 40/991 [10:16<4:11:22, 15.86s/batch, batch_loss=12.2, batch_

Epoch 7/10:   4%| | 40/991 [10:32<4:11:22, 15.86s/batch, batch_loss=6.22e+3, bat

Epoch 7/10:   4%| | 41/991 [10:32<4:08:08, 15.67s/batch, batch_loss=6.22e+3, bat

Epoch 7/10:   4%| | 41/991 [10:48<4:08:08, 15.67s/batch, batch_loss=14.3, batch_

Epoch 7/10:   4%| | 42/991 [10:48<4:09:27, 15.77s/batch, batch_loss=14.3, batch_

Epoch 7/10:   4%| | 42/991 [11:03<4:09:27, 15.77s/batch, batch_loss=10.1, batch_

Epoch 7/10:   4%| | 43/991 [11:03<4:08:44, 15.74s/batch, batch_loss=10.1, batch_

Epoch 7/10:   4%| | 43/991 [11:20<4:08:44, 15.74s/batch, batch_loss=15, batch_in

Epoch 7/10:   4%| | 44/991 [11:20<4:14:29, 16.12s/batch, batch_loss=15, batch_in

Epoch 7/10:   4%| | 44/991 [11:36<4:14:29, 16.12s/batch, batch_loss=16.5, batch_

Epoch 7/10:   5%| | 45/991 [11:36<4:14:12, 16.12s/batch, batch_loss=16.5, batch_

Epoch 7/10:   5%| | 45/991 [11:52<4:14:12, 16.12s/batch, batch_loss=13.4, batch_

Epoch 7/10:   5%| | 46/991 [11:52<4:12:55, 16.06s/batch, batch_loss=13.4, batch_

Epoch 7/10:   5%| | 46/991 [12:09<4:12:55, 16.06s/batch, batch_loss=6.18, batch_

Epoch 7/10:   5%| | 47/991 [12:09<4:13:56, 16.14s/batch, batch_loss=6.18, batch_

Epoch 7/10:   5%| | 47/991 [12:26<4:13:56, 16.14s/batch, batch_loss=12.5, batch_

Epoch 7/10:   5%| | 48/991 [12:26<4:18:48, 16.47s/batch, batch_loss=12.5, batch_

Epoch 7/10:   5%| | 48/991 [12:42<4:18:48, 16.47s/batch, batch_loss=11.7, batch_

Epoch 7/10:   5%| | 49/991 [12:42<4:14:42, 16.22s/batch, batch_loss=11.7, batch_

Epoch 7/10:   5%| | 49/991 [12:58<4:14:42, 16.22s/batch, batch_loss=14.7, batch_

Epoch 7/10:   5%| | 50/991 [12:58<4:13:43, 16.18s/batch, batch_loss=14.7, batch_

Epoch 7/10:   5%| | 50/991 [13:13<4:13:43, 16.18s/batch, batch_loss=8.3, batch_i

Epoch 7/10:   5%| | 51/991 [13:13<4:08:46, 15.88s/batch, batch_loss=8.3, batch_i

Epoch 7/10:   5%| | 51/991 [13:28<4:08:46, 15.88s/batch, batch_loss=13.8, batch_

Epoch 7/10:   5%| | 52/991 [13:28<4:03:26, 15.56s/batch, batch_loss=13.8, batch_

Epoch 7/10:   5%| | 52/991 [13:43<4:03:26, 15.56s/batch, batch_loss=12.2, batch_

Epoch 7/10:   5%| | 53/991 [13:43<4:03:08, 15.55s/batch, batch_loss=12.2, batch_

Epoch 7/10:   5%| | 53/991 [14:02<4:03:08, 15.55s/batch, batch_loss=9.01, batch_

Epoch 7/10:   5%| | 54/991 [14:02<4:17:00, 16.46s/batch, batch_loss=9.01, batch_

Epoch 7/10:   5%| | 54/991 [14:18<4:17:00, 16.46s/batch, batch_loss=9.46, batch_

Epoch 7/10:   6%| | 55/991 [14:18<4:16:47, 16.46s/batch, batch_loss=9.46, batch_

Epoch 7/10:   6%| | 55/991 [14:34<4:16:47, 16.46s/batch, batch_loss=10.3, batch_

Epoch 7/10:   6%| | 56/991 [14:34<4:15:26, 16.39s/batch, batch_loss=10.3, batch_

Epoch 7/10:   6%| | 56/991 [14:50<4:15:26, 16.39s/batch, batch_loss=6.76, batch_

Epoch 7/10:   6%| | 57/991 [14:50<4:09:47, 16.05s/batch, batch_loss=6.76, batch_

Epoch 7/10:   6%| | 57/991 [15:05<4:09:47, 16.05s/batch, batch_loss=19.8, batch_

Epoch 7/10:   6%| | 58/991 [15:05<4:07:02, 15.89s/batch, batch_loss=19.8, batch_

Epoch 7/10:   6%| | 58/991 [15:23<4:07:02, 15.89s/batch, batch_loss=9.82, batch_

Epoch 7/10:   6%| | 59/991 [15:23<4:14:51, 16.41s/batch, batch_loss=9.82, batch_

Epoch 7/10:   6%| | 59/991 [15:38<4:14:51, 16.41s/batch, batch_loss=15.7, batch_

Epoch 7/10:   6%| | 60/991 [15:38<4:10:36, 16.15s/batch, batch_loss=15.7, batch_

Epoch 7/10:   6%| | 60/991 [15:55<4:10:36, 16.15s/batch, batch_loss=10.6, batch_

Epoch 7/10:   6%| | 61/991 [15:55<4:11:27, 16.22s/batch, batch_loss=10.6, batch_

Epoch 7/10:   6%| | 61/991 [16:11<4:11:27, 16.22s/batch, batch_loss=11.2, batch_

Epoch 7/10:   6%| | 62/991 [16:11<4:10:15, 16.16s/batch, batch_loss=11.2, batch_

Epoch 7/10:   6%| | 62/991 [16:28<4:10:15, 16.16s/batch, batch_loss=420, batch_i

Epoch 7/10:   6%| | 63/991 [16:28<4:13:05, 16.36s/batch, batch_loss=420, batch_i

Epoch 7/10:   6%| | 63/991 [16:43<4:13:05, 16.36s/batch, batch_loss=794, batch_i

Epoch 7/10:   6%| | 64/991 [16:43<4:08:56, 16.11s/batch, batch_loss=794, batch_i

Epoch 7/10:   6%| | 64/991 [16:59<4:08:56, 16.11s/batch, batch_loss=2.55e+3, bat

Epoch 7/10:   7%| | 65/991 [16:59<4:08:15, 16.09s/batch, batch_loss=2.55e+3, bat

Epoch 7/10:   7%| | 65/991 [17:15<4:08:15, 16.09s/batch, batch_loss=3.8, batch_i

Epoch 7/10:   7%| | 66/991 [17:15<4:08:55, 16.15s/batch, batch_loss=3.8, batch_i

Epoch 7/10:   7%| | 66/991 [17:31<4:08:55, 16.15s/batch, batch_loss=10.4, batch_

Epoch 7/10:   7%| | 67/991 [17:31<4:07:26, 16.07s/batch, batch_loss=10.4, batch_

Epoch 7/10:   7%| | 67/991 [17:46<4:07:26, 16.07s/batch, batch_loss=8.23, batch_

Epoch 7/10:   7%| | 68/991 [17:46<4:00:26, 15.63s/batch, batch_loss=8.23, batch_

Epoch 7/10:   7%| | 68/991 [18:00<4:00:26, 15.63s/batch, batch_loss=16.4, batch_

Epoch 7/10:   7%| | 69/991 [18:00<3:55:12, 15.31s/batch, batch_loss=16.4, batch_

Epoch 7/10:   7%| | 69/991 [18:18<3:55:12, 15.31s/batch, batch_loss=8.09, batch_

Epoch 7/10:   7%| | 70/991 [18:18<4:03:57, 15.89s/batch, batch_loss=8.09, batch_

Epoch 7/10:   7%| | 70/991 [18:33<4:03:57, 15.89s/batch, batch_loss=12.2, batch_

Epoch 7/10:   7%| | 71/991 [18:33<3:59:27, 15.62s/batch, batch_loss=12.2, batch_

Epoch 7/10:   7%| | 71/991 [18:48<3:59:27, 15.62s/batch, batch_loss=13.3, batch_

Epoch 7/10:   7%| | 72/991 [18:48<3:58:49, 15.59s/batch, batch_loss=13.3, batch_

Epoch 7/10:   7%| | 72/991 [19:04<3:58:49, 15.59s/batch, batch_loss=23.9, batch_

Epoch 7/10:   7%| | 73/991 [19:04<3:59:42, 15.67s/batch, batch_loss=23.9, batch_

Epoch 7/10:   7%| | 73/991 [19:19<3:59:42, 15.67s/batch, batch_loss=1.74e+3, bat

Epoch 7/10:   7%| | 74/991 [19:19<3:57:06, 15.51s/batch, batch_loss=1.74e+3, bat

Epoch 7/10:   7%| | 74/991 [19:35<3:57:06, 15.51s/batch, batch_loss=15, batch_in

Epoch 7/10:   8%| | 75/991 [19:35<3:55:48, 15.45s/batch, batch_loss=15, batch_in

Epoch 7/10:   8%| | 75/991 [19:50<3:55:48, 15.45s/batch, batch_loss=11.2, batch_

Epoch 7/10:   8%| | 76/991 [19:50<3:54:31, 15.38s/batch, batch_loss=11.2, batch_

Epoch 7/10:   8%| | 76/991 [20:05<3:54:31, 15.38s/batch, batch_loss=11.2, batch_

Epoch 7/10:   8%| | 77/991 [20:05<3:52:50, 15.29s/batch, batch_loss=11.2, batch_

Epoch 7/10:   8%| | 77/991 [20:20<3:52:50, 15.29s/batch, batch_loss=13.2, batch_

Epoch 7/10:   8%| | 78/991 [20:20<3:50:53, 15.17s/batch, batch_loss=13.2, batch_

Epoch 7/10:   8%| | 78/991 [20:35<3:50:53, 15.17s/batch, batch_loss=9.15, batch_

Epoch 7/10:   8%| | 79/991 [20:35<3:52:46, 15.31s/batch, batch_loss=9.15, batch_

Epoch 7/10:   8%| | 79/991 [20:51<3:52:46, 15.31s/batch, batch_loss=11.3, batch_

Epoch 7/10:   8%| | 80/991 [20:51<3:53:15, 15.36s/batch, batch_loss=11.3, batch_

Epoch 7/10:   8%| | 80/991 [21:07<3:53:15, 15.36s/batch, batch_loss=14.6, batch_

Epoch 7/10:   8%| | 81/991 [21:07<3:55:43, 15.54s/batch, batch_loss=14.6, batch_

Epoch 7/10:   8%| | 81/991 [21:22<3:55:43, 15.54s/batch, batch_loss=12.2, batch_

Epoch 7/10:   8%| | 82/991 [21:22<3:54:11, 15.46s/batch, batch_loss=12.2, batch_

Epoch 7/10:   8%| | 82/991 [21:37<3:54:11, 15.46s/batch, batch_loss=8.5, batch_i

Epoch 7/10:   8%| | 83/991 [21:37<3:51:48, 15.32s/batch, batch_loss=8.5, batch_i

Epoch 7/10:   8%| | 83/991 [21:50<3:51:48, 15.32s/batch, batch_loss=10.6, batch_

Epoch 7/10:   8%| | 84/991 [21:50<3:41:58, 14.68s/batch, batch_loss=10.6, batch_

Epoch 7/10:   8%| | 84/991 [22:04<3:41:58, 14.68s/batch, batch_loss=9.01, batch_

Epoch 7/10:   9%| | 85/991 [22:04<3:39:04, 14.51s/batch, batch_loss=9.01, batch_

Epoch 7/10:   9%| | 85/991 [22:20<3:39:04, 14.51s/batch, batch_loss=12.3, batch_

Epoch 7/10:   9%| | 86/991 [22:20<3:44:56, 14.91s/batch, batch_loss=12.3, batch_

Epoch 7/10:   9%| | 86/991 [22:36<3:44:56, 14.91s/batch, batch_loss=10.4, batch_

Epoch 7/10:   9%| | 87/991 [22:36<3:46:58, 15.06s/batch, batch_loss=10.4, batch_

Epoch 7/10:   9%| | 87/991 [23:00<3:46:58, 15.06s/batch, batch_loss=10.3, batch_

Epoch 7/10:   9%| | 88/991 [23:00<4:28:36, 17.85s/batch, batch_loss=10.3, batch_

Epoch 7/10:   9%| | 88/991 [23:34<4:28:36, 17.85s/batch, batch_loss=5.8, batch_i

Epoch 7/10:   9%| | 89/991 [23:34<5:42:33, 22.79s/batch, batch_loss=5.8, batch_i

Epoch 7/10:   9%| | 89/991 [24:08<5:42:33, 22.79s/batch, batch_loss=247, batch_i

Epoch 7/10:   9%| | 90/991 [24:08<6:31:44, 26.09s/batch, batch_loss=247, batch_i

Epoch 7/10:   9%| | 90/991 [24:41<6:31:44, 26.09s/batch, batch_loss=1.54e+3, bat

Epoch 7/10:   9%| | 91/991 [24:41<7:02:09, 28.14s/batch, batch_loss=1.54e+3, bat

Epoch 7/10:   9%| | 91/991 [25:16<7:02:09, 28.14s/batch, batch_loss=18.1, batch_

Epoch 7/10:   9%| | 92/991 [25:16<7:31:06, 30.11s/batch, batch_loss=18.1, batch_

Epoch 7/10:   9%| | 92/991 [25:50<7:31:06, 30.11s/batch, batch_loss=19.8, batch_

Epoch 7/10:   9%| | 93/991 [25:50<7:47:28, 31.23s/batch, batch_loss=19.8, batch_

Epoch 7/10:   9%| | 93/991 [26:22<7:47:28, 31.23s/batch, batch_loss=17.2, batch_

Epoch 7/10:   9%| | 94/991 [26:22<7:51:42, 31.55s/batch, batch_loss=17.2, batch_

Epoch 7/10:   9%| | 94/991 [26:54<7:51:42, 31.55s/batch, batch_loss=18.2, batch_

Epoch 7/10:  10%| | 95/991 [26:54<7:53:30, 31.71s/batch, batch_loss=18.2, batch_

Epoch 7/10:  10%| | 95/991 [27:27<7:53:30, 31.71s/batch, batch_loss=17.3, batch_

Epoch 7/10:  10%| | 96/991 [27:27<7:57:18, 32.00s/batch, batch_loss=17.3, batch_

Epoch 7/10:  10%| | 96/991 [27:59<7:57:18, 32.00s/batch, batch_loss=19.3, batch_

Epoch 7/10:  10%| | 97/991 [27:59<8:00:27, 32.25s/batch, batch_loss=19.3, batch_

Epoch 7/10:  10%| | 97/991 [28:33<8:00:27, 32.25s/batch, batch_loss=14.8, batch_

Epoch 7/10:  10%| | 98/991 [28:33<8:04:41, 32.57s/batch, batch_loss=14.8, batch_

Epoch 7/10:  10%| | 98/991 [29:06<8:04:41, 32.57s/batch, batch_loss=18.6, batch_

Epoch 7/10:  10%| | 99/991 [29:06<8:09:16, 32.91s/batch, batch_loss=18.6, batch_

Epoch 7/10:  10%| | 99/991 [29:41<8:09:16, 32.91s/batch, batch_loss=16.5, batch_

Epoch 7/10:  10%| | 100/991 [29:41<8:14:15, 33.28s/batch, batch_loss=16.5, batch

Epoch 7/10:  10%| | 100/991 [30:15<8:14:15, 33.28s/batch, batch_loss=13.5, batch

Epoch 7/10:  10%| | 101/991 [30:15<8:18:18, 33.59s/batch, batch_loss=13.5, batch

Epoch 7/10:  10%| | 101/991 [30:49<8:18:18, 33.59s/batch, batch_loss=18, batch_i

Epoch 7/10:  10%| | 102/991 [30:49<8:18:59, 33.68s/batch, batch_loss=18, batch_i

Epoch 7/10:  10%| | 102/991 [31:21<8:18:59, 33.68s/batch, batch_loss=907, batch_

Epoch 7/10:  10%| | 103/991 [31:21<8:13:41, 33.36s/batch, batch_loss=907, batch_

Epoch 7/10:  10%| | 103/991 [31:56<8:13:41, 33.36s/batch, batch_loss=14.8, batch

Epoch 7/10:  10%| | 104/991 [31:56<8:19:13, 33.77s/batch, batch_loss=14.8, batch

Epoch 7/10:  10%| | 104/991 [32:30<8:19:13, 33.77s/batch, batch_loss=10.1, batch

Epoch 7/10:  11%| | 105/991 [32:30<8:20:08, 33.87s/batch, batch_loss=10.1, batch

Epoch 7/10:  11%| | 105/991 [33:02<8:20:08, 33.87s/batch, batch_loss=11.3, batch

Epoch 7/10:  11%| | 106/991 [33:02<8:10:03, 33.22s/batch, batch_loss=11.3, batch

Epoch 7/10:  11%| | 106/991 [33:35<8:10:03, 33.22s/batch, batch_loss=15.6, batch

Epoch 7/10:  11%| | 107/991 [33:35<8:09:09, 33.20s/batch, batch_loss=15.6, batch

Epoch 7/10:  11%| | 107/991 [34:10<8:09:09, 33.20s/batch, batch_loss=25.3, batch

Epoch 7/10:  11%| | 108/991 [34:10<8:15:48, 33.69s/batch, batch_loss=25.3, batch

Epoch 7/10:  11%| | 108/991 [34:44<8:15:48, 33.69s/batch, batch_loss=15.2, batch

Epoch 7/10:  11%| | 109/991 [34:44<8:14:39, 33.65s/batch, batch_loss=15.2, batch

Epoch 7/10:  11%| | 109/991 [35:17<8:14:39, 33.65s/batch, batch_loss=17.3, batch

Epoch 7/10:  11%| | 110/991 [35:17<8:12:50, 33.56s/batch, batch_loss=17.3, batch

Epoch 7/10:  11%| | 110/991 [35:50<8:12:50, 33.56s/batch, batch_loss=16, batch_i

Epoch 7/10:  11%| | 111/991 [35:50<8:09:33, 33.38s/batch, batch_loss=16, batch_i

Epoch 7/10:  11%| | 111/991 [36:23<8:09:33, 33.38s/batch, batch_loss=18.6, batch

Epoch 7/10:  11%| | 112/991 [36:23<8:06:59, 33.24s/batch, batch_loss=18.6, batch

Epoch 7/10:  11%| | 112/991 [36:56<8:06:59, 33.24s/batch, batch_loss=8.92, batch

Epoch 7/10:  11%| | 113/991 [36:56<8:07:33, 33.32s/batch, batch_loss=8.92, batch

Epoch 7/10:  11%| | 113/991 [37:31<8:07:33, 33.32s/batch, batch_loss=14.3, batch

Epoch 7/10:  12%| | 114/991 [37:31<8:14:45, 33.85s/batch, batch_loss=14.3, batch

Epoch 7/10:  12%| | 114/991 [38:05<8:14:45, 33.85s/batch, batch_loss=19.2, batch

Epoch 7/10:  12%| | 115/991 [38:05<8:15:35, 33.94s/batch, batch_loss=19.2, batch

Epoch 7/10:  12%| | 115/991 [38:43<8:15:35, 33.94s/batch, batch_loss=10.1, batch

Epoch 7/10:  12%| | 116/991 [38:43<8:31:02, 35.04s/batch, batch_loss=10.1, batch

Epoch 7/10:  12%| | 116/991 [39:23<8:31:02, 35.04s/batch, batch_loss=16.5, batch

Epoch 7/10:  12%| | 117/991 [39:23<8:53:02, 36.59s/batch, batch_loss=16.5, batch

Epoch 7/10:  12%| | 117/991 [39:55<8:53:02, 36.59s/batch, batch_loss=15.6, batch

Epoch 7/10:  12%| | 118/991 [39:55<8:33:04, 35.26s/batch, batch_loss=15.6, batch

Epoch 7/10:  12%| | 118/991 [40:29<8:33:04, 35.26s/batch, batch_loss=25, batch_i

Epoch 7/10:  12%| | 119/991 [40:29<8:25:21, 34.77s/batch, batch_loss=25, batch_i

Epoch 7/10:  12%| | 119/991 [41:03<8:25:21, 34.77s/batch, batch_loss=16.1, batch

Epoch 7/10:  12%| | 120/991 [41:03<8:18:59, 34.37s/batch, batch_loss=16.1, batch

Epoch 7/10:  12%| | 120/991 [41:35<8:18:59, 34.37s/batch, batch_loss=22.3, batch

Epoch 7/10:  12%| | 121/991 [41:35<8:11:54, 33.92s/batch, batch_loss=22.3, batch

Epoch 7/10:  12%| | 121/991 [42:12<8:11:54, 33.92s/batch, batch_loss=9.32, batch

Epoch 7/10:  12%| | 122/991 [42:12<8:22:08, 34.67s/batch, batch_loss=9.32, batch

Epoch 7/10:  12%| | 122/991 [42:45<8:22:08, 34.67s/batch, batch_loss=16.7, batch

Epoch 7/10:  12%| | 123/991 [42:45<8:13:06, 34.09s/batch, batch_loss=16.7, batch

Epoch 7/10:  12%| | 123/991 [43:17<8:13:06, 34.09s/batch, batch_loss=3.47e+3, ba

Epoch 7/10:  13%|▏| 124/991 [43:17<8:07:02, 33.70s/batch, batch_loss=3.47e+3, ba

Epoch 7/10:  13%|▏| 124/991 [43:53<8:07:02, 33.70s/batch, batch_loss=8.11, batch

Epoch 7/10:  13%|▏| 125/991 [43:53<8:13:41, 34.21s/batch, batch_loss=8.11, batch

Epoch 7/10:  13%|▏| 125/991 [44:26<8:13:41, 34.21s/batch, batch_loss=11, batch_i

Epoch 7/10:  13%|▏| 126/991 [44:26<8:09:30, 33.95s/batch, batch_loss=11, batch_i

Epoch 7/10:  13%|▏| 126/991 [45:00<8:09:30, 33.95s/batch, batch_loss=1.9e+3, bat

Epoch 7/10:  13%|▏| 127/991 [45:00<8:08:54, 33.95s/batch, batch_loss=1.9e+3, bat

Epoch 7/10:  13%|▏| 127/991 [45:33<8:08:54, 33.95s/batch, batch_loss=1.56e+3, ba

Epoch 7/10:  13%|▏| 128/991 [45:33<8:03:30, 33.62s/batch, batch_loss=1.56e+3, ba

Epoch 7/10:  13%|▏| 128/991 [46:08<8:03:30, 33.62s/batch, batch_loss=225, batch_

Epoch 7/10:  13%|▏| 129/991 [46:08<8:09:25, 34.07s/batch, batch_loss=225, batch_

Epoch 7/10:  13%|▏| 129/991 [46:41<8:09:25, 34.07s/batch, batch_loss=990, batch_

Epoch 7/10:  13%|▏| 130/991 [46:41<8:05:04, 33.80s/batch, batch_loss=990, batch_

Epoch 7/10:  13%|▏| 130/991 [47:15<8:05:04, 33.80s/batch, batch_loss=8.33e+3, ba

Epoch 7/10:  13%|▏| 131/991 [47:15<8:04:47, 33.82s/batch, batch_loss=8.33e+3, ba

Epoch 7/10:  13%|▏| 131/991 [47:48<8:04:47, 33.82s/batch, batch_loss=16.3, batch

Epoch 7/10:  13%|▏| 132/991 [47:48<7:58:50, 33.45s/batch, batch_loss=16.3, batch

Epoch 7/10:  13%|▏| 132/991 [48:21<7:58:50, 33.45s/batch, batch_loss=8.76, batch

Epoch 7/10:  13%|▏| 133/991 [48:21<7:56:06, 33.29s/batch, batch_loss=8.76, batch

Epoch 7/10:  13%|▏| 133/991 [48:53<7:56:06, 33.29s/batch, batch_loss=12.6, batch

Epoch 7/10:  14%|▏| 134/991 [48:53<7:53:38, 33.16s/batch, batch_loss=12.6, batch

Epoch 7/10:  14%|▏| 134/991 [49:26<7:53:38, 33.16s/batch, batch_loss=17.5, batch

Epoch 7/10:  14%|▏| 135/991 [49:26<7:51:57, 33.08s/batch, batch_loss=17.5, batch

Epoch 7/10:  14%|▏| 135/991 [50:02<7:51:57, 33.08s/batch, batch_loss=9.73, batch

Epoch 7/10:  14%|▏| 136/991 [50:02<8:00:40, 33.73s/batch, batch_loss=9.73, batch

Epoch 7/10:  14%|▏| 136/991 [50:35<8:00:40, 33.73s/batch, batch_loss=14.6, batch

Epoch 7/10:  14%|▏| 137/991 [50:35<7:57:33, 33.55s/batch, batch_loss=14.6, batch

Epoch 7/10:  14%|▏| 137/991 [51:08<7:57:33, 33.55s/batch, batch_loss=17, batch_i

Epoch 7/10:  14%|▏| 138/991 [51:08<7:55:55, 33.48s/batch, batch_loss=17, batch_i

Epoch 7/10:  14%|▏| 138/991 [51:41<7:55:55, 33.48s/batch, batch_loss=6.16, batch

Epoch 7/10:  14%|▏| 139/991 [51:41<7:53:36, 33.35s/batch, batch_loss=6.16, batch

Epoch 7/10:  14%|▏| 139/991 [52:14<7:53:36, 33.35s/batch, batch_loss=10.6, batch

Epoch 7/10:  14%|▏| 140/991 [52:14<7:51:59, 33.28s/batch, batch_loss=10.6, batch

Epoch 7/10:  14%|▏| 140/991 [52:48<7:51:59, 33.28s/batch, batch_loss=5.13, batch

Epoch 7/10:  14%|▏| 141/991 [52:48<7:52:10, 33.33s/batch, batch_loss=5.13, batch

Epoch 7/10:  14%|▏| 141/991 [53:20<7:52:10, 33.33s/batch, batch_loss=6.36, batch

Epoch 7/10:  14%|▏| 142/991 [53:20<7:48:10, 33.09s/batch, batch_loss=6.36, batch

Epoch 7/10:  14%|▏| 142/991 [53:56<7:48:10, 33.09s/batch, batch_loss=13.5, batch

Epoch 7/10:  14%|▏| 143/991 [53:56<7:58:55, 33.89s/batch, batch_loss=13.5, batch

Epoch 7/10:  14%|▏| 143/991 [54:28<7:58:55, 33.89s/batch, batch_loss=16.2, batch

Epoch 7/10:  15%|▏| 144/991 [54:28<7:50:03, 33.30s/batch, batch_loss=16.2, batch

Epoch 7/10:  15%|▏| 144/991 [55:01<7:50:03, 33.30s/batch, batch_loss=17.6, batch

Epoch 7/10:  15%|▏| 145/991 [55:01<7:48:55, 33.26s/batch, batch_loss=17.6, batch

Epoch 7/10:  15%|▏| 145/991 [55:36<7:48:55, 33.26s/batch, batch_loss=15.3, batch

Epoch 7/10:  15%|▏| 146/991 [55:36<7:57:03, 33.87s/batch, batch_loss=15.3, batch

Epoch 7/10:  15%|▏| 146/991 [56:09<7:57:03, 33.87s/batch, batch_loss=7.77, batch

Epoch 7/10:  15%|▏| 147/991 [56:09<7:52:33, 33.59s/batch, batch_loss=7.77, batch

Epoch 7/10:  15%|▏| 147/991 [56:42<7:52:33, 33.59s/batch, batch_loss=21.5, batch

Epoch 7/10:  15%|▏| 148/991 [56:42<7:50:17, 33.47s/batch, batch_loss=21.5, batch

Epoch 7/10:  15%|▏| 148/991 [57:16<7:50:17, 33.47s/batch, batch_loss=11.9, batch

Epoch 7/10:  15%|▏| 149/991 [57:16<7:49:54, 33.49s/batch, batch_loss=11.9, batch

Epoch 7/10:  15%|▏| 149/991 [57:49<7:49:54, 33.49s/batch, batch_loss=11.5, batch

Epoch 7/10:  15%|▏| 150/991 [57:49<7:49:29, 33.50s/batch, batch_loss=11.5, batch

Epoch 7/10:  15%|▏| 150/991 [58:24<7:49:29, 33.50s/batch, batch_loss=20.2, batch

Epoch 7/10:  15%|▏| 151/991 [58:24<7:52:30, 33.75s/batch, batch_loss=20.2, batch

Epoch 7/10:  15%|▏| 151/991 [58:59<7:52:30, 33.75s/batch, batch_loss=17.1, batch

Epoch 7/10:  15%|▏| 152/991 [58:59<7:57:59, 34.18s/batch, batch_loss=17.1, batch

Epoch 7/10:  15%|▏| 152/991 [59:33<7:57:59, 34.18s/batch, batch_loss=21.9, batch

Epoch 7/10:  15%|▏| 153/991 [59:33<7:56:11, 34.09s/batch, batch_loss=21.9, batch

Epoch 7/10:  15%|▏| 153/991 [1:00:06<7:56:11, 34.09s/batch, batch_loss=22.9, bat

Epoch 7/10:  16%|▏| 154/991 [1:00:06<7:52:44, 33.89s/batch, batch_loss=22.9, bat

Epoch 7/10:  16%|▏| 154/991 [1:00:41<7:52:44, 33.89s/batch, batch_loss=24.2, bat

Epoch 7/10:  16%|▏| 155/991 [1:00:41<7:53:45, 34.00s/batch, batch_loss=24.2, bat

Epoch 7/10:  16%|▏| 155/991 [1:01:13<7:53:45, 34.00s/batch, batch_loss=8.45, bat

Epoch 7/10:  16%|▏| 156/991 [1:01:13<7:48:44, 33.68s/batch, batch_loss=8.45, bat

Epoch 7/10:  16%|▏| 156/991 [1:01:47<7:48:44, 33.68s/batch, batch_loss=25.8, bat

Epoch 7/10:  16%|▏| 157/991 [1:01:47<7:45:37, 33.50s/batch, batch_loss=25.8, bat

Epoch 7/10:  16%|▏| 157/991 [1:02:01<7:45:37, 33.50s/batch, batch_loss=7.52, bat

Epoch 7/10:  16%|▏| 158/991 [1:02:01<6:26:11, 27.82s/batch, batch_loss=7.52, bat

Epoch 7/10:  16%|▏| 158/991 [1:02:16<6:26:11, 27.82s/batch, batch_loss=5.3, batc

Epoch 7/10:  16%|▏| 159/991 [1:02:16<5:30:41, 23.85s/batch, batch_loss=5.3, batc

Epoch 7/10:  16%|▏| 159/991 [1:02:30<5:30:41, 23.85s/batch, batch_loss=12.2, bat

Epoch 7/10:  16%|▏| 160/991 [1:02:30<4:48:49, 20.85s/batch, batch_loss=12.2, bat

Epoch 7/10:  16%|▏| 160/991 [1:02:43<4:48:49, 20.85s/batch, batch_loss=460, batc

Epoch 7/10:  16%|▏| 161/991 [1:02:43<4:18:25, 18.68s/batch, batch_loss=460, batc

Epoch 7/10:  16%|▏| 161/991 [1:02:57<4:18:25, 18.68s/batch, batch_loss=17.1, bat

Epoch 7/10:  16%|▏| 162/991 [1:02:57<3:59:44, 17.35s/batch, batch_loss=17.1, bat

Epoch 7/10:  16%|▏| 162/991 [1:03:12<3:59:44, 17.35s/batch, batch_loss=8.5, batc

Epoch 7/10:  16%|▏| 163/991 [1:03:12<3:46:10, 16.39s/batch, batch_loss=8.5, batc

Epoch 7/10:  16%|▏| 163/991 [1:03:26<3:46:10, 16.39s/batch, batch_loss=12.9, bat

Epoch 7/10:  17%|▏| 164/991 [1:03:26<3:37:36, 15.79s/batch, batch_loss=12.9, bat

Epoch 7/10:  17%|▏| 164/991 [1:03:40<3:37:36, 15.79s/batch, batch_loss=11.7, bat

Epoch 7/10:  17%|▏| 165/991 [1:03:40<3:30:44, 15.31s/batch, batch_loss=11.7, bat

Epoch 7/10:  17%|▏| 165/991 [1:03:54<3:30:44, 15.31s/batch, batch_loss=9.45, bat

Epoch 7/10:  17%|▏| 166/991 [1:03:54<3:24:57, 14.91s/batch, batch_loss=9.45, bat

Epoch 7/10:  17%|▏| 166/991 [1:04:10<3:24:57, 14.91s/batch, batch_loss=19.2, bat

Epoch 7/10:  17%|▏| 167/991 [1:04:10<3:27:41, 15.12s/batch, batch_loss=19.2, bat

Epoch 7/10:  17%|▏| 167/991 [1:04:26<3:27:41, 15.12s/batch, batch_loss=13.4, bat

Epoch 7/10:  17%|▏| 168/991 [1:04:26<3:34:04, 15.61s/batch, batch_loss=13.4, bat

Epoch 7/10:  17%|▏| 168/991 [1:04:43<3:34:04, 15.61s/batch, batch_loss=13.2, bat

Epoch 7/10:  17%|▏| 169/991 [1:04:43<3:38:30, 15.95s/batch, batch_loss=13.2, bat

Epoch 7/10:  17%|▏| 169/991 [1:05:00<3:38:30, 15.95s/batch, batch_loss=8.99, bat

Epoch 7/10:  17%|▏| 170/991 [1:05:00<3:40:16, 16.10s/batch, batch_loss=8.99, bat

Epoch 7/10:  17%|▏| 170/991 [1:05:15<3:40:16, 16.10s/batch, batch_loss=5.43, bat

Epoch 7/10:  17%|▏| 171/991 [1:05:15<3:36:26, 15.84s/batch, batch_loss=5.43, bat

Epoch 7/10:  17%|▏| 171/991 [1:05:30<3:36:26, 15.84s/batch, batch_loss=8.49, bat

Epoch 7/10:  17%|▏| 172/991 [1:05:30<3:34:01, 15.68s/batch, batch_loss=8.49, bat

Epoch 7/10:  17%|▏| 172/991 [1:05:45<3:34:01, 15.68s/batch, batch_loss=6.91, bat

Epoch 7/10:  17%|▏| 173/991 [1:05:45<3:30:03, 15.41s/batch, batch_loss=6.91, bat

Epoch 7/10:  17%|▏| 173/991 [1:05:59<3:30:03, 15.41s/batch, batch_loss=3.02e+4, 

Epoch 7/10:  18%|▏| 174/991 [1:05:59<3:24:41, 15.03s/batch, batch_loss=3.02e+4, 

Epoch 7/10:  18%|▏| 174/991 [1:06:14<3:24:41, 15.03s/batch, batch_loss=20.7, bat

Epoch 7/10:  18%|▏| 175/991 [1:06:14<3:23:41, 14.98s/batch, batch_loss=20.7, bat

Epoch 7/10:  18%|▏| 175/991 [1:06:29<3:23:41, 14.98s/batch, batch_loss=23.9, bat

Epoch 7/10:  18%|▏| 176/991 [1:06:29<3:22:30, 14.91s/batch, batch_loss=23.9, bat

Epoch 7/10:  18%|▏| 176/991 [1:06:44<3:22:30, 14.91s/batch, batch_loss=25.2, bat

Epoch 7/10:  18%|▏| 177/991 [1:06:44<3:22:21, 14.92s/batch, batch_loss=25.2, bat

Epoch 7/10:  18%|▏| 177/991 [1:07:00<3:22:21, 14.92s/batch, batch_loss=20.5, bat

Epoch 7/10:  18%|▏| 178/991 [1:07:00<3:26:05, 15.21s/batch, batch_loss=20.5, bat

Epoch 7/10:  18%|▏| 178/991 [1:07:15<3:26:05, 15.21s/batch, batch_loss=13.2, bat

Epoch 7/10:  18%|▏| 179/991 [1:07:15<3:28:19, 15.39s/batch, batch_loss=13.2, bat

Epoch 7/10:  18%|▏| 179/991 [1:07:33<3:28:19, 15.39s/batch, batch_loss=6.72, bat

Epoch 7/10:  18%|▏| 180/991 [1:07:33<3:37:25, 16.09s/batch, batch_loss=6.72, bat

Epoch 7/10:  18%|▏| 180/991 [1:07:47<3:37:25, 16.09s/batch, batch_loss=2.51e+4, 

Epoch 7/10:  18%|▏| 181/991 [1:07:47<3:28:10, 15.42s/batch, batch_loss=2.51e+4, 

Epoch 7/10:  18%|▏| 181/991 [1:08:01<3:28:10, 15.42s/batch, batch_loss=12.8, bat

Epoch 7/10:  18%|▏| 182/991 [1:08:01<3:20:57, 14.90s/batch, batch_loss=12.8, bat

Epoch 7/10:  18%|▏| 182/991 [1:08:15<3:20:57, 14.90s/batch, batch_loss=19.4, bat

Epoch 7/10:  18%|▏| 183/991 [1:08:15<3:16:59, 14.63s/batch, batch_loss=19.4, bat

Epoch 7/10:  18%|▏| 183/991 [1:08:33<3:16:59, 14.63s/batch, batch_loss=21, batch

Epoch 7/10:  19%|▏| 184/991 [1:08:33<3:32:09, 15.77s/batch, batch_loss=21, batch

Epoch 7/10:  19%|▏| 184/991 [1:08:52<3:32:09, 15.77s/batch, batch_loss=12.2, bat

Epoch 7/10:  19%|▏| 185/991 [1:08:52<3:44:41, 16.73s/batch, batch_loss=12.2, bat

Epoch 7/10:  19%|▏| 185/991 [1:09:09<3:44:41, 16.73s/batch, batch_loss=17.6, bat

Epoch 7/10:  19%|▏| 186/991 [1:09:09<3:47:20, 16.95s/batch, batch_loss=17.6, bat

Epoch 7/10:  19%|▏| 186/991 [1:09:26<3:47:20, 16.95s/batch, batch_loss=14.8, bat

Epoch 7/10:  19%|▏| 187/991 [1:09:26<3:47:02, 16.94s/batch, batch_loss=14.8, bat

Epoch 7/10:  19%|▏| 187/991 [1:09:41<3:47:02, 16.94s/batch, batch_loss=17.9, bat

Epoch 7/10:  19%|▏| 188/991 [1:09:41<3:35:18, 16.09s/batch, batch_loss=17.9, bat

Epoch 7/10:  19%|▏| 188/991 [1:09:55<3:35:18, 16.09s/batch, batch_loss=19, batch

Epoch 7/10:  19%|▏| 189/991 [1:09:55<3:27:02, 15.49s/batch, batch_loss=19, batch

Epoch 7/10:  19%|▏| 189/991 [1:10:08<3:27:02, 15.49s/batch, batch_loss=21.4, bat

Epoch 7/10:  19%|▏| 190/991 [1:10:08<3:19:12, 14.92s/batch, batch_loss=21.4, bat

Epoch 7/10:  19%|▏| 190/991 [1:10:22<3:19:12, 14.92s/batch, batch_loss=18.9, bat

Epoch 7/10:  19%|▏| 191/991 [1:10:22<3:13:49, 14.54s/batch, batch_loss=18.9, bat

Epoch 7/10:  19%|▏| 191/991 [1:10:35<3:13:49, 14.54s/batch, batch_loss=11.6, bat

Epoch 7/10:  19%|▏| 192/991 [1:10:35<3:09:16, 14.21s/batch, batch_loss=11.6, bat

Epoch 7/10:  19%|▏| 192/991 [1:10:49<3:09:16, 14.21s/batch, batch_loss=18.4, bat

Epoch 7/10:  19%|▏| 193/991 [1:10:49<3:06:31, 14.02s/batch, batch_loss=18.4, bat

Epoch 7/10:  19%|▏| 193/991 [1:11:03<3:06:31, 14.02s/batch, batch_loss=8.61, bat

Epoch 7/10:  20%|▏| 194/991 [1:11:03<3:05:25, 13.96s/batch, batch_loss=8.61, bat

Epoch 7/10:  20%|▏| 194/991 [1:11:17<3:05:25, 13.96s/batch, batch_loss=3.35, bat

Epoch 7/10:  20%|▏| 195/991 [1:11:17<3:05:41, 14.00s/batch, batch_loss=3.35, bat

Epoch 7/10:  20%|▏| 195/991 [1:11:33<3:05:41, 14.00s/batch, batch_loss=5.8, batc

Epoch 7/10:  20%|▏| 196/991 [1:11:33<3:14:12, 14.66s/batch, batch_loss=5.8, batc

Epoch 7/10:  20%|▏| 196/991 [1:11:47<3:14:12, 14.66s/batch, batch_loss=12.9, bat

Epoch 7/10:  20%|▏| 197/991 [1:11:47<3:11:14, 14.45s/batch, batch_loss=12.9, bat

Epoch 7/10:  20%|▏| 197/991 [1:12:01<3:11:14, 14.45s/batch, batch_loss=8.91, bat

Epoch 7/10:  20%|▏| 198/991 [1:12:01<3:09:50, 14.36s/batch, batch_loss=8.91, bat

Epoch 7/10:  20%|▏| 198/991 [1:12:15<3:09:50, 14.36s/batch, batch_loss=16.3, bat

Epoch 7/10:  20%|▏| 199/991 [1:12:15<3:06:58, 14.16s/batch, batch_loss=16.3, bat

Epoch 7/10:  20%|▏| 199/991 [1:12:29<3:06:58, 14.16s/batch, batch_loss=7.81, bat

Epoch 7/10:  20%|▏| 200/991 [1:12:29<3:06:11, 14.12s/batch, batch_loss=7.81, bat

Epoch 7/10:  20%|▏| 200/991 [1:12:43<3:06:11, 14.12s/batch, batch_loss=12.6, bat

Epoch 7/10:  20%|▏| 201/991 [1:12:43<3:05:03, 14.05s/batch, batch_loss=12.6, bat

Epoch 7/10:  20%|▏| 201/991 [1:12:57<3:05:03, 14.05s/batch, batch_loss=10, batch

Epoch 7/10:  20%|▏| 202/991 [1:12:57<3:04:56, 14.06s/batch, batch_loss=10, batch

Epoch 7/10:  20%|▏| 202/991 [1:13:11<3:04:56, 14.06s/batch, batch_loss=14, batch

Epoch 7/10:  20%|▏| 203/991 [1:13:11<3:06:09, 14.17s/batch, batch_loss=14, batch

Epoch 7/10:  20%|▏| 203/991 [1:13:25<3:06:09, 14.17s/batch, batch_loss=19.4, bat

Epoch 7/10:  21%|▏| 204/991 [1:13:25<3:04:39, 14.08s/batch, batch_loss=19.4, bat

Epoch 7/10:  21%|▏| 204/991 [1:13:41<3:04:39, 14.08s/batch, batch_loss=18.1, bat

Epoch 7/10:  21%|▏| 205/991 [1:13:41<3:10:51, 14.57s/batch, batch_loss=18.1, bat

Epoch 7/10:  21%|▏| 205/991 [1:13:55<3:10:51, 14.57s/batch, batch_loss=7.88, bat

Epoch 7/10:  21%|▏| 206/991 [1:13:55<3:07:16, 14.31s/batch, batch_loss=7.88, bat

Epoch 7/10:  21%|▏| 206/991 [1:14:08<3:07:16, 14.31s/batch, batch_loss=9.1, batc

Epoch 7/10:  21%|▏| 207/991 [1:14:08<3:05:06, 14.17s/batch, batch_loss=9.1, batc

Epoch 7/10:  21%|▏| 207/991 [1:14:22<3:05:06, 14.17s/batch, batch_loss=11.2, bat

Epoch 7/10:  21%|▏| 208/991 [1:14:22<3:03:37, 14.07s/batch, batch_loss=11.2, bat

Epoch 7/10:  21%|▏| 208/991 [1:14:36<3:03:37, 14.07s/batch, batch_loss=8.86, bat

Epoch 7/10:  21%|▏| 209/991 [1:14:36<3:02:47, 14.02s/batch, batch_loss=8.86, bat

Epoch 7/10:  21%|▏| 209/991 [1:14:50<3:02:47, 14.02s/batch, batch_loss=20, batch

Epoch 7/10:  21%|▏| 210/991 [1:14:50<3:01:36, 13.95s/batch, batch_loss=20, batch

Epoch 7/10:  21%|▏| 210/991 [1:15:04<3:01:36, 13.95s/batch, batch_loss=11.9, bat

Epoch 7/10:  21%|▏| 211/991 [1:15:04<3:00:27, 13.88s/batch, batch_loss=11.9, bat

Epoch 7/10:  21%|▏| 211/991 [1:15:17<3:00:27, 13.88s/batch, batch_loss=14.5, bat

Epoch 7/10:  21%|▏| 212/991 [1:15:17<2:59:52, 13.85s/batch, batch_loss=14.5, bat

Epoch 7/10:  21%|▏| 212/991 [1:15:31<2:59:52, 13.85s/batch, batch_loss=2.89, bat

Epoch 7/10:  21%|▏| 213/991 [1:15:31<2:59:15, 13.82s/batch, batch_loss=2.89, bat

Epoch 7/10:  21%|▏| 213/991 [1:15:45<2:59:15, 13.82s/batch, batch_loss=12.6, bat

Epoch 7/10:  22%|▏| 214/991 [1:15:45<2:58:39, 13.80s/batch, batch_loss=12.6, bat

Epoch 7/10:  22%|▏| 214/991 [1:16:00<2:58:39, 13.80s/batch, batch_loss=16.8, bat

Epoch 7/10:  22%|▏| 215/991 [1:16:00<3:02:17, 14.09s/batch, batch_loss=16.8, bat

Epoch 7/10:  22%|▏| 215/991 [1:16:15<3:02:17, 14.09s/batch, batch_loss=9.35, bat

Epoch 7/10:  22%|▏| 216/991 [1:16:15<3:06:28, 14.44s/batch, batch_loss=9.35, bat

Epoch 7/10:  22%|▏| 216/991 [1:16:29<3:06:28, 14.44s/batch, batch_loss=11.9, bat

Epoch 7/10:  22%|▏| 217/991 [1:16:29<3:05:27, 14.38s/batch, batch_loss=11.9, bat

Epoch 7/10:  22%|▏| 217/991 [1:16:43<3:05:27, 14.38s/batch, batch_loss=19.6, bat

Epoch 7/10:  22%|▏| 218/991 [1:16:43<3:02:49, 14.19s/batch, batch_loss=19.6, bat

Epoch 7/10:  22%|▏| 218/991 [1:16:58<3:02:49, 14.19s/batch, batch_loss=18.8, bat

Epoch 7/10:  22%|▏| 219/991 [1:16:58<3:05:57, 14.45s/batch, batch_loss=18.8, bat

Epoch 7/10:  22%|▏| 219/991 [1:17:12<3:05:57, 14.45s/batch, batch_loss=21.4, bat

Epoch 7/10:  22%|▏| 220/991 [1:17:12<3:03:50, 14.31s/batch, batch_loss=21.4, bat

Epoch 7/10:  22%|▏| 220/991 [1:17:25<3:03:50, 14.31s/batch, batch_loss=21.1, bat

Epoch 7/10:  22%|▏| 221/991 [1:17:25<3:00:21, 14.05s/batch, batch_loss=21.1, bat

Epoch 7/10:  22%|▏| 221/991 [1:17:43<3:00:21, 14.05s/batch, batch_loss=13.7, bat

Epoch 7/10:  22%|▏| 222/991 [1:17:43<3:14:41, 15.19s/batch, batch_loss=13.7, bat

Epoch 7/10:  22%|▏| 222/991 [1:17:58<3:14:41, 15.19s/batch, batch_loss=19.5, bat

Epoch 7/10:  23%|▏| 223/991 [1:17:58<3:13:40, 15.13s/batch, batch_loss=19.5, bat

Epoch 7/10:  23%|▏| 223/991 [1:18:14<3:13:40, 15.13s/batch, batch_loss=13.5, bat

Epoch 7/10:  23%|▏| 224/991 [1:18:14<3:16:25, 15.37s/batch, batch_loss=13.5, bat

Epoch 7/10:  23%|▏| 224/991 [1:18:30<3:16:25, 15.37s/batch, batch_loss=10, batch

Epoch 7/10:  23%|▏| 225/991 [1:18:30<3:16:51, 15.42s/batch, batch_loss=10, batch

Epoch 7/10:  23%|▏| 225/991 [1:18:45<3:16:51, 15.42s/batch, batch_loss=20.5, bat

Epoch 7/10:  23%|▏| 226/991 [1:18:45<3:14:33, 15.26s/batch, batch_loss=20.5, bat

Epoch 7/10:  23%|▏| 226/991 [1:19:01<3:14:33, 15.26s/batch, batch_loss=2.41e+3, 

Epoch 7/10:  23%|▏| 227/991 [1:19:01<3:17:42, 15.53s/batch, batch_loss=2.41e+3, 

Epoch 7/10:  23%|▏| 227/991 [1:19:16<3:17:42, 15.53s/batch, batch_loss=3.6e+3, b

Epoch 7/10:  23%|▏| 228/991 [1:19:16<3:15:46, 15.39s/batch, batch_loss=3.6e+3, b

Epoch 7/10:  23%|▏| 228/991 [1:19:31<3:15:46, 15.39s/batch, batch_loss=12.8, bat

Epoch 7/10:  23%|▏| 229/991 [1:19:31<3:14:56, 15.35s/batch, batch_loss=12.8, bat

Epoch 7/10:  23%|▏| 229/991 [1:19:46<3:14:56, 15.35s/batch, batch_loss=9.24, bat

Epoch 7/10:  23%|▏| 230/991 [1:19:46<3:14:45, 15.36s/batch, batch_loss=9.24, bat

Epoch 7/10:  23%|▏| 230/991 [1:20:02<3:14:45, 15.36s/batch, batch_loss=13.4, bat

Epoch 7/10:  23%|▏| 231/991 [1:20:02<3:13:51, 15.30s/batch, batch_loss=13.4, bat

Epoch 7/10:  23%|▏| 231/991 [1:20:17<3:13:51, 15.30s/batch, batch_loss=9.01, bat

Epoch 7/10:  23%|▏| 232/991 [1:20:17<3:15:36, 15.46s/batch, batch_loss=9.01, bat

Epoch 7/10:  23%|▏| 232/991 [1:20:33<3:15:36, 15.46s/batch, batch_loss=8.79, bat

Epoch 7/10:  24%|▏| 233/991 [1:20:33<3:16:00, 15.51s/batch, batch_loss=8.79, bat

Epoch 7/10:  24%|▏| 233/991 [1:20:48<3:16:00, 15.51s/batch, batch_loss=14.8, bat

Epoch 7/10:  24%|▏| 234/991 [1:20:48<3:14:48, 15.44s/batch, batch_loss=14.8, bat

Epoch 7/10:  24%|▏| 234/991 [1:21:04<3:14:48, 15.44s/batch, batch_loss=14.7, bat

Epoch 7/10:  24%|▏| 235/991 [1:21:04<3:16:35, 15.60s/batch, batch_loss=14.7, bat

Epoch 7/10:  24%|▏| 235/991 [1:21:20<3:16:35, 15.60s/batch, batch_loss=23.5, bat

Epoch 7/10:  24%|▏| 236/991 [1:21:20<3:15:26, 15.53s/batch, batch_loss=23.5, bat

Epoch 7/10:  24%|▏| 236/991 [1:21:35<3:15:26, 15.53s/batch, batch_loss=23, batch

Epoch 7/10:  24%|▏| 237/991 [1:21:35<3:13:27, 15.39s/batch, batch_loss=23, batch

Epoch 7/10:  24%|▏| 237/991 [1:21:53<3:13:27, 15.39s/batch, batch_loss=17, batch

Epoch 7/10:  24%|▏| 238/991 [1:21:53<3:23:43, 16.23s/batch, batch_loss=17, batch

Epoch 7/10:  24%|▏| 238/991 [1:22:08<3:23:43, 16.23s/batch, batch_loss=5.96, bat

Epoch 7/10:  24%|▏| 239/991 [1:22:08<3:19:21, 15.91s/batch, batch_loss=5.96, bat

Epoch 7/10:  24%|▏| 239/991 [1:22:22<3:19:21, 15.91s/batch, batch_loss=6.38, bat

Epoch 7/10:  24%|▏| 240/991 [1:22:22<3:12:47, 15.40s/batch, batch_loss=6.38, bat

Epoch 7/10:  24%|▏| 240/991 [1:22:38<3:12:47, 15.40s/batch, batch_loss=10.1, bat

Epoch 7/10:  24%|▏| 241/991 [1:22:38<3:12:07, 15.37s/batch, batch_loss=10.1, bat

Epoch 7/10:  24%|▏| 241/991 [1:22:54<3:12:07, 15.37s/batch, batch_loss=21, batch

Epoch 7/10:  24%|▏| 242/991 [1:22:54<3:14:16, 15.56s/batch, batch_loss=21, batch

Epoch 7/10:  24%|▏| 242/991 [1:23:09<3:14:16, 15.56s/batch, batch_loss=267, batc

Epoch 7/10:  25%|▏| 243/991 [1:23:09<3:14:38, 15.61s/batch, batch_loss=267, batc

Epoch 7/10:  25%|▏| 243/991 [1:23:26<3:14:38, 15.61s/batch, batch_loss=15.1, bat

Epoch 7/10:  25%|▏| 244/991 [1:23:26<3:18:36, 15.95s/batch, batch_loss=15.1, bat

Epoch 7/10:  25%|▏| 244/991 [1:23:42<3:18:36, 15.95s/batch, batch_loss=7.22, bat

Epoch 7/10:  25%|▏| 245/991 [1:23:42<3:16:46, 15.83s/batch, batch_loss=7.22, bat

Epoch 7/10:  25%|▏| 245/991 [1:23:56<3:16:46, 15.83s/batch, batch_loss=6.29, bat

Epoch 7/10:  25%|▏| 246/991 [1:23:56<3:12:39, 15.52s/batch, batch_loss=6.29, bat

Epoch 7/10:  25%|▏| 246/991 [1:24:11<3:12:39, 15.52s/batch, batch_loss=14.1, bat

Epoch 7/10:  25%|▏| 247/991 [1:24:11<3:10:16, 15.34s/batch, batch_loss=14.1, bat

Epoch 7/10:  25%|▏| 247/991 [1:24:26<3:10:16, 15.34s/batch, batch_loss=4.33, bat

Epoch 7/10:  25%|▎| 248/991 [1:24:26<3:07:47, 15.16s/batch, batch_loss=4.33, bat

Epoch 7/10:  25%|▎| 248/991 [1:24:42<3:07:47, 15.16s/batch, batch_loss=13.3, bat

Epoch 7/10:  25%|▎| 249/991 [1:24:42<3:10:26, 15.40s/batch, batch_loss=13.3, bat

Epoch 7/10:  25%|▎| 249/991 [1:24:56<3:10:26, 15.40s/batch, batch_loss=10.1, bat

Epoch 7/10:  25%|▎| 250/991 [1:24:56<3:04:04, 14.90s/batch, batch_loss=10.1, bat

Epoch 7/10:  25%|▎| 250/991 [1:25:10<3:04:04, 14.90s/batch, batch_loss=6.47, bat

Epoch 7/10:  25%|▎| 251/991 [1:25:10<2:59:41, 14.57s/batch, batch_loss=6.47, bat

Epoch 7/10:  25%|▎| 251/991 [1:25:23<2:59:41, 14.57s/batch, batch_loss=13.2, bat

Epoch 7/10:  25%|▎| 252/991 [1:25:23<2:56:42, 14.35s/batch, batch_loss=13.2, bat

Epoch 7/10:  25%|▎| 252/991 [1:25:38<2:56:42, 14.35s/batch, batch_loss=7.9, batc

Epoch 7/10:  26%|▎| 253/991 [1:25:38<2:55:55, 14.30s/batch, batch_loss=7.9, batc

Epoch 7/10:  26%|▎| 253/991 [1:25:51<2:55:55, 14.30s/batch, batch_loss=19.1, bat

Epoch 7/10:  26%|▎| 254/991 [1:25:51<2:51:54, 14.00s/batch, batch_loss=19.1, bat

Epoch 7/10:  26%|▎| 254/991 [1:26:05<2:51:54, 14.00s/batch, batch_loss=15.1, bat

Epoch 7/10:  26%|▎| 255/991 [1:26:05<2:50:46, 13.92s/batch, batch_loss=15.1, bat

Epoch 7/10:  26%|▎| 255/991 [1:26:17<2:50:46, 13.92s/batch, batch_loss=697, batc

Epoch 7/10:  26%|▎| 256/991 [1:26:17<2:45:56, 13.55s/batch, batch_loss=697, batc

Epoch 7/10:  26%|▎| 256/991 [1:26:30<2:45:56, 13.55s/batch, batch_loss=19.1, bat

Epoch 7/10:  26%|▎| 257/991 [1:26:30<2:42:58, 13.32s/batch, batch_loss=19.1, bat

Epoch 7/10:  26%|▎| 257/991 [1:26:44<2:42:58, 13.32s/batch, batch_loss=204, batc

Epoch 7/10:  26%|▎| 258/991 [1:26:44<2:43:05, 13.35s/batch, batch_loss=204, batc

Epoch 7/10:  26%|▎| 258/991 [1:26:57<2:43:05, 13.35s/batch, batch_loss=16.7, bat

Epoch 7/10:  26%|▎| 259/991 [1:26:57<2:43:00, 13.36s/batch, batch_loss=16.7, bat

Epoch 7/10:  26%|▎| 259/991 [1:27:10<2:43:00, 13.36s/batch, batch_loss=14.7, bat

Epoch 7/10:  26%|▎| 260/991 [1:27:10<2:42:23, 13.33s/batch, batch_loss=14.7, bat

Epoch 7/10:  26%|▎| 260/991 [1:27:24<2:42:23, 13.33s/batch, batch_loss=12.5, bat

Epoch 7/10:  26%|▎| 261/991 [1:27:24<2:42:10, 13.33s/batch, batch_loss=12.5, bat

Epoch 7/10:  26%|▎| 261/991 [1:27:37<2:42:10, 13.33s/batch, batch_loss=10.3, bat

Epoch 7/10:  26%|▎| 262/991 [1:27:37<2:42:01, 13.34s/batch, batch_loss=10.3, bat

Epoch 7/10:  26%|▎| 262/991 [1:27:51<2:42:01, 13.34s/batch, batch_loss=11.6, bat

Epoch 7/10:  27%|▎| 263/991 [1:27:51<2:44:33, 13.56s/batch, batch_loss=11.6, bat

Epoch 7/10:  27%|▎| 263/991 [1:28:05<2:44:33, 13.56s/batch, batch_loss=14.1, bat

Epoch 7/10:  27%|▎| 264/991 [1:28:05<2:46:13, 13.72s/batch, batch_loss=14.1, bat

Epoch 7/10:  27%|▎| 264/991 [1:28:19<2:46:13, 13.72s/batch, batch_loss=16, batch

Epoch 7/10:  27%|▎| 265/991 [1:28:19<2:45:33, 13.68s/batch, batch_loss=16, batch

Epoch 7/10:  27%|▎| 265/991 [1:28:32<2:45:33, 13.68s/batch, batch_loss=14.3, bat

Epoch 7/10:  27%|▎| 266/991 [1:28:32<2:45:37, 13.71s/batch, batch_loss=14.3, bat

Epoch 7/10:  27%|▎| 266/991 [1:28:46<2:45:37, 13.71s/batch, batch_loss=11, batch

Epoch 7/10:  27%|▎| 267/991 [1:28:46<2:44:56, 13.67s/batch, batch_loss=11, batch

Epoch 7/10:  27%|▎| 267/991 [1:29:00<2:44:56, 13.67s/batch, batch_loss=7.01, bat

Epoch 7/10:  27%|▎| 268/991 [1:29:00<2:46:45, 13.84s/batch, batch_loss=7.01, bat

Epoch 7/10:  27%|▎| 268/991 [1:29:14<2:46:45, 13.84s/batch, batch_loss=12.4, bat

Epoch 7/10:  27%|▎| 269/991 [1:29:14<2:45:38, 13.77s/batch, batch_loss=12.4, bat

Epoch 7/10:  27%|▎| 269/991 [1:29:27<2:45:38, 13.77s/batch, batch_loss=1.22, bat

Epoch 7/10:  27%|▎| 270/991 [1:29:27<2:44:30, 13.69s/batch, batch_loss=1.22, bat

Epoch 7/10:  27%|▎| 270/991 [1:29:41<2:44:30, 13.69s/batch, batch_loss=9.83, bat

Epoch 7/10:  27%|▎| 271/991 [1:29:41<2:42:38, 13.55s/batch, batch_loss=9.83, bat

Epoch 7/10:  27%|▎| 271/991 [1:29:55<2:42:38, 13.55s/batch, batch_loss=9.71, bat

Epoch 7/10:  27%|▎| 272/991 [1:29:55<2:44:40, 13.74s/batch, batch_loss=9.71, bat

Epoch 7/10:  27%|▎| 272/991 [1:30:12<2:44:40, 13.74s/batch, batch_loss=16.7, bat

Epoch 7/10:  28%|▎| 273/991 [1:30:12<2:55:25, 14.66s/batch, batch_loss=16.7, bat

Epoch 7/10:  28%|▎| 273/991 [1:30:25<2:55:25, 14.66s/batch, batch_loss=12.6, bat

Epoch 7/10:  28%|▎| 274/991 [1:30:25<2:51:56, 14.39s/batch, batch_loss=12.6, bat

Epoch 7/10:  28%|▎| 274/991 [1:30:39<2:51:56, 14.39s/batch, batch_loss=3.32e+3, 

Epoch 7/10:  28%|▎| 275/991 [1:30:39<2:49:53, 14.24s/batch, batch_loss=3.32e+3, 

Epoch 7/10:  28%|▎| 275/991 [1:30:53<2:49:53, 14.24s/batch, batch_loss=12.7, bat

Epoch 7/10:  28%|▎| 276/991 [1:30:53<2:47:15, 14.04s/batch, batch_loss=12.7, bat

Epoch 7/10:  28%|▎| 276/991 [1:31:07<2:47:15, 14.04s/batch, batch_loss=4.77e+3, 

Epoch 7/10:  28%|▎| 277/991 [1:31:07<2:46:28, 13.99s/batch, batch_loss=4.77e+3, 

Epoch 7/10:  28%|▎| 277/991 [1:31:19<2:46:28, 13.99s/batch, batch_loss=10.9, bat

Epoch 7/10:  28%|▎| 278/991 [1:31:19<2:41:52, 13.62s/batch, batch_loss=10.9, bat

Epoch 7/10:  28%|▎| 278/991 [1:31:34<2:41:52, 13.62s/batch, batch_loss=17.8, bat

Epoch 7/10:  28%|▎| 279/991 [1:31:34<2:44:18, 13.85s/batch, batch_loss=17.8, bat

Epoch 7/10:  28%|▎| 279/991 [1:31:50<2:44:18, 13.85s/batch, batch_loss=12.9, bat

Epoch 7/10:  28%|▎| 280/991 [1:31:50<2:52:41, 14.57s/batch, batch_loss=12.9, bat

Epoch 7/10:  28%|▎| 280/991 [1:32:04<2:52:41, 14.57s/batch, batch_loss=11.5, bat

Epoch 7/10:  28%|▎| 281/991 [1:32:04<2:50:05, 14.37s/batch, batch_loss=11.5, bat

Epoch 7/10:  28%|▎| 281/991 [1:32:18<2:50:05, 14.37s/batch, batch_loss=6.56, bat

Epoch 7/10:  28%|▎| 282/991 [1:32:18<2:49:15, 14.32s/batch, batch_loss=6.56, bat

Epoch 7/10:  28%|▎| 282/991 [1:32:32<2:49:15, 14.32s/batch, batch_loss=14, batch

Epoch 7/10:  29%|▎| 283/991 [1:32:32<2:46:05, 14.08s/batch, batch_loss=14, batch

Epoch 7/10:  29%|▎| 283/991 [1:32:46<2:46:05, 14.08s/batch, batch_loss=15, batch

Epoch 7/10:  29%|▎| 284/991 [1:32:46<2:46:48, 14.16s/batch, batch_loss=15, batch

Epoch 7/10:  29%|▎| 284/991 [1:33:00<2:46:48, 14.16s/batch, batch_loss=12.4, bat

Epoch 7/10:  29%|▎| 285/991 [1:33:00<2:46:17, 14.13s/batch, batch_loss=12.4, bat

Epoch 7/10:  29%|▎| 285/991 [1:33:14<2:46:17, 14.13s/batch, batch_loss=7.42, bat

Epoch 7/10:  29%|▎| 286/991 [1:33:14<2:44:16, 13.98s/batch, batch_loss=7.42, bat

Epoch 7/10:  29%|▎| 286/991 [1:33:27<2:44:16, 13.98s/batch, batch_loss=7.7, batc

Epoch 7/10:  29%|▎| 287/991 [1:33:27<2:42:49, 13.88s/batch, batch_loss=7.7, batc

Epoch 7/10:  29%|▎| 287/991 [1:33:43<2:42:49, 13.88s/batch, batch_loss=2.59e+3, 

Epoch 7/10:  29%|▎| 288/991 [1:33:43<2:50:07, 14.52s/batch, batch_loss=2.59e+3, 

Epoch 7/10:  29%|▎| 288/991 [1:33:57<2:50:07, 14.52s/batch, batch_loss=1.25e+3, 

Epoch 7/10:  29%|▎| 289/991 [1:33:57<2:46:42, 14.25s/batch, batch_loss=1.25e+3, 

Epoch 7/10:  29%|▎| 289/991 [1:34:11<2:46:42, 14.25s/batch, batch_loss=10.6, bat

Epoch 7/10:  29%|▎| 290/991 [1:34:11<2:44:13, 14.06s/batch, batch_loss=10.6, bat

Epoch 7/10:  29%|▎| 290/991 [1:34:24<2:44:13, 14.06s/batch, batch_loss=5.45, bat

Epoch 7/10:  29%|▎| 291/991 [1:34:24<2:42:07, 13.90s/batch, batch_loss=5.45, bat

Epoch 7/10:  29%|▎| 291/991 [1:34:38<2:42:07, 13.90s/batch, batch_loss=10.7, bat

Epoch 7/10:  29%|▎| 292/991 [1:34:38<2:41:34, 13.87s/batch, batch_loss=10.7, bat

Epoch 7/10:  29%|▎| 292/991 [1:34:52<2:41:34, 13.87s/batch, batch_loss=13.4, bat

Epoch 7/10:  30%|▎| 293/991 [1:34:52<2:40:41, 13.81s/batch, batch_loss=13.4, bat

Epoch 7/10:  30%|▎| 293/991 [1:35:05<2:40:41, 13.81s/batch, batch_loss=13, batch

Epoch 7/10:  30%|▎| 294/991 [1:35:05<2:37:57, 13.60s/batch, batch_loss=13, batch

Epoch 7/10:  30%|▎| 294/991 [1:35:19<2:37:57, 13.60s/batch, batch_loss=9.79, bat

Epoch 7/10:  30%|▎| 295/991 [1:35:19<2:38:39, 13.68s/batch, batch_loss=9.79, bat

Epoch 7/10:  30%|▎| 295/991 [1:35:32<2:38:39, 13.68s/batch, batch_loss=16.3, bat

Epoch 7/10:  30%|▎| 296/991 [1:35:32<2:38:15, 13.66s/batch, batch_loss=16.3, bat

Epoch 7/10:  30%|▎| 296/991 [1:35:45<2:38:15, 13.66s/batch, batch_loss=12.1, bat

Epoch 7/10:  30%|▎| 297/991 [1:35:45<2:36:46, 13.55s/batch, batch_loss=12.1, bat

Epoch 7/10:  30%|▎| 297/991 [1:35:59<2:36:46, 13.55s/batch, batch_loss=3.24e+4, 

Epoch 7/10:  30%|▎| 298/991 [1:35:59<2:35:33, 13.47s/batch, batch_loss=3.24e+4, 

Epoch 7/10:  30%|▎| 298/991 [1:36:12<2:35:33, 13.47s/batch, batch_loss=12.4, bat

Epoch 7/10:  30%|▎| 299/991 [1:36:12<2:35:59, 13.53s/batch, batch_loss=12.4, bat

Epoch 7/10:  30%|▎| 299/991 [1:36:26<2:35:59, 13.53s/batch, batch_loss=6.23, bat

Epoch 7/10:  30%|▎| 300/991 [1:36:26<2:36:32, 13.59s/batch, batch_loss=6.23, bat

Epoch 7/10:  30%|▎| 300/991 [1:36:40<2:36:32, 13.59s/batch, batch_loss=8.15, bat

Epoch 7/10:  30%|▎| 301/991 [1:36:40<2:36:23, 13.60s/batch, batch_loss=8.15, bat

Epoch 7/10:  30%|▎| 301/991 [1:36:53<2:36:23, 13.60s/batch, batch_loss=12.2, bat

Epoch 7/10:  30%|▎| 302/991 [1:36:53<2:35:51, 13.57s/batch, batch_loss=12.2, bat

Epoch 7/10:  30%|▎| 302/991 [1:37:07<2:35:51, 13.57s/batch, batch_loss=9.4, batc

Epoch 7/10:  31%|▎| 303/991 [1:37:07<2:36:44, 13.67s/batch, batch_loss=9.4, batc

Epoch 7/10:  31%|▎| 303/991 [1:37:23<2:36:44, 13.67s/batch, batch_loss=3.03, bat

Epoch 7/10:  31%|▎| 304/991 [1:37:23<2:42:37, 14.20s/batch, batch_loss=3.03, bat

Epoch 7/10:  31%|▎| 304/991 [1:37:36<2:42:37, 14.20s/batch, batch_loss=14.7, bat

Epoch 7/10:  31%|▎| 305/991 [1:37:36<2:39:20, 13.94s/batch, batch_loss=14.7, bat

Epoch 7/10:  31%|▎| 305/991 [1:37:49<2:39:20, 13.94s/batch, batch_loss=8.32, bat

Epoch 7/10:  31%|▎| 306/991 [1:37:49<2:36:27, 13.70s/batch, batch_loss=8.32, bat

Epoch 7/10:  31%|▎| 306/991 [1:38:02<2:36:27, 13.70s/batch, batch_loss=6.28e+3, 

Epoch 7/10:  31%|▎| 307/991 [1:38:02<2:32:15, 13.36s/batch, batch_loss=6.28e+3, 

Epoch 7/10:  31%|▎| 307/991 [1:38:15<2:32:15, 13.36s/batch, batch_loss=11.8, bat

Epoch 7/10:  31%|▎| 308/991 [1:38:15<2:31:02, 13.27s/batch, batch_loss=11.8, bat

Epoch 7/10:  31%|▎| 308/991 [1:38:28<2:31:02, 13.27s/batch, batch_loss=18.7, bat

Epoch 7/10:  31%|▎| 309/991 [1:38:28<2:30:41, 13.26s/batch, batch_loss=18.7, bat

Epoch 7/10:  31%|▎| 309/991 [1:38:41<2:30:41, 13.26s/batch, batch_loss=11.8, bat

Epoch 7/10:  31%|▎| 310/991 [1:38:41<2:28:55, 13.12s/batch, batch_loss=11.8, bat

Epoch 7/10:  31%|▎| 310/991 [1:38:54<2:28:55, 13.12s/batch, batch_loss=11.3, bat

Epoch 7/10:  31%|▎| 311/991 [1:38:54<2:28:30, 13.10s/batch, batch_loss=11.3, bat

Epoch 7/10:  31%|▎| 311/991 [1:39:08<2:28:30, 13.10s/batch, batch_loss=11.3, bat

Epoch 7/10:  31%|▎| 312/991 [1:39:08<2:31:23, 13.38s/batch, batch_loss=11.3, bat

Epoch 7/10:  31%|▎| 312/991 [1:39:24<2:31:23, 13.38s/batch, batch_loss=1.06e+4, 

Epoch 7/10:  32%|▎| 313/991 [1:39:24<2:41:23, 14.28s/batch, batch_loss=1.06e+4, 

Epoch 7/10:  32%|▎| 313/991 [1:39:37<2:41:23, 14.28s/batch, batch_loss=7.55, bat

Epoch 7/10:  32%|▎| 314/991 [1:39:37<2:37:38, 13.97s/batch, batch_loss=7.55, bat

Epoch 7/10:  32%|▎| 314/991 [1:39:51<2:37:38, 13.97s/batch, batch_loss=13.8, bat

Epoch 7/10:  32%|▎| 315/991 [1:39:51<2:35:49, 13.83s/batch, batch_loss=13.8, bat

Epoch 7/10:  32%|▎| 315/991 [1:40:04<2:35:49, 13.83s/batch, batch_loss=21.1, bat

Epoch 7/10:  32%|▎| 316/991 [1:40:04<2:34:21, 13.72s/batch, batch_loss=21.1, bat

Epoch 7/10:  32%|▎| 316/991 [1:40:18<2:34:21, 13.72s/batch, batch_loss=20, batch

Epoch 7/10:  32%|▎| 317/991 [1:40:18<2:35:01, 13.80s/batch, batch_loss=20, batch

Epoch 7/10:  32%|▎| 317/991 [1:40:32<2:35:01, 13.80s/batch, batch_loss=18.8, bat

Epoch 7/10:  32%|▎| 318/991 [1:40:32<2:33:24, 13.68s/batch, batch_loss=18.8, bat

Epoch 7/10:  32%|▎| 318/991 [1:40:45<2:33:24, 13.68s/batch, batch_loss=12.8, bat

Epoch 7/10:  32%|▎| 319/991 [1:40:45<2:31:57, 13.57s/batch, batch_loss=12.8, bat

Epoch 7/10:  32%|▎| 319/991 [1:40:59<2:31:57, 13.57s/batch, batch_loss=13.8, bat

Epoch 7/10:  32%|▎| 320/991 [1:40:59<2:31:16, 13.53s/batch, batch_loss=13.8, bat

Epoch 7/10:  32%|▎| 320/991 [1:41:12<2:31:16, 13.53s/batch, batch_loss=20.1, bat

Epoch 7/10:  32%|▎| 321/991 [1:41:12<2:30:35, 13.49s/batch, batch_loss=20.1, bat

Epoch 7/10:  32%|▎| 321/991 [1:41:26<2:30:35, 13.49s/batch, batch_loss=6.34, bat

Epoch 7/10:  32%|▎| 322/991 [1:41:26<2:31:09, 13.56s/batch, batch_loss=6.34, bat

Epoch 7/10:  32%|▎| 322/991 [1:41:42<2:31:09, 13.56s/batch, batch_loss=8.99, bat

Epoch 7/10:  33%|▎| 323/991 [1:41:42<2:38:59, 14.28s/batch, batch_loss=8.99, bat

Epoch 7/10:  33%|▎| 323/991 [1:41:55<2:38:59, 14.28s/batch, batch_loss=22, batch

Epoch 7/10:  33%|▎| 324/991 [1:41:55<2:34:50, 13.93s/batch, batch_loss=22, batch

Epoch 7/10:  33%|▎| 324/991 [1:42:08<2:34:50, 13.93s/batch, batch_loss=9.16, bat

Epoch 7/10:  33%|▎| 325/991 [1:42:08<2:31:58, 13.69s/batch, batch_loss=9.16, bat

Epoch 7/10:  33%|▎| 325/991 [1:42:22<2:31:58, 13.69s/batch, batch_loss=21.6, bat

Epoch 7/10:  33%|▎| 326/991 [1:42:22<2:33:45, 13.87s/batch, batch_loss=21.6, bat

Epoch 7/10:  33%|▎| 326/991 [1:42:36<2:33:45, 13.87s/batch, batch_loss=3.04e+3, 

Epoch 7/10:  33%|▎| 327/991 [1:42:36<2:32:38, 13.79s/batch, batch_loss=3.04e+3, 

Epoch 7/10:  33%|▎| 327/991 [1:42:49<2:32:38, 13.79s/batch, batch_loss=7.83, bat

Epoch 7/10:  33%|▎| 328/991 [1:42:49<2:31:09, 13.68s/batch, batch_loss=7.83, bat

Epoch 7/10:  33%|▎| 328/991 [1:43:03<2:31:09, 13.68s/batch, batch_loss=18.3, bat

Epoch 7/10:  33%|▎| 329/991 [1:43:03<2:31:30, 13.73s/batch, batch_loss=18.3, bat

Epoch 7/10:  33%|▎| 329/991 [1:43:16<2:31:30, 13.73s/batch, batch_loss=12.5, bat

Epoch 7/10:  33%|▎| 330/991 [1:43:16<2:30:04, 13.62s/batch, batch_loss=12.5, bat

Epoch 7/10:  33%|▎| 330/991 [1:43:30<2:30:04, 13.62s/batch, batch_loss=12.2, bat

Epoch 7/10:  33%|▎| 331/991 [1:43:30<2:28:12, 13.47s/batch, batch_loss=12.2, bat

Epoch 7/10:  33%|▎| 331/991 [1:43:45<2:28:12, 13.47s/batch, batch_loss=16, batch

Epoch 7/10:  34%|▎| 332/991 [1:43:45<2:35:37, 14.17s/batch, batch_loss=16, batch

Epoch 7/10:  34%|▎| 332/991 [1:43:59<2:35:37, 14.17s/batch, batch_loss=12.7, bat

Epoch 7/10:  34%|▎| 333/991 [1:43:59<2:32:35, 13.91s/batch, batch_loss=12.7, bat

Epoch 7/10:  34%|▎| 333/991 [1:44:12<2:32:35, 13.91s/batch, batch_loss=13.7, bat

Epoch 7/10:  34%|▎| 334/991 [1:44:12<2:31:24, 13.83s/batch, batch_loss=13.7, bat

Epoch 7/10:  34%|▎| 334/991 [1:44:26<2:31:24, 13.83s/batch, batch_loss=4.14, bat

Epoch 7/10:  34%|▎| 335/991 [1:44:26<2:29:46, 13.70s/batch, batch_loss=4.14, bat

Epoch 7/10:  34%|▎| 335/991 [1:44:40<2:29:46, 13.70s/batch, batch_loss=8.47e+3, 

Epoch 7/10:  34%|▎| 336/991 [1:44:40<2:29:59, 13.74s/batch, batch_loss=8.47e+3, 

Epoch 7/10:  34%|▎| 336/991 [1:44:53<2:29:59, 13.74s/batch, batch_loss=2.35e+3, 

Epoch 7/10:  34%|▎| 337/991 [1:44:53<2:29:34, 13.72s/batch, batch_loss=2.35e+3, 

Epoch 7/10:  34%|▎| 337/991 [1:45:07<2:29:34, 13.72s/batch, batch_loss=8.74, bat

Epoch 7/10:  34%|▎| 338/991 [1:45:07<2:28:21, 13.63s/batch, batch_loss=8.74, bat

Epoch 7/10:  34%|▎| 338/991 [1:45:20<2:28:21, 13.63s/batch, batch_loss=18.1, bat

Epoch 7/10:  34%|▎| 339/991 [1:45:20<2:25:59, 13.44s/batch, batch_loss=18.1, bat

Epoch 7/10:  34%|▎| 339/991 [1:45:33<2:25:59, 13.44s/batch, batch_loss=10.1, bat

Epoch 7/10:  34%|▎| 340/991 [1:45:33<2:24:38, 13.33s/batch, batch_loss=10.1, bat

Epoch 7/10:  34%|▎| 340/991 [1:45:48<2:24:38, 13.33s/batch, batch_loss=9.39, bat

Epoch 7/10:  34%|▎| 341/991 [1:45:48<2:30:17, 13.87s/batch, batch_loss=9.39, bat

Epoch 7/10:  34%|▎| 341/991 [1:46:01<2:30:17, 13.87s/batch, batch_loss=0.926, ba

Epoch 7/10:  35%|▎| 342/991 [1:46:01<2:27:17, 13.62s/batch, batch_loss=0.926, ba

Epoch 7/10:  35%|▎| 342/991 [1:46:14<2:27:17, 13.62s/batch, batch_loss=6.61, bat

Epoch 7/10:  35%|▎| 343/991 [1:46:14<2:26:07, 13.53s/batch, batch_loss=6.61, bat

Epoch 7/10:  35%|▎| 343/991 [1:46:27<2:26:07, 13.53s/batch, batch_loss=14.6, bat

Epoch 7/10:  35%|▎| 344/991 [1:46:27<2:24:57, 13.44s/batch, batch_loss=14.6, bat

Epoch 7/10:  35%|▎| 344/991 [1:46:41<2:24:57, 13.44s/batch, batch_loss=112, batc

Epoch 7/10:  35%|▎| 345/991 [1:46:41<2:24:11, 13.39s/batch, batch_loss=112, batc

Epoch 7/10:  35%|▎| 345/991 [1:46:54<2:24:11, 13.39s/batch, batch_loss=14.9, bat

Epoch 7/10:  35%|▎| 346/991 [1:46:54<2:25:15, 13.51s/batch, batch_loss=14.9, bat

Epoch 7/10:  35%|▎| 346/991 [1:47:08<2:25:15, 13.51s/batch, batch_loss=12.4, bat

Epoch 7/10:  35%|▎| 347/991 [1:47:08<2:25:10, 13.53s/batch, batch_loss=12.4, bat

Epoch 7/10:  35%|▎| 347/991 [1:47:21<2:25:10, 13.53s/batch, batch_loss=13.9, bat

Epoch 7/10:  35%|▎| 348/991 [1:47:21<2:24:26, 13.48s/batch, batch_loss=13.9, bat

Epoch 7/10:  35%|▎| 348/991 [1:47:35<2:24:26, 13.48s/batch, batch_loss=9.36, bat

Epoch 7/10:  35%|▎| 349/991 [1:47:35<2:25:27, 13.59s/batch, batch_loss=9.36, bat

Epoch 7/10:  35%|▎| 349/991 [1:47:50<2:25:27, 13.59s/batch, batch_loss=15, batch

Epoch 7/10:  35%|▎| 350/991 [1:47:50<2:27:16, 13.79s/batch, batch_loss=15, batch

Epoch 7/10:  35%|▎| 350/991 [1:48:04<2:27:16, 13.79s/batch, batch_loss=9.79, bat

Epoch 7/10:  35%|▎| 351/991 [1:48:04<2:27:58, 13.87s/batch, batch_loss=9.79, bat

Epoch 7/10:  35%|▎| 351/991 [1:48:18<2:27:58, 13.87s/batch, batch_loss=14.5, bat

Epoch 7/10:  36%|▎| 352/991 [1:48:18<2:28:11, 13.91s/batch, batch_loss=14.5, bat

Epoch 7/10:  36%|▎| 352/991 [1:48:31<2:28:11, 13.91s/batch, batch_loss=13.4, bat

Epoch 7/10:  36%|▎| 353/991 [1:48:31<2:27:30, 13.87s/batch, batch_loss=13.4, bat

Epoch 7/10:  36%|▎| 353/991 [1:48:45<2:27:30, 13.87s/batch, batch_loss=20, batch

Epoch 7/10:  36%|▎| 354/991 [1:48:45<2:27:04, 13.85s/batch, batch_loss=20, batch

Epoch 7/10:  36%|▎| 354/991 [1:48:59<2:27:04, 13.85s/batch, batch_loss=8.85, bat

Epoch 7/10:  36%|▎| 355/991 [1:48:59<2:26:30, 13.82s/batch, batch_loss=8.85, bat

Epoch 7/10:  36%|▎| 355/991 [1:49:13<2:26:30, 13.82s/batch, batch_loss=15.7, bat

Epoch 7/10:  36%|▎| 356/991 [1:49:13<2:28:38, 14.04s/batch, batch_loss=15.7, bat

Epoch 7/10:  36%|▎| 356/991 [1:49:27<2:28:38, 14.04s/batch, batch_loss=13.9, bat

Epoch 7/10:  36%|▎| 357/991 [1:49:27<2:26:38, 13.88s/batch, batch_loss=13.9, bat

Epoch 7/10:  36%|▎| 357/991 [1:49:41<2:26:38, 13.88s/batch, batch_loss=13, batch

Epoch 7/10:  36%|▎| 358/991 [1:49:41<2:25:54, 13.83s/batch, batch_loss=13, batch

Epoch 7/10:  36%|▎| 358/991 [1:49:56<2:25:54, 13.83s/batch, batch_loss=5.32, bat

Epoch 7/10:  36%|▎| 359/991 [1:49:56<2:29:41, 14.21s/batch, batch_loss=5.32, bat

Epoch 7/10:  36%|▎| 359/991 [1:50:09<2:29:41, 14.21s/batch, batch_loss=8.89, bat

Epoch 7/10:  36%|▎| 360/991 [1:50:09<2:26:55, 13.97s/batch, batch_loss=8.89, bat

Epoch 7/10:  36%|▎| 360/991 [1:50:23<2:26:55, 13.97s/batch, batch_loss=26.3, bat

Epoch 7/10:  36%|▎| 361/991 [1:50:23<2:24:46, 13.79s/batch, batch_loss=26.3, bat

Epoch 7/10:  36%|▎| 361/991 [1:50:37<2:24:46, 13.79s/batch, batch_loss=17.3, bat

Epoch 7/10:  37%|▎| 362/991 [1:50:37<2:25:02, 13.83s/batch, batch_loss=17.3, bat

Epoch 7/10:  37%|▎| 362/991 [1:50:50<2:25:02, 13.83s/batch, batch_loss=13.4, bat

Epoch 7/10:  37%|▎| 363/991 [1:50:50<2:23:48, 13.74s/batch, batch_loss=13.4, bat

Epoch 7/10:  37%|▎| 363/991 [1:51:03<2:23:48, 13.74s/batch, batch_loss=11.2, bat

Epoch 7/10:  37%|▎| 364/991 [1:51:03<2:20:42, 13.46s/batch, batch_loss=11.2, bat

Epoch 7/10:  37%|▎| 364/991 [1:51:17<2:20:42, 13.46s/batch, batch_loss=10.6, bat

Epoch 7/10:  37%|▎| 365/991 [1:51:17<2:21:36, 13.57s/batch, batch_loss=10.6, bat

Epoch 7/10:  37%|▎| 365/991 [1:51:30<2:21:36, 13.57s/batch, batch_loss=16.2, bat

Epoch 7/10:  37%|▎| 366/991 [1:51:30<2:19:35, 13.40s/batch, batch_loss=16.2, bat

Epoch 7/10:  37%|▎| 366/991 [1:51:44<2:19:35, 13.40s/batch, batch_loss=10, batch

Epoch 7/10:  37%|▎| 367/991 [1:51:44<2:21:05, 13.57s/batch, batch_loss=10, batch

Epoch 7/10:  37%|▎| 367/991 [1:51:57<2:21:05, 13.57s/batch, batch_loss=11.6, bat

Epoch 7/10:  37%|▎| 368/991 [1:51:57<2:20:03, 13.49s/batch, batch_loss=11.6, bat

Epoch 7/10:  37%|▎| 368/991 [1:52:10<2:20:03, 13.49s/batch, batch_loss=14.6, bat

Epoch 7/10:  37%|▎| 369/991 [1:52:10<2:20:01, 13.51s/batch, batch_loss=14.6, bat

Epoch 7/10:  37%|▎| 369/991 [1:52:24<2:20:01, 13.51s/batch, batch_loss=1.21e+4, 

Epoch 7/10:  37%|▎| 370/991 [1:52:24<2:20:17, 13.56s/batch, batch_loss=1.21e+4, 

Epoch 7/10:  37%|▎| 370/991 [1:52:38<2:20:17, 13.56s/batch, batch_loss=18.5, bat

Epoch 7/10:  37%|▎| 371/991 [1:52:38<2:19:24, 13.49s/batch, batch_loss=18.5, bat

Epoch 7/10:  37%|▎| 371/991 [1:52:51<2:19:24, 13.49s/batch, batch_loss=16.8, bat

Epoch 7/10:  38%|▍| 372/991 [1:52:51<2:19:53, 13.56s/batch, batch_loss=16.8, bat

Epoch 7/10:  38%|▍| 372/991 [1:53:05<2:19:53, 13.56s/batch, batch_loss=24.8, bat

Epoch 7/10:  38%|▍| 373/991 [1:53:05<2:20:00, 13.59s/batch, batch_loss=24.8, bat

Epoch 7/10:  38%|▍| 373/991 [1:53:19<2:20:00, 13.59s/batch, batch_loss=467, batc

Epoch 7/10:  38%|▍| 374/991 [1:53:19<2:20:35, 13.67s/batch, batch_loss=467, batc

Epoch 7/10:  38%|▍| 374/991 [1:53:33<2:20:35, 13.67s/batch, batch_loss=1.43e+3, 

Epoch 7/10:  38%|▍| 375/991 [1:53:33<2:20:45, 13.71s/batch, batch_loss=1.43e+3, 

Epoch 7/10:  38%|▍| 375/991 [1:53:49<2:20:45, 13.71s/batch, batch_loss=1.23e+3, 

Epoch 7/10:  38%|▍| 376/991 [1:53:49<2:29:13, 14.56s/batch, batch_loss=1.23e+3, 

Epoch 7/10:  38%|▍| 376/991 [1:54:03<2:29:13, 14.56s/batch, batch_loss=20.3, bat

Epoch 7/10:  38%|▍| 377/991 [1:54:03<2:27:11, 14.38s/batch, batch_loss=20.3, bat

Epoch 7/10:  38%|▍| 377/991 [1:54:16<2:27:11, 14.38s/batch, batch_loss=1.19e+3, 

Epoch 7/10:  38%|▍| 378/991 [1:54:16<2:23:22, 14.03s/batch, batch_loss=1.19e+3, 

Epoch 7/10:  38%|▍| 378/991 [1:54:30<2:23:22, 14.03s/batch, batch_loss=11.6, bat

Epoch 7/10:  38%|▍| 379/991 [1:54:30<2:22:42, 13.99s/batch, batch_loss=11.6, bat

Epoch 7/10:  38%|▍| 379/991 [1:54:44<2:22:42, 13.99s/batch, batch_loss=12.7, bat

Epoch 7/10:  38%|▍| 380/991 [1:54:44<2:20:49, 13.83s/batch, batch_loss=12.7, bat

Epoch 7/10:  38%|▍| 380/991 [1:54:57<2:20:49, 13.83s/batch, batch_loss=25.7, bat

Epoch 7/10:  38%|▍| 381/991 [1:54:57<2:20:12, 13.79s/batch, batch_loss=25.7, bat

Epoch 7/10:  38%|▍| 381/991 [1:55:10<2:20:12, 13.79s/batch, batch_loss=12.2, bat

Epoch 7/10:  39%|▍| 382/991 [1:55:10<2:17:58, 13.59s/batch, batch_loss=12.2, bat

Epoch 7/10:  39%|▍| 382/991 [1:55:27<2:17:58, 13.59s/batch, batch_loss=9.86, bat

Epoch 7/10:  39%|▍| 383/991 [1:55:27<2:25:23, 14.35s/batch, batch_loss=9.86, bat

Epoch 7/10:  39%|▍| 383/991 [1:55:40<2:25:23, 14.35s/batch, batch_loss=26, batch

Epoch 7/10:  39%|▍| 384/991 [1:55:40<2:22:09, 14.05s/batch, batch_loss=26, batch

Epoch 7/10:  39%|▍| 384/991 [1:55:53<2:22:09, 14.05s/batch, batch_loss=10.5, bat

Epoch 7/10:  39%|▍| 385/991 [1:55:53<2:19:15, 13.79s/batch, batch_loss=10.5, bat

Epoch 7/10:  39%|▍| 385/991 [1:56:06<2:19:15, 13.79s/batch, batch_loss=19.3, bat

Epoch 7/10:  39%|▍| 386/991 [1:56:06<2:17:50, 13.67s/batch, batch_loss=19.3, bat

Epoch 7/10:  39%|▍| 386/991 [1:56:20<2:17:50, 13.67s/batch, batch_loss=24.4, bat

Epoch 7/10:  39%|▍| 387/991 [1:56:20<2:17:41, 13.68s/batch, batch_loss=24.4, bat

Epoch 7/10:  39%|▍| 387/991 [1:56:33<2:17:41, 13.68s/batch, batch_loss=791, batc

Epoch 7/10:  39%|▍| 388/991 [1:56:33<2:15:53, 13.52s/batch, batch_loss=791, batc

Epoch 7/10:  39%|▍| 388/991 [1:56:47<2:15:53, 13.52s/batch, batch_loss=17.2, bat

Epoch 7/10:  39%|▍| 389/991 [1:56:47<2:14:39, 13.42s/batch, batch_loss=17.2, bat

Epoch 7/10:  39%|▍| 389/991 [1:57:00<2:14:39, 13.42s/batch, batch_loss=866, batc

Epoch 7/10:  39%|▍| 390/991 [1:57:00<2:14:11, 13.40s/batch, batch_loss=866, batc

Epoch 7/10:  39%|▍| 390/991 [1:57:13<2:14:11, 13.40s/batch, batch_loss=16.9, bat

Epoch 7/10:  39%|▍| 391/991 [1:57:13<2:13:57, 13.40s/batch, batch_loss=16.9, bat

Epoch 7/10:  39%|▍| 391/991 [1:57:26<2:13:57, 13.40s/batch, batch_loss=17.8, bat

Epoch 7/10:  40%|▍| 392/991 [1:57:26<2:12:41, 13.29s/batch, batch_loss=17.8, bat

Epoch 7/10:  40%|▍| 392/991 [1:57:40<2:12:41, 13.29s/batch, batch_loss=17.6, bat

Epoch 7/10:  40%|▍| 393/991 [1:57:40<2:12:56, 13.34s/batch, batch_loss=17.6, bat

Epoch 7/10:  40%|▍| 393/991 [1:57:53<2:12:56, 13.34s/batch, batch_loss=607, batc

Epoch 7/10:  40%|▍| 394/991 [1:57:53<2:12:59, 13.37s/batch, batch_loss=607, batc

Epoch 7/10:  40%|▍| 394/991 [1:58:09<2:12:59, 13.37s/batch, batch_loss=19.7, bat

Epoch 7/10:  40%|▍| 395/991 [1:58:09<2:18:37, 13.96s/batch, batch_loss=19.7, bat

Epoch 7/10:  40%|▍| 395/991 [1:58:22<2:18:37, 13.96s/batch, batch_loss=11.8, bat

Epoch 7/10:  40%|▍| 396/991 [1:58:22<2:18:00, 13.92s/batch, batch_loss=11.8, bat

Epoch 7/10:  40%|▍| 396/991 [1:58:36<2:18:00, 13.92s/batch, batch_loss=15.3, bat

Epoch 7/10:  40%|▍| 397/991 [1:58:36<2:16:28, 13.79s/batch, batch_loss=15.3, bat

Epoch 7/10:  40%|▍| 397/991 [1:58:49<2:16:28, 13.79s/batch, batch_loss=12.5, bat

Epoch 7/10:  40%|▍| 398/991 [1:58:49<2:15:41, 13.73s/batch, batch_loss=12.5, bat

Epoch 7/10:  40%|▍| 398/991 [1:59:03<2:15:41, 13.73s/batch, batch_loss=19.6, bat

Epoch 7/10:  40%|▍| 399/991 [1:59:03<2:15:11, 13.70s/batch, batch_loss=19.6, bat

Epoch 7/10:  40%|▍| 399/991 [1:59:17<2:15:11, 13.70s/batch, batch_loss=11.4, bat

Epoch 7/10:  40%|▍| 400/991 [1:59:17<2:16:30, 13.86s/batch, batch_loss=11.4, bat

Epoch 7/10:  40%|▍| 400/991 [1:59:31<2:16:30, 13.86s/batch, batch_loss=11.8, bat

Epoch 7/10:  40%|▍| 401/991 [1:59:31<2:15:29, 13.78s/batch, batch_loss=11.8, bat

Epoch 7/10:  40%|▍| 401/991 [1:59:44<2:15:29, 13.78s/batch, batch_loss=16, batch

Epoch 7/10:  41%|▍| 402/991 [1:59:44<2:14:16, 13.68s/batch, batch_loss=16, batch

Epoch 7/10:  41%|▍| 402/991 [1:59:57<2:14:16, 13.68s/batch, batch_loss=16.1, bat

Epoch 7/10:  41%|▍| 403/991 [1:59:57<2:11:40, 13.44s/batch, batch_loss=16.1, bat

Epoch 7/10:  41%|▍| 403/991 [2:00:11<2:11:40, 13.44s/batch, batch_loss=11, batch

Epoch 7/10:  41%|▍| 404/991 [2:00:11<2:11:36, 13.45s/batch, batch_loss=11, batch

Epoch 7/10:  41%|▍| 404/991 [2:00:25<2:11:36, 13.45s/batch, batch_loss=13.1, bat

Epoch 7/10:  41%|▍| 405/991 [2:00:25<2:12:49, 13.60s/batch, batch_loss=13.1, bat

Epoch 7/10:  41%|▍| 405/991 [2:00:38<2:12:49, 13.60s/batch, batch_loss=5.09, bat

Epoch 7/10:  41%|▍| 406/991 [2:00:38<2:12:47, 13.62s/batch, batch_loss=5.09, bat

Epoch 7/10:  41%|▍| 406/991 [2:00:52<2:12:47, 13.62s/batch, batch_loss=25.8, bat

Epoch 7/10:  41%|▍| 407/991 [2:00:52<2:14:00, 13.77s/batch, batch_loss=25.8, bat

Epoch 7/10:  41%|▍| 407/991 [2:01:06<2:14:00, 13.77s/batch, batch_loss=6.66, bat

Epoch 7/10:  41%|▍| 408/991 [2:01:06<2:14:15, 13.82s/batch, batch_loss=6.66, bat

Epoch 7/10:  41%|▍| 408/991 [2:01:20<2:14:15, 13.82s/batch, batch_loss=21.4, bat

Epoch 7/10:  41%|▍| 409/991 [2:01:20<2:12:34, 13.67s/batch, batch_loss=21.4, bat

Epoch 7/10:  41%|▍| 409/991 [2:01:33<2:12:34, 13.67s/batch, batch_loss=20.7, bat

Epoch 7/10:  41%|▍| 410/991 [2:01:33<2:11:09, 13.54s/batch, batch_loss=20.7, bat

Epoch 7/10:  41%|▍| 410/991 [2:01:46<2:11:09, 13.54s/batch, batch_loss=13, batch

Epoch 7/10:  41%|▍| 411/991 [2:01:46<2:10:03, 13.45s/batch, batch_loss=13, batch

Epoch 7/10:  41%|▍| 411/991 [2:01:59<2:10:03, 13.45s/batch, batch_loss=13.2, bat

Epoch 7/10:  42%|▍| 412/991 [2:01:59<2:08:26, 13.31s/batch, batch_loss=13.2, bat

Epoch 7/10:  42%|▍| 412/991 [2:02:12<2:08:26, 13.31s/batch, batch_loss=16.2, bat

Epoch 7/10:  42%|▍| 413/991 [2:02:12<2:08:06, 13.30s/batch, batch_loss=16.2, bat

Epoch 7/10:  42%|▍| 413/991 [2:02:26<2:08:06, 13.30s/batch, batch_loss=13.2, bat

Epoch 7/10:  42%|▍| 414/991 [2:02:26<2:07:43, 13.28s/batch, batch_loss=13.2, bat

Epoch 7/10:  42%|▍| 414/991 [2:02:39<2:07:43, 13.28s/batch, batch_loss=7.97, bat

Epoch 7/10:  42%|▍| 415/991 [2:02:39<2:09:03, 13.44s/batch, batch_loss=7.97, bat

Epoch 7/10:  42%|▍| 415/991 [2:02:53<2:09:03, 13.44s/batch, batch_loss=9.44, bat

Epoch 7/10:  42%|▍| 416/991 [2:02:53<2:09:05, 13.47s/batch, batch_loss=9.44, bat

Epoch 7/10:  42%|▍| 416/991 [2:03:06<2:09:05, 13.47s/batch, batch_loss=10.8, bat

Epoch 7/10:  42%|▍| 417/991 [2:03:06<2:07:58, 13.38s/batch, batch_loss=10.8, bat

Epoch 7/10:  42%|▍| 417/991 [2:03:19<2:07:58, 13.38s/batch, batch_loss=10.6, bat

Epoch 7/10:  42%|▍| 418/991 [2:03:19<2:07:23, 13.34s/batch, batch_loss=10.6, bat

Epoch 7/10:  42%|▍| 418/991 [2:03:33<2:07:23, 13.34s/batch, batch_loss=1.3e+3, b

Epoch 7/10:  42%|▍| 419/991 [2:03:33<2:06:56, 13.32s/batch, batch_loss=1.3e+3, b

Epoch 7/10:  42%|▍| 419/991 [2:03:46<2:06:56, 13.32s/batch, batch_loss=14.8, bat

Epoch 7/10:  42%|▍| 420/991 [2:03:46<2:07:19, 13.38s/batch, batch_loss=14.8, bat

Epoch 7/10:  42%|▍| 420/991 [2:04:00<2:07:19, 13.38s/batch, batch_loss=13.1, bat

Epoch 7/10:  42%|▍| 421/991 [2:04:00<2:07:55, 13.47s/batch, batch_loss=13.1, bat

Epoch 7/10:  42%|▍| 421/991 [2:04:14<2:07:55, 13.47s/batch, batch_loss=7.96, bat

Epoch 7/10:  43%|▍| 422/991 [2:04:14<2:08:51, 13.59s/batch, batch_loss=7.96, bat

Epoch 7/10:  43%|▍| 422/991 [2:04:27<2:08:51, 13.59s/batch, batch_loss=9.55, bat

Epoch 7/10:  43%|▍| 423/991 [2:04:27<2:07:56, 13.52s/batch, batch_loss=9.55, bat

Epoch 7/10:  43%|▍| 423/991 [2:04:41<2:07:56, 13.52s/batch, batch_loss=11.2, bat

Epoch 7/10:  43%|▍| 424/991 [2:04:41<2:07:28, 13.49s/batch, batch_loss=11.2, bat

Epoch 7/10:  43%|▍| 424/991 [2:04:54<2:07:28, 13.49s/batch, batch_loss=6.46, bat

Epoch 7/10:  43%|▍| 425/991 [2:04:54<2:07:02, 13.47s/batch, batch_loss=6.46, bat

Epoch 7/10:  43%|▍| 425/991 [2:05:07<2:07:02, 13.47s/batch, batch_loss=2.24, bat

Epoch 7/10:  43%|▍| 426/991 [2:05:07<2:06:26, 13.43s/batch, batch_loss=2.24, bat

Epoch 7/10:  43%|▍| 426/991 [2:05:21<2:06:26, 13.43s/batch, batch_loss=10, batch

Epoch 7/10:  43%|▍| 427/991 [2:05:21<2:07:19, 13.55s/batch, batch_loss=10, batch

Epoch 7/10:  43%|▍| 427/991 [2:05:35<2:07:19, 13.55s/batch, batch_loss=16.6, bat

Epoch 7/10:  43%|▍| 428/991 [2:05:35<2:08:10, 13.66s/batch, batch_loss=16.6, bat

Epoch 7/10:  43%|▍| 428/991 [2:05:49<2:08:10, 13.66s/batch, batch_loss=17.3, bat

Epoch 7/10:  43%|▍| 429/991 [2:05:49<2:08:25, 13.71s/batch, batch_loss=17.3, bat

Epoch 7/10:  43%|▍| 429/991 [2:06:02<2:08:25, 13.71s/batch, batch_loss=9.27e+3, 

Epoch 7/10:  43%|▍| 430/991 [2:06:02<2:08:00, 13.69s/batch, batch_loss=9.27e+3, 

Epoch 7/10:  43%|▍| 430/991 [2:06:16<2:08:00, 13.69s/batch, batch_loss=22.8, bat

Epoch 7/10:  43%|▍| 431/991 [2:06:16<2:07:28, 13.66s/batch, batch_loss=22.8, bat

Epoch 7/10:  43%|▍| 431/991 [2:06:30<2:07:28, 13.66s/batch, batch_loss=17.3, bat

Epoch 7/10:  44%|▍| 432/991 [2:06:30<2:08:25, 13.78s/batch, batch_loss=17.3, bat

Epoch 7/10:  44%|▍| 432/991 [2:06:44<2:08:25, 13.78s/batch, batch_loss=10.4, bat

Epoch 7/10:  44%|▍| 433/991 [2:06:44<2:09:10, 13.89s/batch, batch_loss=10.4, bat

Epoch 7/10:  44%|▍| 433/991 [2:06:58<2:09:10, 13.89s/batch, batch_loss=15.8, bat

Epoch 7/10:  44%|▍| 434/991 [2:06:58<2:07:57, 13.78s/batch, batch_loss=15.8, bat

Epoch 7/10:  44%|▍| 434/991 [2:07:11<2:07:57, 13.78s/batch, batch_loss=12.3, bat

Epoch 7/10:  44%|▍| 435/991 [2:07:11<2:06:09, 13.61s/batch, batch_loss=12.3, bat

Epoch 7/10:  44%|▍| 435/991 [2:07:27<2:06:09, 13.61s/batch, batch_loss=13.9, bat

Epoch 7/10:  44%|▍| 436/991 [2:07:27<2:13:27, 14.43s/batch, batch_loss=13.9, bat

Epoch 7/10:  44%|▍| 436/991 [2:07:41<2:13:27, 14.43s/batch, batch_loss=15.8, bat

Epoch 7/10:  44%|▍| 437/991 [2:07:41<2:10:57, 14.18s/batch, batch_loss=15.8, bat

Epoch 7/10:  44%|▍| 437/991 [2:07:54<2:10:57, 14.18s/batch, batch_loss=16.9, bat

Epoch 7/10:  44%|▍| 438/991 [2:07:54<2:08:53, 13.98s/batch, batch_loss=16.9, bat

Epoch 7/10:  44%|▍| 438/991 [2:08:08<2:08:53, 13.98s/batch, batch_loss=12.9, bat

Epoch 7/10:  44%|▍| 439/991 [2:08:08<2:08:26, 13.96s/batch, batch_loss=12.9, bat

Epoch 7/10:  44%|▍| 439/991 [2:08:23<2:08:26, 13.96s/batch, batch_loss=20.4, bat

Epoch 7/10:  44%|▍| 440/991 [2:08:23<2:10:38, 14.23s/batch, batch_loss=20.4, bat

Epoch 7/10:  44%|▍| 440/991 [2:08:36<2:10:38, 14.23s/batch, batch_loss=20.7, bat

Epoch 7/10:  45%|▍| 441/991 [2:08:36<2:07:11, 13.88s/batch, batch_loss=20.7, bat

Epoch 7/10:  45%|▍| 441/991 [2:08:50<2:07:11, 13.88s/batch, batch_loss=13.8, bat

Epoch 7/10:  45%|▍| 442/991 [2:08:50<2:05:21, 13.70s/batch, batch_loss=13.8, bat

Epoch 7/10:  45%|▍| 442/991 [2:09:04<2:05:21, 13.70s/batch, batch_loss=19.6, bat

Epoch 7/10:  45%|▍| 443/991 [2:09:04<2:08:15, 14.04s/batch, batch_loss=19.6, bat

Epoch 7/10:  45%|▍| 443/991 [2:09:21<2:08:15, 14.04s/batch, batch_loss=16.9, bat

Epoch 7/10:  45%|▍| 444/991 [2:09:21<2:15:06, 14.82s/batch, batch_loss=16.9, bat

Epoch 7/10:  45%|▍| 444/991 [2:09:36<2:15:06, 14.82s/batch, batch_loss=18.4, bat

Epoch 7/10:  45%|▍| 445/991 [2:09:36<2:15:39, 14.91s/batch, batch_loss=18.4, bat

Epoch 7/10:  45%|▍| 445/991 [2:09:52<2:15:39, 14.91s/batch, batch_loss=24.9, bat

Epoch 7/10:  45%|▍| 446/991 [2:09:52<2:17:55, 15.18s/batch, batch_loss=24.9, bat

Epoch 7/10:  45%|▍| 446/991 [2:10:06<2:17:55, 15.18s/batch, batch_loss=13.9, bat

Epoch 7/10:  45%|▍| 447/991 [2:10:06<2:15:44, 14.97s/batch, batch_loss=13.9, bat

Epoch 7/10:  45%|▍| 447/991 [2:10:23<2:15:44, 14.97s/batch, batch_loss=19.9, bat

Epoch 7/10:  45%|▍| 448/991 [2:10:23<2:19:08, 15.37s/batch, batch_loss=19.9, bat

Epoch 7/10:  45%|▍| 448/991 [2:10:37<2:19:08, 15.37s/batch, batch_loss=17, batch

Epoch 7/10:  45%|▍| 449/991 [2:10:37<2:16:28, 15.11s/batch, batch_loss=17, batch

Epoch 7/10:  45%|▍| 449/991 [2:10:53<2:16:28, 15.11s/batch, batch_loss=22.3, bat

Epoch 7/10:  45%|▍| 450/991 [2:10:53<2:17:38, 15.27s/batch, batch_loss=22.3, bat

Epoch 7/10:  45%|▍| 450/991 [2:11:08<2:17:38, 15.27s/batch, batch_loss=20.2, bat

Epoch 7/10:  46%|▍| 451/991 [2:11:08<2:18:01, 15.34s/batch, batch_loss=20.2, bat

Epoch 7/10:  46%|▍| 451/991 [2:11:26<2:18:01, 15.34s/batch, batch_loss=15.2, bat

Epoch 7/10:  46%|▍| 452/991 [2:11:26<2:25:09, 16.16s/batch, batch_loss=15.2, bat

Epoch 7/10:  46%|▍| 452/991 [2:11:41<2:25:09, 16.16s/batch, batch_loss=17, batch

Epoch 7/10:  46%|▍| 453/991 [2:11:41<2:21:39, 15.80s/batch, batch_loss=17, batch

Epoch 7/10:  46%|▍| 453/991 [2:11:57<2:21:39, 15.80s/batch, batch_loss=7.24e+3, 

Epoch 7/10:  46%|▍| 454/991 [2:11:57<2:20:44, 15.72s/batch, batch_loss=7.24e+3, 

Epoch 7/10:  46%|▍| 454/991 [2:12:13<2:20:44, 15.72s/batch, batch_loss=23.4, bat

Epoch 7/10:  46%|▍| 455/991 [2:12:13<2:20:03, 15.68s/batch, batch_loss=23.4, bat

Epoch 7/10:  46%|▍| 455/991 [2:12:28<2:20:03, 15.68s/batch, batch_loss=21.8, bat

Epoch 7/10:  46%|▍| 456/991 [2:12:28<2:18:22, 15.52s/batch, batch_loss=21.8, bat

Epoch 7/10:  46%|▍| 456/991 [2:12:43<2:18:22, 15.52s/batch, batch_loss=13.9, bat

Epoch 7/10:  46%|▍| 457/991 [2:12:43<2:17:06, 15.41s/batch, batch_loss=13.9, bat

Epoch 7/10:  46%|▍| 457/991 [2:12:59<2:17:06, 15.41s/batch, batch_loss=14.8, bat

Epoch 7/10:  46%|▍| 458/991 [2:12:59<2:17:55, 15.53s/batch, batch_loss=14.8, bat

Epoch 7/10:  46%|▍| 458/991 [2:13:14<2:17:55, 15.53s/batch, batch_loss=21.1, bat

Epoch 7/10:  46%|▍| 459/991 [2:13:14<2:17:12, 15.48s/batch, batch_loss=21.1, bat

Epoch 7/10:  46%|▍| 459/991 [2:13:32<2:17:12, 15.48s/batch, batch_loss=18.6, bat

Epoch 7/10:  46%|▍| 460/991 [2:13:32<2:24:42, 16.35s/batch, batch_loss=18.6, bat

Epoch 7/10:  46%|▍| 460/991 [2:13:48<2:24:42, 16.35s/batch, batch_loss=54, batch

Epoch 7/10:  47%|▍| 461/991 [2:13:48<2:21:38, 16.04s/batch, batch_loss=54, batch

Epoch 7/10:  47%|▍| 461/991 [2:14:03<2:21:38, 16.04s/batch, batch_loss=13.6, bat

Epoch 7/10:  47%|▍| 462/991 [2:14:03<2:19:13, 15.79s/batch, batch_loss=13.6, bat

Epoch 7/10:  47%|▍| 462/991 [2:14:18<2:19:13, 15.79s/batch, batch_loss=6.22e+4, 

Epoch 7/10:  47%|▍| 463/991 [2:14:18<2:16:17, 15.49s/batch, batch_loss=6.22e+4, 

Epoch 7/10:  47%|▍| 463/991 [2:14:33<2:16:17, 15.49s/batch, batch_loss=14.5, bat

Epoch 7/10:  47%|▍| 464/991 [2:14:33<2:14:11, 15.28s/batch, batch_loss=14.5, bat

Epoch 7/10:  47%|▍| 464/991 [2:14:49<2:14:11, 15.28s/batch, batch_loss=13.9, bat

Epoch 7/10:  47%|▍| 465/991 [2:14:49<2:16:04, 15.52s/batch, batch_loss=13.9, bat

Epoch 7/10:  47%|▍| 465/991 [2:15:04<2:16:04, 15.52s/batch, batch_loss=19.5, bat

Epoch 7/10:  47%|▍| 466/991 [2:15:04<2:16:01, 15.55s/batch, batch_loss=19.5, bat

Epoch 7/10:  47%|▍| 466/991 [2:15:19<2:16:01, 15.55s/batch, batch_loss=16.7, bat

Epoch 7/10:  47%|▍| 467/991 [2:15:19<2:14:47, 15.43s/batch, batch_loss=16.7, bat

Epoch 7/10:  47%|▍| 467/991 [2:15:35<2:14:47, 15.43s/batch, batch_loss=23.6, bat

Epoch 7/10:  47%|▍| 468/991 [2:15:35<2:14:35, 15.44s/batch, batch_loss=23.6, bat

Epoch 7/10:  47%|▍| 468/991 [2:15:54<2:14:35, 15.44s/batch, batch_loss=38.8, bat

Epoch 7/10:  47%|▍| 469/991 [2:15:54<2:24:20, 16.59s/batch, batch_loss=38.8, bat

Epoch 7/10:  47%|▍| 469/991 [2:16:09<2:24:20, 16.59s/batch, batch_loss=33.7, bat

Epoch 7/10:  47%|▍| 470/991 [2:16:09<2:20:30, 16.18s/batch, batch_loss=33.7, bat

Epoch 7/10:  47%|▍| 470/991 [2:16:25<2:20:30, 16.18s/batch, batch_loss=28.7, bat

Epoch 7/10:  48%|▍| 471/991 [2:16:25<2:17:57, 15.92s/batch, batch_loss=28.7, bat

Epoch 7/10:  48%|▍| 471/991 [2:16:40<2:17:57, 15.92s/batch, batch_loss=59.7, bat

Epoch 7/10:  48%|▍| 472/991 [2:16:40<2:16:12, 15.75s/batch, batch_loss=59.7, bat

Epoch 7/10:  48%|▍| 472/991 [2:16:55<2:16:12, 15.75s/batch, batch_loss=40.9, bat

Epoch 7/10:  48%|▍| 473/991 [2:16:55<2:14:47, 15.61s/batch, batch_loss=40.9, bat

Epoch 7/10:  48%|▍| 473/991 [2:17:11<2:14:47, 15.61s/batch, batch_loss=33.8, bat

Epoch 7/10:  48%|▍| 474/991 [2:17:11<2:13:33, 15.50s/batch, batch_loss=33.8, bat

Epoch 7/10:  48%|▍| 474/991 [2:17:26<2:13:33, 15.50s/batch, batch_loss=2.44e+3, 

Epoch 7/10:  48%|▍| 475/991 [2:17:26<2:12:00, 15.35s/batch, batch_loss=2.44e+3, 

Epoch 7/10:  48%|▍| 475/991 [2:17:41<2:12:00, 15.35s/batch, batch_loss=35.9, bat

Epoch 7/10:  48%|▍| 476/991 [2:17:41<2:11:08, 15.28s/batch, batch_loss=35.9, bat

Epoch 7/10:  48%|▍| 476/991 [2:17:58<2:11:08, 15.28s/batch, batch_loss=41.2, bat

Epoch 7/10:  48%|▍| 477/991 [2:17:58<2:16:44, 15.96s/batch, batch_loss=41.2, bat

Epoch 7/10:  48%|▍| 477/991 [2:18:14<2:16:44, 15.96s/batch, batch_loss=31, batch

Epoch 7/10:  48%|▍| 478/991 [2:18:14<2:16:20, 15.95s/batch, batch_loss=31, batch

Epoch 7/10:  48%|▍| 478/991 [2:18:30<2:16:20, 15.95s/batch, batch_loss=29, batch

Epoch 7/10:  48%|▍| 479/991 [2:18:30<2:15:14, 15.85s/batch, batch_loss=29, batch

Epoch 7/10:  48%|▍| 479/991 [2:18:45<2:15:14, 15.85s/batch, batch_loss=27.6, bat

Epoch 7/10:  48%|▍| 480/991 [2:18:45<2:14:07, 15.75s/batch, batch_loss=27.6, bat

Epoch 7/10:  48%|▍| 480/991 [2:19:01<2:14:07, 15.75s/batch, batch_loss=32.6, bat

Epoch 7/10:  49%|▍| 481/991 [2:19:01<2:13:14, 15.68s/batch, batch_loss=32.6, bat

Epoch 7/10:  49%|▍| 481/991 [2:19:16<2:13:14, 15.68s/batch, batch_loss=31.5, bat

Epoch 7/10:  49%|▍| 482/991 [2:19:16<2:10:48, 15.42s/batch, batch_loss=31.5, bat

Epoch 7/10:  49%|▍| 482/991 [2:19:31<2:10:48, 15.42s/batch, batch_loss=17.9, bat

Epoch 7/10:  49%|▍| 483/991 [2:19:31<2:11:26, 15.52s/batch, batch_loss=17.9, bat

Epoch 7/10:  49%|▍| 483/991 [2:19:46<2:11:26, 15.52s/batch, batch_loss=27.1, bat

Epoch 7/10:  49%|▍| 484/991 [2:19:46<2:09:17, 15.30s/batch, batch_loss=27.1, bat

Epoch 7/10:  49%|▍| 484/991 [2:20:02<2:09:17, 15.30s/batch, batch_loss=19, batch

Epoch 7/10:  49%|▍| 485/991 [2:20:02<2:11:08, 15.55s/batch, batch_loss=19, batch

Epoch 7/10:  49%|▍| 485/991 [2:20:18<2:11:08, 15.55s/batch, batch_loss=41.1, bat

Epoch 7/10:  49%|▍| 486/991 [2:20:18<2:11:01, 15.57s/batch, batch_loss=41.1, bat

Epoch 7/10:  49%|▍| 486/991 [2:20:34<2:11:01, 15.57s/batch, batch_loss=20.1, bat

Epoch 7/10:  49%|▍| 487/991 [2:20:34<2:11:29, 15.65s/batch, batch_loss=20.1, bat

Epoch 7/10:  49%|▍| 487/991 [2:20:49<2:11:29, 15.65s/batch, batch_loss=17.1, bat

Epoch 7/10:  49%|▍| 488/991 [2:20:49<2:10:39, 15.59s/batch, batch_loss=17.1, bat

Epoch 7/10:  49%|▍| 488/991 [2:21:04<2:10:39, 15.59s/batch, batch_loss=14.8, bat

Epoch 7/10:  49%|▍| 489/991 [2:21:04<2:08:39, 15.38s/batch, batch_loss=14.8, bat

Epoch 7/10:  49%|▍| 489/991 [2:21:19<2:08:39, 15.38s/batch, batch_loss=13.1, bat

Epoch 7/10:  49%|▍| 490/991 [2:21:19<2:08:13, 15.36s/batch, batch_loss=13.1, bat

Epoch 7/10:  49%|▍| 490/991 [2:21:35<2:08:13, 15.36s/batch, batch_loss=33.2, bat

Epoch 7/10:  50%|▍| 491/991 [2:21:35<2:08:26, 15.41s/batch, batch_loss=33.2, bat

Epoch 7/10:  50%|▍| 491/991 [2:21:52<2:08:26, 15.41s/batch, batch_loss=48.8, bat

Epoch 7/10:  50%|▍| 492/991 [2:21:52<2:13:16, 16.03s/batch, batch_loss=48.8, bat

Epoch 7/10:  50%|▍| 492/991 [2:22:07<2:13:16, 16.03s/batch, batch_loss=40, batch

Epoch 7/10:  50%|▍| 493/991 [2:22:07<2:09:24, 15.59s/batch, batch_loss=40, batch

Epoch 7/10:  50%|▍| 493/991 [2:22:22<2:09:24, 15.59s/batch, batch_loss=14.8, bat

Epoch 7/10:  50%|▍| 494/991 [2:22:22<2:07:34, 15.40s/batch, batch_loss=14.8, bat

Epoch 7/10:  50%|▍| 494/991 [2:22:36<2:07:34, 15.40s/batch, batch_loss=8.53e+4, 

Epoch 7/10:  50%|▍| 495/991 [2:22:36<2:05:22, 15.17s/batch, batch_loss=8.53e+4, 

Epoch 7/10:  50%|▍| 495/991 [2:22:52<2:05:22, 15.17s/batch, batch_loss=23.3, bat

Epoch 7/10:  50%|▌| 496/991 [2:22:52<2:04:52, 15.14s/batch, batch_loss=23.3, bat

Epoch 7/10:  50%|▌| 496/991 [2:23:06<2:04:52, 15.14s/batch, batch_loss=178, batc

Epoch 7/10:  50%|▌| 497/991 [2:23:06<2:02:29, 14.88s/batch, batch_loss=178, batc

Epoch 7/10:  50%|▌| 497/991 [2:23:21<2:02:29, 14.88s/batch, batch_loss=18.6, bat

Epoch 7/10:  50%|▌| 498/991 [2:23:21<2:02:15, 14.88s/batch, batch_loss=18.6, bat

Epoch 7/10:  50%|▌| 498/991 [2:23:36<2:02:15, 14.88s/batch, batch_loss=406, batc

Epoch 7/10:  50%|▌| 499/991 [2:23:36<2:02:57, 14.99s/batch, batch_loss=406, batc

Epoch 7/10:  50%|▌| 499/991 [2:23:51<2:02:57, 14.99s/batch, batch_loss=21.2, bat

Epoch 7/10:  50%|▌| 500/991 [2:23:51<2:02:11, 14.93s/batch, batch_loss=21.2, bat

Epoch 7/10:  50%|▌| 500/991 [2:24:09<2:02:11, 14.93s/batch, batch_loss=10.2, bat

Epoch 7/10:  51%|▌| 501/991 [2:24:09<2:10:56, 16.03s/batch, batch_loss=10.2, bat

Epoch 7/10:  51%|▌| 501/991 [2:24:25<2:10:56, 16.03s/batch, batch_loss=14, batch

Epoch 7/10:  51%|▌| 502/991 [2:24:25<2:08:59, 15.83s/batch, batch_loss=14, batch

Epoch 7/10:  51%|▌| 502/991 [2:24:40<2:08:59, 15.83s/batch, batch_loss=34.7, bat

Epoch 7/10:  51%|▌| 503/991 [2:24:40<2:07:11, 15.64s/batch, batch_loss=34.7, bat

Epoch 7/10:  51%|▌| 503/991 [2:24:56<2:07:11, 15.64s/batch, batch_loss=15, batch

Epoch 7/10:  51%|▌| 504/991 [2:24:56<2:06:59, 15.65s/batch, batch_loss=15, batch

Epoch 7/10:  51%|▌| 504/991 [2:25:11<2:06:59, 15.65s/batch, batch_loss=8.28, bat

Epoch 7/10:  51%|▌| 505/991 [2:25:11<2:06:46, 15.65s/batch, batch_loss=8.28, bat

Epoch 7/10:  51%|▌| 505/991 [2:25:26<2:06:46, 15.65s/batch, batch_loss=19.4, bat

Epoch 7/10:  51%|▌| 506/991 [2:25:26<2:05:13, 15.49s/batch, batch_loss=19.4, bat

Epoch 7/10:  51%|▌| 506/991 [2:25:42<2:05:13, 15.49s/batch, batch_loss=13.1, bat

Epoch 7/10:  51%|▌| 507/991 [2:25:42<2:06:16, 15.65s/batch, batch_loss=13.1, bat

Epoch 7/10:  51%|▌| 507/991 [2:25:56<2:06:16, 15.65s/batch, batch_loss=16.9, bat

Epoch 7/10:  51%|▌| 508/991 [2:25:56<2:02:01, 15.16s/batch, batch_loss=16.9, bat

Epoch 7/10:  51%|▌| 508/991 [2:26:11<2:02:01, 15.16s/batch, batch_loss=21.6, bat

Epoch 7/10:  51%|▌| 509/991 [2:26:11<2:01:08, 15.08s/batch, batch_loss=21.6, bat

Epoch 7/10:  51%|▌| 509/991 [2:26:27<2:01:08, 15.08s/batch, batch_loss=18.9, bat

Epoch 7/10:  51%|▌| 510/991 [2:26:27<2:03:21, 15.39s/batch, batch_loss=18.9, bat

Epoch 7/10:  51%|▌| 510/991 [2:26:43<2:03:21, 15.39s/batch, batch_loss=19, batch

Epoch 7/10:  52%|▌| 511/991 [2:26:43<2:02:36, 15.33s/batch, batch_loss=19, batch

Epoch 7/10:  52%|▌| 511/991 [2:26:58<2:02:36, 15.33s/batch, batch_loss=12.2, bat

Epoch 7/10:  52%|▌| 512/991 [2:26:58<2:02:30, 15.35s/batch, batch_loss=12.2, bat

Epoch 7/10:  52%|▌| 512/991 [2:27:13<2:02:30, 15.35s/batch, batch_loss=12.3, bat

Epoch 7/10:  52%|▌| 513/991 [2:27:13<2:01:07, 15.20s/batch, batch_loss=12.3, bat

Epoch 7/10:  52%|▌| 513/991 [2:27:28<2:01:07, 15.20s/batch, batch_loss=22.8, bat

Epoch 7/10:  52%|▌| 514/991 [2:27:28<2:01:42, 15.31s/batch, batch_loss=22.8, bat

Epoch 7/10:  52%|▌| 514/991 [2:27:44<2:01:42, 15.31s/batch, batch_loss=22.8, bat

Epoch 7/10:  52%|▌| 515/991 [2:27:44<2:02:26, 15.43s/batch, batch_loss=22.8, bat

Epoch 7/10:  52%|▌| 515/991 [2:28:00<2:02:26, 15.43s/batch, batch_loss=23.4, bat

Epoch 7/10:  52%|▌| 516/991 [2:28:00<2:02:52, 15.52s/batch, batch_loss=23.4, bat

Epoch 7/10:  52%|▌| 516/991 [2:28:15<2:02:52, 15.52s/batch, batch_loss=13.6, bat

Epoch 7/10:  52%|▌| 517/991 [2:28:15<2:02:43, 15.54s/batch, batch_loss=13.6, bat

Epoch 7/10:  52%|▌| 517/991 [2:28:30<2:02:43, 15.54s/batch, batch_loss=30.1, bat

Epoch 7/10:  52%|▌| 518/991 [2:28:30<2:00:31, 15.29s/batch, batch_loss=30.1, bat

Epoch 7/10:  52%|▌| 518/991 [2:28:45<2:00:31, 15.29s/batch, batch_loss=17.3, bat

Epoch 7/10:  52%|▌| 519/991 [2:28:45<1:59:36, 15.21s/batch, batch_loss=17.3, bat

Epoch 7/10:  52%|▌| 519/991 [2:29:01<1:59:36, 15.21s/batch, batch_loss=18.9, bat

Epoch 7/10:  52%|▌| 520/991 [2:29:01<2:00:24, 15.34s/batch, batch_loss=18.9, bat

Epoch 7/10:  52%|▌| 520/991 [2:29:16<2:00:24, 15.34s/batch, batch_loss=11.9, bat

Epoch 7/10:  53%|▌| 521/991 [2:29:16<1:59:24, 15.24s/batch, batch_loss=11.9, bat

Epoch 7/10:  53%|▌| 521/991 [2:29:32<1:59:24, 15.24s/batch, batch_loss=10.5, bat

Epoch 7/10:  53%|▌| 522/991 [2:29:32<2:01:05, 15.49s/batch, batch_loss=10.5, bat

Epoch 7/10:  53%|▌| 522/991 [2:29:48<2:01:05, 15.49s/batch, batch_loss=3.79, bat

Epoch 7/10:  53%|▌| 523/991 [2:29:48<2:02:33, 15.71s/batch, batch_loss=3.79, bat

Epoch 7/10:  53%|▌| 523/991 [2:30:03<2:02:33, 15.71s/batch, batch_loss=10.5, bat

Epoch 7/10:  53%|▌| 524/991 [2:30:03<1:59:54, 15.41s/batch, batch_loss=10.5, bat

Epoch 7/10:  53%|▌| 524/991 [2:30:18<1:59:54, 15.41s/batch, batch_loss=7.4, batc

Epoch 7/10:  53%|▌| 525/991 [2:30:18<2:00:19, 15.49s/batch, batch_loss=7.4, batc

Epoch 7/10:  53%|▌| 525/991 [2:30:34<2:00:19, 15.49s/batch, batch_loss=9.37, bat

Epoch 7/10:  53%|▌| 526/991 [2:30:34<1:59:02, 15.36s/batch, batch_loss=9.37, bat

Epoch 7/10:  53%|▌| 526/991 [2:30:48<1:59:02, 15.36s/batch, batch_loss=20.5, bat

Epoch 7/10:  53%|▌| 527/991 [2:30:48<1:57:18, 15.17s/batch, batch_loss=20.5, bat

Epoch 7/10:  53%|▌| 527/991 [2:31:03<1:57:18, 15.17s/batch, batch_loss=17.9, bat

Epoch 7/10:  53%|▌| 528/991 [2:31:03<1:56:56, 15.15s/batch, batch_loss=17.9, bat

Epoch 7/10:  53%|▌| 528/991 [2:31:19<1:56:56, 15.15s/batch, batch_loss=11, batch

Epoch 7/10:  53%|▌| 529/991 [2:31:19<1:56:50, 15.18s/batch, batch_loss=11, batch

Epoch 7/10:  53%|▌| 529/991 [2:31:33<1:56:50, 15.18s/batch, batch_loss=18, batch

Epoch 7/10:  53%|▌| 530/991 [2:31:33<1:55:39, 15.05s/batch, batch_loss=18, batch

Epoch 7/10:  53%|▌| 530/991 [2:31:51<1:55:39, 15.05s/batch, batch_loss=16.8, bat

Epoch 7/10:  54%|▌| 531/991 [2:31:51<2:01:04, 15.79s/batch, batch_loss=16.8, bat

Epoch 7/10:  54%|▌| 531/991 [2:32:06<2:01:04, 15.79s/batch, batch_loss=17.9, bat

Epoch 7/10:  54%|▌| 532/991 [2:32:06<1:59:58, 15.68s/batch, batch_loss=17.9, bat

Epoch 7/10:  54%|▌| 532/991 [2:32:22<1:59:58, 15.68s/batch, batch_loss=14.7, bat

Epoch 7/10:  54%|▌| 533/991 [2:32:22<1:59:36, 15.67s/batch, batch_loss=14.7, bat

Epoch 7/10:  54%|▌| 533/991 [2:32:37<1:59:36, 15.67s/batch, batch_loss=16.1, bat

Epoch 7/10:  54%|▌| 534/991 [2:32:37<1:58:04, 15.50s/batch, batch_loss=16.1, bat

Epoch 7/10:  54%|▌| 534/991 [2:32:51<1:58:04, 15.50s/batch, batch_loss=29.1, bat

Epoch 7/10:  54%|▌| 535/991 [2:32:51<1:55:08, 15.15s/batch, batch_loss=29.1, bat

Epoch 7/10:  54%|▌| 535/991 [2:33:06<1:55:08, 15.15s/batch, batch_loss=25.2, bat

Epoch 7/10:  54%|▌| 536/991 [2:33:06<1:53:48, 15.01s/batch, batch_loss=25.2, bat

Epoch 7/10:  54%|▌| 536/991 [2:33:20<1:53:48, 15.01s/batch, batch_loss=13.4, bat

Epoch 7/10:  54%|▌| 537/991 [2:33:20<1:52:03, 14.81s/batch, batch_loss=13.4, bat

Epoch 7/10:  54%|▌| 537/991 [2:33:35<1:52:03, 14.81s/batch, batch_loss=1.79e+3, 

Epoch 7/10:  54%|▌| 538/991 [2:33:35<1:51:41, 14.79s/batch, batch_loss=1.79e+3, 

Epoch 7/10:  54%|▌| 538/991 [2:33:53<1:51:41, 14.79s/batch, batch_loss=27.1, bat

Epoch 7/10:  54%|▌| 539/991 [2:33:53<1:57:31, 15.60s/batch, batch_loss=27.1, bat

Epoch 7/10:  54%|▌| 539/991 [2:34:07<1:57:31, 15.60s/batch, batch_loss=29.9, bat

Epoch 7/10:  54%|▌| 540/991 [2:34:07<1:55:16, 15.34s/batch, batch_loss=29.9, bat

Epoch 7/10:  54%|▌| 540/991 [2:34:22<1:55:16, 15.34s/batch, batch_loss=1.3e+4, b

Epoch 7/10:  55%|▌| 541/991 [2:34:22<1:53:03, 15.07s/batch, batch_loss=1.3e+4, b

Epoch 7/10:  55%|▌| 541/991 [2:34:37<1:53:03, 15.07s/batch, batch_loss=2.86e+3, 

Epoch 7/10:  55%|▌| 542/991 [2:34:37<1:53:47, 15.21s/batch, batch_loss=2.86e+3, 

Epoch 7/10:  55%|▌| 542/991 [2:34:52<1:53:47, 15.21s/batch, batch_loss=63.1, bat

Epoch 7/10:  55%|▌| 543/991 [2:34:52<1:52:47, 15.11s/batch, batch_loss=63.1, bat

Epoch 7/10:  55%|▌| 543/991 [2:35:07<1:52:47, 15.11s/batch, batch_loss=25.6, bat

Epoch 7/10:  55%|▌| 544/991 [2:35:07<1:51:57, 15.03s/batch, batch_loss=25.6, bat

Epoch 7/10:  55%|▌| 544/991 [2:35:21<1:51:57, 15.03s/batch, batch_loss=17.7, bat

Epoch 7/10:  55%|▌| 545/991 [2:35:21<1:50:00, 14.80s/batch, batch_loss=17.7, bat

Epoch 7/10:  55%|▌| 545/991 [2:35:37<1:50:00, 14.80s/batch, batch_loss=300, batc

Epoch 7/10:  55%|▌| 546/991 [2:35:37<1:50:45, 14.93s/batch, batch_loss=300, batc

Epoch 7/10:  55%|▌| 546/991 [2:35:52<1:50:45, 14.93s/batch, batch_loss=15.3, bat

Epoch 7/10:  55%|▌| 547/991 [2:35:52<1:50:40, 14.96s/batch, batch_loss=15.3, bat

Epoch 7/10:  55%|▌| 547/991 [2:36:07<1:50:40, 14.96s/batch, batch_loss=14.1, bat

Epoch 7/10:  55%|▌| 548/991 [2:36:07<1:50:29, 14.96s/batch, batch_loss=14.1, bat

Epoch 7/10:  55%|▌| 548/991 [2:36:23<1:50:29, 14.96s/batch, batch_loss=9.99, bat

Epoch 7/10:  55%|▌| 549/991 [2:36:23<1:52:21, 15.25s/batch, batch_loss=9.99, bat

Epoch 7/10:  55%|▌| 549/991 [2:36:38<1:52:21, 15.25s/batch, batch_loss=27.1, bat

Epoch 7/10:  55%|▌| 550/991 [2:36:38<1:51:53, 15.22s/batch, batch_loss=27.1, bat

Epoch 7/10:  55%|▌| 550/991 [2:36:53<1:51:53, 15.22s/batch, batch_loss=22.5, bat

Epoch 7/10:  56%|▌| 551/991 [2:36:53<1:52:24, 15.33s/batch, batch_loss=22.5, bat

Epoch 7/10:  56%|▌| 551/991 [2:37:08<1:52:24, 15.33s/batch, batch_loss=16, batch

Epoch 7/10:  56%|▌| 552/991 [2:37:08<1:50:53, 15.16s/batch, batch_loss=16, batch

Epoch 7/10:  56%|▌| 552/991 [2:37:26<1:50:53, 15.16s/batch, batch_loss=17.6, bat

Epoch 7/10:  56%|▌| 553/991 [2:37:26<1:57:33, 16.10s/batch, batch_loss=17.6, bat

Epoch 7/10:  56%|▌| 553/991 [2:37:42<1:57:33, 16.10s/batch, batch_loss=5.75e+3, 

Epoch 7/10:  56%|▌| 554/991 [2:37:42<1:55:56, 15.92s/batch, batch_loss=5.75e+3, 

Epoch 7/10:  56%|▌| 554/991 [2:37:58<1:55:56, 15.92s/batch, batch_loss=2.57e+3, 

Epoch 7/10:  56%|▌| 555/991 [2:37:58<1:55:20, 15.87s/batch, batch_loss=2.57e+3, 

Epoch 7/10:  56%|▌| 555/991 [2:38:13<1:55:20, 15.87s/batch, batch_loss=19.4, bat

Epoch 7/10:  56%|▌| 556/991 [2:38:13<1:54:44, 15.83s/batch, batch_loss=19.4, bat

Epoch 7/10:  56%|▌| 556/991 [2:38:27<1:54:44, 15.83s/batch, batch_loss=1.27e+4, 

Epoch 7/10:  56%|▌| 557/991 [2:38:27<1:50:25, 15.27s/batch, batch_loss=1.27e+4, 

Epoch 7/10:  56%|▌| 557/991 [2:38:41<1:50:25, 15.27s/batch, batch_loss=12.9, bat

Epoch 7/10:  56%|▌| 558/991 [2:38:41<1:46:36, 14.77s/batch, batch_loss=12.9, bat

Epoch 7/10:  56%|▌| 558/991 [2:38:55<1:46:36, 14.77s/batch, batch_loss=19, batch

Epoch 7/10:  56%|▌| 559/991 [2:38:55<1:44:28, 14.51s/batch, batch_loss=19, batch

Epoch 7/10:  56%|▌| 559/991 [2:39:11<1:44:28, 14.51s/batch, batch_loss=6.78, bat

Epoch 7/10:  57%|▌| 560/991 [2:39:11<1:47:06, 14.91s/batch, batch_loss=6.78, bat

Epoch 7/10:  57%|▌| 560/991 [2:39:26<1:47:06, 14.91s/batch, batch_loss=10.2, bat

Epoch 7/10:  57%|▌| 561/991 [2:39:26<1:47:59, 15.07s/batch, batch_loss=10.2, bat

Epoch 7/10:  57%|▌| 561/991 [2:39:41<1:47:59, 15.07s/batch, batch_loss=17.9, bat

Epoch 7/10:  57%|▌| 562/991 [2:39:41<1:48:28, 15.17s/batch, batch_loss=17.9, bat

Epoch 7/10:  57%|▌| 562/991 [2:39:57<1:48:28, 15.17s/batch, batch_loss=7.09, bat

Epoch 7/10:  57%|▌| 563/991 [2:39:57<1:48:52, 15.26s/batch, batch_loss=7.09, bat

Epoch 7/10:  57%|▌| 563/991 [2:40:12<1:48:52, 15.26s/batch, batch_loss=12.8, bat

Epoch 7/10:  57%|▌| 564/991 [2:40:12<1:48:41, 15.27s/batch, batch_loss=12.8, bat

Epoch 7/10:  57%|▌| 564/991 [2:40:30<1:48:41, 15.27s/batch, batch_loss=503, batc

Epoch 7/10:  57%|▌| 565/991 [2:40:30<1:53:36, 16.00s/batch, batch_loss=503, batc

Epoch 7/10:  57%|▌| 565/991 [2:40:45<1:53:36, 16.00s/batch, batch_loss=18.2, bat

Epoch 7/10:  57%|▌| 566/991 [2:40:45<1:52:25, 15.87s/batch, batch_loss=18.2, bat

Epoch 7/10:  57%|▌| 566/991 [2:41:02<1:52:25, 15.87s/batch, batch_loss=20.9, bat

Epoch 7/10:  57%|▌| 567/991 [2:41:02<1:52:39, 15.94s/batch, batch_loss=20.9, bat

Epoch 7/10:  57%|▌| 567/991 [2:41:17<1:52:39, 15.94s/batch, batch_loss=297, batc

Epoch 7/10:  57%|▌| 568/991 [2:41:17<1:50:14, 15.64s/batch, batch_loss=297, batc

Epoch 7/10:  57%|▌| 568/991 [2:41:31<1:50:14, 15.64s/batch, batch_loss=33, batch

Epoch 7/10:  57%|▌| 569/991 [2:41:31<1:47:57, 15.35s/batch, batch_loss=33, batch

Epoch 7/10:  57%|▌| 569/991 [2:41:48<1:47:57, 15.35s/batch, batch_loss=8.49e+3, 

Epoch 7/10:  58%|▌| 570/991 [2:41:48<1:51:41, 15.92s/batch, batch_loss=8.49e+3, 

Epoch 7/10:  58%|▌| 570/991 [2:42:03<1:51:41, 15.92s/batch, batch_loss=11.1, bat

Epoch 7/10:  58%|▌| 571/991 [2:42:03<1:47:47, 15.40s/batch, batch_loss=11.1, bat

Epoch 7/10:  58%|▌| 571/991 [2:42:18<1:47:47, 15.40s/batch, batch_loss=11.6, bat

Epoch 7/10:  58%|▌| 572/991 [2:42:18<1:47:11, 15.35s/batch, batch_loss=11.6, bat

Epoch 7/10:  58%|▌| 572/991 [2:42:34<1:47:11, 15.35s/batch, batch_loss=7.61, bat

Epoch 7/10:  58%|▌| 573/991 [2:42:34<1:48:45, 15.61s/batch, batch_loss=7.61, bat

Epoch 7/10:  58%|▌| 573/991 [2:42:50<1:48:45, 15.61s/batch, batch_loss=10.3, bat

Epoch 7/10:  58%|▌| 574/991 [2:42:50<1:48:38, 15.63s/batch, batch_loss=10.3, bat

Epoch 7/10:  58%|▌| 574/991 [2:43:05<1:48:38, 15.63s/batch, batch_loss=29.7, bat

Epoch 7/10:  58%|▌| 575/991 [2:43:05<1:47:23, 15.49s/batch, batch_loss=29.7, bat

Epoch 7/10:  58%|▌| 575/991 [2:43:20<1:47:23, 15.49s/batch, batch_loss=33.6, bat

Epoch 7/10:  58%|▌| 576/991 [2:43:20<1:46:31, 15.40s/batch, batch_loss=33.6, bat

Epoch 7/10:  58%|▌| 576/991 [2:43:35<1:46:31, 15.40s/batch, batch_loss=12.3, bat

Epoch 7/10:  58%|▌| 577/991 [2:43:35<1:45:14, 15.25s/batch, batch_loss=12.3, bat

Epoch 7/10:  58%|▌| 577/991 [2:43:53<1:45:14, 15.25s/batch, batch_loss=7.7, batc

Epoch 7/10:  58%|▌| 578/991 [2:43:53<1:50:48, 16.10s/batch, batch_loss=7.7, batc

Epoch 7/10:  58%|▌| 578/991 [2:44:08<1:50:48, 16.10s/batch, batch_loss=14.8, bat

Epoch 7/10:  58%|▌| 579/991 [2:44:08<1:48:51, 15.85s/batch, batch_loss=14.8, bat

Epoch 7/10:  58%|▌| 579/991 [2:44:24<1:48:51, 15.85s/batch, batch_loss=19.3, bat

Epoch 7/10:  59%|▌| 580/991 [2:44:24<1:47:57, 15.76s/batch, batch_loss=19.3, bat

Epoch 7/10:  59%|▌| 580/991 [2:44:39<1:47:57, 15.76s/batch, batch_loss=8.2, batc

Epoch 7/10:  59%|▌| 581/991 [2:44:39<1:45:46, 15.48s/batch, batch_loss=8.2, batc

Epoch 7/10:  59%|▌| 581/991 [2:44:54<1:45:46, 15.48s/batch, batch_loss=0.411, ba

Epoch 7/10:  59%|▌| 582/991 [2:44:54<1:45:02, 15.41s/batch, batch_loss=0.411, ba

Epoch 7/10:  59%|▌| 582/991 [2:45:08<1:45:02, 15.41s/batch, batch_loss=6.61e+3, 

Epoch 7/10:  59%|▌| 583/991 [2:45:08<1:42:48, 15.12s/batch, batch_loss=6.61e+3, 

Epoch 7/10:  59%|▌| 583/991 [2:45:23<1:42:48, 15.12s/batch, batch_loss=12.8, bat

Epoch 7/10:  59%|▌| 584/991 [2:45:23<1:42:00, 15.04s/batch, batch_loss=12.8, bat

Epoch 7/10:  59%|▌| 584/991 [2:45:39<1:42:00, 15.04s/batch, batch_loss=10.2, bat

Epoch 7/10:  59%|▌| 585/991 [2:45:39<1:43:02, 15.23s/batch, batch_loss=10.2, bat

Epoch 7/10:  59%|▌| 585/991 [2:45:56<1:43:02, 15.23s/batch, batch_loss=26.1, bat

Epoch 7/10:  59%|▌| 586/991 [2:45:56<1:47:19, 15.90s/batch, batch_loss=26.1, bat

Epoch 7/10:  59%|▌| 586/991 [2:46:12<1:47:19, 15.90s/batch, batch_loss=22, batch

Epoch 7/10:  59%|▌| 587/991 [2:46:12<1:46:36, 15.83s/batch, batch_loss=22, batch

Epoch 7/10:  59%|▌| 587/991 [2:46:27<1:46:36, 15.83s/batch, batch_loss=21.9, bat

Epoch 7/10:  59%|▌| 588/991 [2:46:27<1:43:31, 15.41s/batch, batch_loss=21.9, bat

Epoch 7/10:  59%|▌| 588/991 [2:46:41<1:43:31, 15.41s/batch, batch_loss=7.85, bat

Epoch 7/10:  59%|▌| 589/991 [2:46:41<1:41:29, 15.15s/batch, batch_loss=7.85, bat

Epoch 7/10:  59%|▌| 589/991 [2:46:56<1:41:29, 15.15s/batch, batch_loss=17.1, bat

Epoch 7/10:  60%|▌| 590/991 [2:46:56<1:41:19, 15.16s/batch, batch_loss=17.1, bat

Epoch 7/10:  60%|▌| 590/991 [2:47:11<1:41:19, 15.16s/batch, batch_loss=15.6, bat

Epoch 7/10:  60%|▌| 591/991 [2:47:11<1:40:01, 15.00s/batch, batch_loss=15.6, bat

Epoch 7/10:  60%|▌| 591/991 [2:47:26<1:40:01, 15.00s/batch, batch_loss=7.65, bat

Epoch 7/10:  60%|▌| 592/991 [2:47:26<1:40:03, 15.05s/batch, batch_loss=7.65, bat

Epoch 7/10:  60%|▌| 592/991 [2:47:41<1:40:03, 15.05s/batch, batch_loss=11.8, bat

Epoch 7/10:  60%|▌| 593/991 [2:47:41<1:40:22, 15.13s/batch, batch_loss=11.8, bat

Epoch 7/10:  60%|▌| 593/991 [2:47:57<1:40:22, 15.13s/batch, batch_loss=13.8, bat

Epoch 7/10:  60%|▌| 594/991 [2:47:57<1:40:22, 15.17s/batch, batch_loss=13.8, bat

Epoch 7/10:  60%|▌| 594/991 [2:48:11<1:40:22, 15.17s/batch, batch_loss=8.9, batc

Epoch 7/10:  60%|▌| 595/991 [2:48:11<1:39:06, 15.02s/batch, batch_loss=8.9, batc

Epoch 7/10:  60%|▌| 595/991 [2:48:27<1:39:06, 15.02s/batch, batch_loss=6.88, bat

Epoch 7/10:  60%|▌| 596/991 [2:48:27<1:40:38, 15.29s/batch, batch_loss=6.88, bat

Epoch 7/10:  60%|▌| 596/991 [2:48:42<1:40:38, 15.29s/batch, batch_loss=22.1, bat

Epoch 7/10:  60%|▌| 597/991 [2:48:42<1:40:18, 15.28s/batch, batch_loss=22.1, bat

Epoch 7/10:  60%|▌| 597/991 [2:48:58<1:40:18, 15.28s/batch, batch_loss=9.68, bat

Epoch 7/10:  60%|▌| 598/991 [2:48:58<1:40:10, 15.29s/batch, batch_loss=9.68, bat

Epoch 7/10:  60%|▌| 598/991 [2:49:13<1:40:10, 15.29s/batch, batch_loss=18.3, bat

Epoch 7/10:  60%|▌| 599/991 [2:49:13<1:39:27, 15.22s/batch, batch_loss=18.3, bat

Epoch 7/10:  60%|▌| 599/991 [2:49:29<1:39:27, 15.22s/batch, batch_loss=14.5, bat

Epoch 7/10:  61%|▌| 600/991 [2:49:29<1:40:12, 15.38s/batch, batch_loss=14.5, bat

Epoch 7/10:  61%|▌| 600/991 [2:49:44<1:40:12, 15.38s/batch, batch_loss=16.3, bat

Epoch 7/10:  61%|▌| 601/991 [2:49:44<1:39:33, 15.32s/batch, batch_loss=16.3, bat

Epoch 7/10:  61%|▌| 601/991 [2:50:02<1:39:33, 15.32s/batch, batch_loss=11, batch

Epoch 7/10:  61%|▌| 602/991 [2:50:02<1:45:51, 16.33s/batch, batch_loss=11, batch

Epoch 7/10:  61%|▌| 602/991 [2:50:17<1:45:51, 16.33s/batch, batch_loss=8.64, bat

Epoch 7/10:  61%|▌| 603/991 [2:50:17<1:43:03, 15.94s/batch, batch_loss=8.64, bat

Epoch 7/10:  61%|▌| 603/991 [2:50:32<1:43:03, 15.94s/batch, batch_loss=1.01e+4, 

Epoch 7/10:  61%|▌| 604/991 [2:50:32<1:39:28, 15.42s/batch, batch_loss=1.01e+4, 

Epoch 7/10:  61%|▌| 604/991 [2:50:47<1:39:28, 15.42s/batch, batch_loss=11, batch

Epoch 7/10:  61%|▌| 605/991 [2:50:47<1:38:20, 15.29s/batch, batch_loss=11, batch

Epoch 7/10:  61%|▌| 605/991 [2:51:02<1:38:20, 15.29s/batch, batch_loss=7.31, bat

Epoch 7/10:  61%|▌| 606/991 [2:51:02<1:37:30, 15.20s/batch, batch_loss=7.31, bat

Epoch 7/10:  61%|▌| 606/991 [2:51:17<1:37:30, 15.20s/batch, batch_loss=12.7, bat

Epoch 7/10:  61%|▌| 607/991 [2:51:17<1:36:43, 15.11s/batch, batch_loss=12.7, bat

Epoch 7/10:  61%|▌| 607/991 [2:51:33<1:36:43, 15.11s/batch, batch_loss=17.9, bat

Epoch 7/10:  61%|▌| 608/991 [2:51:33<1:38:22, 15.41s/batch, batch_loss=17.9, bat

Epoch 7/10:  61%|▌| 608/991 [2:51:49<1:38:22, 15.41s/batch, batch_loss=15.9, bat

Epoch 7/10:  61%|▌| 609/991 [2:51:49<1:39:09, 15.57s/batch, batch_loss=15.9, bat

Epoch 7/10:  61%|▌| 609/991 [2:52:04<1:39:09, 15.57s/batch, batch_loss=16.2, bat

Epoch 7/10:  62%|▌| 610/991 [2:52:04<1:38:07, 15.45s/batch, batch_loss=16.2, bat

Epoch 7/10:  62%|▌| 610/991 [2:52:19<1:38:07, 15.45s/batch, batch_loss=31.7, bat

Epoch 7/10:  62%|▌| 611/991 [2:52:19<1:37:24, 15.38s/batch, batch_loss=31.7, bat

Epoch 7/10:  62%|▌| 611/991 [2:52:34<1:37:24, 15.38s/batch, batch_loss=8.75, bat

Epoch 7/10:  62%|▌| 612/991 [2:52:34<1:36:30, 15.28s/batch, batch_loss=8.75, bat

Epoch 7/10:  62%|▌| 612/991 [2:52:49<1:36:30, 15.28s/batch, batch_loss=14.2, bat

Epoch 7/10:  62%|▌| 613/991 [2:52:49<1:36:33, 15.33s/batch, batch_loss=14.2, bat

Epoch 7/10:  62%|▌| 613/991 [2:53:05<1:36:33, 15.33s/batch, batch_loss=1.73e+4, 

Epoch 7/10:  62%|▌| 614/991 [2:53:05<1:36:19, 15.33s/batch, batch_loss=1.73e+4, 

Epoch 7/10:  62%|▌| 614/991 [2:53:20<1:36:19, 15.33s/batch, batch_loss=995, batc

Epoch 7/10:  62%|▌| 615/991 [2:53:20<1:36:05, 15.33s/batch, batch_loss=995, batc

Epoch 7/10:  62%|▌| 615/991 [2:53:37<1:36:05, 15.33s/batch, batch_loss=7.97, bat

Epoch 7/10:  62%|▌| 616/991 [2:53:37<1:38:28, 15.76s/batch, batch_loss=7.97, bat

Epoch 7/10:  62%|▌| 616/991 [2:53:53<1:38:28, 15.76s/batch, batch_loss=17.8, bat

Epoch 7/10:  62%|▌| 617/991 [2:53:53<1:39:27, 15.96s/batch, batch_loss=17.8, bat

Epoch 7/10:  62%|▌| 617/991 [2:54:08<1:39:27, 15.96s/batch, batch_loss=12.2, bat

Epoch 7/10:  62%|▌| 618/991 [2:54:08<1:37:13, 15.64s/batch, batch_loss=12.2, bat

Epoch 7/10:  62%|▌| 618/991 [2:54:24<1:37:13, 15.64s/batch, batch_loss=22.5, bat

Epoch 7/10:  62%|▌| 619/991 [2:54:24<1:37:25, 15.71s/batch, batch_loss=22.5, bat

Epoch 7/10:  62%|▌| 619/991 [2:54:39<1:37:25, 15.71s/batch, batch_loss=12.5, bat

Epoch 7/10:  63%|▋| 620/991 [2:54:39<1:36:11, 15.56s/batch, batch_loss=12.5, bat

Epoch 7/10:  63%|▋| 620/991 [2:54:54<1:36:11, 15.56s/batch, batch_loss=10.1, bat

Epoch 7/10:  63%|▋| 621/991 [2:54:54<1:34:43, 15.36s/batch, batch_loss=10.1, bat

Epoch 7/10:  63%|▋| 621/991 [2:55:09<1:34:43, 15.36s/batch, batch_loss=5.48e+3, 

Epoch 7/10:  63%|▋| 622/991 [2:55:09<1:33:44, 15.24s/batch, batch_loss=5.48e+3, 

Epoch 7/10:  63%|▋| 622/991 [2:55:27<1:33:44, 15.24s/batch, batch_loss=23.5, bat

Epoch 7/10:  63%|▋| 623/991 [2:55:27<1:38:18, 16.03s/batch, batch_loss=23.5, bat

Epoch 7/10:  63%|▋| 623/991 [2:55:42<1:38:18, 16.03s/batch, batch_loss=1.6e+4, b

Epoch 7/10:  63%|▋| 624/991 [2:55:42<1:36:38, 15.80s/batch, batch_loss=1.6e+4, b

Epoch 7/10:  63%|▋| 624/991 [2:55:57<1:36:38, 15.80s/batch, batch_loss=10, batch

Epoch 7/10:  63%|▋| 625/991 [2:55:57<1:34:57, 15.57s/batch, batch_loss=10, batch

Epoch 7/10:  63%|▋| 625/991 [2:56:13<1:34:57, 15.57s/batch, batch_loss=5.84, bat

Epoch 7/10:  63%|▋| 626/991 [2:56:13<1:35:39, 15.72s/batch, batch_loss=5.84, bat

Epoch 7/10:  63%|▋| 626/991 [2:56:28<1:35:39, 15.72s/batch, batch_loss=4.32e+3, 

Epoch 7/10:  63%|▋| 627/991 [2:56:28<1:34:08, 15.52s/batch, batch_loss=4.32e+3, 

Epoch 7/10:  63%|▋| 627/991 [2:56:42<1:34:08, 15.52s/batch, batch_loss=1.05e+3, 

Epoch 7/10:  63%|▋| 628/991 [2:56:42<1:31:09, 15.07s/batch, batch_loss=1.05e+3, 

Epoch 7/10:  63%|▋| 628/991 [2:56:59<1:31:09, 15.07s/batch, batch_loss=14.4, bat

Epoch 7/10:  63%|▋| 629/991 [2:56:59<1:34:10, 15.61s/batch, batch_loss=14.4, bat

Epoch 7/10:  63%|▋| 629/991 [2:57:14<1:34:10, 15.61s/batch, batch_loss=21.3, bat

Epoch 7/10:  64%|▋| 630/991 [2:57:14<1:32:50, 15.43s/batch, batch_loss=21.3, bat

Epoch 7/10:  64%|▋| 630/991 [2:57:29<1:32:50, 15.43s/batch, batch_loss=18.7, bat

Epoch 7/10:  64%|▋| 631/991 [2:57:29<1:31:01, 15.17s/batch, batch_loss=18.7, bat

Epoch 7/10:  64%|▋| 631/991 [2:57:44<1:31:01, 15.17s/batch, batch_loss=3.04, bat

Epoch 7/10:  64%|▋| 632/991 [2:57:44<1:29:40, 14.99s/batch, batch_loss=3.04, bat

Epoch 7/10:  64%|▋| 632/991 [2:57:58<1:29:40, 14.99s/batch, batch_loss=24.1, bat

Epoch 7/10:  64%|▋| 633/991 [2:57:58<1:29:09, 14.94s/batch, batch_loss=24.1, bat

Epoch 7/10:  64%|▋| 633/991 [2:58:13<1:29:09, 14.94s/batch, batch_loss=30.5, bat

Epoch 7/10:  64%|▋| 634/991 [2:58:13<1:28:06, 14.81s/batch, batch_loss=30.5, bat

Epoch 7/10:  64%|▋| 634/991 [2:58:27<1:28:06, 14.81s/batch, batch_loss=24.6, bat

Epoch 7/10:  64%|▋| 635/991 [2:58:27<1:27:18, 14.72s/batch, batch_loss=24.6, bat

Epoch 7/10:  64%|▋| 635/991 [2:58:42<1:27:18, 14.72s/batch, batch_loss=19.6, bat

Epoch 7/10:  64%|▋| 636/991 [2:58:42<1:27:26, 14.78s/batch, batch_loss=19.6, bat

Epoch 7/10:  64%|▋| 636/991 [2:58:58<1:27:26, 14.78s/batch, batch_loss=17.8, bat

Epoch 7/10:  64%|▋| 637/991 [2:58:58<1:28:16, 14.96s/batch, batch_loss=17.8, bat

Epoch 7/10:  64%|▋| 637/991 [2:59:12<1:28:16, 14.96s/batch, batch_loss=17.6, bat

Epoch 7/10:  64%|▋| 638/991 [2:59:12<1:27:47, 14.92s/batch, batch_loss=17.6, bat

Epoch 7/10:  64%|▋| 638/991 [2:59:29<1:27:47, 14.92s/batch, batch_loss=13.8, bat

Epoch 7/10:  64%|▋| 639/991 [2:59:29<1:29:30, 15.26s/batch, batch_loss=13.8, bat

Epoch 7/10:  64%|▋| 639/991 [2:59:43<1:29:30, 15.26s/batch, batch_loss=676, batc

Epoch 7/10:  65%|▋| 640/991 [2:59:43<1:28:19, 15.10s/batch, batch_loss=676, batc

Epoch 7/10:  65%|▋| 640/991 [2:59:58<1:28:19, 15.10s/batch, batch_loss=12.9, bat

Epoch 7/10:  65%|▋| 641/991 [2:59:58<1:26:44, 14.87s/batch, batch_loss=12.9, bat

Epoch 7/10:  65%|▋| 641/991 [3:00:13<1:26:44, 14.87s/batch, batch_loss=8.79, bat

Epoch 7/10:  65%|▋| 642/991 [3:00:13<1:27:47, 15.09s/batch, batch_loss=8.79, bat

Epoch 7/10:  65%|▋| 642/991 [3:00:29<1:27:47, 15.09s/batch, batch_loss=2.13e+4, 

Epoch 7/10:  65%|▋| 643/991 [3:00:29<1:28:26, 15.25s/batch, batch_loss=2.13e+4, 

Epoch 7/10:  65%|▋| 643/991 [3:00:44<1:28:26, 15.25s/batch, batch_loss=1.76e+4, 

Epoch 7/10:  65%|▋| 644/991 [3:00:44<1:28:51, 15.36s/batch, batch_loss=1.76e+4, 

Epoch 7/10:  65%|▋| 644/991 [3:01:00<1:28:51, 15.36s/batch, batch_loss=2.2e+3, b

Epoch 7/10:  65%|▋| 645/991 [3:01:00<1:28:37, 15.37s/batch, batch_loss=2.2e+3, b

Epoch 7/10:  65%|▋| 645/991 [3:01:15<1:28:37, 15.37s/batch, batch_loss=11.1, bat

Epoch 7/10:  65%|▋| 646/991 [3:01:15<1:28:36, 15.41s/batch, batch_loss=11.1, bat

Epoch 7/10:  65%|▋| 646/991 [3:01:31<1:28:36, 15.41s/batch, batch_loss=12.3, bat

Epoch 7/10:  65%|▋| 647/991 [3:01:31<1:28:04, 15.36s/batch, batch_loss=12.3, bat

Epoch 7/10:  65%|▋| 647/991 [3:01:45<1:28:04, 15.36s/batch, batch_loss=13.9, bat

Epoch 7/10:  65%|▋| 648/991 [3:01:45<1:26:32, 15.14s/batch, batch_loss=13.9, bat

Epoch 7/10:  65%|▋| 648/991 [3:02:00<1:26:32, 15.14s/batch, batch_loss=14.6, bat

Epoch 7/10:  65%|▋| 649/991 [3:02:00<1:26:21, 15.15s/batch, batch_loss=14.6, bat

Epoch 7/10:  65%|▋| 649/991 [3:02:16<1:26:21, 15.15s/batch, batch_loss=1.34e+4, 

Epoch 7/10:  66%|▋| 650/991 [3:02:16<1:26:59, 15.31s/batch, batch_loss=1.34e+4, 

Epoch 7/10:  66%|▋| 650/991 [3:02:31<1:26:59, 15.31s/batch, batch_loss=8.56, bat

Epoch 7/10:  66%|▋| 651/991 [3:02:31<1:25:36, 15.11s/batch, batch_loss=8.56, bat

Epoch 7/10:  66%|▋| 651/991 [3:02:45<1:25:36, 15.11s/batch, batch_loss=11.9, bat

Epoch 7/10:  66%|▋| 652/991 [3:02:45<1:24:25, 14.94s/batch, batch_loss=11.9, bat

Epoch 7/10:  66%|▋| 652/991 [3:03:00<1:24:25, 14.94s/batch, batch_loss=18.3, bat

Epoch 7/10:  66%|▋| 653/991 [3:03:00<1:24:10, 14.94s/batch, batch_loss=18.3, bat

Epoch 7/10:  66%|▋| 653/991 [3:03:15<1:24:10, 14.94s/batch, batch_loss=20.2, bat

Epoch 7/10:  66%|▋| 654/991 [3:03:15<1:22:56, 14.77s/batch, batch_loss=20.2, bat

Epoch 7/10:  66%|▋| 654/991 [3:03:29<1:22:56, 14.77s/batch, batch_loss=3.84e+3, 

Epoch 7/10:  66%|▋| 655/991 [3:03:29<1:22:18, 14.70s/batch, batch_loss=3.84e+3, 

Epoch 7/10:  66%|▋| 655/991 [3:03:44<1:22:18, 14.70s/batch, batch_loss=5.19e+3, 

Epoch 7/10:  66%|▋| 656/991 [3:03:44<1:22:26, 14.77s/batch, batch_loss=5.19e+3, 

Epoch 7/10:  66%|▋| 656/991 [3:03:59<1:22:26, 14.77s/batch, batch_loss=4.22e+3, 

Epoch 7/10:  66%|▋| 657/991 [3:03:59<1:22:09, 14.76s/batch, batch_loss=4.22e+3, 

Epoch 7/10:  66%|▋| 657/991 [3:04:14<1:22:09, 14.76s/batch, batch_loss=2.2e+4, b

Epoch 7/10:  66%|▋| 658/991 [3:04:14<1:23:09, 14.98s/batch, batch_loss=2.2e+4, b

Epoch 7/10:  66%|▋| 658/991 [3:04:30<1:23:09, 14.98s/batch, batch_loss=4, batch_

Epoch 7/10:  66%|▋| 659/991 [3:04:30<1:23:50, 15.15s/batch, batch_loss=4, batch_

Epoch 7/10:  66%|▋| 659/991 [3:04:46<1:23:50, 15.15s/batch, batch_loss=5.2, batc

Epoch 7/10:  67%|▋| 660/991 [3:04:46<1:24:42, 15.36s/batch, batch_loss=5.2, batc

Epoch 7/10:  67%|▋| 660/991 [3:05:01<1:24:42, 15.36s/batch, batch_loss=13.1, bat

Epoch 7/10:  67%|▋| 661/991 [3:05:01<1:24:34, 15.38s/batch, batch_loss=13.1, bat

Epoch 7/10:  67%|▋| 661/991 [3:05:16<1:24:34, 15.38s/batch, batch_loss=14.8, bat

Epoch 7/10:  67%|▋| 662/991 [3:05:16<1:23:46, 15.28s/batch, batch_loss=14.8, bat

Epoch 7/10:  67%|▋| 662/991 [3:05:31<1:23:46, 15.28s/batch, batch_loss=18, batch

Epoch 7/10:  67%|▋| 663/991 [3:05:31<1:22:28, 15.09s/batch, batch_loss=18, batch

Epoch 7/10:  67%|▋| 663/991 [3:05:45<1:22:28, 15.09s/batch, batch_loss=3.03e+3, 

Epoch 7/10:  67%|▋| 664/991 [3:05:45<1:21:15, 14.91s/batch, batch_loss=3.03e+3, 

Epoch 7/10:  67%|▋| 664/991 [3:06:00<1:21:15, 14.91s/batch, batch_loss=13.2, bat

Epoch 7/10:  67%|▋| 665/991 [3:06:00<1:21:30, 15.00s/batch, batch_loss=13.2, bat

Epoch 7/10:  67%|▋| 665/991 [3:06:16<1:21:30, 15.00s/batch, batch_loss=3.05e+3, 

Epoch 7/10:  67%|▋| 666/991 [3:06:16<1:21:41, 15.08s/batch, batch_loss=3.05e+3, 

Epoch 7/10:  67%|▋| 666/991 [3:06:31<1:21:41, 15.08s/batch, batch_loss=18.9, bat

Epoch 7/10:  67%|▋| 667/991 [3:06:31<1:21:51, 15.16s/batch, batch_loss=18.9, bat

Epoch 7/10:  67%|▋| 667/991 [3:06:46<1:21:51, 15.16s/batch, batch_loss=375, batc

Epoch 7/10:  67%|▋| 668/991 [3:06:46<1:21:40, 15.17s/batch, batch_loss=375, batc

Epoch 7/10:  67%|▋| 668/991 [3:07:02<1:21:40, 15.17s/batch, batch_loss=2.98e+3, 

Epoch 7/10:  68%|▋| 669/991 [3:07:02<1:22:07, 15.30s/batch, batch_loss=2.98e+3, 

Epoch 7/10:  68%|▋| 669/991 [3:07:17<1:22:07, 15.30s/batch, batch_loss=1.02e+3, 

Epoch 7/10:  68%|▋| 670/991 [3:07:17<1:21:40, 15.27s/batch, batch_loss=1.02e+3, 

Epoch 7/10:  68%|▋| 670/991 [3:07:32<1:21:40, 15.27s/batch, batch_loss=8.53, bat

Epoch 7/10:  68%|▋| 671/991 [3:07:32<1:21:25, 15.27s/batch, batch_loss=8.53, bat

Epoch 7/10:  68%|▋| 671/991 [3:07:47<1:21:25, 15.27s/batch, batch_loss=16.1, bat

Epoch 7/10:  68%|▋| 672/991 [3:07:47<1:20:15, 15.10s/batch, batch_loss=16.1, bat

Epoch 7/10:  68%|▋| 672/991 [3:08:02<1:20:15, 15.10s/batch, batch_loss=18.3, bat

Epoch 7/10:  68%|▋| 673/991 [3:08:02<1:19:44, 15.04s/batch, batch_loss=18.3, bat

Epoch 7/10:  68%|▋| 673/991 [3:08:17<1:19:44, 15.04s/batch, batch_loss=14.7, bat

Epoch 7/10:  68%|▋| 674/991 [3:08:17<1:19:24, 15.03s/batch, batch_loss=14.7, bat

Epoch 7/10:  68%|▋| 674/991 [3:08:31<1:19:24, 15.03s/batch, batch_loss=3.98, bat

Epoch 7/10:  68%|▋| 675/991 [3:08:31<1:17:59, 14.81s/batch, batch_loss=3.98, bat

Epoch 7/10:  68%|▋| 675/991 [3:08:45<1:17:59, 14.81s/batch, batch_loss=11.2, bat

Epoch 7/10:  68%|▋| 676/991 [3:08:45<1:16:18, 14.54s/batch, batch_loss=11.2, bat

Epoch 7/10:  68%|▋| 676/991 [3:08:58<1:16:18, 14.54s/batch, batch_loss=21.2, bat

Epoch 7/10:  68%|▋| 677/991 [3:08:58<1:14:07, 14.16s/batch, batch_loss=21.2, bat

Epoch 7/10:  68%|▋| 677/991 [3:09:14<1:14:07, 14.16s/batch, batch_loss=5.78, bat

Epoch 7/10:  68%|▋| 678/991 [3:09:14<1:16:03, 14.58s/batch, batch_loss=5.78, bat

Epoch 7/10:  68%|▋| 678/991 [3:09:31<1:16:03, 14.58s/batch, batch_loss=3.82e+3, 

Epoch 7/10:  69%|▋| 679/991 [3:09:31<1:20:00, 15.39s/batch, batch_loss=3.82e+3, 

Epoch 7/10:  69%|▋| 679/991 [3:09:46<1:20:00, 15.39s/batch, batch_loss=6.13e+3, 

Epoch 7/10:  69%|▋| 680/991 [3:09:46<1:19:16, 15.29s/batch, batch_loss=6.13e+3, 

Epoch 7/10:  69%|▋| 680/991 [3:10:02<1:19:16, 15.29s/batch, batch_loss=7.23e+4, 

Epoch 7/10:  69%|▋| 681/991 [3:10:02<1:19:44, 15.43s/batch, batch_loss=7.23e+4, 

Epoch 7/10:  69%|▋| 681/991 [3:10:18<1:19:44, 15.43s/batch, batch_loss=13.3, bat

Epoch 7/10:  69%|▋| 682/991 [3:10:18<1:20:36, 15.65s/batch, batch_loss=13.3, bat

Epoch 7/10:  69%|▋| 682/991 [3:10:34<1:20:36, 15.65s/batch, batch_loss=378, batc

Epoch 7/10:  69%|▋| 683/991 [3:10:34<1:20:28, 15.68s/batch, batch_loss=378, batc

Epoch 7/10:  69%|▋| 683/991 [3:10:50<1:20:28, 15.68s/batch, batch_loss=4.84, bat

Epoch 7/10:  69%|▋| 684/991 [3:10:50<1:20:01, 15.64s/batch, batch_loss=4.84, bat

Epoch 7/10:  69%|▋| 684/991 [3:11:06<1:20:01, 15.64s/batch, batch_loss=11.9, bat

Epoch 7/10:  69%|▋| 685/991 [3:11:06<1:20:48, 15.84s/batch, batch_loss=11.9, bat

Epoch 7/10:  69%|▋| 685/991 [3:11:24<1:20:48, 15.84s/batch, batch_loss=12.8, bat

Epoch 7/10:  69%|▋| 686/991 [3:11:24<1:23:44, 16.47s/batch, batch_loss=12.8, bat

Epoch 7/10:  69%|▋| 686/991 [3:11:39<1:23:44, 16.47s/batch, batch_loss=533, batc

Epoch 7/10:  69%|▋| 687/991 [3:11:39<1:21:55, 16.17s/batch, batch_loss=533, batc

Epoch 7/10:  69%|▋| 687/991 [3:11:55<1:21:55, 16.17s/batch, batch_loss=4.3, batc

Epoch 7/10:  69%|▋| 688/991 [3:11:55<1:20:25, 15.93s/batch, batch_loss=4.3, batc

Epoch 7/10:  69%|▋| 688/991 [3:12:10<1:20:25, 15.93s/batch, batch_loss=8.11, bat

Epoch 7/10:  70%|▋| 689/991 [3:12:10<1:19:12, 15.74s/batch, batch_loss=8.11, bat

Epoch 7/10:  70%|▋| 689/991 [3:12:25<1:19:12, 15.74s/batch, batch_loss=12.2, bat

Epoch 7/10:  70%|▋| 690/991 [3:12:25<1:18:10, 15.58s/batch, batch_loss=12.2, bat

Epoch 7/10:  70%|▋| 690/991 [3:12:40<1:18:10, 15.58s/batch, batch_loss=15.9, bat

Epoch 7/10:  70%|▋| 691/991 [3:12:40<1:17:24, 15.48s/batch, batch_loss=15.9, bat

Epoch 7/10:  70%|▋| 691/991 [3:12:56<1:17:24, 15.48s/batch, batch_loss=4.43, bat

Epoch 7/10:  70%|▋| 692/991 [3:12:56<1:17:00, 15.45s/batch, batch_loss=4.43, bat

Epoch 7/10:  70%|▋| 692/991 [3:13:12<1:17:00, 15.45s/batch, batch_loss=4.65e+3, 

Epoch 7/10:  70%|▋| 693/991 [3:13:12<1:17:11, 15.54s/batch, batch_loss=4.65e+3, 

Epoch 7/10:  70%|▋| 693/991 [3:13:30<1:17:11, 15.54s/batch, batch_loss=461, batc

Epoch 7/10:  70%|▋| 694/991 [3:13:30<1:21:24, 16.45s/batch, batch_loss=461, batc

Epoch 7/10:  70%|▋| 694/991 [3:13:46<1:21:24, 16.45s/batch, batch_loss=781, batc

Epoch 7/10:  70%|▋| 695/991 [3:13:46<1:20:47, 16.38s/batch, batch_loss=781, batc

Epoch 7/10:  70%|▋| 695/991 [3:14:01<1:20:47, 16.38s/batch, batch_loss=9.42, bat

Epoch 7/10:  70%|▋| 696/991 [3:14:01<1:18:41, 16.01s/batch, batch_loss=9.42, bat

Epoch 7/10:  70%|▋| 696/991 [3:14:16<1:18:41, 16.01s/batch, batch_loss=6.81e+3, 

Epoch 7/10:  70%|▋| 697/991 [3:14:16<1:16:33, 15.62s/batch, batch_loss=6.81e+3, 

Epoch 7/10:  70%|▋| 697/991 [3:14:31<1:16:33, 15.62s/batch, batch_loss=13.4, bat

Epoch 7/10:  70%|▋| 698/991 [3:14:31<1:14:55, 15.34s/batch, batch_loss=13.4, bat

Epoch 7/10:  70%|▋| 698/991 [3:14:46<1:14:55, 15.34s/batch, batch_loss=8.09, bat

Epoch 7/10:  71%|▋| 699/991 [3:14:46<1:14:37, 15.34s/batch, batch_loss=8.09, bat

Epoch 7/10:  71%|▋| 699/991 [3:15:01<1:14:37, 15.34s/batch, batch_loss=7.74, bat

Epoch 7/10:  71%|▋| 700/991 [3:15:01<1:14:22, 15.33s/batch, batch_loss=7.74, bat

Epoch 7/10:  71%|▋| 700/991 [3:15:17<1:14:22, 15.33s/batch, batch_loss=212, batc

Epoch 7/10:  71%|▋| 701/991 [3:15:17<1:14:56, 15.50s/batch, batch_loss=212, batc

Epoch 7/10:  71%|▋| 701/991 [3:15:36<1:14:56, 15.50s/batch, batch_loss=20.9, bat

Epoch 7/10:  71%|▋| 702/991 [3:15:36<1:18:46, 16.36s/batch, batch_loss=20.9, bat

Epoch 7/10:  71%|▋| 702/991 [3:15:51<1:18:46, 16.36s/batch, batch_loss=276, batc

Epoch 7/10:  71%|▋| 703/991 [3:15:51<1:16:43, 15.98s/batch, batch_loss=276, batc

Epoch 7/10:  71%|▋| 703/991 [3:16:06<1:16:43, 15.98s/batch, batch_loss=9.07, bat

Epoch 7/10:  71%|▋| 704/991 [3:16:06<1:15:17, 15.74s/batch, batch_loss=9.07, bat

Epoch 7/10:  71%|▋| 704/991 [3:16:21<1:15:17, 15.74s/batch, batch_loss=11.4, bat

Epoch 7/10:  71%|▋| 705/991 [3:16:21<1:13:23, 15.40s/batch, batch_loss=11.4, bat

Epoch 7/10:  71%|▋| 705/991 [3:16:37<1:13:23, 15.40s/batch, batch_loss=21.3, bat

Epoch 7/10:  71%|▋| 706/991 [3:16:37<1:14:06, 15.60s/batch, batch_loss=21.3, bat

Epoch 7/10:  71%|▋| 706/991 [3:16:52<1:14:06, 15.60s/batch, batch_loss=17.9, bat

Epoch 7/10:  71%|▋| 707/991 [3:16:52<1:13:10, 15.46s/batch, batch_loss=17.9, bat

Epoch 7/10:  71%|▋| 707/991 [3:17:07<1:13:10, 15.46s/batch, batch_loss=7.65, bat

Epoch 7/10:  71%|▋| 708/991 [3:17:07<1:12:26, 15.36s/batch, batch_loss=7.65, bat

Epoch 7/10:  71%|▋| 708/991 [3:17:22<1:12:26, 15.36s/batch, batch_loss=6.58, bat

Epoch 7/10:  72%|▋| 709/991 [3:17:22<1:11:25, 15.20s/batch, batch_loss=6.58, bat

Epoch 7/10:  72%|▋| 709/991 [3:17:37<1:11:25, 15.20s/batch, batch_loss=27.4, bat

Epoch 7/10:  72%|▋| 710/991 [3:17:37<1:11:33, 15.28s/batch, batch_loss=27.4, bat

Epoch 7/10:  72%|▋| 710/991 [3:17:52<1:11:33, 15.28s/batch, batch_loss=97.5, bat

Epoch 7/10:  72%|▋| 711/991 [3:17:52<1:11:01, 15.22s/batch, batch_loss=97.5, bat

Epoch 7/10:  72%|▋| 711/991 [3:18:07<1:11:01, 15.22s/batch, batch_loss=12.3, bat

Epoch 7/10:  72%|▋| 712/991 [3:18:07<1:10:32, 15.17s/batch, batch_loss=12.3, bat

Epoch 7/10:  72%|▋| 712/991 [3:18:23<1:10:32, 15.17s/batch, batch_loss=83, batch

Epoch 7/10:  72%|▋| 713/991 [3:18:23<1:10:39, 15.25s/batch, batch_loss=83, batch

Epoch 7/10:  72%|▋| 713/991 [3:18:41<1:10:39, 15.25s/batch, batch_loss=27, batch

Epoch 7/10:  72%|▋| 714/991 [3:18:41<1:14:45, 16.19s/batch, batch_loss=27, batch

Epoch 7/10:  72%|▋| 714/991 [3:18:56<1:14:45, 16.19s/batch, batch_loss=21, batch

Epoch 7/10:  72%|▋| 715/991 [3:18:56<1:12:54, 15.85s/batch, batch_loss=21, batch

Epoch 7/10:  72%|▋| 715/991 [3:19:11<1:12:54, 15.85s/batch, batch_loss=14.5, bat

Epoch 7/10:  72%|▋| 716/991 [3:19:11<1:11:46, 15.66s/batch, batch_loss=14.5, bat

Epoch 7/10:  72%|▋| 716/991 [3:19:27<1:11:46, 15.66s/batch, batch_loss=22.9, bat

Epoch 7/10:  72%|▋| 717/991 [3:19:27<1:11:32, 15.67s/batch, batch_loss=22.9, bat

Epoch 7/10:  72%|▋| 717/991 [3:19:43<1:11:32, 15.67s/batch, batch_loss=22.7, bat

Epoch 7/10:  72%|▋| 718/991 [3:19:43<1:11:30, 15.72s/batch, batch_loss=22.7, bat

Epoch 7/10:  72%|▋| 718/991 [3:19:59<1:11:30, 15.72s/batch, batch_loss=10.8, bat

Epoch 7/10:  73%|▋| 719/991 [3:19:59<1:11:56, 15.87s/batch, batch_loss=10.8, bat

Epoch 7/10:  73%|▋| 719/991 [3:20:14<1:11:56, 15.87s/batch, batch_loss=11.9, bat

Epoch 7/10:  73%|▋| 720/991 [3:20:14<1:10:38, 15.64s/batch, batch_loss=11.9, bat

Epoch 7/10:  73%|▋| 720/991 [3:20:29<1:10:38, 15.64s/batch, batch_loss=18.6, bat

Epoch 7/10:  73%|▋| 721/991 [3:20:29<1:09:35, 15.46s/batch, batch_loss=18.6, bat

Epoch 7/10:  73%|▋| 721/991 [3:20:44<1:09:35, 15.46s/batch, batch_loss=21.4, bat

Epoch 7/10:  73%|▋| 722/991 [3:20:44<1:08:46, 15.34s/batch, batch_loss=21.4, bat

Epoch 7/10:  73%|▋| 722/991 [3:21:00<1:08:46, 15.34s/batch, batch_loss=7.23e+3, 

Epoch 7/10:  73%|▋| 723/991 [3:21:00<1:08:19, 15.30s/batch, batch_loss=7.23e+3, 

Epoch 7/10:  73%|▋| 723/991 [3:21:16<1:08:19, 15.30s/batch, batch_loss=4.57, bat

Epoch 7/10:  73%|▋| 724/991 [3:21:16<1:08:59, 15.51s/batch, batch_loss=4.57, bat

Epoch 7/10:  73%|▋| 724/991 [3:21:31<1:08:59, 15.51s/batch, batch_loss=15.1, bat

Epoch 7/10:  73%|▋| 725/991 [3:21:31<1:08:45, 15.51s/batch, batch_loss=15.1, bat

Epoch 7/10:  73%|▋| 725/991 [3:21:49<1:08:45, 15.51s/batch, batch_loss=11.6, bat

Epoch 7/10:  73%|▋| 726/991 [3:21:49<1:11:34, 16.21s/batch, batch_loss=11.6, bat

Epoch 7/10:  73%|▋| 726/991 [3:22:04<1:11:34, 16.21s/batch, batch_loss=1.29e+4, 

Epoch 7/10:  73%|▋| 727/991 [3:22:04<1:09:49, 15.87s/batch, batch_loss=1.29e+4, 

Epoch 7/10:  73%|▋| 727/991 [3:22:19<1:09:49, 15.87s/batch, batch_loss=13.6, bat

Epoch 7/10:  73%|▋| 728/991 [3:22:19<1:08:36, 15.65s/batch, batch_loss=13.6, bat

Epoch 7/10:  73%|▋| 728/991 [3:22:34<1:08:36, 15.65s/batch, batch_loss=128, batc

Epoch 7/10:  74%|▋| 729/991 [3:22:34<1:07:27, 15.45s/batch, batch_loss=128, batc

Epoch 7/10:  74%|▋| 729/991 [3:22:50<1:07:27, 15.45s/batch, batch_loss=12.2, bat

Epoch 7/10:  74%|▋| 730/991 [3:22:50<1:07:10, 15.44s/batch, batch_loss=12.2, bat

Epoch 7/10:  74%|▋| 730/991 [3:23:03<1:07:10, 15.44s/batch, batch_loss=110, batc

Epoch 7/10:  74%|▋| 731/991 [3:23:03<1:04:51, 14.97s/batch, batch_loss=110, batc

Epoch 7/10:  74%|▋| 731/991 [3:23:19<1:04:51, 14.97s/batch, batch_loss=1.39e+4, 

Epoch 7/10:  74%|▋| 732/991 [3:23:19<1:04:53, 15.03s/batch, batch_loss=1.39e+4, 

Epoch 7/10:  74%|▋| 732/991 [3:23:34<1:04:53, 15.03s/batch, batch_loss=14.6, bat

Epoch 7/10:  74%|▋| 733/991 [3:23:34<1:05:16, 15.18s/batch, batch_loss=14.6, bat

Epoch 7/10:  74%|▋| 733/991 [3:23:52<1:05:16, 15.18s/batch, batch_loss=6.8e+3, b

Epoch 7/10:  74%|▋| 734/991 [3:23:52<1:08:04, 15.89s/batch, batch_loss=6.8e+3, b

Epoch 7/10:  74%|▋| 734/991 [3:24:08<1:08:04, 15.89s/batch, batch_loss=12.6, bat

Epoch 7/10:  74%|▋| 735/991 [3:24:08<1:07:53, 15.91s/batch, batch_loss=12.6, bat

Epoch 7/10:  74%|▋| 735/991 [3:24:24<1:07:53, 15.91s/batch, batch_loss=15.2, bat

Epoch 7/10:  74%|▋| 736/991 [3:24:24<1:07:32, 15.89s/batch, batch_loss=15.2, bat

Epoch 7/10:  74%|▋| 736/991 [3:24:39<1:07:32, 15.89s/batch, batch_loss=10.7, bat

Epoch 7/10:  74%|▋| 737/991 [3:24:39<1:06:23, 15.68s/batch, batch_loss=10.7, bat

Epoch 7/10:  74%|▋| 737/991 [3:24:54<1:06:23, 15.68s/batch, batch_loss=1.47e+3, 

Epoch 7/10:  74%|▋| 738/991 [3:24:54<1:05:54, 15.63s/batch, batch_loss=1.47e+3, 

Epoch 7/10:  74%|▋| 738/991 [3:25:09<1:05:54, 15.63s/batch, batch_loss=24.7, bat

Epoch 7/10:  75%|▋| 739/991 [3:25:09<1:05:07, 15.51s/batch, batch_loss=24.7, bat

Epoch 7/10:  75%|▋| 739/991 [3:25:25<1:05:07, 15.51s/batch, batch_loss=10.2, bat

Epoch 7/10:  75%|▋| 740/991 [3:25:25<1:04:44, 15.48s/batch, batch_loss=10.2, bat

Epoch 7/10:  75%|▋| 740/991 [3:25:40<1:04:44, 15.48s/batch, batch_loss=1.8e+4, b

Epoch 7/10:  75%|▋| 741/991 [3:25:40<1:04:37, 15.51s/batch, batch_loss=1.8e+4, b

Epoch 7/10:  75%|▋| 741/991 [3:25:56<1:04:37, 15.51s/batch, batch_loss=2.28e+3, 

Epoch 7/10:  75%|▋| 742/991 [3:25:56<1:04:00, 15.42s/batch, batch_loss=2.28e+3, 

Epoch 7/10:  75%|▋| 742/991 [3:26:10<1:04:00, 15.42s/batch, batch_loss=10.2, bat

Epoch 7/10:  75%|▋| 743/991 [3:26:10<1:02:35, 15.14s/batch, batch_loss=10.2, bat

Epoch 7/10:  75%|▋| 743/991 [3:26:25<1:02:35, 15.14s/batch, batch_loss=13.3, bat

Epoch 7/10:  75%|▊| 744/991 [3:26:25<1:01:46, 15.00s/batch, batch_loss=13.3, bat

Epoch 7/10:  75%|▊| 744/991 [3:26:40<1:01:46, 15.00s/batch, batch_loss=15.1, bat

Epoch 7/10:  75%|▊| 745/991 [3:26:40<1:01:52, 15.09s/batch, batch_loss=15.1, bat

Epoch 7/10:  75%|▊| 745/991 [3:26:56<1:01:52, 15.09s/batch, batch_loss=1.15e+3, 

Epoch 7/10:  75%|▊| 746/991 [3:26:56<1:02:01, 15.19s/batch, batch_loss=1.15e+3, 

Epoch 7/10:  75%|▊| 746/991 [3:27:10<1:02:01, 15.19s/batch, batch_loss=3.88e+3, 

Epoch 7/10:  75%|▊| 747/991 [3:27:10<1:00:47, 14.95s/batch, batch_loss=3.88e+3, 

Epoch 7/10:  75%|▊| 747/991 [3:27:25<1:00:47, 14.95s/batch, batch_loss=13.4, bat

Epoch 7/10:  75%|▊| 748/991 [3:27:25<1:00:41, 14.99s/batch, batch_loss=13.4, bat

Epoch 7/10:  75%|▊| 748/991 [3:27:40<1:00:41, 14.99s/batch, batch_loss=12.3, bat

Epoch 7/10:  76%|▊| 749/991 [3:27:40<1:00:30, 15.00s/batch, batch_loss=12.3, bat

Epoch 7/10:  76%|▊| 749/991 [3:27:54<1:00:30, 15.00s/batch, batch_loss=10.1, bat

Epoch 7/10:  76%|▊| 750/991 [3:27:54<59:33, 14.83s/batch, batch_loss=10.1, batch

Epoch 7/10:  76%|▊| 750/991 [3:28:09<59:33, 14.83s/batch, batch_loss=11.2, batch

Epoch 7/10:  76%|▊| 751/991 [3:28:09<58:58, 14.75s/batch, batch_loss=11.2, batch

Epoch 7/10:  76%|▊| 751/991 [3:28:24<58:58, 14.75s/batch, batch_loss=6.13, batch

Epoch 7/10:  76%|▊| 752/991 [3:28:24<58:32, 14.70s/batch, batch_loss=6.13, batch

Epoch 7/10:  76%|▊| 752/991 [3:28:39<58:32, 14.70s/batch, batch_loss=7.26, batch

Epoch 7/10:  76%|▊| 753/991 [3:28:39<58:56, 14.86s/batch, batch_loss=7.26, batch

Epoch 7/10:  76%|▊| 753/991 [3:28:55<58:56, 14.86s/batch, batch_loss=5.32, batch

Epoch 7/10:  76%|▊| 754/991 [3:28:55<59:43, 15.12s/batch, batch_loss=5.32, batch

Epoch 7/10:  76%|▊| 754/991 [3:29:09<59:43, 15.12s/batch, batch_loss=15.2, batch

Epoch 7/10:  76%|▊| 755/991 [3:29:09<59:03, 15.02s/batch, batch_loss=15.2, batch

Epoch 7/10:  76%|▊| 755/991 [3:29:26<59:03, 15.02s/batch, batch_loss=12.4, batch

Epoch 7/10:  76%|▊| 756/991 [3:29:26<1:00:10, 15.36s/batch, batch_loss=12.4, bat

Epoch 7/10:  76%|▊| 756/991 [3:29:40<1:00:10, 15.36s/batch, batch_loss=4.82, bat

Epoch 7/10:  76%|▊| 757/991 [3:29:40<59:16, 15.20s/batch, batch_loss=4.82, batch

Epoch 7/10:  76%|▊| 757/991 [3:29:59<59:16, 15.20s/batch, batch_loss=15, batch_i

Epoch 7/10:  76%|▊| 758/991 [3:29:59<1:02:47, 16.17s/batch, batch_loss=15, batch

Epoch 7/10:  76%|▊| 758/991 [3:30:13<1:02:47, 16.17s/batch, batch_loss=15.2, bat

Epoch 7/10:  77%|▊| 759/991 [3:30:13<1:00:43, 15.71s/batch, batch_loss=15.2, bat

Epoch 7/10:  77%|▊| 759/991 [3:30:28<1:00:43, 15.71s/batch, batch_loss=14.5, bat

Epoch 7/10:  77%|▊| 760/991 [3:30:28<59:35, 15.48s/batch, batch_loss=14.5, batch

Epoch 7/10:  77%|▊| 760/991 [3:30:44<59:35, 15.48s/batch, batch_loss=17.5, batch

Epoch 7/10:  77%|▊| 761/991 [3:30:44<58:59, 15.39s/batch, batch_loss=17.5, batch

Epoch 7/10:  77%|▊| 761/991 [3:30:58<58:59, 15.39s/batch, batch_loss=26.6, batch

Epoch 7/10:  77%|▊| 762/991 [3:30:58<57:46, 15.14s/batch, batch_loss=26.6, batch

Epoch 7/10:  77%|▊| 762/991 [3:31:13<57:46, 15.14s/batch, batch_loss=510, batch_

Epoch 7/10:  77%|▊| 763/991 [3:31:13<57:26, 15.12s/batch, batch_loss=510, batch_

Epoch 7/10:  77%|▊| 763/991 [3:31:28<57:26, 15.12s/batch, batch_loss=11.9, batch

Epoch 7/10:  77%|▊| 764/991 [3:31:28<57:24, 15.17s/batch, batch_loss=11.9, batch

Epoch 7/10:  77%|▊| 764/991 [3:31:46<57:24, 15.17s/batch, batch_loss=3.02, batch

Epoch 7/10:  77%|▊| 765/991 [3:31:46<59:45, 15.87s/batch, batch_loss=3.02, batch

Epoch 7/10:  77%|▊| 765/991 [3:32:01<59:45, 15.87s/batch, batch_loss=12.1, batch

Epoch 7/10:  77%|▊| 766/991 [3:32:01<58:53, 15.71s/batch, batch_loss=12.1, batch

Epoch 7/10:  77%|▊| 766/991 [3:32:16<58:53, 15.71s/batch, batch_loss=14.8, batch

Epoch 7/10:  77%|▊| 767/991 [3:32:16<57:45, 15.47s/batch, batch_loss=14.8, batch

Epoch 7/10:  77%|▊| 767/991 [3:32:31<57:45, 15.47s/batch, batch_loss=4.38, batch

Epoch 7/10:  77%|▊| 768/991 [3:32:31<56:41, 15.25s/batch, batch_loss=4.38, batch

Epoch 7/10:  77%|▊| 768/991 [3:32:46<56:41, 15.25s/batch, batch_loss=2.07, batch

Epoch 7/10:  78%|▊| 769/991 [3:32:46<56:21, 15.23s/batch, batch_loss=2.07, batch

Epoch 7/10:  78%|▊| 769/991 [3:33:00<56:21, 15.23s/batch, batch_loss=13.4, batch

Epoch 7/10:  78%|▊| 770/991 [3:33:00<54:55, 14.91s/batch, batch_loss=13.4, batch

Epoch 7/10:  78%|▊| 770/991 [3:33:16<54:55, 14.91s/batch, batch_loss=2.75e+3, ba

Epoch 7/10:  78%|▊| 771/991 [3:33:16<55:25, 15.12s/batch, batch_loss=2.75e+3, ba

Epoch 7/10:  78%|▊| 771/991 [3:33:31<55:25, 15.12s/batch, batch_loss=5.67, batch

Epoch 7/10:  78%|▊| 772/991 [3:33:31<55:19, 15.16s/batch, batch_loss=5.67, batch

Epoch 7/10:  78%|▊| 772/991 [3:33:48<55:19, 15.16s/batch, batch_loss=1.32, batch

Epoch 7/10:  78%|▊| 773/991 [3:33:48<56:39, 15.60s/batch, batch_loss=1.32, batch

Epoch 7/10:  78%|▊| 773/991 [3:34:03<56:39, 15.60s/batch, batch_loss=10.3, batch

Epoch 7/10:  78%|▊| 774/991 [3:34:03<55:40, 15.40s/batch, batch_loss=10.3, batch

Epoch 7/10:  78%|▊| 774/991 [3:34:18<55:40, 15.40s/batch, batch_loss=6.64, batch

Epoch 7/10:  78%|▊| 775/991 [3:34:18<55:42, 15.47s/batch, batch_loss=6.64, batch

Epoch 7/10:  78%|▊| 775/991 [3:34:34<55:42, 15.47s/batch, batch_loss=257, batch_

Epoch 7/10:  78%|▊| 776/991 [3:34:34<55:57, 15.62s/batch, batch_loss=257, batch_

Epoch 7/10:  78%|▊| 776/991 [3:34:50<55:57, 15.62s/batch, batch_loss=0.51, batch

Epoch 7/10:  78%|▊| 777/991 [3:34:50<55:20, 15.52s/batch, batch_loss=0.51, batch

Epoch 7/10:  78%|▊| 777/991 [3:35:04<55:20, 15.52s/batch, batch_loss=0.674, batc

Epoch 7/10:  79%|▊| 778/991 [3:35:04<54:17, 15.29s/batch, batch_loss=0.674, batc

Epoch 7/10:  79%|▊| 778/991 [3:35:20<54:17, 15.29s/batch, batch_loss=7.22, batch

Epoch 7/10:  79%|▊| 779/991 [3:35:20<54:23, 15.39s/batch, batch_loss=7.22, batch

Epoch 7/10:  79%|▊| 779/991 [3:35:35<54:23, 15.39s/batch, batch_loss=2.81, batch

Epoch 7/10:  79%|▊| 780/991 [3:35:35<53:40, 15.26s/batch, batch_loss=2.81, batch

Epoch 7/10:  79%|▊| 780/991 [3:35:50<53:40, 15.26s/batch, batch_loss=3.52, batch

Epoch 7/10:  79%|▊| 781/991 [3:35:50<53:12, 15.20s/batch, batch_loss=3.52, batch

Epoch 7/10:  79%|▊| 781/991 [3:36:05<53:12, 15.20s/batch, batch_loss=2.51e+4, ba

Epoch 7/10:  79%|▊| 782/991 [3:36:05<52:52, 15.18s/batch, batch_loss=2.51e+4, ba

Epoch 7/10:  79%|▊| 782/991 [3:36:21<52:52, 15.18s/batch, batch_loss=19.8, batch

Epoch 7/10:  79%|▊| 783/991 [3:36:21<53:08, 15.33s/batch, batch_loss=19.8, batch

Epoch 7/10:  79%|▊| 783/991 [3:36:36<53:08, 15.33s/batch, batch_loss=13.2, batch

Epoch 7/10:  79%|▊| 784/991 [3:36:36<52:51, 15.32s/batch, batch_loss=13.2, batch

Epoch 7/10:  79%|▊| 784/991 [3:36:52<52:51, 15.32s/batch, batch_loss=12.9, batch

Epoch 7/10:  79%|▊| 785/991 [3:36:52<53:07, 15.47s/batch, batch_loss=12.9, batch

Epoch 7/10:  79%|▊| 785/991 [3:37:06<53:07, 15.47s/batch, batch_loss=8.44, batch

Epoch 7/10:  79%|▊| 786/991 [3:37:06<51:47, 15.16s/batch, batch_loss=8.44, batch

Epoch 7/10:  79%|▊| 786/991 [3:37:24<51:47, 15.16s/batch, batch_loss=2.48e+4, ba

Epoch 7/10:  79%|▊| 787/991 [3:37:24<54:33, 16.05s/batch, batch_loss=2.48e+4, ba

Epoch 7/10:  79%|▊| 787/991 [3:37:40<54:33, 16.05s/batch, batch_loss=680, batch_

Epoch 7/10:  80%|▊| 788/991 [3:37:40<53:39, 15.86s/batch, batch_loss=680, batch_

Epoch 7/10:  80%|▊| 788/991 [3:37:55<53:39, 15.86s/batch, batch_loss=20.9, batch

Epoch 7/10:  80%|▊| 789/991 [3:37:55<52:41, 15.65s/batch, batch_loss=20.9, batch

Epoch 7/10:  80%|▊| 789/991 [3:38:11<52:41, 15.65s/batch, batch_loss=20.3, batch

Epoch 7/10:  80%|▊| 790/991 [3:38:11<52:18, 15.62s/batch, batch_loss=20.3, batch

Epoch 7/10:  80%|▊| 790/991 [3:38:25<52:18, 15.62s/batch, batch_loss=13.5, batch

Epoch 7/10:  80%|▊| 791/991 [3:38:25<50:55, 15.28s/batch, batch_loss=13.5, batch

Epoch 7/10:  80%|▊| 791/991 [3:38:39<50:55, 15.28s/batch, batch_loss=1.04e+4, ba

Epoch 7/10:  80%|▊| 792/991 [3:38:39<49:17, 14.86s/batch, batch_loss=1.04e+4, ba

Epoch 7/10:  80%|▊| 792/991 [3:38:53<49:17, 14.86s/batch, batch_loss=8.36, batch

Epoch 7/10:  80%|▊| 793/991 [3:38:53<48:12, 14.61s/batch, batch_loss=8.36, batch

Epoch 7/10:  80%|▊| 793/991 [3:39:08<48:12, 14.61s/batch, batch_loss=1.9, batch_

Epoch 7/10:  80%|▊| 794/991 [3:39:08<48:27, 14.76s/batch, batch_loss=1.9, batch_

Epoch 7/10:  80%|▊| 794/991 [3:39:24<48:27, 14.76s/batch, batch_loss=8.42, batch

Epoch 7/10:  80%|▊| 795/991 [3:39:24<49:36, 15.18s/batch, batch_loss=8.42, batch

Epoch 7/10:  80%|▊| 795/991 [3:39:42<49:36, 15.18s/batch, batch_loss=14.1, batch

Epoch 7/10:  80%|▊| 796/991 [3:39:42<52:00, 16.00s/batch, batch_loss=14.1, batch

Epoch 7/10:  80%|▊| 796/991 [3:39:58<52:00, 16.00s/batch, batch_loss=24.9, batch

Epoch 7/10:  80%|▊| 797/991 [3:39:58<51:34, 15.95s/batch, batch_loss=24.9, batch

Epoch 7/10:  80%|▊| 797/991 [3:40:13<51:34, 15.95s/batch, batch_loss=342, batch_

Epoch 7/10:  81%|▊| 798/991 [3:40:13<50:24, 15.67s/batch, batch_loss=342, batch_

Epoch 7/10:  81%|▊| 798/991 [3:40:28<50:24, 15.67s/batch, batch_loss=9.26, batch

Epoch 7/10:  81%|▊| 799/991 [3:40:28<49:17, 15.40s/batch, batch_loss=9.26, batch

Epoch 7/10:  81%|▊| 799/991 [3:40:43<49:17, 15.40s/batch, batch_loss=17, batch_i

Epoch 7/10:  81%|▊| 800/991 [3:40:43<48:52, 15.35s/batch, batch_loss=17, batch_i

Epoch 7/10:  81%|▊| 800/991 [3:40:58<48:52, 15.35s/batch, batch_loss=11.8, batch

Epoch 7/10:  81%|▊| 801/991 [3:40:58<48:38, 15.36s/batch, batch_loss=11.8, batch

Epoch 7/10:  81%|▊| 801/991 [3:41:13<48:38, 15.36s/batch, batch_loss=16.3, batch

Epoch 7/10:  81%|▊| 802/991 [3:41:13<48:02, 15.25s/batch, batch_loss=16.3, batch

Epoch 7/10:  81%|▊| 802/991 [3:41:28<48:02, 15.25s/batch, batch_loss=6.11, batch

Epoch 7/10:  81%|▊| 803/991 [3:41:28<47:29, 15.16s/batch, batch_loss=6.11, batch

Epoch 7/10:  81%|▊| 803/991 [3:41:44<47:29, 15.16s/batch, batch_loss=16.3, batch

Epoch 7/10:  81%|▊| 804/991 [3:41:44<47:17, 15.17s/batch, batch_loss=16.3, batch

Epoch 7/10:  81%|▊| 804/991 [3:42:01<47:17, 15.17s/batch, batch_loss=5.8, batch_

Epoch 7/10:  81%|▊| 805/991 [3:42:01<49:29, 15.97s/batch, batch_loss=5.8, batch_

Epoch 7/10:  81%|▊| 805/991 [3:42:17<49:29, 15.97s/batch, batch_loss=11.3, batch

Epoch 7/10:  81%|▊| 806/991 [3:42:17<48:41, 15.79s/batch, batch_loss=11.3, batch

Epoch 7/10:  81%|▊| 806/991 [3:42:33<48:41, 15.79s/batch, batch_loss=9.82, batch

Epoch 7/10:  81%|▊| 807/991 [3:42:33<48:30, 15.82s/batch, batch_loss=9.82, batch

Epoch 7/10:  81%|▊| 807/991 [3:42:48<48:30, 15.82s/batch, batch_loss=16.2, batch

Epoch 7/10:  82%|▊| 808/991 [3:42:48<47:40, 15.63s/batch, batch_loss=16.2, batch

Epoch 7/10:  82%|▊| 808/991 [3:43:03<47:40, 15.63s/batch, batch_loss=1.21e+4, ba

Epoch 7/10:  82%|▊| 809/991 [3:43:03<46:46, 15.42s/batch, batch_loss=1.21e+4, ba

Epoch 7/10:  82%|▊| 809/991 [3:43:18<46:46, 15.42s/batch, batch_loss=16.1, batch

Epoch 7/10:  82%|▊| 810/991 [3:43:18<46:46, 15.51s/batch, batch_loss=16.1, batch

Epoch 7/10:  82%|▊| 810/991 [3:43:33<46:46, 15.51s/batch, batch_loss=7.82, batch

Epoch 7/10:  82%|▊| 811/991 [3:43:33<45:52, 15.29s/batch, batch_loss=7.82, batch

Epoch 7/10:  82%|▊| 811/991 [3:43:51<45:52, 15.29s/batch, batch_loss=7.12, batch

Epoch 7/10:  82%|▊| 812/991 [3:43:51<47:46, 16.01s/batch, batch_loss=7.12, batch

Epoch 7/10:  82%|▊| 812/991 [3:44:06<47:46, 16.01s/batch, batch_loss=8.75, batch

Epoch 7/10:  82%|▊| 813/991 [3:44:06<46:41, 15.74s/batch, batch_loss=8.75, batch

Epoch 7/10:  82%|▊| 813/991 [3:44:21<46:41, 15.74s/batch, batch_loss=13.5, batch

Epoch 7/10:  82%|▊| 814/991 [3:44:21<45:54, 15.56s/batch, batch_loss=13.5, batch

Epoch 7/10:  82%|▊| 814/991 [3:44:36<45:54, 15.56s/batch, batch_loss=8, batch_in

Epoch 7/10:  82%|▊| 815/991 [3:44:36<44:57, 15.33s/batch, batch_loss=8, batch_in

Epoch 7/10:  82%|▊| 815/991 [3:44:50<44:57, 15.33s/batch, batch_loss=92.2, batch

Epoch 7/10:  82%|▊| 816/991 [3:44:50<43:35, 14.95s/batch, batch_loss=92.2, batch

Epoch 7/10:  82%|▊| 816/991 [3:45:05<43:35, 14.95s/batch, batch_loss=366, batch_

Epoch 7/10:  82%|▊| 817/991 [3:45:05<43:33, 15.02s/batch, batch_loss=366, batch_

Epoch 7/10:  82%|▊| 817/991 [3:45:21<43:33, 15.02s/batch, batch_loss=374, batch_

Epoch 7/10:  83%|▊| 818/991 [3:45:21<44:14, 15.35s/batch, batch_loss=374, batch_

Epoch 7/10:  83%|▊| 818/991 [3:45:37<44:14, 15.35s/batch, batch_loss=13.3, batch

Epoch 7/10:  83%|▊| 819/991 [3:45:37<44:11, 15.42s/batch, batch_loss=13.3, batch

Epoch 7/10:  83%|▊| 819/991 [3:45:56<44:11, 15.42s/batch, batch_loss=8.03, batch

Epoch 7/10:  83%|▊| 820/991 [3:45:56<46:47, 16.42s/batch, batch_loss=8.03, batch

Epoch 7/10:  83%|▊| 820/991 [3:46:13<46:47, 16.42s/batch, batch_loss=6.88, batch

Epoch 7/10:  83%|▊| 821/991 [3:46:13<47:32, 16.78s/batch, batch_loss=6.88, batch

Epoch 7/10:  83%|▊| 821/991 [3:46:32<47:32, 16.78s/batch, batch_loss=9.45, batch

Epoch 7/10:  83%|▊| 822/991 [3:46:32<48:33, 17.24s/batch, batch_loss=9.45, batch

Epoch 7/10:  83%|▊| 822/991 [3:46:49<48:33, 17.24s/batch, batch_loss=153, batch_

Epoch 7/10:  83%|▊| 823/991 [3:46:49<48:34, 17.35s/batch, batch_loss=153, batch_

Epoch 7/10:  83%|▊| 823/991 [3:47:06<48:34, 17.35s/batch, batch_loss=7.18, batch

Epoch 7/10:  83%|▊| 824/991 [3:47:06<47:50, 17.19s/batch, batch_loss=7.18, batch

Epoch 7/10:  83%|▊| 824/991 [3:47:23<47:50, 17.19s/batch, batch_loss=14.5, batch

Epoch 7/10:  83%|▊| 825/991 [3:47:23<47:30, 17.17s/batch, batch_loss=14.5, batch

Epoch 7/10:  83%|▊| 825/991 [3:47:40<47:30, 17.17s/batch, batch_loss=2.59e+3, ba

Epoch 7/10:  83%|▊| 826/991 [3:47:40<46:54, 17.06s/batch, batch_loss=2.59e+3, ba

Epoch 7/10:  83%|▊| 826/991 [3:47:57<46:54, 17.06s/batch, batch_loss=23.3, batch

Epoch 7/10:  83%|▊| 827/991 [3:47:57<46:53, 17.16s/batch, batch_loss=23.3, batch

Epoch 7/10:  83%|▊| 827/991 [3:48:16<46:53, 17.16s/batch, batch_loss=33.2, batch

Epoch 7/10:  84%|▊| 828/991 [3:48:16<47:34, 17.51s/batch, batch_loss=33.2, batch

Epoch 7/10:  84%|▊| 828/991 [3:48:33<47:34, 17.51s/batch, batch_loss=8.75, batch

Epoch 7/10:  84%|▊| 829/991 [3:48:33<47:10, 17.47s/batch, batch_loss=8.75, batch

Epoch 7/10:  84%|▊| 829/991 [3:48:51<47:10, 17.47s/batch, batch_loss=14.7, batch

Epoch 7/10:  84%|▊| 830/991 [3:48:51<47:38, 17.75s/batch, batch_loss=14.7, batch

Epoch 7/10:  84%|▊| 830/991 [3:49:10<47:38, 17.75s/batch, batch_loss=12, batch_i

Epoch 7/10:  84%|▊| 831/991 [3:49:10<47:46, 17.92s/batch, batch_loss=12, batch_i

Epoch 7/10:  84%|▊| 831/991 [3:49:27<47:46, 17.92s/batch, batch_loss=18.2, batch

Epoch 7/10:  84%|▊| 832/991 [3:49:27<46:49, 17.67s/batch, batch_loss=18.2, batch

Epoch 7/10:  84%|▊| 832/991 [3:49:47<46:49, 17.67s/batch, batch_loss=227, batch_

Epoch 7/10:  84%|▊| 833/991 [3:49:47<48:13, 18.31s/batch, batch_loss=227, batch_

Epoch 7/10:  84%|▊| 833/991 [3:50:03<48:13, 18.31s/batch, batch_loss=26.4, batch

Epoch 7/10:  84%|▊| 834/991 [3:50:03<46:32, 17.78s/batch, batch_loss=26.4, batch

Epoch 7/10:  84%|▊| 834/991 [3:50:21<46:32, 17.78s/batch, batch_loss=17.3, batch

Epoch 7/10:  84%|▊| 835/991 [3:50:21<45:56, 17.67s/batch, batch_loss=17.3, batch

Epoch 7/10:  84%|▊| 835/991 [3:50:39<45:56, 17.67s/batch, batch_loss=3.27e+3, ba

Epoch 7/10:  84%|▊| 836/991 [3:50:39<45:58, 17.80s/batch, batch_loss=3.27e+3, ba

Epoch 7/10:  84%|▊| 836/991 [3:50:56<45:58, 17.80s/batch, batch_loss=4.9e+3, bat

Epoch 7/10:  84%|▊| 837/991 [3:50:56<45:19, 17.66s/batch, batch_loss=4.9e+3, bat

Epoch 7/10:  84%|▊| 837/991 [3:51:12<45:19, 17.66s/batch, batch_loss=20.7, batch

Epoch 7/10:  85%|▊| 838/991 [3:51:12<43:58, 17.25s/batch, batch_loss=20.7, batch

Epoch 7/10:  85%|▊| 838/991 [3:51:29<43:58, 17.25s/batch, batch_loss=4.54, batch

Epoch 7/10:  85%|▊| 839/991 [3:51:29<43:34, 17.20s/batch, batch_loss=4.54, batch

Epoch 7/10:  85%|▊| 839/991 [3:51:46<43:34, 17.20s/batch, batch_loss=4.06, batch

Epoch 7/10:  85%|▊| 840/991 [3:51:46<42:37, 16.94s/batch, batch_loss=4.06, batch

Epoch 7/10:  85%|▊| 840/991 [3:52:04<42:37, 16.94s/batch, batch_loss=23.9, batch

Epoch 7/10:  85%|▊| 841/991 [3:52:04<42:56, 17.18s/batch, batch_loss=23.9, batch

Epoch 7/10:  85%|▊| 841/991 [3:52:22<42:56, 17.18s/batch, batch_loss=18.7, batch

Epoch 7/10:  85%|▊| 842/991 [3:52:22<43:21, 17.46s/batch, batch_loss=18.7, batch

Epoch 7/10:  85%|▊| 842/991 [3:52:38<43:21, 17.46s/batch, batch_loss=8.59, batch

Epoch 7/10:  85%|▊| 843/991 [3:52:38<42:15, 17.13s/batch, batch_loss=8.59, batch

Epoch 7/10:  85%|▊| 843/991 [3:52:54<42:15, 17.13s/batch, batch_loss=1.69e+3, ba

Epoch 7/10:  85%|▊| 844/991 [3:52:54<41:04, 16.77s/batch, batch_loss=1.69e+3, ba

Epoch 7/10:  85%|▊| 844/991 [3:53:11<41:04, 16.77s/batch, batch_loss=23.6, batch

Epoch 7/10:  85%|▊| 845/991 [3:53:11<40:54, 16.81s/batch, batch_loss=23.6, batch

Epoch 7/10:  85%|▊| 845/991 [3:53:27<40:54, 16.81s/batch, batch_loss=1.18e+4, ba

Epoch 7/10:  85%|▊| 846/991 [3:53:27<40:30, 16.76s/batch, batch_loss=1.18e+4, ba

Epoch 7/10:  85%|▊| 846/991 [3:53:48<40:30, 16.76s/batch, batch_loss=43.2, batch

Epoch 7/10:  85%|▊| 847/991 [3:53:48<42:43, 17.80s/batch, batch_loss=43.2, batch

Epoch 7/10:  85%|▊| 847/991 [3:54:04<42:43, 17.80s/batch, batch_loss=35.2, batch

Epoch 7/10:  86%|▊| 848/991 [3:54:04<41:36, 17.46s/batch, batch_loss=35.2, batch

Epoch 7/10:  86%|▊| 848/991 [3:54:22<41:36, 17.46s/batch, batch_loss=1.01e+3, ba

Epoch 7/10:  86%|▊| 849/991 [3:54:22<41:40, 17.61s/batch, batch_loss=1.01e+3, ba

Epoch 7/10:  86%|▊| 849/991 [3:54:39<41:40, 17.61s/batch, batch_loss=7.46, batch

Epoch 7/10:  86%|▊| 850/991 [3:54:39<40:57, 17.43s/batch, batch_loss=7.46, batch

Epoch 7/10:  86%|▊| 850/991 [3:54:56<40:57, 17.43s/batch, batch_loss=23.1, batch

Epoch 7/10:  86%|▊| 851/991 [3:54:56<40:10, 17.22s/batch, batch_loss=23.1, batch

Epoch 7/10:  86%|▊| 851/991 [3:55:13<40:10, 17.22s/batch, batch_loss=15, batch_i

Epoch 7/10:  86%|▊| 852/991 [3:55:13<39:47, 17.17s/batch, batch_loss=15, batch_i

Epoch 7/10:  86%|▊| 852/991 [3:55:33<39:47, 17.17s/batch, batch_loss=7.63e+3, ba

Epoch 7/10:  86%|▊| 853/991 [3:55:33<41:12, 17.92s/batch, batch_loss=7.63e+3, ba

Epoch 7/10:  86%|▊| 853/991 [3:55:49<41:12, 17.92s/batch, batch_loss=24.7, batch

Epoch 7/10:  86%|▊| 854/991 [3:55:49<39:49, 17.44s/batch, batch_loss=24.7, batch

Epoch 7/10:  86%|▊| 854/991 [3:56:06<39:49, 17.44s/batch, batch_loss=9.08, batch

Epoch 7/10:  86%|▊| 855/991 [3:56:06<38:52, 17.15s/batch, batch_loss=9.08, batch

Epoch 7/10:  86%|▊| 855/991 [3:56:21<38:52, 17.15s/batch, batch_loss=8.16, batch

Epoch 7/10:  86%|▊| 856/991 [3:56:21<37:23, 16.62s/batch, batch_loss=8.16, batch

Epoch 7/10:  86%|▊| 856/991 [3:56:37<37:23, 16.62s/batch, batch_loss=9.08, batch

Epoch 7/10:  86%|▊| 857/991 [3:56:37<36:41, 16.43s/batch, batch_loss=9.08, batch

Epoch 7/10:  86%|▊| 857/991 [3:56:52<36:41, 16.43s/batch, batch_loss=35.7, batch

Epoch 7/10:  87%|▊| 858/991 [3:56:52<35:33, 16.04s/batch, batch_loss=35.7, batch

Epoch 7/10:  87%|▊| 858/991 [3:57:11<35:33, 16.04s/batch, batch_loss=17.3, batch

Epoch 7/10:  87%|▊| 859/991 [3:57:11<37:04, 16.85s/batch, batch_loss=17.3, batch

Epoch 7/10:  87%|▊| 859/991 [3:57:26<37:04, 16.85s/batch, batch_loss=24.5, batch

Epoch 7/10:  87%|▊| 860/991 [3:57:26<35:53, 16.44s/batch, batch_loss=24.5, batch

Epoch 7/10:  87%|▊| 860/991 [3:57:42<35:53, 16.44s/batch, batch_loss=8.73, batch

Epoch 7/10:  87%|▊| 861/991 [3:57:42<35:20, 16.31s/batch, batch_loss=8.73, batch

Epoch 7/10:  87%|▊| 861/991 [3:57:59<35:20, 16.31s/batch, batch_loss=19.8, batch

Epoch 7/10:  87%|▊| 862/991 [3:57:59<35:17, 16.41s/batch, batch_loss=19.8, batch

Epoch 7/10:  87%|▊| 862/991 [3:58:16<35:17, 16.41s/batch, batch_loss=29.6, batch

Epoch 7/10:  87%|▊| 863/991 [3:58:16<35:14, 16.52s/batch, batch_loss=29.6, batch

Epoch 7/10:  87%|▊| 863/991 [3:58:32<35:14, 16.52s/batch, batch_loss=9.76, batch

Epoch 7/10:  87%|▊| 864/991 [3:58:32<34:38, 16.36s/batch, batch_loss=9.76, batch

Epoch 7/10:  87%|▊| 864/991 [3:58:47<34:38, 16.36s/batch, batch_loss=20.1, batch

Epoch 7/10:  87%|▊| 865/991 [3:58:47<33:49, 16.10s/batch, batch_loss=20.1, batch

Epoch 7/10:  87%|▊| 865/991 [3:59:03<33:49, 16.10s/batch, batch_loss=29.9, batch

Epoch 7/10:  87%|▊| 866/991 [3:59:03<33:03, 15.86s/batch, batch_loss=29.9, batch

Epoch 7/10:  87%|▊| 866/991 [3:59:18<33:03, 15.86s/batch, batch_loss=25.1, batch

Epoch 7/10:  87%|▊| 867/991 [3:59:18<32:25, 15.69s/batch, batch_loss=25.1, batch

Epoch 7/10:  87%|▊| 867/991 [3:59:34<32:25, 15.69s/batch, batch_loss=19.9, batch

Epoch 7/10:  88%|▉| 868/991 [3:59:34<32:28, 15.84s/batch, batch_loss=19.9, batch

Epoch 7/10:  88%|▉| 868/991 [3:59:50<32:28, 15.84s/batch, batch_loss=11.6, batch

Epoch 7/10:  88%|▉| 869/991 [3:59:50<32:20, 15.91s/batch, batch_loss=11.6, batch

Epoch 7/10:  88%|▉| 869/991 [4:00:06<32:20, 15.91s/batch, batch_loss=13, batch_i

Epoch 7/10:  88%|▉| 870/991 [4:00:06<32:20, 16.04s/batch, batch_loss=13, batch_i

Epoch 7/10:  88%|▉| 870/991 [4:00:22<32:20, 16.04s/batch, batch_loss=7.9, batch_

Epoch 7/10:  88%|▉| 871/991 [4:00:22<31:56, 15.97s/batch, batch_loss=7.9, batch_

Epoch 7/10:  88%|▉| 871/991 [4:00:37<31:56, 15.97s/batch, batch_loss=19.6, batch

Epoch 7/10:  88%|▉| 872/991 [4:00:37<30:58, 15.62s/batch, batch_loss=19.6, batch

Epoch 7/10:  88%|▉| 872/991 [4:00:53<30:58, 15.62s/batch, batch_loss=14.2, batch

Epoch 7/10:  88%|▉| 873/991 [4:00:53<30:56, 15.74s/batch, batch_loss=14.2, batch

Epoch 7/10:  88%|▉| 873/991 [4:01:09<30:56, 15.74s/batch, batch_loss=7.42, batch

Epoch 7/10:  88%|▉| 874/991 [4:01:09<30:33, 15.67s/batch, batch_loss=7.42, batch

Epoch 7/10:  88%|▉| 874/991 [4:01:24<30:33, 15.67s/batch, batch_loss=12.8, batch

Epoch 7/10:  88%|▉| 875/991 [4:01:24<30:21, 15.70s/batch, batch_loss=12.8, batch

Epoch 7/10:  88%|▉| 875/991 [4:01:40<30:21, 15.70s/batch, batch_loss=32.7, batch

Epoch 7/10:  88%|▉| 876/991 [4:01:40<29:57, 15.63s/batch, batch_loss=32.7, batch

Epoch 7/10:  88%|▉| 876/991 [4:01:55<29:57, 15.63s/batch, batch_loss=21.4, batch

Epoch 7/10:  88%|▉| 877/991 [4:01:55<29:18, 15.43s/batch, batch_loss=21.4, batch

Epoch 7/10:  88%|▉| 877/991 [4:02:10<29:18, 15.43s/batch, batch_loss=33.2, batch

Epoch 7/10:  89%|▉| 878/991 [4:02:10<29:13, 15.52s/batch, batch_loss=33.2, batch

Epoch 7/10:  89%|▉| 878/991 [4:02:25<29:13, 15.52s/batch, batch_loss=18.9, batch

Epoch 7/10:  89%|▉| 879/991 [4:02:25<28:33, 15.30s/batch, batch_loss=18.9, batch

Epoch 7/10:  89%|▉| 879/991 [4:02:41<28:33, 15.30s/batch, batch_loss=13, batch_i

Epoch 7/10:  89%|▉| 880/991 [4:02:41<28:34, 15.44s/batch, batch_loss=13, batch_i

Epoch 7/10:  89%|▉| 880/991 [4:02:57<28:34, 15.44s/batch, batch_loss=5.14e+3, ba

Epoch 7/10:  89%|▉| 881/991 [4:02:57<28:47, 15.71s/batch, batch_loss=5.14e+3, ba

Epoch 7/10:  89%|▉| 881/991 [4:03:13<28:47, 15.71s/batch, batch_loss=16.5, batch

Epoch 7/10:  89%|▉| 882/991 [4:03:13<28:20, 15.60s/batch, batch_loss=16.5, batch

Epoch 7/10:  89%|▉| 882/991 [4:03:29<28:20, 15.60s/batch, batch_loss=20.4, batch

Epoch 7/10:  89%|▉| 883/991 [4:03:29<28:32, 15.86s/batch, batch_loss=20.4, batch

Epoch 7/10:  89%|▉| 883/991 [4:03:45<28:32, 15.86s/batch, batch_loss=13.7, batch

Epoch 7/10:  89%|▉| 884/991 [4:03:45<27:59, 15.70s/batch, batch_loss=13.7, batch

Epoch 7/10:  89%|▉| 884/991 [4:04:00<27:59, 15.70s/batch, batch_loss=15, batch_i

Epoch 7/10:  89%|▉| 885/991 [4:04:00<27:47, 15.73s/batch, batch_loss=15, batch_i

Epoch 7/10:  89%|▉| 885/991 [4:04:16<27:47, 15.73s/batch, batch_loss=17.4, batch

Epoch 7/10:  89%|▉| 886/991 [4:04:16<27:19, 15.62s/batch, batch_loss=17.4, batch

Epoch 7/10:  89%|▉| 886/991 [4:04:31<27:19, 15.62s/batch, batch_loss=1.93e+4, ba

Epoch 7/10:  90%|▉| 887/991 [4:04:31<27:07, 15.65s/batch, batch_loss=1.93e+4, ba

Epoch 7/10:  90%|▉| 887/991 [4:04:46<27:07, 15.65s/batch, batch_loss=19.3, batch

Epoch 7/10:  90%|▉| 888/991 [4:04:46<26:06, 15.21s/batch, batch_loss=19.3, batch

Epoch 7/10:  90%|▉| 888/991 [4:05:00<26:06, 15.21s/batch, batch_loss=20.3, batch

Epoch 7/10:  90%|▉| 889/991 [4:05:00<25:39, 15.09s/batch, batch_loss=20.3, batch

Epoch 7/10:  90%|▉| 889/991 [4:05:15<25:39, 15.09s/batch, batch_loss=14.9, batch

Epoch 7/10:  90%|▉| 890/991 [4:05:15<25:05, 14.91s/batch, batch_loss=14.9, batch

Epoch 7/10:  90%|▉| 890/991 [4:05:30<25:05, 14.91s/batch, batch_loss=16.7, batch

Epoch 7/10:  90%|▉| 891/991 [4:05:30<24:57, 14.97s/batch, batch_loss=16.7, batch

Epoch 7/10:  90%|▉| 891/991 [4:05:46<24:57, 14.97s/batch, batch_loss=23.7, batch

Epoch 7/10:  90%|▉| 892/991 [4:05:46<24:59, 15.15s/batch, batch_loss=23.7, batch

Epoch 7/10:  90%|▉| 892/991 [4:06:02<24:59, 15.15s/batch, batch_loss=3.69e+3, ba

Epoch 7/10:  90%|▉| 893/991 [4:06:02<25:15, 15.47s/batch, batch_loss=3.69e+3, ba

Epoch 7/10:  90%|▉| 893/991 [4:06:18<25:15, 15.47s/batch, batch_loss=11, batch_i

Epoch 7/10:  90%|▉| 894/991 [4:06:18<25:33, 15.81s/batch, batch_loss=11, batch_i

Epoch 7/10:  90%|▉| 894/991 [4:06:34<25:33, 15.81s/batch, batch_loss=15.7, batch

Epoch 7/10:  90%|▉| 895/991 [4:06:34<25:24, 15.88s/batch, batch_loss=15.7, batch

Epoch 7/10:  90%|▉| 895/991 [4:06:50<25:24, 15.88s/batch, batch_loss=11.2, batch

Epoch 7/10:  90%|▉| 896/991 [4:06:50<24:55, 15.74s/batch, batch_loss=11.2, batch

Epoch 7/10:  90%|▉| 896/991 [4:07:06<24:55, 15.74s/batch, batch_loss=19.2, batch

Epoch 7/10:  91%|▉| 897/991 [4:07:06<24:38, 15.73s/batch, batch_loss=19.2, batch

Epoch 7/10:  91%|▉| 897/991 [4:07:21<24:38, 15.73s/batch, batch_loss=20.3, batch

Epoch 7/10:  91%|▉| 898/991 [4:07:21<24:03, 15.52s/batch, batch_loss=20.3, batch

Epoch 7/10:  91%|▉| 898/991 [4:07:36<24:03, 15.52s/batch, batch_loss=16.3, batch

Epoch 7/10:  91%|▉| 899/991 [4:07:36<23:54, 15.59s/batch, batch_loss=16.3, batch

Epoch 7/10:  91%|▉| 899/991 [4:07:52<23:54, 15.59s/batch, batch_loss=18.7, batch

Epoch 7/10:  91%|▉| 900/991 [4:07:52<23:40, 15.61s/batch, batch_loss=18.7, batch

Epoch 7/10:  91%|▉| 900/991 [4:08:08<23:40, 15.61s/batch, batch_loss=14, batch_i

Epoch 7/10:  91%|▉| 901/991 [4:08:08<23:22, 15.58s/batch, batch_loss=14, batch_i

Epoch 7/10:  91%|▉| 901/991 [4:08:25<23:22, 15.58s/batch, batch_loss=12.7, batch

Epoch 7/10:  91%|▉| 902/991 [4:08:25<23:53, 16.11s/batch, batch_loss=12.7, batch

Epoch 7/10:  91%|▉| 902/991 [4:08:39<23:53, 16.11s/batch, batch_loss=7.34, batch

Epoch 7/10:  91%|▉| 903/991 [4:08:39<22:37, 15.43s/batch, batch_loss=7.34, batch

Epoch 7/10:  91%|▉| 903/991 [4:08:53<22:37, 15.43s/batch, batch_loss=8.8, batch_

Epoch 7/10:  91%|▉| 904/991 [4:08:53<21:48, 15.04s/batch, batch_loss=8.8, batch_

Epoch 7/10:  91%|▉| 904/991 [4:09:07<21:48, 15.04s/batch, batch_loss=34.4, batch

Epoch 7/10:  91%|▉| 905/991 [4:09:07<21:22, 14.91s/batch, batch_loss=34.4, batch

Epoch 7/10:  91%|▉| 905/991 [4:09:25<21:22, 14.91s/batch, batch_loss=21.8, batch

Epoch 7/10:  91%|▉| 906/991 [4:09:25<22:09, 15.65s/batch, batch_loss=21.8, batch

Epoch 7/10:  91%|▉| 906/991 [4:09:40<22:09, 15.65s/batch, batch_loss=21.7, batch

Epoch 7/10:  92%|▉| 907/991 [4:09:40<21:45, 15.55s/batch, batch_loss=21.7, batch

Epoch 7/10:  92%|▉| 907/991 [4:09:55<21:45, 15.55s/batch, batch_loss=14.9, batch

Epoch 7/10:  92%|▉| 908/991 [4:09:55<21:03, 15.22s/batch, batch_loss=14.9, batch

Epoch 7/10:  92%|▉| 908/991 [4:10:09<21:03, 15.22s/batch, batch_loss=6.19, batch

Epoch 7/10:  92%|▉| 909/991 [4:10:09<20:37, 15.10s/batch, batch_loss=6.19, batch

Epoch 7/10:  92%|▉| 909/991 [4:10:24<20:37, 15.10s/batch, batch_loss=684, batch_

Epoch 7/10:  92%|▉| 910/991 [4:10:24<20:11, 14.95s/batch, batch_loss=684, batch_

Epoch 7/10:  92%|▉| 910/991 [4:10:40<20:11, 14.95s/batch, batch_loss=1.02e+3, ba

Epoch 7/10:  92%|▉| 911/991 [4:10:40<20:10, 15.13s/batch, batch_loss=1.02e+3, ba

Epoch 7/10:  92%|▉| 911/991 [4:10:55<20:10, 15.13s/batch, batch_loss=25.2, batch

Epoch 7/10:  92%|▉| 912/991 [4:10:55<20:00, 15.20s/batch, batch_loss=25.2, batch

Epoch 7/10:  92%|▉| 912/991 [4:11:11<20:00, 15.20s/batch, batch_loss=26.3, batch

Epoch 7/10:  92%|▉| 913/991 [4:11:11<19:59, 15.37s/batch, batch_loss=26.3, batch

Epoch 7/10:  92%|▉| 913/991 [4:11:29<19:59, 15.37s/batch, batch_loss=19.6, batch

Epoch 7/10:  92%|▉| 914/991 [4:11:29<20:46, 16.19s/batch, batch_loss=19.6, batch

Epoch 7/10:  92%|▉| 914/991 [4:11:44<20:46, 16.19s/batch, batch_loss=20.8, batch

Epoch 7/10:  92%|▉| 915/991 [4:11:44<20:06, 15.88s/batch, batch_loss=20.8, batch

Epoch 7/10:  92%|▉| 915/991 [4:11:59<20:06, 15.88s/batch, batch_loss=16.1, batch

Epoch 7/10:  92%|▉| 916/991 [4:11:59<19:37, 15.70s/batch, batch_loss=16.1, batch

Epoch 7/10:  92%|▉| 916/991 [4:12:15<19:37, 15.70s/batch, batch_loss=8.74, batch

Epoch 7/10:  93%|▉| 917/991 [4:12:15<19:13, 15.59s/batch, batch_loss=8.74, batch

Epoch 7/10:  93%|▉| 917/991 [4:12:30<19:13, 15.59s/batch, batch_loss=16, batch_i

Epoch 7/10:  93%|▉| 918/991 [4:12:30<18:58, 15.60s/batch, batch_loss=16, batch_i

Epoch 7/10:  93%|▉| 918/991 [4:12:45<18:58, 15.60s/batch, batch_loss=12.7, batch

Epoch 7/10:  93%|▉| 919/991 [4:12:45<18:21, 15.30s/batch, batch_loss=12.7, batch

Epoch 7/10:  93%|▉| 919/991 [4:13:00<18:21, 15.30s/batch, batch_loss=16.2, batch

Epoch 7/10:  93%|▉| 920/991 [4:13:00<18:10, 15.36s/batch, batch_loss=16.2, batch

Epoch 7/10:  93%|▉| 920/991 [4:13:15<18:10, 15.36s/batch, batch_loss=19.5, batch

Epoch 7/10:  93%|▉| 921/991 [4:13:15<17:51, 15.31s/batch, batch_loss=19.5, batch

Epoch 7/10:  93%|▉| 921/991 [4:13:32<17:51, 15.31s/batch, batch_loss=30.4, batch

Epoch 7/10:  93%|▉| 922/991 [4:13:32<18:06, 15.75s/batch, batch_loss=30.4, batch

Epoch 7/10:  93%|▉| 922/991 [4:13:49<18:06, 15.75s/batch, batch_loss=6.81, batch

Epoch 7/10:  93%|▉| 923/991 [4:13:49<18:04, 15.94s/batch, batch_loss=6.81, batch

Epoch 7/10:  93%|▉| 923/991 [4:14:04<18:04, 15.94s/batch, batch_loss=11, batch_i

Epoch 7/10:  93%|▉| 924/991 [4:14:04<17:46, 15.91s/batch, batch_loss=11, batch_i

Epoch 7/10:  93%|▉| 924/991 [4:14:20<17:46, 15.91s/batch, batch_loss=11, batch_i

Epoch 7/10:  93%|▉| 925/991 [4:14:20<17:29, 15.91s/batch, batch_loss=11, batch_i

Epoch 7/10:  93%|▉| 925/991 [4:14:35<17:29, 15.91s/batch, batch_loss=3e+4, batch

Epoch 7/10:  93%|▉| 926/991 [4:14:35<16:53, 15.60s/batch, batch_loss=3e+4, batch

Epoch 7/10:  93%|▉| 926/991 [4:14:50<16:53, 15.60s/batch, batch_loss=6.32, batch

Epoch 7/10:  94%|▉| 927/991 [4:14:50<16:24, 15.38s/batch, batch_loss=6.32, batch

Epoch 7/10:  94%|▉| 927/991 [4:15:05<16:24, 15.38s/batch, batch_loss=861, batch_

Epoch 7/10:  94%|▉| 928/991 [4:15:05<15:52, 15.12s/batch, batch_loss=861, batch_

Epoch 7/10:  94%|▉| 928/991 [4:15:20<15:52, 15.12s/batch, batch_loss=10.2, batch

Epoch 7/10:  94%|▉| 929/991 [4:15:20<15:41, 15.18s/batch, batch_loss=10.2, batch

Epoch 7/10:  94%|▉| 929/991 [4:15:35<15:41, 15.18s/batch, batch_loss=8.13, batch

Epoch 7/10:  94%|▉| 930/991 [4:15:35<15:18, 15.05s/batch, batch_loss=8.13, batch

Epoch 7/10:  94%|▉| 930/991 [4:15:52<15:18, 15.05s/batch, batch_loss=13.7, batch

Epoch 7/10:  94%|▉| 931/991 [4:15:52<15:37, 15.63s/batch, batch_loss=13.7, batch

Epoch 7/10:  94%|▉| 931/991 [4:16:06<15:37, 15.63s/batch, batch_loss=12, batch_i

Epoch 7/10:  94%|▉| 932/991 [4:16:06<15:00, 15.27s/batch, batch_loss=12, batch_i

Epoch 7/10:  94%|▉| 932/991 [4:16:21<15:00, 15.27s/batch, batch_loss=12.1, batch

Epoch 7/10:  94%|▉| 933/991 [4:16:21<14:43, 15.23s/batch, batch_loss=12.1, batch

Epoch 7/10:  94%|▉| 933/991 [4:16:37<14:43, 15.23s/batch, batch_loss=1.75, batch

Epoch 7/10:  94%|▉| 934/991 [4:16:37<14:33, 15.33s/batch, batch_loss=1.75, batch

Epoch 7/10:  94%|▉| 934/991 [4:16:51<14:33, 15.33s/batch, batch_loss=1.68, batch

Epoch 7/10:  94%|▉| 935/991 [4:16:51<14:07, 15.13s/batch, batch_loss=1.68, batch

Epoch 7/10:  94%|▉| 935/991 [4:17:07<14:07, 15.13s/batch, batch_loss=167, batch_

Epoch 7/10:  94%|▉| 936/991 [4:17:07<13:58, 15.25s/batch, batch_loss=167, batch_

Epoch 7/10:  94%|▉| 936/991 [4:17:22<13:58, 15.25s/batch, batch_loss=36.1, batch

Epoch 7/10:  95%|▉| 937/991 [4:17:22<13:40, 15.20s/batch, batch_loss=36.1, batch

Epoch 7/10:  95%|▉| 937/991 [4:17:40<13:40, 15.20s/batch, batch_loss=10, batch_i

Epoch 7/10:  95%|▉| 938/991 [4:17:40<14:06, 15.98s/batch, batch_loss=10, batch_i

Epoch 7/10:  95%|▉| 938/991 [4:17:55<14:06, 15.98s/batch, batch_loss=8.72, batch

Epoch 7/10:  95%|▉| 939/991 [4:17:55<13:44, 15.86s/batch, batch_loss=8.72, batch

Epoch 7/10:  95%|▉| 939/991 [4:18:10<13:44, 15.86s/batch, batch_loss=424, batch_

Epoch 7/10:  95%|▉| 940/991 [4:18:10<13:16, 15.61s/batch, batch_loss=424, batch_

Epoch 7/10:  95%|▉| 940/991 [4:18:25<13:16, 15.61s/batch, batch_loss=18.4, batch

Epoch 7/10:  95%|▉| 941/991 [4:18:25<12:46, 15.32s/batch, batch_loss=18.4, batch

Epoch 7/10:  95%|▉| 941/991 [4:18:41<12:46, 15.32s/batch, batch_loss=16.3, batch

Epoch 7/10:  95%|▉| 942/991 [4:18:41<12:31, 15.34s/batch, batch_loss=16.3, batch

Epoch 7/10:  95%|▉| 942/991 [4:18:55<12:31, 15.34s/batch, batch_loss=10.2, batch

Epoch 7/10:  95%|▉| 943/991 [4:18:55<12:05, 15.12s/batch, batch_loss=10.2, batch

Epoch 7/10:  95%|▉| 943/991 [4:19:11<12:05, 15.12s/batch, batch_loss=15.8, batch

Epoch 7/10:  95%|▉| 944/991 [4:19:11<11:58, 15.28s/batch, batch_loss=15.8, batch

Epoch 7/10:  95%|▉| 944/991 [4:19:26<11:58, 15.28s/batch, batch_loss=1.86, batch

Epoch 7/10:  95%|▉| 945/991 [4:19:26<11:41, 15.25s/batch, batch_loss=1.86, batch

Epoch 7/10:  95%|▉| 945/991 [4:19:41<11:41, 15.25s/batch, batch_loss=15.8, batch

Epoch 7/10:  95%|▉| 946/991 [4:19:41<11:25, 15.23s/batch, batch_loss=15.8, batch

Epoch 7/10:  95%|▉| 946/991 [4:19:56<11:25, 15.23s/batch, batch_loss=14.6, batch

Epoch 7/10:  96%|▉| 947/991 [4:19:56<11:08, 15.20s/batch, batch_loss=14.6, batch

Epoch 7/10:  96%|▉| 947/991 [4:20:11<11:08, 15.20s/batch, batch_loss=10.2, batch

Epoch 7/10:  96%|▉| 948/991 [4:20:11<10:52, 15.18s/batch, batch_loss=10.2, batch

Epoch 7/10:  96%|▉| 948/991 [4:20:27<10:52, 15.18s/batch, batch_loss=5.19, batch

Epoch 7/10:  96%|▉| 949/991 [4:20:27<10:44, 15.35s/batch, batch_loss=5.19, batch

Epoch 7/10:  96%|▉| 949/991 [4:20:42<10:44, 15.35s/batch, batch_loss=8.42, batch

Epoch 7/10:  96%|▉| 950/991 [4:20:42<10:25, 15.26s/batch, batch_loss=8.42, batch

Epoch 7/10:  96%|▉| 950/991 [4:20:57<10:25, 15.26s/batch, batch_loss=15.7, batch

Epoch 7/10:  96%|▉| 951/991 [4:20:57<10:02, 15.07s/batch, batch_loss=15.7, batch

Epoch 7/10:  96%|▉| 951/991 [4:21:13<10:02, 15.07s/batch, batch_loss=17.2, batch

Epoch 7/10:  96%|▉| 952/991 [4:21:13<09:55, 15.27s/batch, batch_loss=17.2, batch

Epoch 7/10:  96%|▉| 952/991 [4:21:28<09:55, 15.27s/batch, batch_loss=6.61, batch

Epoch 7/10:  96%|▉| 953/991 [4:21:28<09:46, 15.42s/batch, batch_loss=6.61, batch

Epoch 7/10:  96%|▉| 953/991 [4:21:46<09:46, 15.42s/batch, batch_loss=333, batch_

Epoch 7/10:  96%|▉| 954/991 [4:21:46<09:53, 16.04s/batch, batch_loss=333, batch_

Epoch 7/10:  96%|▉| 954/991 [4:22:01<09:53, 16.04s/batch, batch_loss=16.3, batch

Epoch 7/10:  96%|▉| 955/991 [4:22:01<09:26, 15.73s/batch, batch_loss=16.3, batch

Epoch 7/10:  96%|▉| 955/991 [4:22:16<09:26, 15.73s/batch, batch_loss=21.2, batch

Epoch 7/10:  96%|▉| 956/991 [4:22:16<09:06, 15.60s/batch, batch_loss=21.2, batch

Epoch 7/10:  96%|▉| 956/991 [4:22:31<09:06, 15.60s/batch, batch_loss=18.2, batch

Epoch 7/10:  97%|▉| 957/991 [4:22:31<08:45, 15.46s/batch, batch_loss=18.2, batch

Epoch 7/10:  97%|▉| 957/991 [4:22:46<08:45, 15.46s/batch, batch_loss=19.6, batch

Epoch 7/10:  97%|▉| 958/991 [4:22:46<08:20, 15.17s/batch, batch_loss=19.6, batch

Epoch 7/10:  97%|▉| 958/991 [4:23:01<08:20, 15.17s/batch, batch_loss=7.54, batch

Epoch 7/10:  97%|▉| 959/991 [4:23:01<08:03, 15.12s/batch, batch_loss=7.54, batch

Epoch 7/10:  97%|▉| 959/991 [4:23:16<08:03, 15.12s/batch, batch_loss=13.8, batch

Epoch 7/10:  97%|▉| 960/991 [4:23:16<07:51, 15.22s/batch, batch_loss=13.8, batch

Epoch 7/10:  97%|▉| 960/991 [4:23:32<07:51, 15.22s/batch, batch_loss=17.6, batch

Epoch 7/10:  97%|▉| 961/991 [4:23:32<07:37, 15.26s/batch, batch_loss=17.6, batch

Epoch 7/10:  97%|▉| 961/991 [4:23:47<07:37, 15.26s/batch, batch_loss=5.59, batch

Epoch 7/10:  97%|▉| 962/991 [4:23:47<07:21, 15.21s/batch, batch_loss=5.59, batch

Epoch 7/10:  97%|▉| 962/991 [4:24:03<07:21, 15.21s/batch, batch_loss=8.18, batch

Epoch 7/10:  97%|▉| 963/991 [4:24:03<07:19, 15.70s/batch, batch_loss=8.18, batch

Epoch 7/10:  97%|▉| 963/991 [4:24:19<07:19, 15.70s/batch, batch_loss=9.45e+3, ba

Epoch 7/10:  97%|▉| 964/991 [4:24:19<06:59, 15.52s/batch, batch_loss=9.45e+3, ba

Epoch 7/10:  97%|▉| 964/991 [4:24:33<06:59, 15.52s/batch, batch_loss=21, batch_i

Epoch 7/10:  97%|▉| 965/991 [4:24:33<06:36, 15.24s/batch, batch_loss=21, batch_i

Epoch 7/10:  97%|▉| 965/991 [4:24:48<06:36, 15.24s/batch, batch_loss=15.5, batch

Epoch 7/10:  97%|▉| 966/991 [4:24:48<06:21, 15.25s/batch, batch_loss=15.5, batch

Epoch 7/10:  97%|▉| 966/991 [4:25:04<06:21, 15.25s/batch, batch_loss=2.4e+4, bat

Epoch 7/10:  98%|▉| 967/991 [4:25:04<06:05, 15.22s/batch, batch_loss=2.4e+4, bat

Epoch 7/10:  98%|▉| 967/991 [4:25:19<06:05, 15.22s/batch, batch_loss=419, batch_

Epoch 7/10:  98%|▉| 968/991 [4:25:19<05:53, 15.37s/batch, batch_loss=419, batch_

Epoch 7/10:  98%|▉| 968/991 [4:25:35<05:53, 15.37s/batch, batch_loss=25.6, batch

Epoch 7/10:  98%|▉| 969/991 [4:25:35<05:38, 15.38s/batch, batch_loss=25.6, batch

Epoch 7/10:  98%|▉| 969/991 [4:25:51<05:38, 15.38s/batch, batch_loss=1.26, batch

Epoch 7/10:  98%|▉| 970/991 [4:25:51<05:27, 15.60s/batch, batch_loss=1.26, batch

Epoch 7/10:  98%|▉| 970/991 [4:26:06<05:27, 15.60s/batch, batch_loss=11.8, batch

Epoch 7/10:  98%|▉| 971/991 [4:26:06<05:10, 15.51s/batch, batch_loss=11.8, batch

Epoch 7/10:  98%|▉| 971/991 [4:26:22<05:10, 15.51s/batch, batch_loss=27.8, batch

Epoch 7/10:  98%|▉| 972/991 [4:26:22<04:56, 15.61s/batch, batch_loss=27.8, batch

Epoch 7/10:  98%|▉| 972/991 [4:26:37<04:56, 15.61s/batch, batch_loss=25.8, batch

Epoch 7/10:  98%|▉| 973/991 [4:26:37<04:38, 15.47s/batch, batch_loss=25.8, batch

Epoch 7/10:  98%|▉| 973/991 [4:26:52<04:38, 15.47s/batch, batch_loss=17.3, batch

Epoch 7/10:  98%|▉| 974/991 [4:26:52<04:20, 15.35s/batch, batch_loss=17.3, batch

Epoch 7/10:  98%|▉| 974/991 [4:27:06<04:20, 15.35s/batch, batch_loss=10.1, batch

Epoch 7/10:  98%|▉| 975/991 [4:27:06<03:59, 15.00s/batch, batch_loss=10.1, batch

Epoch 7/10:  98%|▉| 975/991 [4:27:21<03:59, 15.00s/batch, batch_loss=27.4, batch

Epoch 7/10:  98%|▉| 976/991 [4:27:21<03:42, 14.85s/batch, batch_loss=27.4, batch

Epoch 7/10:  98%|▉| 976/991 [4:27:34<03:42, 14.85s/batch, batch_loss=0.598, batc

Epoch 7/10:  99%|▉| 977/991 [4:27:34<03:18, 14.20s/batch, batch_loss=0.598, batc

Epoch 7/10:  99%|▉| 977/991 [4:27:46<03:18, 14.20s/batch, batch_loss=0.425, batc

Epoch 7/10:  99%|▉| 978/991 [4:27:46<02:59, 13.78s/batch, batch_loss=0.425, batc

Epoch 7/10:  99%|▉| 978/991 [4:28:00<02:59, 13.78s/batch, batch_loss=0.335, batc

Epoch 7/10:  99%|▉| 979/991 [4:28:00<02:44, 13.72s/batch, batch_loss=0.335, batc

Epoch 7/10:  99%|▉| 979/991 [4:28:13<02:44, 13.72s/batch, batch_loss=0.264, batc

Epoch 7/10:  99%|▉| 980/991 [4:28:13<02:27, 13.41s/batch, batch_loss=0.264, batc

Epoch 7/10:  99%|▉| 980/991 [4:28:26<02:27, 13.41s/batch, batch_loss=0.203, batc

Epoch 7/10:  99%|▉| 981/991 [4:28:26<02:12, 13.29s/batch, batch_loss=0.203, batc

Epoch 7/10:  99%|▉| 981/991 [4:28:39<02:12, 13.29s/batch, batch_loss=0.155, batc

Epoch 7/10:  99%|▉| 982/991 [4:28:39<01:59, 13.29s/batch, batch_loss=0.155, batc

Epoch 7/10:  99%|▉| 982/991 [4:28:52<01:59, 13.29s/batch, batch_loss=0.119, batc

Epoch 7/10:  99%|▉| 983/991 [4:28:52<01:46, 13.28s/batch, batch_loss=0.119, batc

Epoch 7/10:  99%|▉| 983/991 [4:29:05<01:46, 13.28s/batch, batch_loss=0.0937, bat

Epoch 7/10:  99%|▉| 984/991 [4:29:05<01:31, 13.11s/batch, batch_loss=0.0937, bat

Epoch 7/10:  99%|▉| 984/991 [4:29:19<01:31, 13.11s/batch, batch_loss=0.077, batc

Epoch 7/10:  99%|▉| 985/991 [4:29:19<01:19, 13.33s/batch, batch_loss=0.077, batc

Epoch 7/10:  99%|▉| 985/991 [4:29:31<01:19, 13.33s/batch, batch_loss=0.067, batc

Epoch 7/10:  99%|▉| 986/991 [4:29:31<01:05, 13.11s/batch, batch_loss=0.067, batc

Epoch 7/10:  99%|▉| 986/991 [4:29:45<01:05, 13.11s/batch, batch_loss=0.0629, bat

Epoch 7/10: 100%|▉| 987/991 [4:29:45<00:52, 13.18s/batch, batch_loss=0.0629, bat

Epoch 7/10: 100%|▉| 987/991 [4:29:58<00:52, 13.18s/batch, batch_loss=0.0626, bat

Epoch 7/10: 100%|▉| 988/991 [4:29:58<00:39, 13.13s/batch, batch_loss=0.0626, bat

Epoch 7/10: 100%|▉| 988/991 [4:30:12<00:39, 13.13s/batch, batch_loss=0.0624, bat

Epoch 7/10: 100%|▉| 989/991 [4:30:12<00:26, 13.36s/batch, batch_loss=0.0624, bat

Epoch 7/10: 100%|▉| 989/991 [4:30:25<00:26, 13.36s/batch, batch_loss=0.0603, bat

Epoch 7/10: 100%|▉| 990/991 [4:30:25<00:13, 13.37s/batch, batch_loss=0.0603, bat

Epoch 7/10: 100%|▉| 990/991 [4:30:36<00:13, 13.37s/batch, batch_loss=0.0563, bat

Epoch 7/10: 100%|█| 991/991 [4:30:36<00:00, 12.72s/batch, batch_loss=0.0563, bat

Epoch 7/10: 100%|█| 991/991 [4:30:36<00:00, 16.38s/batch, batch_loss=0.0563, bat




Epoch 7, Loss: 987.5266


Validation:   0%|                                    | 0/743 [00:00<?, ?batch/s]

Validation:   0%| | 0/743 [00:15<?, ?batch/s, batch_loss=19.9, batch_index=1, ba

Validation:   0%| | 1/743 [00:15<3:14:00, 15.69s/batch, batch_loss=19.9, batch_i

Validation:   0%| | 1/743 [00:29<3:14:00, 15.69s/batch, batch_loss=21.5, batch_i

Validation:   0%| | 2/743 [00:29<3:01:43, 14.72s/batch, batch_loss=21.5, batch_i

Validation:   0%| | 2/743 [00:44<3:01:43, 14.72s/batch, batch_loss=17.3, batch_i

Validation:   0%| | 3/743 [00:44<3:01:35, 14.72s/batch, batch_loss=17.3, batch_i

Validation:   0%| | 3/743 [00:59<3:01:35, 14.72s/batch, batch_loss=11, batch_ind

Validation:   1%| | 4/743 [00:59<3:02:11, 14.79s/batch, batch_loss=11, batch_ind

Validation:   1%| | 4/743 [01:16<3:02:11, 14.79s/batch, batch_loss=25.9, batch_i

Validation:   1%| | 5/743 [01:16<3:13:31, 15.73s/batch, batch_loss=25.9, batch_i

Validation:   1%| | 5/743 [01:31<3:13:31, 15.73s/batch, batch_loss=25.3, batch_i

Validation:   1%| | 6/743 [01:31<3:09:26, 15.42s/batch, batch_loss=25.3, batch_i

Validation:   1%| | 6/743 [01:46<3:09:26, 15.42s/batch, batch_loss=559, batch_in

Validation:   1%| | 7/743 [01:46<3:09:00, 15.41s/batch, batch_loss=559, batch_in

Validation:   1%| | 7/743 [02:02<3:09:00, 15.41s/batch, batch_loss=17.8, batch_i

Validation:   1%| | 8/743 [02:02<3:08:27, 15.38s/batch, batch_loss=17.8, batch_i

Validation:   1%| | 8/743 [02:17<3:08:27, 15.38s/batch, batch_loss=14.4, batch_i

Validation:   1%| | 9/743 [02:17<3:07:47, 15.35s/batch, batch_loss=14.4, batch_i

Validation:   1%| | 9/743 [02:32<3:07:47, 15.35s/batch, batch_loss=14.1, batch_i

Validation:   1%| | 10/743 [02:32<3:06:11, 15.24s/batch, batch_loss=14.1, batch_

Validation:   1%| | 10/743 [02:47<3:06:11, 15.24s/batch, batch_loss=11.8, batch_

Validation:   1%| | 11/743 [02:47<3:06:12, 15.26s/batch, batch_loss=11.8, batch_

Validation:   1%| | 11/743 [03:02<3:06:12, 15.26s/batch, batch_loss=2.18e+3, bat

Validation:   2%| | 12/743 [03:02<3:04:50, 15.17s/batch, batch_loss=2.18e+3, bat

Validation:   2%| | 12/743 [03:21<3:04:50, 15.17s/batch, batch_loss=17.4, batch_

Validation:   2%| | 13/743 [03:21<3:16:59, 16.19s/batch, batch_loss=17.4, batch_

Validation:   2%| | 13/743 [03:36<3:16:59, 16.19s/batch, batch_loss=12.1, batch_

Validation:   2%| | 14/743 [03:36<3:11:39, 15.78s/batch, batch_loss=12.1, batch_

Validation:   2%| | 14/743 [03:50<3:11:39, 15.78s/batch, batch_loss=18.9, batch_

Validation:   2%| | 15/743 [03:50<3:06:37, 15.38s/batch, batch_loss=18.9, batch_

Validation:   2%| | 15/743 [04:04<3:06:37, 15.38s/batch, batch_loss=17.2, batch_

Validation:   2%| | 16/743 [04:04<2:59:53, 14.85s/batch, batch_loss=17.2, batch_

Validation:   2%| | 16/743 [04:19<2:59:53, 14.85s/batch, batch_loss=12, batch_in

Validation:   2%| | 17/743 [04:19<3:01:11, 14.97s/batch, batch_loss=12, batch_in

Validation:   2%| | 17/743 [04:34<3:01:11, 14.97s/batch, batch_loss=4.55e+3, bat

Validation:   2%| | 18/743 [04:34<2:59:21, 14.84s/batch, batch_loss=4.55e+3, bat

Validation:   2%| | 18/743 [04:48<2:59:21, 14.84s/batch, batch_loss=13, batch_in

Validation:   3%| | 19/743 [04:48<2:56:18, 14.61s/batch, batch_loss=13, batch_in

Validation:   3%| | 19/743 [05:02<2:56:18, 14.61s/batch, batch_loss=16.3, batch_

Validation:   3%| | 20/743 [05:02<2:55:30, 14.56s/batch, batch_loss=16.3, batch_

Validation:   3%| | 20/743 [05:16<2:55:30, 14.56s/batch, batch_loss=967, batch_i

Validation:   3%| | 21/743 [05:16<2:53:53, 14.45s/batch, batch_loss=967, batch_i

Validation:   3%| | 21/743 [05:30<2:53:53, 14.45s/batch, batch_loss=14.1, batch_

Validation:   3%| | 22/743 [05:30<2:52:23, 14.35s/batch, batch_loss=14.1, batch_

Validation:   3%| | 22/743 [05:48<2:52:23, 14.35s/batch, batch_loss=7.51, batch_

Validation:   3%| | 23/743 [05:48<3:04:08, 15.34s/batch, batch_loss=7.51, batch_

Validation:   3%| | 23/743 [06:02<3:04:08, 15.34s/batch, batch_loss=16.8, batch_

Validation:   3%| | 24/743 [06:02<2:59:36, 14.99s/batch, batch_loss=16.8, batch_

Validation:   3%| | 24/743 [06:16<2:59:36, 14.99s/batch, batch_loss=14.3, batch_

Validation:   3%| | 25/743 [06:16<2:54:42, 14.60s/batch, batch_loss=14.3, batch_

Validation:   3%| | 25/743 [06:30<2:54:42, 14.60s/batch, batch_loss=23, batch_in

Validation:   3%| | 26/743 [06:30<2:53:43, 14.54s/batch, batch_loss=23, batch_in

Validation:   3%| | 26/743 [06:47<2:53:43, 14.54s/batch, batch_loss=1.63e+3, bat

Validation:   4%| | 27/743 [06:47<3:00:27, 15.12s/batch, batch_loss=1.63e+3, bat

Validation:   4%| | 27/743 [07:01<3:00:27, 15.12s/batch, batch_loss=15, batch_in

Validation:   4%| | 28/743 [07:01<2:56:22, 14.80s/batch, batch_loss=15, batch_in

Validation:   4%| | 28/743 [07:15<2:56:22, 14.80s/batch, batch_loss=14.4, batch_

Validation:   4%| | 29/743 [07:15<2:55:10, 14.72s/batch, batch_loss=14.4, batch_

Validation:   4%| | 29/743 [07:29<2:55:10, 14.72s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:29<2:49:52, 14.29s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:43<2:49:52, 14.29s/batch, batch_loss=19.5, batch_

Validation:   4%| | 31/743 [07:43<2:48:17, 14.18s/batch, batch_loss=19.5, batch_

Validation:   4%| | 31/743 [07:56<2:48:17, 14.18s/batch, batch_loss=17.4, batch_

Validation:   4%| | 32/743 [07:56<2:44:23, 13.87s/batch, batch_loss=17.4, batch_

Validation:   4%| | 32/743 [08:08<2:44:23, 13.87s/batch, batch_loss=16.6, batch_

Validation:   4%| | 33/743 [08:08<2:38:57, 13.43s/batch, batch_loss=16.6, batch_

Validation:   4%| | 33/743 [08:21<2:38:57, 13.43s/batch, batch_loss=17.6, batch_

Validation:   5%| | 34/743 [08:21<2:35:20, 13.15s/batch, batch_loss=17.6, batch_

Validation:   5%| | 34/743 [08:34<2:35:20, 13.15s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [08:34<2:37:34, 13.35s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [08:49<2:37:34, 13.35s/batch, batch_loss=15.3, batch_

Validation:   5%| | 36/743 [08:49<2:39:56, 13.57s/batch, batch_loss=15.3, batch_

Validation:   5%| | 36/743 [09:03<2:39:56, 13.57s/batch, batch_loss=164, batch_i

Validation:   5%| | 37/743 [09:03<2:41:31, 13.73s/batch, batch_loss=164, batch_i

Validation:   5%| | 37/743 [09:17<2:41:31, 13.73s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:17<2:43:12, 13.89s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:31<2:43:12, 13.89s/batch, batch_loss=14.4, batch_

Validation:   5%| | 39/743 [09:31<2:44:02, 13.98s/batch, batch_loss=14.4, batch_

Validation:   5%| | 39/743 [09:48<2:44:02, 13.98s/batch, batch_loss=20.9, batch_

Validation:   5%| | 40/743 [09:48<2:53:12, 14.78s/batch, batch_loss=20.9, batch_

Validation:   5%| | 40/743 [10:02<2:53:12, 14.78s/batch, batch_loss=15, batch_in

Validation:   6%| | 41/743 [10:02<2:51:34, 14.67s/batch, batch_loss=15, batch_in

Validation:   6%| | 41/743 [10:16<2:51:34, 14.67s/batch, batch_loss=15.5, batch_

Validation:   6%| | 42/743 [10:16<2:49:40, 14.52s/batch, batch_loss=15.5, batch_

Validation:   6%| | 42/743 [10:30<2:49:40, 14.52s/batch, batch_loss=10.9, batch_

Validation:   6%| | 43/743 [10:30<2:45:52, 14.22s/batch, batch_loss=10.9, batch_

Validation:   6%| | 43/743 [10:44<2:45:52, 14.22s/batch, batch_loss=14, batch_in

Validation:   6%| | 44/743 [10:44<2:44:04, 14.08s/batch, batch_loss=14, batch_in

Validation:   6%| | 44/743 [10:57<2:44:04, 14.08s/batch, batch_loss=21.6, batch_

Validation:   6%| | 45/743 [10:57<2:42:56, 14.01s/batch, batch_loss=21.6, batch_

Validation:   6%| | 45/743 [11:13<2:42:56, 14.01s/batch, batch_loss=8.93, batch_

Validation:   6%| | 46/743 [11:13<2:49:30, 14.59s/batch, batch_loss=8.93, batch_

Validation:   6%| | 46/743 [11:27<2:49:30, 14.59s/batch, batch_loss=17.1, batch_

Validation:   6%| | 47/743 [11:27<2:45:52, 14.30s/batch, batch_loss=17.1, batch_

Validation:   6%| | 47/743 [11:41<2:45:52, 14.30s/batch, batch_loss=19.7, batch_

Validation:   6%| | 48/743 [11:41<2:44:45, 14.22s/batch, batch_loss=19.7, batch_

Validation:   6%| | 48/743 [11:55<2:44:45, 14.22s/batch, batch_loss=17.5, batch_

Validation:   7%| | 49/743 [11:55<2:43:07, 14.10s/batch, batch_loss=17.5, batch_

Validation:   7%| | 49/743 [12:09<2:43:07, 14.10s/batch, batch_loss=14.5, batch_

Validation:   7%| | 50/743 [12:09<2:44:00, 14.20s/batch, batch_loss=14.5, batch_

Validation:   7%| | 50/743 [12:24<2:44:00, 14.20s/batch, batch_loss=16.1, batch_

Validation:   7%| | 51/743 [12:24<2:43:50, 14.21s/batch, batch_loss=16.1, batch_

Validation:   7%| | 51/743 [12:38<2:43:50, 14.21s/batch, batch_loss=19.4, batch_

Validation:   7%| | 52/743 [12:38<2:44:37, 14.29s/batch, batch_loss=19.4, batch_

Validation:   7%| | 52/743 [12:52<2:44:37, 14.29s/batch, batch_loss=23.9, batch_

Validation:   7%| | 53/743 [12:52<2:44:30, 14.30s/batch, batch_loss=23.9, batch_

Validation:   7%| | 53/743 [13:09<2:44:30, 14.30s/batch, batch_loss=13.4, batch_

Validation:   7%| | 54/743 [13:09<2:53:19, 15.09s/batch, batch_loss=13.4, batch_

Validation:   7%| | 54/743 [13:24<2:53:19, 15.09s/batch, batch_loss=24, batch_in

Validation:   7%| | 55/743 [13:24<2:52:35, 15.05s/batch, batch_loss=24, batch_in

Validation:   7%| | 55/743 [13:39<2:52:35, 15.05s/batch, batch_loss=19.3, batch_

Validation:   8%| | 56/743 [13:39<2:51:10, 14.95s/batch, batch_loss=19.3, batch_

Validation:   8%| | 56/743 [13:53<2:51:10, 14.95s/batch, batch_loss=12.4, batch_

Validation:   8%| | 57/743 [13:53<2:47:51, 14.68s/batch, batch_loss=12.4, batch_

Validation:   8%| | 57/743 [14:07<2:47:51, 14.68s/batch, batch_loss=19.9, batch_

Validation:   8%| | 58/743 [14:07<2:46:47, 14.61s/batch, batch_loss=19.9, batch_

Validation:   8%| | 58/743 [14:22<2:46:47, 14.61s/batch, batch_loss=108, batch_i

Validation:   8%| | 59/743 [14:22<2:44:45, 14.45s/batch, batch_loss=108, batch_i

Validation:   8%| | 59/743 [14:35<2:44:45, 14.45s/batch, batch_loss=6.13e+3, bat

Validation:   8%| | 60/743 [14:35<2:41:37, 14.20s/batch, batch_loss=6.13e+3, bat

Validation:   8%| | 60/743 [14:49<2:41:37, 14.20s/batch, batch_loss=8.38, batch_

Validation:   8%| | 61/743 [14:49<2:39:45, 14.05s/batch, batch_loss=8.38, batch_

Validation:   8%| | 61/743 [15:05<2:39:45, 14.05s/batch, batch_loss=10.1, batch_

Validation:   8%| | 62/743 [15:05<2:47:08, 14.73s/batch, batch_loss=10.1, batch_

Validation:   8%| | 62/743 [15:18<2:47:08, 14.73s/batch, batch_loss=23.7, batch_

Validation:   8%| | 63/743 [15:18<2:41:23, 14.24s/batch, batch_loss=23.7, batch_

Validation:   8%| | 63/743 [15:32<2:41:23, 14.24s/batch, batch_loss=12.5, batch_

Validation:   9%| | 64/743 [15:32<2:40:10, 14.15s/batch, batch_loss=12.5, batch_

Validation:   9%| | 64/743 [15:46<2:40:10, 14.15s/batch, batch_loss=17.1, batch_

Validation:   9%| | 65/743 [15:46<2:38:14, 14.00s/batch, batch_loss=17.1, batch_

Validation:   9%| | 65/743 [16:00<2:38:14, 14.00s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [16:00<2:39:06, 14.10s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [16:14<2:39:06, 14.10s/batch, batch_loss=16, batch_in

Validation:   9%| | 67/743 [16:14<2:38:21, 14.06s/batch, batch_loss=16, batch_in

Validation:   9%| | 67/743 [16:28<2:38:21, 14.06s/batch, batch_loss=16.3, batch_

Validation:   9%| | 68/743 [16:28<2:37:04, 13.96s/batch, batch_loss=16.3, batch_

Validation:   9%| | 68/743 [16:42<2:37:04, 13.96s/batch, batch_loss=10.8, batch_

Validation:   9%| | 69/743 [16:42<2:38:16, 14.09s/batch, batch_loss=10.8, batch_

Validation:   9%| | 69/743 [16:56<2:38:16, 14.09s/batch, batch_loss=14.5, batch_

Validation:   9%| | 70/743 [16:56<2:38:18, 14.11s/batch, batch_loss=14.5, batch_

Validation:   9%| | 70/743 [17:11<2:38:18, 14.11s/batch, batch_loss=8.89, batch_

Validation:  10%| | 71/743 [17:11<2:39:01, 14.20s/batch, batch_loss=8.89, batch_

Validation:  10%| | 71/743 [17:25<2:39:01, 14.20s/batch, batch_loss=15.3, batch_

Validation:  10%| | 72/743 [17:25<2:39:52, 14.30s/batch, batch_loss=15.3, batch_

Validation:  10%| | 72/743 [17:40<2:39:52, 14.30s/batch, batch_loss=17, batch_in

Validation:  10%| | 73/743 [17:40<2:40:46, 14.40s/batch, batch_loss=17, batch_in

Validation:  10%| | 73/743 [17:54<2:40:46, 14.40s/batch, batch_loss=20.8, batch_

Validation:  10%| | 74/743 [17:54<2:40:10, 14.37s/batch, batch_loss=20.8, batch_

Validation:  10%| | 74/743 [18:08<2:40:10, 14.37s/batch, batch_loss=12.3, batch_

Validation:  10%| | 75/743 [18:08<2:38:18, 14.22s/batch, batch_loss=12.3, batch_

Validation:  10%| | 75/743 [18:22<2:38:18, 14.22s/batch, batch_loss=15.4, batch_

Validation:  10%| | 76/743 [18:22<2:38:20, 14.24s/batch, batch_loss=15.4, batch_

Validation:  10%| | 76/743 [18:36<2:38:20, 14.24s/batch, batch_loss=12.5, batch_

Validation:  10%| | 77/743 [18:36<2:35:22, 14.00s/batch, batch_loss=12.5, batch_

Validation:  10%| | 77/743 [18:50<2:35:22, 14.00s/batch, batch_loss=17.2, batch_

Validation:  10%| | 78/743 [18:50<2:35:31, 14.03s/batch, batch_loss=17.2, batch_

Validation:  10%| | 78/743 [19:06<2:35:31, 14.03s/batch, batch_loss=11.5, batch_

Validation:  11%| | 79/743 [19:06<2:41:59, 14.64s/batch, batch_loss=11.5, batch_

Validation:  11%| | 79/743 [19:21<2:41:59, 14.64s/batch, batch_loss=7, batch_ind

Validation:  11%| | 80/743 [19:21<2:42:24, 14.70s/batch, batch_loss=7, batch_ind

Validation:  11%| | 80/743 [19:35<2:42:24, 14.70s/batch, batch_loss=149, batch_i

Validation:  11%| | 81/743 [19:35<2:41:44, 14.66s/batch, batch_loss=149, batch_i

Validation:  11%| | 81/743 [19:50<2:41:44, 14.66s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [19:50<2:39:42, 14.50s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [20:04<2:39:42, 14.50s/batch, batch_loss=33.8, batch_

Validation:  11%| | 83/743 [20:04<2:40:35, 14.60s/batch, batch_loss=33.8, batch_

Validation:  11%| | 83/743 [20:19<2:40:35, 14.60s/batch, batch_loss=18.7, batch_

Validation:  11%| | 84/743 [20:19<2:38:51, 14.46s/batch, batch_loss=18.7, batch_

Validation:  11%| | 84/743 [20:33<2:38:51, 14.46s/batch, batch_loss=23.9, batch_

Validation:  11%| | 85/743 [20:33<2:38:11, 14.42s/batch, batch_loss=23.9, batch_

Validation:  11%| | 85/743 [20:47<2:38:11, 14.42s/batch, batch_loss=26.7, batch_

Validation:  12%| | 86/743 [20:47<2:38:02, 14.43s/batch, batch_loss=26.7, batch_

Validation:  12%| | 86/743 [21:02<2:38:02, 14.43s/batch, batch_loss=35.2, batch_

Validation:  12%| | 87/743 [21:02<2:38:22, 14.49s/batch, batch_loss=35.2, batch_

Validation:  12%| | 87/743 [21:17<2:38:22, 14.49s/batch, batch_loss=26.9, batch_

Validation:  12%| | 88/743 [21:17<2:38:30, 14.52s/batch, batch_loss=26.9, batch_

Validation:  12%| | 88/743 [21:31<2:38:30, 14.52s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [21:31<2:38:53, 14.58s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [21:45<2:38:53, 14.58s/batch, batch_loss=5.69, batch_

Validation:  12%| | 90/743 [21:45<2:37:08, 14.44s/batch, batch_loss=5.69, batch_

Validation:  12%| | 90/743 [22:00<2:37:08, 14.44s/batch, batch_loss=34.4, batch_

Validation:  12%| | 91/743 [22:00<2:37:49, 14.52s/batch, batch_loss=34.4, batch_

Validation:  12%| | 91/743 [22:14<2:37:49, 14.52s/batch, batch_loss=35.2, batch_

Validation:  12%| | 92/743 [22:14<2:36:45, 14.45s/batch, batch_loss=35.2, batch_

Validation:  12%| | 92/743 [22:29<2:36:45, 14.45s/batch, batch_loss=28, batch_in

Validation:  13%|▏| 93/743 [22:29<2:35:47, 14.38s/batch, batch_loss=28, batch_in

Validation:  13%|▏| 93/743 [22:43<2:35:47, 14.38s/batch, batch_loss=31.2, batch_

Validation:  13%|▏| 94/743 [22:43<2:35:19, 14.36s/batch, batch_loss=31.2, batch_

Validation:  13%|▏| 94/743 [22:57<2:35:19, 14.36s/batch, batch_loss=14.5, batch_

Validation:  13%|▏| 95/743 [22:57<2:32:43, 14.14s/batch, batch_loss=14.5, batch_

Validation:  13%|▏| 95/743 [23:13<2:32:43, 14.14s/batch, batch_loss=21.4, batch_

Validation:  13%|▏| 96/743 [23:13<2:39:40, 14.81s/batch, batch_loss=21.4, batch_

Validation:  13%|▏| 96/743 [23:27<2:39:40, 14.81s/batch, batch_loss=32, batch_in

Validation:  13%|▏| 97/743 [23:27<2:37:39, 14.64s/batch, batch_loss=32, batch_in

Validation:  13%|▏| 97/743 [23:42<2:37:39, 14.64s/batch, batch_loss=18.8, batch_

Validation:  13%|▏| 98/743 [23:42<2:38:59, 14.79s/batch, batch_loss=18.8, batch_

Validation:  13%|▏| 98/743 [23:57<2:38:59, 14.79s/batch, batch_loss=22.9, batch_

Validation:  13%|▏| 99/743 [23:57<2:38:21, 14.75s/batch, batch_loss=22.9, batch_

Validation:  13%|▏| 99/743 [24:12<2:38:21, 14.75s/batch, batch_loss=13.2, batch_

Validation:  13%|▏| 100/743 [24:12<2:37:25, 14.69s/batch, batch_loss=13.2, batch

Validation:  13%|▏| 100/743 [24:27<2:37:25, 14.69s/batch, batch_loss=17.9, batch

Validation:  14%|▏| 101/743 [24:27<2:38:11, 14.78s/batch, batch_loss=17.9, batch

Validation:  14%|▏| 101/743 [24:41<2:38:11, 14.78s/batch, batch_loss=14.8, batch

Validation:  14%|▏| 102/743 [24:41<2:38:06, 14.80s/batch, batch_loss=14.8, batch

Validation:  14%|▏| 102/743 [24:55<2:38:06, 14.80s/batch, batch_loss=3.4e+3, bat

Validation:  14%|▏| 103/743 [24:55<2:34:13, 14.46s/batch, batch_loss=3.4e+3, bat

Validation:  14%|▏| 103/743 [25:10<2:34:13, 14.46s/batch, batch_loss=14.9, batch

Validation:  14%|▏| 104/743 [25:10<2:34:19, 14.49s/batch, batch_loss=14.9, batch

Validation:  14%|▏| 104/743 [25:23<2:34:19, 14.49s/batch, batch_loss=7.8, batch_

Validation:  14%|▏| 105/743 [25:23<2:30:07, 14.12s/batch, batch_loss=7.8, batch_

Validation:  14%|▏| 105/743 [25:40<2:30:07, 14.12s/batch, batch_loss=15.7, batch

Validation:  14%|▏| 106/743 [25:40<2:41:01, 15.17s/batch, batch_loss=15.7, batch

Validation:  14%|▏| 106/743 [25:55<2:41:01, 15.17s/batch, batch_loss=714, batch_

Validation:  14%|▏| 107/743 [25:55<2:38:27, 14.95s/batch, batch_loss=714, batch_

Validation:  14%|▏| 107/743 [26:09<2:38:27, 14.95s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [26:09<2:34:07, 14.56s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [26:23<2:34:07, 14.56s/batch, batch_loss=204, batch_

Validation:  15%|▏| 109/743 [26:23<2:32:20, 14.42s/batch, batch_loss=204, batch_

Validation:  15%|▏| 109/743 [26:37<2:32:20, 14.42s/batch, batch_loss=31.8, batch

Validation:  15%|▏| 110/743 [26:37<2:33:00, 14.50s/batch, batch_loss=31.8, batch

Validation:  15%|▏| 110/743 [26:52<2:33:00, 14.50s/batch, batch_loss=13.4, batch

Validation:  15%|▏| 111/743 [26:52<2:33:42, 14.59s/batch, batch_loss=13.4, batch

Validation:  15%|▏| 111/743 [27:08<2:33:42, 14.59s/batch, batch_loss=30.2, batch

Validation:  15%|▏| 112/743 [27:08<2:36:06, 14.84s/batch, batch_loss=30.2, batch

Validation:  15%|▏| 112/743 [27:22<2:36:06, 14.84s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [27:22<2:35:19, 14.79s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [27:36<2:35:19, 14.79s/batch, batch_loss=19.5, batch

Validation:  15%|▏| 114/743 [27:36<2:31:05, 14.41s/batch, batch_loss=19.5, batch

Validation:  15%|▏| 114/743 [27:50<2:31:05, 14.41s/batch, batch_loss=21.7, batch

Validation:  15%|▏| 115/743 [27:50<2:31:35, 14.48s/batch, batch_loss=21.7, batch

Validation:  15%|▏| 115/743 [28:05<2:31:35, 14.48s/batch, batch_loss=17.3, batch

Validation:  16%|▏| 116/743 [28:05<2:31:12, 14.47s/batch, batch_loss=17.3, batch

Validation:  16%|▏| 116/743 [28:19<2:31:12, 14.47s/batch, batch_loss=30, batch_i

Validation:  16%|▏| 117/743 [28:19<2:28:54, 14.27s/batch, batch_loss=30, batch_i

Validation:  16%|▏| 117/743 [28:33<2:28:54, 14.27s/batch, batch_loss=24.8, batch

Validation:  16%|▏| 118/743 [28:33<2:29:25, 14.34s/batch, batch_loss=24.8, batch

Validation:  16%|▏| 118/743 [28:47<2:29:25, 14.34s/batch, batch_loss=16.7, batch

Validation:  16%|▏| 119/743 [28:47<2:27:45, 14.21s/batch, batch_loss=16.7, batch

Validation:  16%|▏| 119/743 [29:02<2:27:45, 14.21s/batch, batch_loss=22.6, batch

Validation:  16%|▏| 120/743 [29:02<2:29:05, 14.36s/batch, batch_loss=22.6, batch

Validation:  16%|▏| 120/743 [29:16<2:29:05, 14.36s/batch, batch_loss=13.3, batch

Validation:  16%|▏| 121/743 [29:16<2:29:14, 14.40s/batch, batch_loss=13.3, batch

Validation:  16%|▏| 121/743 [29:31<2:29:14, 14.40s/batch, batch_loss=5.55, batch

Validation:  16%|▏| 122/743 [29:31<2:29:30, 14.45s/batch, batch_loss=5.55, batch

Validation:  16%|▏| 122/743 [29:45<2:29:30, 14.45s/batch, batch_loss=9.11, batch

Validation:  17%|▏| 123/743 [29:45<2:28:45, 14.40s/batch, batch_loss=9.11, batch

Validation:  17%|▏| 123/743 [29:59<2:28:45, 14.40s/batch, batch_loss=10.8, batch

Validation:  17%|▏| 124/743 [29:59<2:27:41, 14.32s/batch, batch_loss=10.8, batch

Validation:  17%|▏| 124/743 [30:13<2:27:41, 14.32s/batch, batch_loss=33.1, batch

Validation:  17%|▏| 125/743 [30:13<2:25:27, 14.12s/batch, batch_loss=33.1, batch

Validation:  17%|▏| 125/743 [30:26<2:25:27, 14.12s/batch, batch_loss=14.9, batch

Validation:  17%|▏| 126/743 [30:26<2:23:31, 13.96s/batch, batch_loss=14.9, batch

Validation:  17%|▏| 126/743 [30:40<2:23:31, 13.96s/batch, batch_loss=13, batch_i

Validation:  17%|▏| 127/743 [30:40<2:23:27, 13.97s/batch, batch_loss=13, batch_i

Validation:  17%|▏| 127/743 [30:55<2:23:27, 13.97s/batch, batch_loss=22.2, batch

Validation:  17%|▏| 128/743 [30:55<2:23:51, 14.04s/batch, batch_loss=22.2, batch

Validation:  17%|▏| 128/743 [31:09<2:23:51, 14.04s/batch, batch_loss=14.3, batch

Validation:  17%|▏| 129/743 [31:09<2:24:20, 14.11s/batch, batch_loss=14.3, batch

Validation:  17%|▏| 129/743 [31:23<2:24:20, 14.11s/batch, batch_loss=21.7, batch

Validation:  17%|▏| 130/743 [31:23<2:23:24, 14.04s/batch, batch_loss=21.7, batch

Validation:  17%|▏| 130/743 [31:37<2:23:24, 14.04s/batch, batch_loss=27.8, batch

Validation:  18%|▏| 131/743 [31:37<2:24:39, 14.18s/batch, batch_loss=27.8, batch

Validation:  18%|▏| 131/743 [31:52<2:24:39, 14.18s/batch, batch_loss=23.6, batch

Validation:  18%|▏| 132/743 [31:52<2:24:37, 14.20s/batch, batch_loss=23.6, batch

Validation:  18%|▏| 132/743 [32:06<2:24:37, 14.20s/batch, batch_loss=40, batch_i

Validation:  18%|▏| 133/743 [32:06<2:24:50, 14.25s/batch, batch_loss=40, batch_i

Validation:  18%|▏| 133/743 [32:20<2:24:50, 14.25s/batch, batch_loss=25.6, batch

Validation:  18%|▏| 134/743 [32:20<2:24:15, 14.21s/batch, batch_loss=25.6, batch

Validation:  18%|▏| 134/743 [32:34<2:24:15, 14.21s/batch, batch_loss=34.7, batch

Validation:  18%|▏| 135/743 [32:34<2:22:01, 14.02s/batch, batch_loss=34.7, batch

Validation:  18%|▏| 135/743 [32:48<2:22:01, 14.02s/batch, batch_loss=18.5, batch

Validation:  18%|▏| 136/743 [32:48<2:22:10, 14.05s/batch, batch_loss=18.5, batch

Validation:  18%|▏| 136/743 [33:02<2:22:10, 14.05s/batch, batch_loss=25.4, batch

Validation:  18%|▏| 137/743 [33:02<2:23:45, 14.23s/batch, batch_loss=25.4, batch

Validation:  18%|▏| 137/743 [33:16<2:23:45, 14.23s/batch, batch_loss=7.75, batch

Validation:  19%|▏| 138/743 [33:16<2:22:18, 14.11s/batch, batch_loss=7.75, batch

Validation:  19%|▏| 138/743 [33:30<2:22:18, 14.11s/batch, batch_loss=253, batch_

Validation:  19%|▏| 139/743 [33:30<2:21:38, 14.07s/batch, batch_loss=253, batch_

Validation:  19%|▏| 139/743 [33:45<2:21:38, 14.07s/batch, batch_loss=19.9, batch

Validation:  19%|▏| 140/743 [33:45<2:22:04, 14.14s/batch, batch_loss=19.9, batch

Validation:  19%|▏| 140/743 [33:58<2:22:04, 14.14s/batch, batch_loss=13.3, batch

Validation:  19%|▏| 141/743 [33:58<2:21:13, 14.08s/batch, batch_loss=13.3, batch

Validation:  19%|▏| 141/743 [34:12<2:21:13, 14.08s/batch, batch_loss=14.9, batch

Validation:  19%|▏| 142/743 [34:12<2:20:17, 14.01s/batch, batch_loss=14.9, batch

Validation:  19%|▏| 142/743 [34:26<2:20:17, 14.01s/batch, batch_loss=14.7, batch

Validation:  19%|▏| 143/743 [34:26<2:18:58, 13.90s/batch, batch_loss=14.7, batch

Validation:  19%|▏| 143/743 [34:41<2:18:58, 13.90s/batch, batch_loss=22.6, batch

Validation:  19%|▏| 144/743 [34:41<2:20:54, 14.11s/batch, batch_loss=22.6, batch

Validation:  19%|▏| 144/743 [34:55<2:20:54, 14.11s/batch, batch_loss=14.2, batch

Validation:  20%|▏| 145/743 [34:55<2:20:58, 14.14s/batch, batch_loss=14.2, batch

Validation:  20%|▏| 145/743 [35:08<2:20:58, 14.14s/batch, batch_loss=16.5, batch

Validation:  20%|▏| 146/743 [35:08<2:19:25, 14.01s/batch, batch_loss=16.5, batch

Validation:  20%|▏| 146/743 [35:23<2:19:25, 14.01s/batch, batch_loss=17.8, batch

Validation:  20%|▏| 147/743 [35:23<2:19:20, 14.03s/batch, batch_loss=17.8, batch

Validation:  20%|▏| 147/743 [35:37<2:19:20, 14.03s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [35:37<2:19:42, 14.09s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [35:51<2:19:42, 14.09s/batch, batch_loss=27, batch_i

Validation:  20%|▏| 149/743 [35:51<2:19:15, 14.07s/batch, batch_loss=27, batch_i

Validation:  20%|▏| 149/743 [36:04<2:19:15, 14.07s/batch, batch_loss=29.4, batch

Validation:  20%|▏| 150/743 [36:04<2:17:43, 13.94s/batch, batch_loss=29.4, batch

Validation:  20%|▏| 150/743 [36:18<2:17:43, 13.94s/batch, batch_loss=15.3, batch

Validation:  20%|▏| 151/743 [36:18<2:17:58, 13.98s/batch, batch_loss=15.3, batch

Validation:  20%|▏| 151/743 [36:33<2:17:58, 13.98s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [36:33<2:18:26, 14.05s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [36:47<2:18:26, 14.05s/batch, batch_loss=18.1, batch

Validation:  21%|▏| 153/743 [36:47<2:19:29, 14.19s/batch, batch_loss=18.1, batch

Validation:  21%|▏| 153/743 [37:01<2:19:29, 14.19s/batch, batch_loss=14.9, batch

Validation:  21%|▏| 154/743 [37:01<2:18:57, 14.16s/batch, batch_loss=14.9, batch

Validation:  21%|▏| 154/743 [37:18<2:18:57, 14.16s/batch, batch_loss=21.2, batch

Validation:  21%|▏| 155/743 [37:18<2:25:12, 14.82s/batch, batch_loss=21.2, batch

Validation:  21%|▏| 155/743 [37:31<2:25:12, 14.82s/batch, batch_loss=17.7, batch

Validation:  21%|▏| 156/743 [37:31<2:21:59, 14.51s/batch, batch_loss=17.7, batch

Validation:  21%|▏| 156/743 [37:46<2:21:59, 14.51s/batch, batch_loss=18.8, batch

Validation:  21%|▏| 157/743 [37:46<2:21:20, 14.47s/batch, batch_loss=18.8, batch

Validation:  21%|▏| 157/743 [37:59<2:21:20, 14.47s/batch, batch_loss=21.4, batch

Validation:  21%|▏| 158/743 [37:59<2:16:05, 13.96s/batch, batch_loss=21.4, batch

Validation:  21%|▏| 158/743 [38:11<2:16:05, 13.96s/batch, batch_loss=25.3, batch

Validation:  21%|▏| 159/743 [38:11<2:10:55, 13.45s/batch, batch_loss=25.3, batch

Validation:  21%|▏| 159/743 [38:24<2:10:55, 13.45s/batch, batch_loss=15.1, batch

Validation:  22%|▏| 160/743 [38:24<2:08:52, 13.26s/batch, batch_loss=15.1, batch

Validation:  22%|▏| 160/743 [38:38<2:08:52, 13.26s/batch, batch_loss=19.1, batch

Validation:  22%|▏| 161/743 [38:38<2:10:53, 13.49s/batch, batch_loss=19.1, batch

Validation:  22%|▏| 161/743 [38:55<2:10:53, 13.49s/batch, batch_loss=22, batch_i

Validation:  22%|▏| 162/743 [38:55<2:21:01, 14.56s/batch, batch_loss=22, batch_i

Validation:  22%|▏| 162/743 [39:08<2:21:01, 14.56s/batch, batch_loss=13.1, batch

Validation:  22%|▏| 163/743 [39:08<2:17:57, 14.27s/batch, batch_loss=13.1, batch

Validation:  22%|▏| 163/743 [39:22<2:17:57, 14.27s/batch, batch_loss=10.7, batch

Validation:  22%|▏| 164/743 [39:22<2:16:43, 14.17s/batch, batch_loss=10.7, batch

Validation:  22%|▏| 164/743 [39:36<2:16:43, 14.17s/batch, batch_loss=14.7, batch

Validation:  22%|▏| 165/743 [39:36<2:14:48, 13.99s/batch, batch_loss=14.7, batch

Validation:  22%|▏| 165/743 [39:51<2:14:48, 13.99s/batch, batch_loss=11.5, batch

Validation:  22%|▏| 166/743 [39:51<2:16:47, 14.22s/batch, batch_loss=11.5, batch

Validation:  22%|▏| 166/743 [40:05<2:16:47, 14.22s/batch, batch_loss=12.8, batch

Validation:  22%|▏| 167/743 [40:05<2:17:35, 14.33s/batch, batch_loss=12.8, batch

Validation:  22%|▏| 167/743 [40:20<2:17:35, 14.33s/batch, batch_loss=21.9, batch

Validation:  23%|▏| 168/743 [40:20<2:17:56, 14.39s/batch, batch_loss=21.9, batch

Validation:  23%|▏| 168/743 [40:34<2:17:56, 14.39s/batch, batch_loss=21.8, batch

Validation:  23%|▏| 169/743 [40:34<2:17:47, 14.40s/batch, batch_loss=21.8, batch

Validation:  23%|▏| 169/743 [40:50<2:17:47, 14.40s/batch, batch_loss=22.3, batch

Validation:  23%|▏| 170/743 [40:50<2:22:23, 14.91s/batch, batch_loss=22.3, batch

Validation:  23%|▏| 170/743 [41:05<2:22:23, 14.91s/batch, batch_loss=22, batch_i

Validation:  23%|▏| 171/743 [41:05<2:20:52, 14.78s/batch, batch_loss=22, batch_i

Validation:  23%|▏| 171/743 [41:20<2:20:52, 14.78s/batch, batch_loss=20, batch_i

Validation:  23%|▏| 172/743 [41:20<2:20:46, 14.79s/batch, batch_loss=20, batch_i

Validation:  23%|▏| 172/743 [41:34<2:20:46, 14.79s/batch, batch_loss=20.8, batch

Validation:  23%|▏| 173/743 [41:34<2:20:39, 14.81s/batch, batch_loss=20.8, batch

Validation:  23%|▏| 173/743 [41:49<2:20:39, 14.81s/batch, batch_loss=18.3, batch

Validation:  23%|▏| 174/743 [41:49<2:19:46, 14.74s/batch, batch_loss=18.3, batch

Validation:  23%|▏| 174/743 [42:04<2:19:46, 14.74s/batch, batch_loss=23.5, batch

Validation:  24%|▏| 175/743 [42:04<2:19:33, 14.74s/batch, batch_loss=23.5, batch

Validation:  24%|▏| 175/743 [42:18<2:19:33, 14.74s/batch, batch_loss=15.9, batch

Validation:  24%|▏| 176/743 [42:18<2:17:51, 14.59s/batch, batch_loss=15.9, batch

Validation:  24%|▏| 176/743 [42:32<2:17:51, 14.59s/batch, batch_loss=19.3, batch

Validation:  24%|▏| 177/743 [42:32<2:14:55, 14.30s/batch, batch_loss=19.3, batch

Validation:  24%|▏| 177/743 [42:48<2:14:55, 14.30s/batch, batch_loss=27.1, batch

Validation:  24%|▏| 178/743 [42:48<2:20:35, 14.93s/batch, batch_loss=27.1, batch

Validation:  24%|▏| 178/743 [43:03<2:20:35, 14.93s/batch, batch_loss=22.2, batch

Validation:  24%|▏| 179/743 [43:03<2:20:57, 14.99s/batch, batch_loss=22.2, batch

Validation:  24%|▏| 179/743 [43:18<2:20:57, 14.99s/batch, batch_loss=7.24e+3, ba

Validation:  24%|▏| 180/743 [43:18<2:19:27, 14.86s/batch, batch_loss=7.24e+3, ba

Validation:  24%|▏| 180/743 [43:32<2:19:27, 14.86s/batch, batch_loss=21.3, batch

Validation:  24%|▏| 181/743 [43:32<2:17:44, 14.71s/batch, batch_loss=21.3, batch

Validation:  24%|▏| 181/743 [43:45<2:17:44, 14.71s/batch, batch_loss=21.1, batch

Validation:  24%|▏| 182/743 [43:45<2:13:30, 14.28s/batch, batch_loss=21.1, batch

Validation:  24%|▏| 182/743 [44:00<2:13:30, 14.28s/batch, batch_loss=19.9, batch

Validation:  25%|▏| 183/743 [44:00<2:13:15, 14.28s/batch, batch_loss=19.9, batch

Validation:  25%|▏| 183/743 [44:14<2:13:15, 14.28s/batch, batch_loss=11.4, batch

Validation:  25%|▏| 184/743 [44:14<2:13:57, 14.38s/batch, batch_loss=11.4, batch

Validation:  25%|▏| 184/743 [44:28<2:13:57, 14.38s/batch, batch_loss=20.2, batch

Validation:  25%|▏| 185/743 [44:28<2:13:18, 14.33s/batch, batch_loss=20.2, batch

Validation:  25%|▏| 185/743 [44:43<2:13:18, 14.33s/batch, batch_loss=26.6, batch

Validation:  25%|▎| 186/743 [44:43<2:14:15, 14.46s/batch, batch_loss=26.6, batch

Validation:  25%|▎| 186/743 [45:00<2:14:15, 14.46s/batch, batch_loss=33.1, batch

Validation:  25%|▎| 187/743 [45:00<2:19:11, 15.02s/batch, batch_loss=33.1, batch

Validation:  25%|▎| 187/743 [45:13<2:19:11, 15.02s/batch, batch_loss=15.6, batch

Validation:  25%|▎| 188/743 [45:13<2:14:43, 14.56s/batch, batch_loss=15.6, batch

Validation:  25%|▎| 188/743 [45:27<2:14:43, 14.56s/batch, batch_loss=17.7, batch

Validation:  25%|▎| 189/743 [45:27<2:12:01, 14.30s/batch, batch_loss=17.7, batch

Validation:  25%|▎| 189/743 [45:41<2:12:01, 14.30s/batch, batch_loss=976, batch_

Validation:  26%|▎| 190/743 [45:41<2:12:27, 14.37s/batch, batch_loss=976, batch_

Validation:  26%|▎| 190/743 [45:56<2:12:27, 14.37s/batch, batch_loss=25.1, batch

Validation:  26%|▎| 191/743 [45:56<2:12:46, 14.43s/batch, batch_loss=25.1, batch

Validation:  26%|▎| 191/743 [46:10<2:12:46, 14.43s/batch, batch_loss=13.9, batch

Validation:  26%|▎| 192/743 [46:10<2:11:29, 14.32s/batch, batch_loss=13.9, batch

Validation:  26%|▎| 192/743 [46:24<2:11:29, 14.32s/batch, batch_loss=18.8, batch

Validation:  26%|▎| 193/743 [46:24<2:11:20, 14.33s/batch, batch_loss=18.8, batch

Validation:  26%|▎| 193/743 [46:39<2:11:20, 14.33s/batch, batch_loss=19.1, batch

Validation:  26%|▎| 194/743 [46:39<2:13:30, 14.59s/batch, batch_loss=19.1, batch

Validation:  26%|▎| 194/743 [46:55<2:13:30, 14.59s/batch, batch_loss=11.7, batch

Validation:  26%|▎| 195/743 [46:55<2:15:12, 14.80s/batch, batch_loss=11.7, batch

Validation:  26%|▎| 195/743 [47:09<2:15:12, 14.80s/batch, batch_loss=17.4, batch

Validation:  26%|▎| 196/743 [47:09<2:12:49, 14.57s/batch, batch_loss=17.4, batch

Validation:  26%|▎| 196/743 [47:23<2:12:49, 14.57s/batch, batch_loss=8.85, batch

Validation:  27%|▎| 197/743 [47:23<2:11:28, 14.45s/batch, batch_loss=8.85, batch

Validation:  27%|▎| 197/743 [47:37<2:11:28, 14.45s/batch, batch_loss=19.5, batch

Validation:  27%|▎| 198/743 [47:37<2:10:49, 14.40s/batch, batch_loss=19.5, batch

Validation:  27%|▎| 198/743 [47:51<2:10:49, 14.40s/batch, batch_loss=18.6, batch

Validation:  27%|▎| 199/743 [47:51<2:09:54, 14.33s/batch, batch_loss=18.6, batch

Validation:  27%|▎| 199/743 [48:06<2:09:54, 14.33s/batch, batch_loss=279, batch_

Validation:  27%|▎| 200/743 [48:06<2:09:27, 14.30s/batch, batch_loss=279, batch_

Validation:  27%|▎| 200/743 [48:19<2:09:27, 14.30s/batch, batch_loss=40.3, batch

Validation:  27%|▎| 201/743 [48:19<2:06:15, 13.98s/batch, batch_loss=40.3, batch

Validation:  27%|▎| 201/743 [48:33<2:06:15, 13.98s/batch, batch_loss=26.9, batch

Validation:  27%|▎| 202/743 [48:33<2:06:04, 13.98s/batch, batch_loss=26.9, batch

Validation:  27%|▎| 202/743 [48:47<2:06:04, 13.98s/batch, batch_loss=20.1, batch

Validation:  27%|▎| 203/743 [48:47<2:06:02, 14.00s/batch, batch_loss=20.1, batch

Validation:  27%|▎| 203/743 [49:01<2:06:02, 14.00s/batch, batch_loss=18.4, batch

Validation:  27%|▎| 204/743 [49:01<2:06:48, 14.12s/batch, batch_loss=18.4, batch

Validation:  27%|▎| 204/743 [49:15<2:06:48, 14.12s/batch, batch_loss=21.4, batch

Validation:  28%|▎| 205/743 [49:15<2:06:22, 14.09s/batch, batch_loss=21.4, batch

Validation:  28%|▎| 205/743 [49:30<2:06:22, 14.09s/batch, batch_loss=14.4, batch

Validation:  28%|▎| 206/743 [49:30<2:07:02, 14.20s/batch, batch_loss=14.4, batch

Validation:  28%|▎| 206/743 [49:45<2:07:02, 14.20s/batch, batch_loss=19.4, batch

Validation:  28%|▎| 207/743 [49:45<2:09:41, 14.52s/batch, batch_loss=19.4, batch

Validation:  28%|▎| 207/743 [49:59<2:09:41, 14.52s/batch, batch_loss=19.7, batch

Validation:  28%|▎| 208/743 [49:59<2:09:03, 14.47s/batch, batch_loss=19.7, batch

Validation:  28%|▎| 208/743 [50:13<2:09:03, 14.47s/batch, batch_loss=8.82, batch

Validation:  28%|▎| 209/743 [50:13<2:07:28, 14.32s/batch, batch_loss=8.82, batch

Validation:  28%|▎| 209/743 [50:28<2:07:28, 14.32s/batch, batch_loss=10.7, batch

Validation:  28%|▎| 210/743 [50:28<2:07:14, 14.32s/batch, batch_loss=10.7, batch

Validation:  28%|▎| 210/743 [50:42<2:07:14, 14.32s/batch, batch_loss=15.4, batch

Validation:  28%|▎| 211/743 [50:42<2:07:01, 14.33s/batch, batch_loss=15.4, batch

Validation:  28%|▎| 211/743 [50:58<2:07:01, 14.33s/batch, batch_loss=16.2, batch

Validation:  29%|▎| 212/743 [50:58<2:12:04, 14.92s/batch, batch_loss=16.2, batch

Validation:  29%|▎| 212/743 [51:13<2:12:04, 14.92s/batch, batch_loss=538, batch_

Validation:  29%|▎| 213/743 [51:13<2:10:28, 14.77s/batch, batch_loss=538, batch_

Validation:  29%|▎| 213/743 [51:27<2:10:28, 14.77s/batch, batch_loss=11.9, batch

Validation:  29%|▎| 214/743 [51:27<2:08:23, 14.56s/batch, batch_loss=11.9, batch

Validation:  29%|▎| 214/743 [51:41<2:08:23, 14.56s/batch, batch_loss=19.3, batch

Validation:  29%|▎| 215/743 [51:41<2:06:24, 14.36s/batch, batch_loss=19.3, batch

Validation:  29%|▎| 215/743 [51:54<2:06:24, 14.36s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [51:54<2:04:30, 14.18s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [52:09<2:04:30, 14.18s/batch, batch_loss=24.1, batch

Validation:  29%|▎| 217/743 [52:09<2:04:23, 14.19s/batch, batch_loss=24.1, batch

Validation:  29%|▎| 217/743 [52:22<2:04:23, 14.19s/batch, batch_loss=13.4, batch

Validation:  29%|▎| 218/743 [52:22<2:03:09, 14.08s/batch, batch_loss=13.4, batch

Validation:  29%|▎| 218/743 [52:37<2:03:09, 14.08s/batch, batch_loss=28.6, batch

Validation:  29%|▎| 219/743 [52:37<2:03:49, 14.18s/batch, batch_loss=28.6, batch

Validation:  29%|▎| 219/743 [52:52<2:03:49, 14.18s/batch, batch_loss=34.1, batch

Validation:  30%|▎| 220/743 [52:52<2:04:50, 14.32s/batch, batch_loss=34.1, batch

Validation:  30%|▎| 220/743 [53:06<2:04:50, 14.32s/batch, batch_loss=18.1, batch

Validation:  30%|▎| 221/743 [53:06<2:05:28, 14.42s/batch, batch_loss=18.1, batch

Validation:  30%|▎| 221/743 [53:22<2:05:28, 14.42s/batch, batch_loss=12, batch_i

Validation:  30%|▎| 222/743 [53:22<2:09:45, 14.94s/batch, batch_loss=12, batch_i

Validation:  30%|▎| 222/743 [53:37<2:09:45, 14.94s/batch, batch_loss=11.5, batch

Validation:  30%|▎| 223/743 [53:37<2:08:26, 14.82s/batch, batch_loss=11.5, batch

Validation:  30%|▎| 223/743 [53:51<2:08:26, 14.82s/batch, batch_loss=11.8, batch

Validation:  30%|▎| 224/743 [53:51<2:06:58, 14.68s/batch, batch_loss=11.8, batch

Validation:  30%|▎| 224/743 [54:05<2:06:58, 14.68s/batch, batch_loss=4.93e+3, ba

Validation:  30%|▎| 225/743 [54:05<2:05:32, 14.54s/batch, batch_loss=4.93e+3, ba

Validation:  30%|▎| 225/743 [54:20<2:05:32, 14.54s/batch, batch_loss=18.7, batch

Validation:  30%|▎| 226/743 [54:20<2:04:02, 14.40s/batch, batch_loss=18.7, batch

Validation:  30%|▎| 226/743 [54:34<2:04:02, 14.40s/batch, batch_loss=18.4, batch

Validation:  31%|▎| 227/743 [54:34<2:03:03, 14.31s/batch, batch_loss=18.4, batch

Validation:  31%|▎| 227/743 [54:47<2:03:03, 14.31s/batch, batch_loss=19.1, batch

Validation:  31%|▎| 228/743 [54:47<2:01:29, 14.15s/batch, batch_loss=19.1, batch

Validation:  31%|▎| 228/743 [55:02<2:01:29, 14.15s/batch, batch_loss=19.8, batch

Validation:  31%|▎| 229/743 [55:02<2:02:01, 14.24s/batch, batch_loss=19.8, batch

Validation:  31%|▎| 229/743 [55:16<2:02:01, 14.24s/batch, batch_loss=22.4, batch

Validation:  31%|▎| 230/743 [55:16<2:01:12, 14.18s/batch, batch_loss=22.4, batch

Validation:  31%|▎| 230/743 [55:30<2:01:12, 14.18s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [55:30<2:01:46, 14.27s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [55:44<2:01:46, 14.27s/batch, batch_loss=18.7, batch

Validation:  31%|▎| 232/743 [55:44<2:00:53, 14.20s/batch, batch_loss=18.7, batch

Validation:  31%|▎| 232/743 [55:59<2:00:53, 14.20s/batch, batch_loss=9.8, batch_

Validation:  31%|▎| 233/743 [55:59<2:00:56, 14.23s/batch, batch_loss=9.8, batch_

Validation:  31%|▎| 233/743 [56:13<2:00:56, 14.23s/batch, batch_loss=14.3, batch

Validation:  31%|▎| 234/743 [56:13<2:00:18, 14.18s/batch, batch_loss=14.3, batch

Validation:  31%|▎| 234/743 [56:27<2:00:18, 14.18s/batch, batch_loss=16.2, batch

Validation:  32%|▎| 235/743 [56:27<1:59:30, 14.12s/batch, batch_loss=16.2, batch

Validation:  32%|▎| 235/743 [56:41<1:59:30, 14.12s/batch, batch_loss=2.75, batch

Validation:  32%|▎| 236/743 [56:41<2:00:05, 14.21s/batch, batch_loss=2.75, batch

Validation:  32%|▎| 236/743 [56:56<2:00:05, 14.21s/batch, batch_loss=20.7, batch

Validation:  32%|▎| 237/743 [56:56<2:00:22, 14.27s/batch, batch_loss=20.7, batch

Validation:  32%|▎| 237/743 [57:09<2:00:22, 14.27s/batch, batch_loss=15, batch_i

Validation:  32%|▎| 238/743 [57:09<1:58:55, 14.13s/batch, batch_loss=15, batch_i

Validation:  32%|▎| 238/743 [57:23<1:58:55, 14.13s/batch, batch_loss=4.49e+3, ba

Validation:  32%|▎| 239/743 [57:23<1:57:13, 13.96s/batch, batch_loss=4.49e+3, ba

Validation:  32%|▎| 239/743 [57:37<1:57:13, 13.96s/batch, batch_loss=20.5, batch

Validation:  32%|▎| 240/743 [57:37<1:57:18, 13.99s/batch, batch_loss=20.5, batch

Validation:  32%|▎| 240/743 [57:52<1:57:18, 13.99s/batch, batch_loss=18.6, batch

Validation:  32%|▎| 241/743 [57:52<1:58:42, 14.19s/batch, batch_loss=18.6, batch

Validation:  32%|▎| 241/743 [58:06<1:58:42, 14.19s/batch, batch_loss=237, batch_

Validation:  33%|▎| 242/743 [58:06<1:58:26, 14.19s/batch, batch_loss=237, batch_

Validation:  33%|▎| 242/743 [58:20<1:58:26, 14.19s/batch, batch_loss=9.92, batch

Validation:  33%|▎| 243/743 [58:20<1:57:54, 14.15s/batch, batch_loss=9.92, batch

Validation:  33%|▎| 243/743 [58:34<1:57:54, 14.15s/batch, batch_loss=14.6, batch

Validation:  33%|▎| 244/743 [58:34<1:57:34, 14.14s/batch, batch_loss=14.6, batch

Validation:  33%|▎| 244/743 [58:48<1:57:34, 14.14s/batch, batch_loss=22.8, batch

Validation:  33%|▎| 245/743 [58:48<1:57:02, 14.10s/batch, batch_loss=22.8, batch

Validation:  33%|▎| 245/743 [59:01<1:57:02, 14.10s/batch, batch_loss=6.64, batch

Validation:  33%|▎| 246/743 [59:01<1:54:47, 13.86s/batch, batch_loss=6.64, batch

Validation:  33%|▎| 246/743 [59:16<1:54:47, 13.86s/batch, batch_loss=17.5, batch

Validation:  33%|▎| 247/743 [59:16<1:56:09, 14.05s/batch, batch_loss=17.5, batch

Validation:  33%|▎| 247/743 [59:32<1:56:09, 14.05s/batch, batch_loss=42.6, batch

Validation:  33%|▎| 248/743 [59:32<2:02:04, 14.80s/batch, batch_loss=42.6, batch

Validation:  33%|▎| 248/743 [59:47<2:02:04, 14.80s/batch, batch_loss=13.3, batch

Validation:  34%|▎| 249/743 [59:47<2:00:27, 14.63s/batch, batch_loss=13.3, batch

Validation:  34%|▎| 249/743 [1:00:00<2:00:27, 14.63s/batch, batch_loss=23.8, bat

Validation:  34%|▎| 250/743 [1:00:00<1:57:20, 14.28s/batch, batch_loss=23.8, bat

Validation:  34%|▎| 250/743 [1:00:15<1:57:20, 14.28s/batch, batch_loss=20.7, bat

Validation:  34%|▎| 251/743 [1:00:15<1:57:37, 14.34s/batch, batch_loss=20.7, bat

Validation:  34%|▎| 251/743 [1:00:29<1:57:37, 14.34s/batch, batch_loss=21.1, bat

Validation:  34%|▎| 252/743 [1:00:29<1:56:33, 14.24s/batch, batch_loss=21.1, bat

Validation:  34%|▎| 252/743 [1:00:42<1:56:33, 14.24s/batch, batch_loss=20.3, bat

Validation:  34%|▎| 253/743 [1:00:42<1:53:59, 13.96s/batch, batch_loss=20.3, bat

Validation:  34%|▎| 253/743 [1:00:56<1:53:59, 13.96s/batch, batch_loss=1.14e+4, 

Validation:  34%|▎| 254/743 [1:00:56<1:53:02, 13.87s/batch, batch_loss=1.14e+4, 

Validation:  34%|▎| 254/743 [1:01:12<1:53:02, 13.87s/batch, batch_loss=2.45e+3, 

Validation:  34%|▎| 255/743 [1:01:12<1:58:47, 14.60s/batch, batch_loss=2.45e+3, 

Validation:  34%|▎| 255/743 [1:01:26<1:58:47, 14.60s/batch, batch_loss=19, batch

Validation:  34%|▎| 256/743 [1:01:26<1:57:07, 14.43s/batch, batch_loss=19, batch

Validation:  34%|▎| 256/743 [1:01:41<1:57:07, 14.43s/batch, batch_loss=28.3, bat

Validation:  35%|▎| 257/743 [1:01:41<1:57:45, 14.54s/batch, batch_loss=28.3, bat

Validation:  35%|▎| 257/743 [1:01:54<1:57:45, 14.54s/batch, batch_loss=14.3, bat

Validation:  35%|▎| 258/743 [1:01:54<1:55:46, 14.32s/batch, batch_loss=14.3, bat

Validation:  35%|▎| 258/743 [1:02:09<1:55:46, 14.32s/batch, batch_loss=3.06, bat

Validation:  35%|▎| 259/743 [1:02:09<1:55:20, 14.30s/batch, batch_loss=3.06, bat

Validation:  35%|▎| 259/743 [1:02:23<1:55:20, 14.30s/batch, batch_loss=1.75, bat

Validation:  35%|▎| 260/743 [1:02:23<1:54:24, 14.21s/batch, batch_loss=1.75, bat

Validation:  35%|▎| 260/743 [1:02:36<1:54:24, 14.21s/batch, batch_loss=8.55, bat

Validation:  35%|▎| 261/743 [1:02:36<1:52:36, 14.02s/batch, batch_loss=8.55, bat

Validation:  35%|▎| 261/743 [1:02:51<1:52:36, 14.02s/batch, batch_loss=32.7, bat

Validation:  35%|▎| 262/743 [1:02:51<1:53:51, 14.20s/batch, batch_loss=32.7, bat

Validation:  35%|▎| 262/743 [1:03:07<1:53:51, 14.20s/batch, batch_loss=2.71e+3, 

Validation:  35%|▎| 263/743 [1:03:07<1:58:16, 14.78s/batch, batch_loss=2.71e+3, 

Validation:  35%|▎| 263/743 [1:03:21<1:58:16, 14.78s/batch, batch_loss=12.8, bat

Validation:  36%|▎| 264/743 [1:03:21<1:57:03, 14.66s/batch, batch_loss=12.8, bat

Validation:  36%|▎| 264/743 [1:03:36<1:57:03, 14.66s/batch, batch_loss=22.3, bat

Validation:  36%|▎| 265/743 [1:03:36<1:56:14, 14.59s/batch, batch_loss=22.3, bat

Validation:  36%|▎| 265/743 [1:03:51<1:56:14, 14.59s/batch, batch_loss=31.8, bat

Validation:  36%|▎| 266/743 [1:03:51<1:56:11, 14.62s/batch, batch_loss=31.8, bat

Validation:  36%|▎| 266/743 [1:04:05<1:56:11, 14.62s/batch, batch_loss=22.7, bat

Validation:  36%|▎| 267/743 [1:04:05<1:55:59, 14.62s/batch, batch_loss=22.7, bat

Validation:  36%|▎| 267/743 [1:04:20<1:55:59, 14.62s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:04:20<1:55:05, 14.54s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:04:34<1:55:05, 14.54s/batch, batch_loss=42.6, bat

Validation:  36%|▎| 269/743 [1:04:34<1:54:42, 14.52s/batch, batch_loss=42.6, bat

Validation:  36%|▎| 269/743 [1:04:49<1:54:42, 14.52s/batch, batch_loss=36.8, bat

Validation:  36%|▎| 270/743 [1:04:49<1:55:53, 14.70s/batch, batch_loss=36.8, bat

Validation:  36%|▎| 270/743 [1:05:04<1:55:53, 14.70s/batch, batch_loss=27.4, bat

Validation:  36%|▎| 271/743 [1:05:04<1:55:51, 14.73s/batch, batch_loss=27.4, bat

Validation:  36%|▎| 271/743 [1:05:18<1:55:51, 14.73s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:05:18<1:53:33, 14.47s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:05:32<1:53:33, 14.47s/batch, batch_loss=18.1, bat

Validation:  37%|▎| 273/743 [1:05:32<1:53:00, 14.43s/batch, batch_loss=18.1, bat

Validation:  37%|▎| 273/743 [1:05:47<1:53:00, 14.43s/batch, batch_loss=23.3, bat

Validation:  37%|▎| 274/743 [1:05:47<1:52:41, 14.42s/batch, batch_loss=23.3, bat

Validation:  37%|▎| 274/743 [1:06:02<1:52:41, 14.42s/batch, batch_loss=20.6, bat

Validation:  37%|▎| 275/743 [1:06:02<1:55:30, 14.81s/batch, batch_loss=20.6, bat

Validation:  37%|▎| 275/743 [1:06:17<1:55:30, 14.81s/batch, batch_loss=15.5, bat

Validation:  37%|▎| 276/743 [1:06:17<1:55:21, 14.82s/batch, batch_loss=15.5, bat

Validation:  37%|▎| 276/743 [1:06:31<1:55:21, 14.82s/batch, batch_loss=26, batch

Validation:  37%|▎| 277/743 [1:06:31<1:53:24, 14.60s/batch, batch_loss=26, batch

Validation:  37%|▎| 277/743 [1:06:48<1:53:24, 14.60s/batch, batch_loss=21.3, bat

Validation:  37%|▎| 278/743 [1:06:48<1:59:02, 15.36s/batch, batch_loss=21.3, bat

Validation:  37%|▎| 278/743 [1:07:03<1:59:02, 15.36s/batch, batch_loss=11.9, bat

Validation:  38%|▍| 279/743 [1:07:03<1:56:57, 15.12s/batch, batch_loss=11.9, bat

Validation:  38%|▍| 279/743 [1:07:17<1:56:57, 15.12s/batch, batch_loss=14.6, bat

Validation:  38%|▍| 280/743 [1:07:17<1:55:16, 14.94s/batch, batch_loss=14.6, bat

Validation:  38%|▍| 280/743 [1:07:31<1:55:16, 14.94s/batch, batch_loss=19.4, bat

Validation:  38%|▍| 281/743 [1:07:31<1:53:01, 14.68s/batch, batch_loss=19.4, bat

Validation:  38%|▍| 281/743 [1:07:45<1:53:01, 14.68s/batch, batch_loss=23.4, bat

Validation:  38%|▍| 282/743 [1:07:45<1:49:26, 14.24s/batch, batch_loss=23.4, bat

Validation:  38%|▍| 282/743 [1:07:58<1:49:26, 14.24s/batch, batch_loss=18.2, bat

Validation:  38%|▍| 283/743 [1:07:58<1:46:10, 13.85s/batch, batch_loss=18.2, bat

Validation:  38%|▍| 283/743 [1:08:10<1:46:10, 13.85s/batch, batch_loss=20.6, bat

Validation:  38%|▍| 284/743 [1:08:10<1:43:07, 13.48s/batch, batch_loss=20.6, bat

Validation:  38%|▍| 284/743 [1:08:24<1:43:07, 13.48s/batch, batch_loss=14.3, bat

Validation:  38%|▍| 285/743 [1:08:24<1:42:46, 13.46s/batch, batch_loss=14.3, bat

Validation:  38%|▍| 285/743 [1:08:38<1:42:46, 13.46s/batch, batch_loss=15.8, bat

Validation:  38%|▍| 286/743 [1:08:38<1:44:13, 13.68s/batch, batch_loss=15.8, bat

Validation:  38%|▍| 286/743 [1:08:53<1:44:13, 13.68s/batch, batch_loss=1.18e+4, 

Validation:  39%|▍| 287/743 [1:08:53<1:46:24, 14.00s/batch, batch_loss=1.18e+4, 

Validation:  39%|▍| 287/743 [1:09:06<1:46:24, 14.00s/batch, batch_loss=26.5, bat

Validation:  39%|▍| 288/743 [1:09:06<1:45:44, 13.94s/batch, batch_loss=26.5, bat

Validation:  39%|▍| 288/743 [1:09:21<1:45:44, 13.94s/batch, batch_loss=24.7, bat

Validation:  39%|▍| 289/743 [1:09:21<1:46:11, 14.03s/batch, batch_loss=24.7, bat

Validation:  39%|▍| 289/743 [1:09:35<1:46:11, 14.03s/batch, batch_loss=486, batc

Validation:  39%|▍| 290/743 [1:09:35<1:47:18, 14.21s/batch, batch_loss=486, batc

Validation:  39%|▍| 290/743 [1:09:50<1:47:18, 14.21s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:09:50<1:47:22, 14.25s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:10:04<1:47:22, 14.25s/batch, batch_loss=1.19e+3, 

Validation:  39%|▍| 292/743 [1:10:04<1:47:16, 14.27s/batch, batch_loss=1.19e+3, 

Validation:  39%|▍| 292/743 [1:10:17<1:47:16, 14.27s/batch, batch_loss=29.4, bat

Validation:  39%|▍| 293/743 [1:10:17<1:44:15, 13.90s/batch, batch_loss=29.4, bat

Validation:  39%|▍| 293/743 [1:10:31<1:44:15, 13.90s/batch, batch_loss=1.09e+3, 

Validation:  40%|▍| 294/743 [1:10:31<1:43:54, 13.89s/batch, batch_loss=1.09e+3, 

Validation:  40%|▍| 294/743 [1:10:45<1:43:54, 13.89s/batch, batch_loss=23.6, bat

Validation:  40%|▍| 295/743 [1:10:45<1:44:04, 13.94s/batch, batch_loss=23.6, bat

Validation:  40%|▍| 295/743 [1:11:02<1:44:04, 13.94s/batch, batch_loss=18.4, bat

Validation:  40%|▍| 296/743 [1:11:02<1:49:57, 14.76s/batch, batch_loss=18.4, bat

Validation:  40%|▍| 296/743 [1:11:15<1:49:57, 14.76s/batch, batch_loss=11.3, bat

Validation:  40%|▍| 297/743 [1:11:15<1:46:59, 14.39s/batch, batch_loss=11.3, bat

Validation:  40%|▍| 297/743 [1:11:29<1:46:59, 14.39s/batch, batch_loss=27.3, bat

Validation:  40%|▍| 298/743 [1:11:29<1:45:51, 14.27s/batch, batch_loss=27.3, bat

Validation:  40%|▍| 298/743 [1:11:43<1:45:51, 14.27s/batch, batch_loss=35, batch

Validation:  40%|▍| 299/743 [1:11:43<1:44:57, 14.18s/batch, batch_loss=35, batch

Validation:  40%|▍| 299/743 [1:11:56<1:44:57, 14.18s/batch, batch_loss=38.5, bat

Validation:  40%|▍| 300/743 [1:11:56<1:42:55, 13.94s/batch, batch_loss=38.5, bat

Validation:  40%|▍| 300/743 [1:12:10<1:42:55, 13.94s/batch, batch_loss=843, batc

Validation:  41%|▍| 301/743 [1:12:10<1:42:26, 13.91s/batch, batch_loss=843, batc

Validation:  41%|▍| 301/743 [1:12:25<1:42:26, 13.91s/batch, batch_loss=12, batch

Validation:  41%|▍| 302/743 [1:12:25<1:43:08, 14.03s/batch, batch_loss=12, batch

Validation:  41%|▍| 302/743 [1:12:39<1:43:08, 14.03s/batch, batch_loss=16.2, bat

Validation:  41%|▍| 303/743 [1:12:39<1:42:39, 14.00s/batch, batch_loss=16.2, bat

Validation:  41%|▍| 303/743 [1:12:52<1:42:39, 14.00s/batch, batch_loss=17.7, bat

Validation:  41%|▍| 304/743 [1:12:52<1:40:32, 13.74s/batch, batch_loss=17.7, bat

Validation:  41%|▍| 304/743 [1:13:07<1:40:32, 13.74s/batch, batch_loss=12.3, bat

Validation:  41%|▍| 305/743 [1:13:07<1:44:38, 14.33s/batch, batch_loss=12.3, bat

Validation:  41%|▍| 305/743 [1:13:22<1:44:38, 14.33s/batch, batch_loss=22, batch

Validation:  41%|▍| 306/743 [1:13:22<1:44:01, 14.28s/batch, batch_loss=22, batch

Validation:  41%|▍| 306/743 [1:13:36<1:44:01, 14.28s/batch, batch_loss=26.4, bat

Validation:  41%|▍| 307/743 [1:13:36<1:43:29, 14.24s/batch, batch_loss=26.4, bat

Validation:  41%|▍| 307/743 [1:13:50<1:43:29, 14.24s/batch, batch_loss=886, batc

Validation:  41%|▍| 308/743 [1:13:50<1:42:36, 14.15s/batch, batch_loss=886, batc

Validation:  41%|▍| 308/743 [1:14:03<1:42:36, 14.15s/batch, batch_loss=25.6, bat

Validation:  42%|▍| 309/743 [1:14:03<1:41:30, 14.03s/batch, batch_loss=25.6, bat

Validation:  42%|▍| 309/743 [1:14:18<1:41:30, 14.03s/batch, batch_loss=20.7, bat

Validation:  42%|▍| 310/743 [1:14:18<1:43:14, 14.30s/batch, batch_loss=20.7, bat

Validation:  42%|▍| 310/743 [1:14:32<1:43:14, 14.30s/batch, batch_loss=19.4, bat

Validation:  42%|▍| 311/743 [1:14:32<1:42:07, 14.18s/batch, batch_loss=19.4, bat

Validation:  42%|▍| 311/743 [1:14:46<1:42:07, 14.18s/batch, batch_loss=17, batch

Validation:  42%|▍| 312/743 [1:14:46<1:41:37, 14.15s/batch, batch_loss=17, batch

Validation:  42%|▍| 312/743 [1:15:00<1:41:37, 14.15s/batch, batch_loss=8.4, batc

Validation:  42%|▍| 313/743 [1:15:00<1:40:27, 14.02s/batch, batch_loss=8.4, batc

Validation:  42%|▍| 313/743 [1:15:16<1:40:27, 14.02s/batch, batch_loss=14.6, bat

Validation:  42%|▍| 314/743 [1:15:16<1:44:28, 14.61s/batch, batch_loss=14.6, bat

Validation:  42%|▍| 314/743 [1:15:30<1:44:28, 14.61s/batch, batch_loss=21.9, bat

Validation:  42%|▍| 315/743 [1:15:30<1:41:55, 14.29s/batch, batch_loss=21.9, bat

Validation:  42%|▍| 315/743 [1:15:44<1:41:55, 14.29s/batch, batch_loss=19.5, bat

Validation:  43%|▍| 316/743 [1:15:44<1:41:35, 14.28s/batch, batch_loss=19.5, bat

Validation:  43%|▍| 316/743 [1:15:58<1:41:35, 14.28s/batch, batch_loss=26, batch

Validation:  43%|▍| 317/743 [1:15:58<1:41:11, 14.25s/batch, batch_loss=26, batch

Validation:  43%|▍| 317/743 [1:16:12<1:41:11, 14.25s/batch, batch_loss=16.3, bat

Validation:  43%|▍| 318/743 [1:16:12<1:39:55, 14.11s/batch, batch_loss=16.3, bat

Validation:  43%|▍| 318/743 [1:16:26<1:39:55, 14.11s/batch, batch_loss=21.3, bat

Validation:  43%|▍| 319/743 [1:16:26<1:38:55, 14.00s/batch, batch_loss=21.3, bat

Validation:  43%|▍| 319/743 [1:16:40<1:38:55, 14.00s/batch, batch_loss=20.4, bat

Validation:  43%|▍| 320/743 [1:16:40<1:38:45, 14.01s/batch, batch_loss=20.4, bat

Validation:  43%|▍| 320/743 [1:16:53<1:38:45, 14.01s/batch, batch_loss=17.4, bat

Validation:  43%|▍| 321/743 [1:16:53<1:37:53, 13.92s/batch, batch_loss=17.4, bat

Validation:  43%|▍| 321/743 [1:17:07<1:37:53, 13.92s/batch, batch_loss=18.3, bat

Validation:  43%|▍| 322/743 [1:17:07<1:37:40, 13.92s/batch, batch_loss=18.3, bat

Validation:  43%|▍| 322/743 [1:17:21<1:37:40, 13.92s/batch, batch_loss=21.8, bat

Validation:  43%|▍| 323/743 [1:17:21<1:37:32, 13.93s/batch, batch_loss=21.8, bat

Validation:  43%|▍| 323/743 [1:17:35<1:37:32, 13.93s/batch, batch_loss=297, batc

Validation:  44%|▍| 324/743 [1:17:35<1:37:43, 13.99s/batch, batch_loss=297, batc

Validation:  44%|▍| 324/743 [1:17:49<1:37:43, 13.99s/batch, batch_loss=24.9, bat

Validation:  44%|▍| 325/743 [1:17:49<1:37:06, 13.94s/batch, batch_loss=24.9, bat

Validation:  44%|▍| 325/743 [1:18:04<1:37:06, 13.94s/batch, batch_loss=21.7, bat

Validation:  44%|▍| 326/743 [1:18:04<1:38:56, 14.24s/batch, batch_loss=21.7, bat

Validation:  44%|▍| 326/743 [1:18:18<1:38:56, 14.24s/batch, batch_loss=21, batch

Validation:  44%|▍| 327/743 [1:18:18<1:37:39, 14.09s/batch, batch_loss=21, batch

Validation:  44%|▍| 327/743 [1:18:32<1:37:39, 14.09s/batch, batch_loss=18.8, bat

Validation:  44%|▍| 328/743 [1:18:32<1:38:22, 14.22s/batch, batch_loss=18.8, bat

Validation:  44%|▍| 328/743 [1:18:47<1:38:22, 14.22s/batch, batch_loss=6.18, bat

Validation:  44%|▍| 329/743 [1:18:47<1:39:03, 14.36s/batch, batch_loss=6.18, bat

Validation:  44%|▍| 329/743 [1:19:02<1:39:03, 14.36s/batch, batch_loss=16.3, bat

Validation:  44%|▍| 330/743 [1:19:02<1:39:28, 14.45s/batch, batch_loss=16.3, bat

Validation:  44%|▍| 330/743 [1:19:18<1:39:28, 14.45s/batch, batch_loss=29.8, bat

Validation:  45%|▍| 331/743 [1:19:18<1:44:03, 15.16s/batch, batch_loss=29.8, bat

Validation:  45%|▍| 331/743 [1:19:33<1:44:03, 15.16s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:19:33<1:42:19, 14.94s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:19:47<1:42:19, 14.94s/batch, batch_loss=31.1, bat

Validation:  45%|▍| 333/743 [1:19:47<1:39:47, 14.60s/batch, batch_loss=31.1, bat

Validation:  45%|▍| 333/743 [1:20:01<1:39:47, 14.60s/batch, batch_loss=27.4, bat

Validation:  45%|▍| 334/743 [1:20:01<1:38:56, 14.51s/batch, batch_loss=27.4, bat

Validation:  45%|▍| 334/743 [1:20:15<1:38:56, 14.51s/batch, batch_loss=35.4, bat

Validation:  45%|▍| 335/743 [1:20:15<1:37:13, 14.30s/batch, batch_loss=35.4, bat

Validation:  45%|▍| 335/743 [1:20:29<1:37:13, 14.30s/batch, batch_loss=12.4, bat

Validation:  45%|▍| 336/743 [1:20:29<1:36:47, 14.27s/batch, batch_loss=12.4, bat

Validation:  45%|▍| 336/743 [1:20:43<1:36:47, 14.27s/batch, batch_loss=29, batch

Validation:  45%|▍| 337/743 [1:20:43<1:36:54, 14.32s/batch, batch_loss=29, batch

Validation:  45%|▍| 337/743 [1:20:58<1:36:54, 14.32s/batch, batch_loss=46.3, bat

Validation:  45%|▍| 338/743 [1:20:58<1:36:58, 14.37s/batch, batch_loss=46.3, bat

Validation:  45%|▍| 338/743 [1:21:11<1:36:58, 14.37s/batch, batch_loss=33.9, bat

Validation:  46%|▍| 339/743 [1:21:11<1:34:42, 14.07s/batch, batch_loss=33.9, bat

Validation:  46%|▍| 339/743 [1:21:25<1:34:42, 14.07s/batch, batch_loss=39.1, bat

Validation:  46%|▍| 340/743 [1:21:25<1:34:15, 14.03s/batch, batch_loss=39.1, bat

Validation:  46%|▍| 340/743 [1:21:40<1:34:15, 14.03s/batch, batch_loss=18.7, bat

Validation:  46%|▍| 341/743 [1:21:40<1:35:20, 14.23s/batch, batch_loss=18.7, bat

Validation:  46%|▍| 341/743 [1:21:54<1:35:20, 14.23s/batch, batch_loss=25.4, bat

Validation:  46%|▍| 342/743 [1:21:54<1:35:19, 14.26s/batch, batch_loss=25.4, bat

Validation:  46%|▍| 342/743 [1:22:09<1:35:19, 14.26s/batch, batch_loss=23.5, bat

Validation:  46%|▍| 343/743 [1:22:09<1:35:33, 14.33s/batch, batch_loss=23.5, bat

Validation:  46%|▍| 343/743 [1:22:23<1:35:33, 14.33s/batch, batch_loss=25.1, bat

Validation:  46%|▍| 344/743 [1:22:23<1:34:11, 14.16s/batch, batch_loss=25.1, bat

Validation:  46%|▍| 344/743 [1:22:37<1:34:11, 14.16s/batch, batch_loss=19.9, bat

Validation:  46%|▍| 345/743 [1:22:37<1:34:34, 14.26s/batch, batch_loss=19.9, bat

Validation:  46%|▍| 345/743 [1:22:51<1:34:34, 14.26s/batch, batch_loss=32.9, bat

Validation:  47%|▍| 346/743 [1:22:51<1:33:15, 14.10s/batch, batch_loss=32.9, bat

Validation:  47%|▍| 346/743 [1:23:07<1:33:15, 14.10s/batch, batch_loss=28.5, bat

Validation:  47%|▍| 347/743 [1:23:07<1:37:26, 14.76s/batch, batch_loss=28.5, bat

Validation:  47%|▍| 347/743 [1:23:21<1:37:26, 14.76s/batch, batch_loss=36.1, bat

Validation:  47%|▍| 348/743 [1:23:21<1:35:46, 14.55s/batch, batch_loss=36.1, bat

Validation:  47%|▍| 348/743 [1:23:35<1:35:46, 14.55s/batch, batch_loss=28.6, bat

Validation:  47%|▍| 349/743 [1:23:35<1:35:14, 14.50s/batch, batch_loss=28.6, bat

Validation:  47%|▍| 349/743 [1:23:50<1:35:14, 14.50s/batch, batch_loss=21.9, bat

Validation:  47%|▍| 350/743 [1:23:50<1:35:29, 14.58s/batch, batch_loss=21.9, bat

Validation:  47%|▍| 350/743 [1:24:04<1:35:29, 14.58s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:24:04<1:34:07, 14.41s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:24:18<1:34:07, 14.41s/batch, batch_loss=35.4, bat

Validation:  47%|▍| 352/743 [1:24:18<1:32:54, 14.26s/batch, batch_loss=35.4, bat

Validation:  47%|▍| 352/743 [1:24:32<1:32:54, 14.26s/batch, batch_loss=22.5, bat

Validation:  48%|▍| 353/743 [1:24:32<1:32:18, 14.20s/batch, batch_loss=22.5, bat

Validation:  48%|▍| 353/743 [1:24:49<1:32:18, 14.20s/batch, batch_loss=28.2, bat

Validation:  48%|▍| 354/743 [1:24:49<1:37:20, 15.01s/batch, batch_loss=28.2, bat

Validation:  48%|▍| 354/743 [1:25:03<1:37:20, 15.01s/batch, batch_loss=34.6, bat

Validation:  48%|▍| 355/743 [1:25:03<1:35:50, 14.82s/batch, batch_loss=34.6, bat

Validation:  48%|▍| 355/743 [1:25:17<1:35:50, 14.82s/batch, batch_loss=46.2, bat

Validation:  48%|▍| 356/743 [1:25:17<1:33:26, 14.49s/batch, batch_loss=46.2, bat

Validation:  48%|▍| 356/743 [1:25:31<1:33:26, 14.49s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:25:31<1:32:41, 14.41s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:25:45<1:32:41, 14.41s/batch, batch_loss=16.5, bat

Validation:  48%|▍| 358/743 [1:25:45<1:29:57, 14.02s/batch, batch_loss=16.5, bat

Validation:  48%|▍| 358/743 [1:25:58<1:29:57, 14.02s/batch, batch_loss=11.7, bat

Validation:  48%|▍| 359/743 [1:25:58<1:28:47, 13.87s/batch, batch_loss=11.7, bat

Validation:  48%|▍| 359/743 [1:26:12<1:28:47, 13.87s/batch, batch_loss=25.4, bat

Validation:  48%|▍| 360/743 [1:26:12<1:29:10, 13.97s/batch, batch_loss=25.4, bat

Validation:  48%|▍| 360/743 [1:26:26<1:29:10, 13.97s/batch, batch_loss=17.8, bat

Validation:  49%|▍| 361/743 [1:26:26<1:28:51, 13.96s/batch, batch_loss=17.8, bat

Validation:  49%|▍| 361/743 [1:26:40<1:28:51, 13.96s/batch, batch_loss=32.3, bat

Validation:  49%|▍| 362/743 [1:26:40<1:28:55, 14.00s/batch, batch_loss=32.3, bat

Validation:  49%|▍| 362/743 [1:26:54<1:28:55, 14.00s/batch, batch_loss=31.6, bat

Validation:  49%|▍| 363/743 [1:26:54<1:27:45, 13.86s/batch, batch_loss=31.6, bat

Validation:  49%|▍| 363/743 [1:27:08<1:27:45, 13.86s/batch, batch_loss=22.9, bat

Validation:  49%|▍| 364/743 [1:27:08<1:27:27, 13.85s/batch, batch_loss=22.9, bat

Validation:  49%|▍| 364/743 [1:27:22<1:27:27, 13.85s/batch, batch_loss=18.5, bat

Validation:  49%|▍| 365/743 [1:27:22<1:27:19, 13.86s/batch, batch_loss=18.5, bat

Validation:  49%|▍| 365/743 [1:27:38<1:27:19, 13.86s/batch, batch_loss=17.6, bat

Validation:  49%|▍| 366/743 [1:27:38<1:31:46, 14.61s/batch, batch_loss=17.6, bat

Validation:  49%|▍| 366/743 [1:27:52<1:31:46, 14.61s/batch, batch_loss=16.7, bat

Validation:  49%|▍| 367/743 [1:27:52<1:30:47, 14.49s/batch, batch_loss=16.7, bat

Validation:  49%|▍| 367/743 [1:28:06<1:30:47, 14.49s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:28:06<1:30:20, 14.45s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:28:21<1:30:20, 14.45s/batch, batch_loss=19.7, bat

Validation:  50%|▍| 369/743 [1:28:21<1:30:19, 14.49s/batch, batch_loss=19.7, bat

Validation:  50%|▍| 369/743 [1:28:35<1:30:19, 14.49s/batch, batch_loss=28, batch

Validation:  50%|▍| 370/743 [1:28:35<1:29:29, 14.39s/batch, batch_loss=28, batch

Validation:  50%|▍| 370/743 [1:28:50<1:29:29, 14.39s/batch, batch_loss=23.1, bat

Validation:  50%|▍| 371/743 [1:28:50<1:29:55, 14.50s/batch, batch_loss=23.1, bat

Validation:  50%|▍| 371/743 [1:29:04<1:29:55, 14.50s/batch, batch_loss=20.6, bat

Validation:  50%|▌| 372/743 [1:29:04<1:28:47, 14.36s/batch, batch_loss=20.6, bat

Validation:  50%|▌| 372/743 [1:29:18<1:28:47, 14.36s/batch, batch_loss=28.8, bat

Validation:  50%|▌| 373/743 [1:29:18<1:27:27, 14.18s/batch, batch_loss=28.8, bat

Validation:  50%|▌| 373/743 [1:29:32<1:27:27, 14.18s/batch, batch_loss=16.5, bat

Validation:  50%|▌| 374/743 [1:29:32<1:26:30, 14.07s/batch, batch_loss=16.5, bat

Validation:  50%|▌| 374/743 [1:29:46<1:26:30, 14.07s/batch, batch_loss=9.03, bat

Validation:  50%|▌| 375/743 [1:29:46<1:27:26, 14.26s/batch, batch_loss=9.03, bat

Validation:  50%|▌| 375/743 [1:30:01<1:27:26, 14.26s/batch, batch_loss=38, batch

Validation:  51%|▌| 376/743 [1:30:01<1:27:59, 14.38s/batch, batch_loss=38, batch

Validation:  51%|▌| 376/743 [1:30:15<1:27:59, 14.38s/batch, batch_loss=10.7, bat

Validation:  51%|▌| 377/743 [1:30:15<1:27:06, 14.28s/batch, batch_loss=10.7, bat

Validation:  51%|▌| 377/743 [1:30:29<1:27:06, 14.28s/batch, batch_loss=21.8, bat

Validation:  51%|▌| 378/743 [1:30:29<1:26:26, 14.21s/batch, batch_loss=21.8, bat

Validation:  51%|▌| 378/743 [1:30:44<1:26:26, 14.21s/batch, batch_loss=10.2, bat

Validation:  51%|▌| 379/743 [1:30:44<1:26:54, 14.33s/batch, batch_loss=10.2, bat

Validation:  51%|▌| 379/743 [1:30:58<1:26:54, 14.33s/batch, batch_loss=8.09, bat

Validation:  51%|▌| 380/743 [1:30:58<1:26:38, 14.32s/batch, batch_loss=8.09, bat

Validation:  51%|▌| 380/743 [1:31:12<1:26:38, 14.32s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:31:12<1:25:24, 14.16s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:31:26<1:25:24, 14.16s/batch, batch_loss=913, batc

Validation:  51%|▌| 382/743 [1:31:26<1:25:03, 14.14s/batch, batch_loss=913, batc

Validation:  51%|▌| 382/743 [1:31:40<1:25:03, 14.14s/batch, batch_loss=211, batc

Validation:  52%|▌| 383/743 [1:31:40<1:24:41, 14.12s/batch, batch_loss=211, batc

Validation:  52%|▌| 383/743 [1:31:53<1:24:41, 14.12s/batch, batch_loss=284, batc

Validation:  52%|▌| 384/743 [1:31:53<1:23:31, 13.96s/batch, batch_loss=284, batc

Validation:  52%|▌| 384/743 [1:32:08<1:23:31, 13.96s/batch, batch_loss=21.5, bat

Validation:  52%|▌| 385/743 [1:32:08<1:23:54, 14.06s/batch, batch_loss=21.5, bat

Validation:  52%|▌| 385/743 [1:32:22<1:23:54, 14.06s/batch, batch_loss=10.5, bat

Validation:  52%|▌| 386/743 [1:32:22<1:23:24, 14.02s/batch, batch_loss=10.5, bat

Validation:  52%|▌| 386/743 [1:32:36<1:23:24, 14.02s/batch, batch_loss=8.04, bat

Validation:  52%|▌| 387/743 [1:32:36<1:23:00, 13.99s/batch, batch_loss=8.04, bat

Validation:  52%|▌| 387/743 [1:32:48<1:23:00, 13.99s/batch, batch_loss=19.2, bat

Validation:  52%|▌| 388/743 [1:32:48<1:20:44, 13.65s/batch, batch_loss=19.2, bat

Validation:  52%|▌| 388/743 [1:33:03<1:20:44, 13.65s/batch, batch_loss=14.2, bat

Validation:  52%|▌| 389/743 [1:33:03<1:21:52, 13.88s/batch, batch_loss=14.2, bat

Validation:  52%|▌| 389/743 [1:33:17<1:21:52, 13.88s/batch, batch_loss=19.7, bat

Validation:  52%|▌| 390/743 [1:33:17<1:22:19, 13.99s/batch, batch_loss=19.7, bat

Validation:  52%|▌| 390/743 [1:33:30<1:22:19, 13.99s/batch, batch_loss=18, batch

Validation:  53%|▌| 391/743 [1:33:30<1:20:54, 13.79s/batch, batch_loss=18, batch

Validation:  53%|▌| 391/743 [1:33:45<1:20:54, 13.79s/batch, batch_loss=17, batch

Validation:  53%|▌| 392/743 [1:33:45<1:21:42, 13.97s/batch, batch_loss=17, batch

Validation:  53%|▌| 392/743 [1:33:58<1:21:42, 13.97s/batch, batch_loss=18, batch

Validation:  53%|▌| 393/743 [1:33:58<1:20:45, 13.84s/batch, batch_loss=18, batch

Validation:  53%|▌| 393/743 [1:34:13<1:20:45, 13.84s/batch, batch_loss=18.3, bat

Validation:  53%|▌| 394/743 [1:34:13<1:21:19, 13.98s/batch, batch_loss=18.3, bat

Validation:  53%|▌| 394/743 [1:34:26<1:21:19, 13.98s/batch, batch_loss=13.3, bat

Validation:  53%|▌| 395/743 [1:34:26<1:20:35, 13.90s/batch, batch_loss=13.3, bat

Validation:  53%|▌| 395/743 [1:34:40<1:20:35, 13.90s/batch, batch_loss=15.9, bat

Validation:  53%|▌| 396/743 [1:34:40<1:19:50, 13.81s/batch, batch_loss=15.9, bat

Validation:  53%|▌| 396/743 [1:34:54<1:19:50, 13.81s/batch, batch_loss=11.4, bat

Validation:  53%|▌| 397/743 [1:34:54<1:20:19, 13.93s/batch, batch_loss=11.4, bat

Validation:  53%|▌| 397/743 [1:35:08<1:20:19, 13.93s/batch, batch_loss=22.6, bat

Validation:  54%|▌| 398/743 [1:35:08<1:20:42, 14.04s/batch, batch_loss=22.6, bat

Validation:  54%|▌| 398/743 [1:35:23<1:20:42, 14.04s/batch, batch_loss=18.2, bat

Validation:  54%|▌| 399/743 [1:35:23<1:20:47, 14.09s/batch, batch_loss=18.2, bat

Validation:  54%|▌| 399/743 [1:35:37<1:20:47, 14.09s/batch, batch_loss=23.1, bat

Validation:  54%|▌| 400/743 [1:35:37<1:20:42, 14.12s/batch, batch_loss=23.1, bat

Validation:  54%|▌| 400/743 [1:35:52<1:20:42, 14.12s/batch, batch_loss=18.7, bat

Validation:  54%|▌| 401/743 [1:35:52<1:21:56, 14.38s/batch, batch_loss=18.7, bat

Validation:  54%|▌| 401/743 [1:36:06<1:21:56, 14.38s/batch, batch_loss=6.54, bat

Validation:  54%|▌| 402/743 [1:36:06<1:21:39, 14.37s/batch, batch_loss=6.54, bat

Validation:  54%|▌| 402/743 [1:36:20<1:21:39, 14.37s/batch, batch_loss=22.3, bat

Validation:  54%|▌| 403/743 [1:36:20<1:20:19, 14.18s/batch, batch_loss=22.3, bat

Validation:  54%|▌| 403/743 [1:36:35<1:20:19, 14.18s/batch, batch_loss=15.2, bat

Validation:  54%|▌| 404/743 [1:36:35<1:20:55, 14.32s/batch, batch_loss=15.2, bat

Validation:  54%|▌| 404/743 [1:36:49<1:20:55, 14.32s/batch, batch_loss=10.4, bat

Validation:  55%|▌| 405/743 [1:36:49<1:20:52, 14.36s/batch, batch_loss=10.4, bat

Validation:  55%|▌| 405/743 [1:37:06<1:20:52, 14.36s/batch, batch_loss=16.4, bat

Validation:  55%|▌| 406/743 [1:37:06<1:24:53, 15.12s/batch, batch_loss=16.4, bat

Validation:  55%|▌| 406/743 [1:37:20<1:24:53, 15.12s/batch, batch_loss=17.1, bat

Validation:  55%|▌| 407/743 [1:37:20<1:22:54, 14.81s/batch, batch_loss=17.1, bat

Validation:  55%|▌| 407/743 [1:37:35<1:22:54, 14.81s/batch, batch_loss=25, batch

Validation:  55%|▌| 408/743 [1:37:35<1:22:29, 14.78s/batch, batch_loss=25, batch

Validation:  55%|▌| 408/743 [1:37:48<1:22:29, 14.78s/batch, batch_loss=11.2, bat

Validation:  55%|▌| 409/743 [1:37:48<1:20:01, 14.38s/batch, batch_loss=11.2, bat

Validation:  55%|▌| 409/743 [1:38:00<1:20:01, 14.38s/batch, batch_loss=19.5, bat

Validation:  55%|▌| 410/743 [1:38:00<1:16:21, 13.76s/batch, batch_loss=19.5, bat

Validation:  55%|▌| 410/743 [1:38:13<1:16:21, 13.76s/batch, batch_loss=20.5, bat

Validation:  55%|▌| 411/743 [1:38:13<1:13:19, 13.25s/batch, batch_loss=20.5, bat

Validation:  55%|▌| 411/743 [1:38:25<1:13:19, 13.25s/batch, batch_loss=15.9, bat

Validation:  55%|▌| 412/743 [1:38:25<1:12:05, 13.07s/batch, batch_loss=15.9, bat

Validation:  55%|▌| 412/743 [1:38:39<1:12:05, 13.07s/batch, batch_loss=1.92e+3, 

Validation:  56%|▌| 413/743 [1:38:39<1:13:32, 13.37s/batch, batch_loss=1.92e+3, 

Validation:  56%|▌| 413/743 [1:38:56<1:13:32, 13.37s/batch, batch_loss=28.3, bat

Validation:  56%|▌| 414/743 [1:38:56<1:18:38, 14.34s/batch, batch_loss=28.3, bat

Validation:  56%|▌| 414/743 [1:39:10<1:18:38, 14.34s/batch, batch_loss=28.8, bat

Validation:  56%|▌| 415/743 [1:39:10<1:18:02, 14.28s/batch, batch_loss=28.8, bat

Validation:  56%|▌| 415/743 [1:39:24<1:18:02, 14.28s/batch, batch_loss=6.46e+3, 

Validation:  56%|▌| 416/743 [1:39:24<1:17:23, 14.20s/batch, batch_loss=6.46e+3, 

Validation:  56%|▌| 416/743 [1:39:38<1:17:23, 14.20s/batch, batch_loss=18, batch

Validation:  56%|▌| 417/743 [1:39:38<1:16:17, 14.04s/batch, batch_loss=18, batch

Validation:  56%|▌| 417/743 [1:39:52<1:16:17, 14.04s/batch, batch_loss=16.6, bat

Validation:  56%|▌| 418/743 [1:39:52<1:15:53, 14.01s/batch, batch_loss=16.6, bat

Validation:  56%|▌| 418/743 [1:40:06<1:15:53, 14.01s/batch, batch_loss=18.2, bat

Validation:  56%|▌| 419/743 [1:40:06<1:16:26, 14.16s/batch, batch_loss=18.2, bat

Validation:  56%|▌| 419/743 [1:40:19<1:16:26, 14.16s/batch, batch_loss=17.7, bat

Validation:  57%|▌| 420/743 [1:40:19<1:14:54, 13.92s/batch, batch_loss=17.7, bat

Validation:  57%|▌| 420/743 [1:40:34<1:14:54, 13.92s/batch, batch_loss=33, batch

Validation:  57%|▌| 421/743 [1:40:34<1:15:46, 14.12s/batch, batch_loss=33, batch

Validation:  57%|▌| 421/743 [1:40:51<1:15:46, 14.12s/batch, batch_loss=9.09, bat

Validation:  57%|▌| 422/743 [1:40:51<1:19:35, 14.88s/batch, batch_loss=9.09, bat

Validation:  57%|▌| 422/743 [1:41:05<1:19:35, 14.88s/batch, batch_loss=25, batch

Validation:  57%|▌| 423/743 [1:41:05<1:18:49, 14.78s/batch, batch_loss=25, batch

Validation:  57%|▌| 423/743 [1:41:19<1:18:49, 14.78s/batch, batch_loss=326, batc

Validation:  57%|▌| 424/743 [1:41:19<1:17:13, 14.52s/batch, batch_loss=326, batc

Validation:  57%|▌| 424/743 [1:41:34<1:17:13, 14.52s/batch, batch_loss=27.8, bat

Validation:  57%|▌| 425/743 [1:41:34<1:17:03, 14.54s/batch, batch_loss=27.8, bat

Validation:  57%|▌| 425/743 [1:41:49<1:17:03, 14.54s/batch, batch_loss=25.3, bat

Validation:  57%|▌| 426/743 [1:41:49<1:17:41, 14.71s/batch, batch_loss=25.3, bat

Validation:  57%|▌| 426/743 [1:42:03<1:17:41, 14.71s/batch, batch_loss=24.9, bat

Validation:  57%|▌| 427/743 [1:42:03<1:16:46, 14.58s/batch, batch_loss=24.9, bat

Validation:  57%|▌| 427/743 [1:42:17<1:16:46, 14.58s/batch, batch_loss=5.27e+3, 

Validation:  58%|▌| 428/743 [1:42:17<1:15:13, 14.33s/batch, batch_loss=5.27e+3, 

Validation:  58%|▌| 428/743 [1:42:31<1:15:13, 14.33s/batch, batch_loss=19.7, bat

Validation:  58%|▌| 429/743 [1:42:31<1:14:38, 14.26s/batch, batch_loss=19.7, bat

Validation:  58%|▌| 429/743 [1:42:48<1:14:38, 14.26s/batch, batch_loss=5.36e+3, 

Validation:  58%|▌| 430/743 [1:42:48<1:18:03, 14.96s/batch, batch_loss=5.36e+3, 

Validation:  58%|▌| 430/743 [1:43:02<1:18:03, 14.96s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [1:43:02<1:17:07, 14.83s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [1:43:16<1:17:07, 14.83s/batch, batch_loss=969, batc

Validation:  58%|▌| 432/743 [1:43:16<1:15:45, 14.61s/batch, batch_loss=969, batc

Validation:  58%|▌| 432/743 [1:43:31<1:15:45, 14.61s/batch, batch_loss=16.5, bat

Validation:  58%|▌| 433/743 [1:43:31<1:15:35, 14.63s/batch, batch_loss=16.5, bat

Validation:  58%|▌| 433/743 [1:43:46<1:15:35, 14.63s/batch, batch_loss=11.9, bat

Validation:  58%|▌| 434/743 [1:43:46<1:15:33, 14.67s/batch, batch_loss=11.9, bat

Validation:  58%|▌| 434/743 [1:44:00<1:15:33, 14.67s/batch, batch_loss=15.8, bat

Validation:  59%|▌| 435/743 [1:44:00<1:15:24, 14.69s/batch, batch_loss=15.8, bat

Validation:  59%|▌| 435/743 [1:44:16<1:15:24, 14.69s/batch, batch_loss=15.2, bat

Validation:  59%|▌| 436/743 [1:44:16<1:16:19, 14.92s/batch, batch_loss=15.2, bat

Validation:  59%|▌| 436/743 [1:44:30<1:16:19, 14.92s/batch, batch_loss=25.3, bat

Validation:  59%|▌| 437/743 [1:44:30<1:14:11, 14.55s/batch, batch_loss=25.3, bat

Validation:  59%|▌| 437/743 [1:44:44<1:14:11, 14.55s/batch, batch_loss=982, batc

Validation:  59%|▌| 438/743 [1:44:44<1:13:24, 14.44s/batch, batch_loss=982, batc

Validation:  59%|▌| 438/743 [1:45:00<1:13:24, 14.44s/batch, batch_loss=905, batc

Validation:  59%|▌| 439/743 [1:45:00<1:16:06, 15.02s/batch, batch_loss=905, batc

Validation:  59%|▌| 439/743 [1:45:14<1:16:06, 15.02s/batch, batch_loss=20.2, bat

Validation:  59%|▌| 440/743 [1:45:14<1:14:04, 14.67s/batch, batch_loss=20.2, bat

Validation:  59%|▌| 440/743 [1:45:28<1:14:04, 14.67s/batch, batch_loss=16.2, bat

Validation:  59%|▌| 441/743 [1:45:28<1:12:39, 14.44s/batch, batch_loss=16.2, bat

Validation:  59%|▌| 441/743 [1:45:42<1:12:39, 14.44s/batch, batch_loss=19, batch

Validation:  59%|▌| 442/743 [1:45:42<1:12:38, 14.48s/batch, batch_loss=19, batch

Validation:  59%|▌| 442/743 [1:45:57<1:12:38, 14.48s/batch, batch_loss=12.3, bat

Validation:  60%|▌| 443/743 [1:45:57<1:12:28, 14.50s/batch, batch_loss=12.3, bat

Validation:  60%|▌| 443/743 [1:46:11<1:12:28, 14.50s/batch, batch_loss=15.8, bat

Validation:  60%|▌| 444/743 [1:46:11<1:12:08, 14.48s/batch, batch_loss=15.8, bat

Validation:  60%|▌| 444/743 [1:46:25<1:12:08, 14.48s/batch, batch_loss=9.83, bat

Validation:  60%|▌| 445/743 [1:46:25<1:11:21, 14.37s/batch, batch_loss=9.83, bat

Validation:  60%|▌| 445/743 [1:46:40<1:11:21, 14.37s/batch, batch_loss=15.5, bat

Validation:  60%|▌| 446/743 [1:46:40<1:11:13, 14.39s/batch, batch_loss=15.5, bat

Validation:  60%|▌| 446/743 [1:46:54<1:11:13, 14.39s/batch, batch_loss=6.85e+3, 

Validation:  60%|▌| 447/743 [1:46:54<1:10:10, 14.23s/batch, batch_loss=6.85e+3, 

Validation:  60%|▌| 447/743 [1:47:11<1:10:10, 14.23s/batch, batch_loss=5.54, bat

Validation:  60%|▌| 448/743 [1:47:11<1:14:00, 15.05s/batch, batch_loss=5.54, bat

Validation:  60%|▌| 448/743 [1:47:25<1:14:00, 15.05s/batch, batch_loss=13.6, bat

Validation:  60%|▌| 449/743 [1:47:25<1:13:07, 14.92s/batch, batch_loss=13.6, bat

Validation:  60%|▌| 449/743 [1:47:40<1:13:07, 14.92s/batch, batch_loss=17.5, bat

Validation:  61%|▌| 450/743 [1:47:40<1:11:56, 14.73s/batch, batch_loss=17.5, bat

Validation:  61%|▌| 450/743 [1:47:54<1:11:56, 14.73s/batch, batch_loss=12.9, bat

Validation:  61%|▌| 451/743 [1:47:54<1:10:26, 14.48s/batch, batch_loss=12.9, bat

Validation:  61%|▌| 451/743 [1:48:07<1:10:26, 14.48s/batch, batch_loss=22.5, bat

Validation:  61%|▌| 452/743 [1:48:07<1:08:37, 14.15s/batch, batch_loss=22.5, bat

Validation:  61%|▌| 452/743 [1:48:21<1:08:37, 14.15s/batch, batch_loss=13.7, bat

Validation:  61%|▌| 453/743 [1:48:21<1:07:45, 14.02s/batch, batch_loss=13.7, bat

Validation:  61%|▌| 453/743 [1:48:35<1:07:45, 14.02s/batch, batch_loss=7.29, bat

Validation:  61%|▌| 454/743 [1:48:35<1:07:51, 14.09s/batch, batch_loss=7.29, bat

Validation:  61%|▌| 454/743 [1:48:49<1:07:51, 14.09s/batch, batch_loss=11.3, bat

Validation:  61%|▌| 455/743 [1:48:49<1:07:51, 14.14s/batch, batch_loss=11.3, bat

Validation:  61%|▌| 455/743 [1:49:03<1:07:51, 14.14s/batch, batch_loss=11, batch

Validation:  61%|▌| 456/743 [1:49:03<1:07:19, 14.07s/batch, batch_loss=11, batch

Validation:  61%|▌| 456/743 [1:49:17<1:07:19, 14.07s/batch, batch_loss=15.7, bat

Validation:  62%|▌| 457/743 [1:49:17<1:06:28, 13.95s/batch, batch_loss=15.7, bat

Validation:  62%|▌| 457/743 [1:49:31<1:06:28, 13.95s/batch, batch_loss=32, batch

Validation:  62%|▌| 458/743 [1:49:31<1:06:41, 14.04s/batch, batch_loss=32, batch

Validation:  62%|▌| 458/743 [1:49:45<1:06:41, 14.04s/batch, batch_loss=16, batch

Validation:  62%|▌| 459/743 [1:49:45<1:06:50, 14.12s/batch, batch_loss=16, batch

Validation:  62%|▌| 459/743 [1:49:58<1:06:50, 14.12s/batch, batch_loss=20.7, bat

Validation:  62%|▌| 460/743 [1:49:58<1:05:20, 13.85s/batch, batch_loss=20.7, bat

Validation:  62%|▌| 460/743 [1:50:13<1:05:20, 13.85s/batch, batch_loss=17.3, bat

Validation:  62%|▌| 461/743 [1:50:13<1:06:15, 14.10s/batch, batch_loss=17.3, bat

Validation:  62%|▌| 461/743 [1:50:27<1:06:15, 14.10s/batch, batch_loss=14.9, bat

Validation:  62%|▌| 462/743 [1:50:27<1:05:14, 13.93s/batch, batch_loss=14.9, bat

Validation:  62%|▌| 462/743 [1:50:40<1:05:14, 13.93s/batch, batch_loss=11.9, bat

Validation:  62%|▌| 463/743 [1:50:40<1:04:48, 13.89s/batch, batch_loss=11.9, bat

Validation:  62%|▌| 463/743 [1:50:57<1:04:48, 13.89s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:50:57<1:08:22, 14.71s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:51:11<1:08:22, 14.71s/batch, batch_loss=20.7, bat

Validation:  63%|▋| 465/743 [1:51:11<1:07:29, 14.56s/batch, batch_loss=20.7, bat

Validation:  63%|▋| 465/743 [1:51:26<1:07:29, 14.56s/batch, batch_loss=16.5, bat

Validation:  63%|▋| 466/743 [1:51:26<1:06:50, 14.48s/batch, batch_loss=16.5, bat

Validation:  63%|▋| 466/743 [1:51:40<1:06:50, 14.48s/batch, batch_loss=27.9, bat

Validation:  63%|▋| 467/743 [1:51:40<1:06:14, 14.40s/batch, batch_loss=27.9, bat

Validation:  63%|▋| 467/743 [1:51:53<1:06:14, 14.40s/batch, batch_loss=16.6, bat

Validation:  63%|▋| 468/743 [1:51:53<1:04:58, 14.18s/batch, batch_loss=16.6, bat

Validation:  63%|▋| 468/743 [1:52:08<1:04:58, 14.18s/batch, batch_loss=24.5, bat

Validation:  63%|▋| 469/743 [1:52:08<1:05:13, 14.28s/batch, batch_loss=24.5, bat

Validation:  63%|▋| 469/743 [1:52:22<1:05:13, 14.28s/batch, batch_loss=6.36e+4, 

Validation:  63%|▋| 470/743 [1:52:22<1:05:06, 14.31s/batch, batch_loss=6.36e+4, 

Validation:  63%|▋| 470/743 [1:52:37<1:05:06, 14.31s/batch, batch_loss=13.8, bat

Validation:  63%|▋| 471/743 [1:52:37<1:05:28, 14.44s/batch, batch_loss=13.8, bat

Validation:  63%|▋| 471/743 [1:52:52<1:05:28, 14.44s/batch, batch_loss=27.7, bat

Validation:  64%|▋| 472/743 [1:52:52<1:05:31, 14.51s/batch, batch_loss=27.7, bat

Validation:  64%|▋| 472/743 [1:53:06<1:05:31, 14.51s/batch, batch_loss=589, batc

Validation:  64%|▋| 473/743 [1:53:06<1:04:42, 14.38s/batch, batch_loss=589, batc

Validation:  64%|▋| 473/743 [1:53:22<1:04:42, 14.38s/batch, batch_loss=19.9, bat

Validation:  64%|▋| 474/743 [1:53:22<1:06:49, 14.91s/batch, batch_loss=19.9, bat

Validation:  64%|▋| 474/743 [1:53:36<1:06:49, 14.91s/batch, batch_loss=21.5, bat

Validation:  64%|▋| 475/743 [1:53:36<1:05:38, 14.69s/batch, batch_loss=21.5, bat

Validation:  64%|▋| 475/743 [1:53:52<1:05:38, 14.69s/batch, batch_loss=8.77, bat

Validation:  64%|▋| 476/743 [1:53:52<1:06:35, 14.96s/batch, batch_loss=8.77, bat

Validation:  64%|▋| 476/743 [1:54:06<1:06:35, 14.96s/batch, batch_loss=13.9, bat

Validation:  64%|▋| 477/743 [1:54:06<1:04:41, 14.59s/batch, batch_loss=13.9, bat

Validation:  64%|▋| 477/743 [1:54:19<1:04:41, 14.59s/batch, batch_loss=2.46e+3, 

Validation:  64%|▋| 478/743 [1:54:19<1:02:56, 14.25s/batch, batch_loss=2.46e+3, 

Validation:  64%|▋| 478/743 [1:54:33<1:02:56, 14.25s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [1:54:33<1:02:54, 14.30s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [1:54:47<1:02:54, 14.30s/batch, batch_loss=12.2, bat

Validation:  65%|▋| 480/743 [1:54:47<1:02:10, 14.18s/batch, batch_loss=12.2, bat

Validation:  65%|▋| 480/743 [1:55:01<1:02:10, 14.18s/batch, batch_loss=12.2, bat

Validation:  65%|▋| 481/743 [1:55:01<1:00:59, 13.97s/batch, batch_loss=12.2, bat

Validation:  65%|▋| 481/743 [1:55:14<1:00:59, 13.97s/batch, batch_loss=6.96e+3, 

Validation:  65%|▋| 482/743 [1:55:14<59:19, 13.64s/batch, batch_loss=6.96e+3, ba

Validation:  65%|▋| 482/743 [1:55:28<59:19, 13.64s/batch, batch_loss=20.1, batch

Validation:  65%|▋| 483/743 [1:55:28<59:28, 13.73s/batch, batch_loss=20.1, batch

Validation:  65%|▋| 483/743 [1:55:42<59:28, 13.73s/batch, batch_loss=2.31e+4, ba

Validation:  65%|▋| 484/743 [1:55:42<1:00:08, 13.93s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [1:55:57<1:00:08, 13.93s/batch, batch_loss=3.12e+4, 

Validation:  65%|▋| 485/743 [1:55:57<1:00:59, 14.18s/batch, batch_loss=3.12e+4, 

Validation:  65%|▋| 485/743 [1:56:09<1:00:59, 14.18s/batch, batch_loss=17.5, bat

Validation:  65%|▋| 486/743 [1:56:09<58:51, 13.74s/batch, batch_loss=17.5, batch

Validation:  65%|▋| 486/743 [1:56:24<58:51, 13.74s/batch, batch_loss=37.2, batch

Validation:  66%|▋| 487/743 [1:56:24<59:18, 13.90s/batch, batch_loss=37.2, batch

Validation:  66%|▋| 487/743 [1:56:37<59:18, 13.90s/batch, batch_loss=28.5, batch

Validation:  66%|▋| 488/743 [1:56:37<58:52, 13.85s/batch, batch_loss=28.5, batch

Validation:  66%|▋| 488/743 [1:56:51<58:52, 13.85s/batch, batch_loss=13.5, batch

Validation:  66%|▋| 489/743 [1:56:51<58:45, 13.88s/batch, batch_loss=13.5, batch

Validation:  66%|▋| 489/743 [1:57:06<58:45, 13.88s/batch, batch_loss=24.2, batch

Validation:  66%|▋| 490/743 [1:57:06<59:10, 14.03s/batch, batch_loss=24.2, batch

Validation:  66%|▋| 490/743 [1:57:20<59:10, 14.03s/batch, batch_loss=20, batch_i

Validation:  66%|▋| 491/743 [1:57:20<59:04, 14.06s/batch, batch_loss=20, batch_i

Validation:  66%|▋| 491/743 [1:57:34<59:04, 14.06s/batch, batch_loss=1.04e+3, ba

Validation:  66%|▋| 492/743 [1:57:34<58:44, 14.04s/batch, batch_loss=1.04e+3, ba

Validation:  66%|▋| 492/743 [1:57:49<58:44, 14.04s/batch, batch_loss=1.43e+4, ba

Validation:  66%|▋| 493/743 [1:57:49<59:14, 14.22s/batch, batch_loss=1.43e+4, ba

Validation:  66%|▋| 493/743 [1:58:02<59:14, 14.22s/batch, batch_loss=9.6, batch_

Validation:  66%|▋| 494/743 [1:58:02<58:09, 14.01s/batch, batch_loss=9.6, batch_

Validation:  66%|▋| 494/743 [1:58:16<58:09, 14.01s/batch, batch_loss=1.18e+4, ba

Validation:  67%|▋| 495/743 [1:58:16<57:41, 13.96s/batch, batch_loss=1.18e+4, ba

Validation:  67%|▋| 495/743 [1:58:30<57:41, 13.96s/batch, batch_loss=19.8, batch

Validation:  67%|▋| 496/743 [1:58:30<57:09, 13.89s/batch, batch_loss=19.8, batch

Validation:  67%|▋| 496/743 [1:58:44<57:09, 13.89s/batch, batch_loss=12.8, batch

Validation:  67%|▋| 497/743 [1:58:44<57:34, 14.04s/batch, batch_loss=12.8, batch

Validation:  67%|▋| 497/743 [1:58:59<57:34, 14.04s/batch, batch_loss=16.4, batch

Validation:  67%|▋| 498/743 [1:58:59<57:51, 14.17s/batch, batch_loss=16.4, batch

Validation:  67%|▋| 498/743 [1:59:12<57:51, 14.17s/batch, batch_loss=5.09, batch

Validation:  67%|▋| 499/743 [1:59:12<56:37, 13.93s/batch, batch_loss=5.09, batch

Validation:  67%|▋| 499/743 [1:59:28<56:37, 13.93s/batch, batch_loss=2.51e+4, ba

Validation:  67%|▋| 500/743 [1:59:28<59:09, 14.61s/batch, batch_loss=2.51e+4, ba

Validation:  67%|▋| 500/743 [1:59:42<59:09, 14.61s/batch, batch_loss=21.3, batch

Validation:  67%|▋| 501/743 [1:59:42<58:33, 14.52s/batch, batch_loss=21.3, batch

Validation:  67%|▋| 501/743 [1:59:57<58:33, 14.52s/batch, batch_loss=3.15e+3, ba

Validation:  68%|▋| 502/743 [1:59:57<58:09, 14.48s/batch, batch_loss=3.15e+3, ba

Validation:  68%|▋| 502/743 [2:00:11<58:09, 14.48s/batch, batch_loss=17.2, batch

Validation:  68%|▋| 503/743 [2:00:11<58:09, 14.54s/batch, batch_loss=17.2, batch

Validation:  68%|▋| 503/743 [2:00:26<58:09, 14.54s/batch, batch_loss=13.1, batch

Validation:  68%|▋| 504/743 [2:00:26<57:25, 14.41s/batch, batch_loss=13.1, batch

Validation:  68%|▋| 504/743 [2:00:40<57:25, 14.41s/batch, batch_loss=23.5, batch

Validation:  68%|▋| 505/743 [2:00:40<56:52, 14.34s/batch, batch_loss=23.5, batch

Validation:  68%|▋| 505/743 [2:00:54<56:52, 14.34s/batch, batch_loss=2.83e+3, ba

Validation:  68%|▋| 506/743 [2:00:54<56:26, 14.29s/batch, batch_loss=2.83e+3, ba

Validation:  68%|▋| 506/743 [2:01:11<56:26, 14.29s/batch, batch_loss=2e+3, batch

Validation:  68%|▋| 507/743 [2:01:11<59:07, 15.03s/batch, batch_loss=2e+3, batch

Validation:  68%|▋| 507/743 [2:01:25<59:07, 15.03s/batch, batch_loss=8.39e+3, ba

Validation:  68%|▋| 508/743 [2:01:25<58:33, 14.95s/batch, batch_loss=8.39e+3, ba

Validation:  68%|▋| 508/743 [2:01:40<58:33, 14.95s/batch, batch_loss=8.47e+3, ba

Validation:  69%|▋| 509/743 [2:01:40<57:53, 14.85s/batch, batch_loss=8.47e+3, ba

Validation:  69%|▋| 509/743 [2:01:53<57:53, 14.85s/batch, batch_loss=16.4, batch

Validation:  69%|▋| 510/743 [2:01:53<55:42, 14.34s/batch, batch_loss=16.4, batch

Validation:  69%|▋| 510/743 [2:02:07<55:42, 14.34s/batch, batch_loss=23.8, batch

Validation:  69%|▋| 511/743 [2:02:07<54:45, 14.16s/batch, batch_loss=23.8, batch

Validation:  69%|▋| 511/743 [2:02:21<54:45, 14.16s/batch, batch_loss=17.3, batch

Validation:  69%|▋| 512/743 [2:02:21<54:47, 14.23s/batch, batch_loss=17.3, batch

Validation:  69%|▋| 512/743 [2:02:37<54:47, 14.23s/batch, batch_loss=17.9, batch

Validation:  69%|▋| 513/743 [2:02:37<55:45, 14.54s/batch, batch_loss=17.9, batch

Validation:  69%|▋| 513/743 [2:02:51<55:45, 14.54s/batch, batch_loss=17.7, batch

Validation:  69%|▋| 514/743 [2:02:51<55:08, 14.45s/batch, batch_loss=17.7, batch

Validation:  69%|▋| 514/743 [2:03:08<55:08, 14.45s/batch, batch_loss=15.3, batch

Validation:  69%|▋| 515/743 [2:03:08<57:59, 15.26s/batch, batch_loss=15.3, batch

Validation:  69%|▋| 515/743 [2:03:22<57:59, 15.26s/batch, batch_loss=17, batch_i

Validation:  69%|▋| 516/743 [2:03:22<56:03, 14.82s/batch, batch_loss=17, batch_i

Validation:  69%|▋| 516/743 [2:03:36<56:03, 14.82s/batch, batch_loss=6.16e+4, ba

Validation:  70%|▋| 517/743 [2:03:36<55:34, 14.76s/batch, batch_loss=6.16e+4, ba

Validation:  70%|▋| 517/743 [2:03:51<55:34, 14.76s/batch, batch_loss=506, batch_

Validation:  70%|▋| 518/743 [2:03:51<54:48, 14.62s/batch, batch_loss=506, batch_

Validation:  70%|▋| 518/743 [2:04:05<54:48, 14.62s/batch, batch_loss=9.3, batch_

Validation:  70%|▋| 519/743 [2:04:05<54:45, 14.67s/batch, batch_loss=9.3, batch_

Validation:  70%|▋| 519/743 [2:04:21<54:45, 14.67s/batch, batch_loss=19.4, batch

Validation:  70%|▋| 520/743 [2:04:21<55:11, 14.85s/batch, batch_loss=19.4, batch

Validation:  70%|▋| 520/743 [2:04:36<55:11, 14.85s/batch, batch_loss=14.4, batch

Validation:  70%|▋| 521/743 [2:04:36<54:57, 14.85s/batch, batch_loss=14.4, batch

Validation:  70%|▋| 521/743 [2:04:49<54:57, 14.85s/batch, batch_loss=14.2, batch

Validation:  70%|▋| 522/743 [2:04:49<53:35, 14.55s/batch, batch_loss=14.2, batch

Validation:  70%|▋| 522/743 [2:05:03<53:35, 14.55s/batch, batch_loss=429, batch_

Validation:  70%|▋| 523/743 [2:05:03<52:45, 14.39s/batch, batch_loss=429, batch_

Validation:  70%|▋| 523/743 [2:05:18<52:45, 14.39s/batch, batch_loss=17.1, batch

Validation:  71%|▋| 524/743 [2:05:18<53:07, 14.55s/batch, batch_loss=17.1, batch

Validation:  71%|▋| 524/743 [2:05:33<53:07, 14.55s/batch, batch_loss=24.3, batch

Validation:  71%|▋| 525/743 [2:05:33<52:44, 14.52s/batch, batch_loss=24.3, batch

Validation:  71%|▋| 525/743 [2:05:47<52:44, 14.52s/batch, batch_loss=9.39, batch

Validation:  71%|▋| 526/743 [2:05:47<52:23, 14.48s/batch, batch_loss=9.39, batch

Validation:  71%|▋| 526/743 [2:06:01<52:23, 14.48s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:06:01<51:52, 14.41s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:06:15<51:52, 14.41s/batch, batch_loss=512, batch_

Validation:  71%|▋| 528/743 [2:06:15<50:49, 14.18s/batch, batch_loss=512, batch_

Validation:  71%|▋| 528/743 [2:06:30<50:49, 14.18s/batch, batch_loss=6.5e+3, bat

Validation:  71%|▋| 529/743 [2:06:30<50:49, 14.25s/batch, batch_loss=6.5e+3, bat

Validation:  71%|▋| 529/743 [2:06:46<50:49, 14.25s/batch, batch_loss=217, batch_

Validation:  71%|▋| 530/743 [2:06:46<52:55, 14.91s/batch, batch_loss=217, batch_

Validation:  71%|▋| 530/743 [2:07:00<52:55, 14.91s/batch, batch_loss=44, batch_i

Validation:  71%|▋| 531/743 [2:07:00<52:13, 14.78s/batch, batch_loss=44, batch_i

Validation:  71%|▋| 531/743 [2:07:14<52:13, 14.78s/batch, batch_loss=263, batch_

Validation:  72%|▋| 532/743 [2:07:14<51:05, 14.53s/batch, batch_loss=263, batch_

Validation:  72%|▋| 532/743 [2:07:28<51:05, 14.53s/batch, batch_loss=9.52, batch

Validation:  72%|▋| 533/743 [2:07:28<50:11, 14.34s/batch, batch_loss=9.52, batch

Validation:  72%|▋| 533/743 [2:07:42<50:11, 14.34s/batch, batch_loss=12.9, batch

Validation:  72%|▋| 534/743 [2:07:42<49:33, 14.23s/batch, batch_loss=12.9, batch

Validation:  72%|▋| 534/743 [2:07:55<49:33, 14.23s/batch, batch_loss=24.3, batch

Validation:  72%|▋| 535/743 [2:07:55<47:44, 13.77s/batch, batch_loss=24.3, batch

Validation:  72%|▋| 535/743 [2:08:08<47:44, 13.77s/batch, batch_loss=20.2, batch

Validation:  72%|▋| 536/743 [2:08:08<46:16, 13.41s/batch, batch_loss=20.2, batch

Validation:  72%|▋| 536/743 [2:08:20<46:16, 13.41s/batch, batch_loss=15.2, batch

Validation:  72%|▋| 537/743 [2:08:20<45:15, 13.18s/batch, batch_loss=15.2, batch

Validation:  72%|▋| 537/743 [2:08:34<45:15, 13.18s/batch, batch_loss=18.1, batch

Validation:  72%|▋| 538/743 [2:08:34<45:30, 13.32s/batch, batch_loss=18.1, batch

Validation:  72%|▋| 538/743 [2:08:49<45:30, 13.32s/batch, batch_loss=253, batch_

Validation:  73%|▋| 539/743 [2:08:49<47:13, 13.89s/batch, batch_loss=253, batch_

Validation:  73%|▋| 539/743 [2:09:03<47:13, 13.89s/batch, batch_loss=19.9, batch

Validation:  73%|▋| 540/743 [2:09:03<47:23, 14.01s/batch, batch_loss=19.9, batch

Validation:  73%|▋| 540/743 [2:09:18<47:23, 14.01s/batch, batch_loss=31.2, batch

Validation:  73%|▋| 541/743 [2:09:18<47:47, 14.19s/batch, batch_loss=31.2, batch

Validation:  73%|▋| 541/743 [2:09:32<47:47, 14.19s/batch, batch_loss=1.95e+3, ba

Validation:  73%|▋| 542/743 [2:09:32<47:31, 14.19s/batch, batch_loss=1.95e+3, ba

Validation:  73%|▋| 542/743 [2:09:46<47:31, 14.19s/batch, batch_loss=19.8, batch

Validation:  73%|▋| 543/743 [2:09:46<47:06, 14.13s/batch, batch_loss=19.8, batch

Validation:  73%|▋| 543/743 [2:09:59<47:06, 14.13s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:09:59<45:57, 13.86s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:10:14<45:57, 13.86s/batch, batch_loss=2.75e+3, ba

Validation:  73%|▋| 545/743 [2:10:14<46:28, 14.08s/batch, batch_loss=2.75e+3, ba

Validation:  73%|▋| 545/743 [2:10:28<46:28, 14.08s/batch, batch_loss=9.29, batch

Validation:  73%|▋| 546/743 [2:10:28<46:17, 14.10s/batch, batch_loss=9.29, batch

Validation:  73%|▋| 546/743 [2:10:41<46:17, 14.10s/batch, batch_loss=266, batch_

Validation:  74%|▋| 547/743 [2:10:41<45:15, 13.85s/batch, batch_loss=266, batch_

Validation:  74%|▋| 547/743 [2:10:56<45:15, 13.85s/batch, batch_loss=28.7, batch

Validation:  74%|▋| 548/743 [2:10:56<45:37, 14.04s/batch, batch_loss=28.7, batch

Validation:  74%|▋| 548/743 [2:11:13<45:37, 14.04s/batch, batch_loss=4.1e+3, bat

Validation:  74%|▋| 549/743 [2:11:13<48:21, 14.96s/batch, batch_loss=4.1e+3, bat

Validation:  74%|▋| 549/743 [2:11:27<48:21, 14.96s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:11:27<47:20, 14.72s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:11:42<47:20, 14.72s/batch, batch_loss=16.1, batch

Validation:  74%|▋| 551/743 [2:11:42<47:09, 14.74s/batch, batch_loss=16.1, batch

Validation:  74%|▋| 551/743 [2:11:56<47:09, 14.74s/batch, batch_loss=6.76e+3, ba

Validation:  74%|▋| 552/743 [2:11:56<46:27, 14.60s/batch, batch_loss=6.76e+3, ba

Validation:  74%|▋| 552/743 [2:12:10<46:27, 14.60s/batch, batch_loss=30.3, batch

Validation:  74%|▋| 553/743 [2:12:10<45:12, 14.28s/batch, batch_loss=30.3, batch

Validation:  74%|▋| 553/743 [2:12:24<45:12, 14.28s/batch, batch_loss=24.6, batch

Validation:  75%|▋| 554/743 [2:12:24<44:46, 14.21s/batch, batch_loss=24.6, batch

Validation:  75%|▋| 554/743 [2:12:38<44:46, 14.21s/batch, batch_loss=2.47e+3, ba

Validation:  75%|▋| 555/743 [2:12:38<44:51, 14.32s/batch, batch_loss=2.47e+3, ba

Validation:  75%|▋| 555/743 [2:12:52<44:51, 14.32s/batch, batch_loss=40.9, batch

Validation:  75%|▋| 556/743 [2:12:52<44:23, 14.24s/batch, batch_loss=40.9, batch

Validation:  75%|▋| 556/743 [2:13:09<44:23, 14.24s/batch, batch_loss=8.98, batch

Validation:  75%|▋| 557/743 [2:13:09<46:36, 15.04s/batch, batch_loss=8.98, batch

Validation:  75%|▋| 557/743 [2:13:24<46:36, 15.04s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:13:24<45:39, 14.81s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:13:38<45:39, 14.81s/batch, batch_loss=3.6e+3, bat

Validation:  75%|▊| 559/743 [2:13:38<44:49, 14.62s/batch, batch_loss=3.6e+3, bat

Validation:  75%|▊| 559/743 [2:13:52<44:49, 14.62s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:13:52<44:40, 14.65s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:14:07<44:40, 14.65s/batch, batch_loss=12.8, batch

Validation:  76%|▊| 561/743 [2:14:07<44:22, 14.63s/batch, batch_loss=12.8, batch

Validation:  76%|▊| 561/743 [2:14:21<44:22, 14.63s/batch, batch_loss=21.8, batch

Validation:  76%|▊| 562/743 [2:14:21<43:26, 14.40s/batch, batch_loss=21.8, batch

Validation:  76%|▊| 562/743 [2:14:35<43:26, 14.40s/batch, batch_loss=20.3, batch

Validation:  76%|▊| 563/743 [2:14:35<42:38, 14.21s/batch, batch_loss=20.3, batch

Validation:  76%|▊| 563/743 [2:14:49<42:38, 14.21s/batch, batch_loss=1.09e+3, ba

Validation:  76%|▊| 564/743 [2:14:49<42:14, 14.16s/batch, batch_loss=1.09e+3, ba

Validation:  76%|▊| 564/743 [2:15:02<42:14, 14.16s/batch, batch_loss=3.7e+3, bat

Validation:  76%|▊| 565/743 [2:15:02<41:25, 13.97s/batch, batch_loss=3.7e+3, bat

Validation:  76%|▊| 565/743 [2:15:19<41:25, 13.97s/batch, batch_loss=11.8, batch

Validation:  76%|▊| 566/743 [2:15:19<43:19, 14.69s/batch, batch_loss=11.8, batch

Validation:  76%|▊| 566/743 [2:15:33<43:19, 14.69s/batch, batch_loss=18.3, batch

Validation:  76%|▊| 567/743 [2:15:33<42:27, 14.47s/batch, batch_loss=18.3, batch

Validation:  76%|▊| 567/743 [2:15:46<42:27, 14.47s/batch, batch_loss=13.4, batch

Validation:  76%|▊| 568/743 [2:15:46<41:30, 14.23s/batch, batch_loss=13.4, batch

Validation:  76%|▊| 568/743 [2:16:00<41:30, 14.23s/batch, batch_loss=17.4, batch

Validation:  77%|▊| 569/743 [2:16:00<41:15, 14.23s/batch, batch_loss=17.4, batch

Validation:  77%|▊| 569/743 [2:16:15<41:15, 14.23s/batch, batch_loss=19.1, batch

Validation:  77%|▊| 570/743 [2:16:15<41:14, 14.31s/batch, batch_loss=19.1, batch

Validation:  77%|▊| 570/743 [2:16:29<41:14, 14.31s/batch, batch_loss=11.8, batch

Validation:  77%|▊| 571/743 [2:16:29<40:58, 14.29s/batch, batch_loss=11.8, batch

Validation:  77%|▊| 571/743 [2:16:43<40:58, 14.29s/batch, batch_loss=29.1, batch

Validation:  77%|▊| 572/743 [2:16:43<40:29, 14.21s/batch, batch_loss=29.1, batch

Validation:  77%|▊| 572/743 [2:16:57<40:29, 14.21s/batch, batch_loss=14.8, batch

Validation:  77%|▊| 573/743 [2:16:57<40:12, 14.19s/batch, batch_loss=14.8, batch

Validation:  77%|▊| 573/743 [2:17:12<40:12, 14.19s/batch, batch_loss=17.1, batch

Validation:  77%|▊| 574/743 [2:17:12<40:11, 14.27s/batch, batch_loss=17.1, batch

Validation:  77%|▊| 574/743 [2:17:25<40:11, 14.27s/batch, batch_loss=15.2, batch

Validation:  77%|▊| 575/743 [2:17:25<39:14, 14.02s/batch, batch_loss=15.2, batch

Validation:  77%|▊| 575/743 [2:17:40<39:14, 14.02s/batch, batch_loss=25.8, batch

Validation:  78%|▊| 576/743 [2:17:40<39:14, 14.10s/batch, batch_loss=25.8, batch

Validation:  78%|▊| 576/743 [2:17:54<39:14, 14.10s/batch, batch_loss=27.5, batch

Validation:  78%|▊| 577/743 [2:17:54<39:01, 14.11s/batch, batch_loss=27.5, batch

Validation:  78%|▊| 577/743 [2:18:08<39:01, 14.11s/batch, batch_loss=29.5, batch

Validation:  78%|▊| 578/743 [2:18:08<38:52, 14.14s/batch, batch_loss=29.5, batch

Validation:  78%|▊| 578/743 [2:18:22<38:52, 14.14s/batch, batch_loss=321, batch_

Validation:  78%|▊| 579/743 [2:18:22<38:45, 14.18s/batch, batch_loss=321, batch_

Validation:  78%|▊| 579/743 [2:18:36<38:45, 14.18s/batch, batch_loss=8.03, batch

Validation:  78%|▊| 580/743 [2:18:36<38:17, 14.10s/batch, batch_loss=8.03, batch

Validation:  78%|▊| 580/743 [2:18:51<38:17, 14.10s/batch, batch_loss=12, batch_i

Validation:  78%|▊| 581/743 [2:18:51<38:22, 14.22s/batch, batch_loss=12, batch_i

Validation:  78%|▊| 581/743 [2:19:07<38:22, 14.22s/batch, batch_loss=17.7, batch

Validation:  78%|▊| 582/743 [2:19:07<40:17, 15.02s/batch, batch_loss=17.7, batch

Validation:  78%|▊| 582/743 [2:19:22<40:17, 15.02s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:19:22<39:43, 14.90s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:19:36<39:43, 14.90s/batch, batch_loss=1.97, batch

Validation:  79%|▊| 584/743 [2:19:36<38:39, 14.59s/batch, batch_loss=1.97, batch

Validation:  79%|▊| 584/743 [2:19:50<38:39, 14.59s/batch, batch_loss=21.2, batch

Validation:  79%|▊| 585/743 [2:19:50<37:37, 14.29s/batch, batch_loss=21.2, batch

Validation:  79%|▊| 585/743 [2:20:03<37:37, 14.29s/batch, batch_loss=552, batch_

Validation:  79%|▊| 586/743 [2:20:03<36:59, 14.14s/batch, batch_loss=552, batch_

Validation:  79%|▊| 586/743 [2:20:17<36:59, 14.14s/batch, batch_loss=11.5, batch

Validation:  79%|▊| 587/743 [2:20:17<36:25, 14.01s/batch, batch_loss=11.5, batch

Validation:  79%|▊| 587/743 [2:20:31<36:25, 14.01s/batch, batch_loss=405, batch_

Validation:  79%|▊| 588/743 [2:20:31<35:50, 13.87s/batch, batch_loss=405, batch_

Validation:  79%|▊| 588/743 [2:20:44<35:50, 13.87s/batch, batch_loss=2.5e+4, bat

Validation:  79%|▊| 589/743 [2:20:44<35:31, 13.84s/batch, batch_loss=2.5e+4, bat

Validation:  79%|▊| 589/743 [2:20:59<35:31, 13.84s/batch, batch_loss=21.1, batch

Validation:  79%|▊| 590/743 [2:20:59<35:53, 14.08s/batch, batch_loss=21.1, batch

Validation:  79%|▊| 590/743 [2:21:13<35:53, 14.08s/batch, batch_loss=14.5, batch

Validation:  80%|▊| 591/743 [2:21:13<35:33, 14.03s/batch, batch_loss=14.5, batch

Validation:  80%|▊| 591/743 [2:21:27<35:33, 14.03s/batch, batch_loss=13.5, batch

Validation:  80%|▊| 592/743 [2:21:27<35:24, 14.07s/batch, batch_loss=13.5, batch

Validation:  80%|▊| 592/743 [2:21:42<35:24, 14.07s/batch, batch_loss=2.38e+4, ba

Validation:  80%|▊| 593/743 [2:21:42<35:48, 14.32s/batch, batch_loss=2.38e+4, ba

Validation:  80%|▊| 593/743 [2:21:57<35:48, 14.32s/batch, batch_loss=3.73, batch

Validation:  80%|▊| 594/743 [2:21:57<35:48, 14.42s/batch, batch_loss=3.73, batch

Validation:  80%|▊| 594/743 [2:22:11<35:48, 14.42s/batch, batch_loss=6.52, batch

Validation:  80%|▊| 595/743 [2:22:11<35:21, 14.34s/batch, batch_loss=6.52, batch

Validation:  80%|▊| 595/743 [2:22:26<35:21, 14.34s/batch, batch_loss=7.78, batch

Validation:  80%|▊| 596/743 [2:22:26<35:41, 14.56s/batch, batch_loss=7.78, batch

Validation:  80%|▊| 596/743 [2:22:40<35:41, 14.56s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:22:40<35:23, 14.54s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:22:54<35:23, 14.54s/batch, batch_loss=14.3, batch

Validation:  80%|▊| 598/743 [2:22:54<34:41, 14.35s/batch, batch_loss=14.3, batch

Validation:  80%|▊| 598/743 [2:23:10<34:41, 14.35s/batch, batch_loss=17.2, batch

Validation:  81%|▊| 599/743 [2:23:10<35:33, 14.81s/batch, batch_loss=17.2, batch

Validation:  81%|▊| 599/743 [2:23:24<35:33, 14.81s/batch, batch_loss=23, batch_i

Validation:  81%|▊| 600/743 [2:23:24<34:52, 14.63s/batch, batch_loss=23, batch_i

Validation:  81%|▊| 600/743 [2:23:39<34:52, 14.63s/batch, batch_loss=14.3, batch

Validation:  81%|▊| 601/743 [2:23:39<34:34, 14.61s/batch, batch_loss=14.3, batch

Validation:  81%|▊| 601/743 [2:23:54<34:34, 14.61s/batch, batch_loss=19, batch_i

Validation:  81%|▊| 602/743 [2:23:54<34:28, 14.67s/batch, batch_loss=19, batch_i

Validation:  81%|▊| 602/743 [2:24:07<34:28, 14.67s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:24:07<33:22, 14.30s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:24:21<33:22, 14.30s/batch, batch_loss=22.6, batch

Validation:  81%|▊| 604/743 [2:24:21<32:55, 14.21s/batch, batch_loss=22.6, batch

Validation:  81%|▊| 604/743 [2:24:35<32:55, 14.21s/batch, batch_loss=30, batch_i

Validation:  81%|▊| 605/743 [2:24:35<32:33, 14.16s/batch, batch_loss=30, batch_i

Validation:  81%|▊| 605/743 [2:24:52<32:33, 14.16s/batch, batch_loss=257, batch_

Validation:  82%|▊| 606/743 [2:24:52<33:53, 14.84s/batch, batch_loss=257, batch_

Validation:  82%|▊| 606/743 [2:25:06<33:53, 14.84s/batch, batch_loss=30.8, batch

Validation:  82%|▊| 607/743 [2:25:06<33:29, 14.77s/batch, batch_loss=30.8, batch

Validation:  82%|▊| 607/743 [2:25:20<33:29, 14.77s/batch, batch_loss=25.7, batch

Validation:  82%|▊| 608/743 [2:25:20<32:28, 14.44s/batch, batch_loss=25.7, batch

Validation:  82%|▊| 608/743 [2:25:34<32:28, 14.44s/batch, batch_loss=18.8, batch

Validation:  82%|▊| 609/743 [2:25:34<32:01, 14.34s/batch, batch_loss=18.8, batch

Validation:  82%|▊| 609/743 [2:25:48<32:01, 14.34s/batch, batch_loss=20.1, batch

Validation:  82%|▊| 610/743 [2:25:48<31:44, 14.32s/batch, batch_loss=20.1, batch

Validation:  82%|▊| 610/743 [2:26:02<31:44, 14.32s/batch, batch_loss=19.4, batch

Validation:  82%|▊| 611/743 [2:26:02<31:20, 14.25s/batch, batch_loss=19.4, batch

Validation:  82%|▊| 611/743 [2:26:16<31:20, 14.25s/batch, batch_loss=10.6, batch

Validation:  82%|▊| 612/743 [2:26:16<30:51, 14.13s/batch, batch_loss=10.6, batch

Validation:  82%|▊| 612/743 [2:26:30<30:51, 14.13s/batch, batch_loss=15.7, batch

Validation:  83%|▊| 613/743 [2:26:30<30:11, 13.94s/batch, batch_loss=15.7, batch

Validation:  83%|▊| 613/743 [2:26:43<30:11, 13.94s/batch, batch_loss=5.61e+3, ba

Validation:  83%|▊| 614/743 [2:26:43<29:51, 13.89s/batch, batch_loss=5.61e+3, ba

Validation:  83%|▊| 614/743 [2:26:59<29:51, 13.89s/batch, batch_loss=12.4, batch

Validation:  83%|▊| 615/743 [2:26:59<30:21, 14.23s/batch, batch_loss=12.4, batch

Validation:  83%|▊| 615/743 [2:27:12<30:21, 14.23s/batch, batch_loss=14.4, batch

Validation:  83%|▊| 616/743 [2:27:12<29:44, 14.05s/batch, batch_loss=14.4, batch

Validation:  83%|▊| 616/743 [2:27:26<29:44, 14.05s/batch, batch_loss=7.39, batch

Validation:  83%|▊| 617/743 [2:27:26<29:21, 13.98s/batch, batch_loss=7.39, batch

Validation:  83%|▊| 617/743 [2:27:40<29:21, 13.98s/batch, batch_loss=12.7, batch

Validation:  83%|▊| 618/743 [2:27:40<29:22, 14.10s/batch, batch_loss=12.7, batch

Validation:  83%|▊| 618/743 [2:27:54<29:22, 14.10s/batch, batch_loss=347, batch_

Validation:  83%|▊| 619/743 [2:27:54<28:58, 14.02s/batch, batch_loss=347, batch_

Validation:  83%|▊| 619/743 [2:28:08<28:58, 14.02s/batch, batch_loss=19.8, batch

Validation:  83%|▊| 620/743 [2:28:08<28:38, 13.97s/batch, batch_loss=19.8, batch

Validation:  83%|▊| 620/743 [2:28:22<28:38, 13.97s/batch, batch_loss=8.86, batch

Validation:  84%|▊| 621/743 [2:28:22<28:31, 14.03s/batch, batch_loss=8.86, batch

Validation:  84%|▊| 621/743 [2:28:37<28:31, 14.03s/batch, batch_loss=12.9, batch

Validation:  84%|▊| 622/743 [2:28:37<28:48, 14.28s/batch, batch_loss=12.9, batch

Validation:  84%|▊| 622/743 [2:28:51<28:48, 14.28s/batch, batch_loss=192, batch_

Validation:  84%|▊| 623/743 [2:28:51<28:35, 14.30s/batch, batch_loss=192, batch_

Validation:  84%|▊| 623/743 [2:29:06<28:35, 14.30s/batch, batch_loss=16.9, batch

Validation:  84%|▊| 624/743 [2:29:06<28:23, 14.31s/batch, batch_loss=16.9, batch

Validation:  84%|▊| 624/743 [2:29:19<28:23, 14.31s/batch, batch_loss=2.4e+3, bat

Validation:  84%|▊| 625/743 [2:29:19<27:39, 14.06s/batch, batch_loss=2.4e+3, bat

Validation:  84%|▊| 625/743 [2:29:34<27:39, 14.06s/batch, batch_loss=20.4, batch

Validation:  84%|▊| 626/743 [2:29:34<27:52, 14.29s/batch, batch_loss=20.4, batch

Validation:  84%|▊| 626/743 [2:29:48<27:52, 14.29s/batch, batch_loss=19.2, batch

Validation:  84%|▊| 627/743 [2:29:48<27:26, 14.19s/batch, batch_loss=19.2, batch

Validation:  84%|▊| 627/743 [2:30:02<27:26, 14.19s/batch, batch_loss=20.4, batch

Validation:  85%|▊| 628/743 [2:30:02<27:03, 14.12s/batch, batch_loss=20.4, batch

Validation:  85%|▊| 628/743 [2:30:16<27:03, 14.12s/batch, batch_loss=14.9, batch

Validation:  85%|▊| 629/743 [2:30:16<26:43, 14.06s/batch, batch_loss=14.9, batch

Validation:  85%|▊| 629/743 [2:30:30<26:43, 14.06s/batch, batch_loss=19.6, batch

Validation:  85%|▊| 630/743 [2:30:30<26:23, 14.02s/batch, batch_loss=19.6, batch

Validation:  85%|▊| 630/743 [2:30:44<26:23, 14.02s/batch, batch_loss=247, batch_

Validation:  85%|▊| 631/743 [2:30:44<26:04, 13.97s/batch, batch_loss=247, batch_

Validation:  85%|▊| 631/743 [2:30:58<26:04, 13.97s/batch, batch_loss=24, batch_i

Validation:  85%|▊| 632/743 [2:30:58<25:57, 14.04s/batch, batch_loss=24, batch_i

Validation:  85%|▊| 632/743 [2:31:13<25:57, 14.04s/batch, batch_loss=17.7, batch

Validation:  85%|▊| 633/743 [2:31:13<26:06, 14.24s/batch, batch_loss=17.7, batch

Validation:  85%|▊| 633/743 [2:31:26<26:06, 14.24s/batch, batch_loss=11.6, batch

Validation:  85%|▊| 634/743 [2:31:26<25:38, 14.11s/batch, batch_loss=11.6, batch

Validation:  85%|▊| 634/743 [2:31:41<25:38, 14.11s/batch, batch_loss=8.34, batch

Validation:  85%|▊| 635/743 [2:31:41<25:24, 14.12s/batch, batch_loss=8.34, batch

Validation:  85%|▊| 635/743 [2:31:55<25:24, 14.12s/batch, batch_loss=799, batch_

Validation:  86%|▊| 636/743 [2:31:55<25:22, 14.23s/batch, batch_loss=799, batch_

Validation:  86%|▊| 636/743 [2:32:09<25:22, 14.23s/batch, batch_loss=715, batch_

Validation:  86%|▊| 637/743 [2:32:09<25:02, 14.17s/batch, batch_loss=715, batch_

Validation:  86%|▊| 637/743 [2:32:26<25:02, 14.17s/batch, batch_loss=24.9, batch

Validation:  86%|▊| 638/743 [2:32:26<26:19, 15.05s/batch, batch_loss=24.9, batch

Validation:  86%|▊| 638/743 [2:32:41<26:19, 15.05s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:32:41<26:09, 15.09s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:32:57<26:09, 15.09s/batch, batch_loss=28.4, batch

Validation:  86%|▊| 640/743 [2:32:57<25:57, 15.12s/batch, batch_loss=28.4, batch

Validation:  86%|▊| 640/743 [2:33:11<25:57, 15.12s/batch, batch_loss=34.8, batch

Validation:  86%|▊| 641/743 [2:33:11<25:17, 14.87s/batch, batch_loss=34.8, batch

Validation:  86%|▊| 641/743 [2:33:25<25:17, 14.87s/batch, batch_loss=38.9, batch

Validation:  86%|▊| 642/743 [2:33:25<24:29, 14.55s/batch, batch_loss=38.9, batch

Validation:  86%|▊| 642/743 [2:33:38<24:29, 14.55s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [2:33:38<23:43, 14.23s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [2:33:52<23:43, 14.23s/batch, batch_loss=22.2, batch

Validation:  87%|▊| 644/743 [2:33:52<23:22, 14.16s/batch, batch_loss=22.2, batch

Validation:  87%|▊| 644/743 [2:34:06<23:22, 14.16s/batch, batch_loss=20.2, batch

Validation:  87%|▊| 645/743 [2:34:06<23:07, 14.16s/batch, batch_loss=20.2, batch

Validation:  87%|▊| 645/743 [2:34:20<23:07, 14.16s/batch, batch_loss=6.24e+3, ba

Validation:  87%|▊| 646/743 [2:34:20<22:37, 14.00s/batch, batch_loss=6.24e+3, ba

Validation:  87%|▊| 646/743 [2:34:34<22:37, 14.00s/batch, batch_loss=22.5, batch

Validation:  87%|▊| 647/743 [2:34:34<22:27, 14.03s/batch, batch_loss=22.5, batch

Validation:  87%|▊| 647/743 [2:34:49<22:27, 14.03s/batch, batch_loss=6.24, batch

Validation:  87%|▊| 648/743 [2:34:49<22:27, 14.18s/batch, batch_loss=6.24, batch

Validation:  87%|▊| 648/743 [2:35:03<22:27, 14.18s/batch, batch_loss=11.3, batch

Validation:  87%|▊| 649/743 [2:35:03<22:20, 14.26s/batch, batch_loss=11.3, batch

Validation:  87%|▊| 649/743 [2:35:17<22:20, 14.26s/batch, batch_loss=20.1, batch

Validation:  87%|▊| 650/743 [2:35:17<21:57, 14.17s/batch, batch_loss=20.1, batch

Validation:  87%|▊| 650/743 [2:35:30<21:57, 14.17s/batch, batch_loss=24.9, batch

Validation:  88%|▉| 651/743 [2:35:31<21:27, 13.99s/batch, batch_loss=24.9, batch

Validation:  88%|▉| 651/743 [2:35:44<21:27, 13.99s/batch, batch_loss=28.9, batch

Validation:  88%|▉| 652/743 [2:35:44<21:11, 13.97s/batch, batch_loss=28.9, batch

Validation:  88%|▉| 652/743 [2:35:58<21:11, 13.97s/batch, batch_loss=15, batch_i

Validation:  88%|▉| 653/743 [2:35:58<20:53, 13.93s/batch, batch_loss=15, batch_i

Validation:  88%|▉| 653/743 [2:36:13<20:53, 13.93s/batch, batch_loss=28.7, batch

Validation:  88%|▉| 654/743 [2:36:13<20:55, 14.11s/batch, batch_loss=28.7, batch

Validation:  88%|▉| 654/743 [2:36:27<20:55, 14.11s/batch, batch_loss=30.5, batch

Validation:  88%|▉| 655/743 [2:36:27<20:56, 14.27s/batch, batch_loss=30.5, batch

Validation:  88%|▉| 655/743 [2:36:42<20:56, 14.27s/batch, batch_loss=17, batch_i

Validation:  88%|▉| 656/743 [2:36:42<20:39, 14.25s/batch, batch_loss=17, batch_i

Validation:  88%|▉| 656/743 [2:36:56<20:39, 14.25s/batch, batch_loss=14.4, batch

Validation:  88%|▉| 657/743 [2:36:56<20:27, 14.28s/batch, batch_loss=14.4, batch

Validation:  88%|▉| 657/743 [2:37:13<20:27, 14.28s/batch, batch_loss=18.3, batch

Validation:  89%|▉| 658/743 [2:37:13<21:16, 15.02s/batch, batch_loss=18.3, batch

Validation:  89%|▉| 658/743 [2:37:28<21:16, 15.02s/batch, batch_loss=31.6, batch

Validation:  89%|▉| 659/743 [2:37:28<20:59, 15.00s/batch, batch_loss=31.6, batch

Validation:  89%|▉| 659/743 [2:37:43<20:59, 15.00s/batch, batch_loss=26.4, batch

Validation:  89%|▉| 660/743 [2:37:43<20:57, 15.15s/batch, batch_loss=26.4, batch

Validation:  89%|▉| 660/743 [2:37:56<20:57, 15.15s/batch, batch_loss=19.5, batch

Validation:  89%|▉| 661/743 [2:37:56<19:43, 14.43s/batch, batch_loss=19.5, batch

Validation:  89%|▉| 661/743 [2:38:08<19:43, 14.43s/batch, batch_loss=8.06, batch

Validation:  89%|▉| 662/743 [2:38:08<18:33, 13.75s/batch, batch_loss=8.06, batch

Validation:  89%|▉| 662/743 [2:38:21<18:33, 13.75s/batch, batch_loss=3.59e+3, ba

Validation:  89%|▉| 663/743 [2:38:21<18:04, 13.55s/batch, batch_loss=3.59e+3, ba

Validation:  89%|▉| 663/743 [2:38:36<18:04, 13.55s/batch, batch_loss=17.8, batch

Validation:  89%|▉| 664/743 [2:38:36<18:24, 13.98s/batch, batch_loss=17.8, batch

Validation:  89%|▉| 664/743 [2:38:54<18:24, 13.98s/batch, batch_loss=23.2, batch

Validation:  90%|▉| 665/743 [2:38:54<19:33, 15.04s/batch, batch_loss=23.2, batch

Validation:  90%|▉| 665/743 [2:39:07<19:33, 15.04s/batch, batch_loss=12.2, batch

Validation:  90%|▉| 666/743 [2:39:07<18:49, 14.67s/batch, batch_loss=12.2, batch

Validation:  90%|▉| 666/743 [2:39:21<18:49, 14.67s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:39:21<18:10, 14.35s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:39:36<18:10, 14.35s/batch, batch_loss=20.1, batch

Validation:  90%|▉| 668/743 [2:39:36<17:58, 14.38s/batch, batch_loss=20.1, batch

Validation:  90%|▉| 668/743 [2:39:50<17:58, 14.38s/batch, batch_loss=25.7, batch

Validation:  90%|▉| 669/743 [2:39:50<17:41, 14.34s/batch, batch_loss=25.7, batch

Validation:  90%|▉| 669/743 [2:40:05<17:41, 14.34s/batch, batch_loss=28.7, batch

Validation:  90%|▉| 670/743 [2:40:05<17:35, 14.46s/batch, batch_loss=28.7, batch

Validation:  90%|▉| 670/743 [2:40:18<17:35, 14.46s/batch, batch_loss=3.1e+3, bat

Validation:  90%|▉| 671/743 [2:40:18<16:55, 14.10s/batch, batch_loss=3.1e+3, bat

Validation:  90%|▉| 671/743 [2:40:32<16:55, 14.10s/batch, batch_loss=22.2, batch

Validation:  90%|▉| 672/743 [2:40:32<16:44, 14.15s/batch, batch_loss=22.2, batch

Validation:  90%|▉| 672/743 [2:40:49<16:44, 14.15s/batch, batch_loss=18.4, batch

Validation:  91%|▉| 673/743 [2:40:49<17:36, 15.09s/batch, batch_loss=18.4, batch

Validation:  91%|▉| 673/743 [2:41:04<17:36, 15.09s/batch, batch_loss=13.7, batch

Validation:  91%|▉| 674/743 [2:41:04<17:10, 14.94s/batch, batch_loss=13.7, batch

Validation:  91%|▉| 674/743 [2:41:18<17:10, 14.94s/batch, batch_loss=23.1, batch

Validation:  91%|▉| 675/743 [2:41:18<16:41, 14.73s/batch, batch_loss=23.1, batch

Validation:  91%|▉| 675/743 [2:41:33<16:41, 14.73s/batch, batch_loss=23.8, batch

Validation:  91%|▉| 676/743 [2:41:33<16:28, 14.75s/batch, batch_loss=23.8, batch

Validation:  91%|▉| 676/743 [2:41:48<16:28, 14.75s/batch, batch_loss=21.9, batch

Validation:  91%|▉| 677/743 [2:41:48<16:12, 14.73s/batch, batch_loss=21.9, batch

Validation:  91%|▉| 677/743 [2:42:02<16:12, 14.73s/batch, batch_loss=16.5, batch

Validation:  91%|▉| 678/743 [2:42:02<15:47, 14.57s/batch, batch_loss=16.5, batch

Validation:  91%|▉| 678/743 [2:42:17<15:47, 14.57s/batch, batch_loss=15.7, batch

Validation:  91%|▉| 679/743 [2:42:17<15:36, 14.63s/batch, batch_loss=15.7, batch

Validation:  91%|▉| 679/743 [2:42:31<15:36, 14.63s/batch, batch_loss=23.4, batch

Validation:  92%|▉| 680/743 [2:42:31<15:18, 14.58s/batch, batch_loss=23.4, batch

Validation:  92%|▉| 680/743 [2:42:48<15:18, 14.58s/batch, batch_loss=26.7, batch

Validation:  92%|▉| 681/743 [2:42:48<15:40, 15.17s/batch, batch_loss=26.7, batch

Validation:  92%|▉| 681/743 [2:43:02<15:40, 15.17s/batch, batch_loss=38.3, batch

Validation:  92%|▉| 682/743 [2:43:02<15:03, 14.81s/batch, batch_loss=38.3, batch

Validation:  92%|▉| 682/743 [2:43:16<15:03, 14.81s/batch, batch_loss=26.8, batch

Validation:  92%|▉| 683/743 [2:43:16<14:38, 14.65s/batch, batch_loss=26.8, batch

Validation:  92%|▉| 683/743 [2:43:30<14:38, 14.65s/batch, batch_loss=14.4, batch

Validation:  92%|▉| 684/743 [2:43:30<14:07, 14.37s/batch, batch_loss=14.4, batch

Validation:  92%|▉| 684/743 [2:43:44<14:07, 14.37s/batch, batch_loss=16.6, batch

Validation:  92%|▉| 685/743 [2:43:44<13:56, 14.43s/batch, batch_loss=16.6, batch

Validation:  92%|▉| 685/743 [2:43:58<13:56, 14.43s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [2:43:58<13:38, 14.36s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [2:44:13<13:38, 14.36s/batch, batch_loss=24.2, batch

Validation:  92%|▉| 687/743 [2:44:13<13:32, 14.51s/batch, batch_loss=24.2, batch

Validation:  92%|▉| 687/743 [2:44:27<13:32, 14.51s/batch, batch_loss=14.9, batch

Validation:  93%|▉| 688/743 [2:44:27<13:12, 14.42s/batch, batch_loss=14.9, batch

Validation:  93%|▉| 688/743 [2:44:42<13:12, 14.42s/batch, batch_loss=17.2, batch

Validation:  93%|▉| 689/743 [2:44:42<12:54, 14.34s/batch, batch_loss=17.2, batch

Validation:  93%|▉| 689/743 [2:44:58<12:54, 14.34s/batch, batch_loss=22.9, batch

Validation:  93%|▉| 690/743 [2:44:58<13:19, 15.08s/batch, batch_loss=22.9, batch

Validation:  93%|▉| 690/743 [2:45:13<13:19, 15.08s/batch, batch_loss=14.1, batch

Validation:  93%|▉| 691/743 [2:45:13<12:49, 14.80s/batch, batch_loss=14.1, batch

Validation:  93%|▉| 691/743 [2:45:27<12:49, 14.80s/batch, batch_loss=24.2, batch

Validation:  93%|▉| 692/743 [2:45:27<12:26, 14.63s/batch, batch_loss=24.2, batch

Validation:  93%|▉| 692/743 [2:45:41<12:26, 14.63s/batch, batch_loss=24.4, batch

Validation:  93%|▉| 693/743 [2:45:41<12:07, 14.55s/batch, batch_loss=24.4, batch

Validation:  93%|▉| 693/743 [2:45:56<12:07, 14.55s/batch, batch_loss=32.6, batch

Validation:  93%|▉| 694/743 [2:45:56<11:53, 14.55s/batch, batch_loss=32.6, batch

Validation:  93%|▉| 694/743 [2:46:10<11:53, 14.55s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [2:46:10<11:40, 14.60s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [2:46:25<11:40, 14.60s/batch, batch_loss=8.33, batch

Validation:  94%|▉| 696/743 [2:46:25<11:22, 14.52s/batch, batch_loss=8.33, batch

Validation:  94%|▉| 696/743 [2:46:40<11:22, 14.52s/batch, batch_loss=35.9, batch

Validation:  94%|▉| 697/743 [2:46:40<11:14, 14.67s/batch, batch_loss=35.9, batch

Validation:  94%|▉| 697/743 [2:46:54<11:14, 14.67s/batch, batch_loss=752, batch_

Validation:  94%|▉| 698/743 [2:46:54<10:49, 14.43s/batch, batch_loss=752, batch_

Validation:  94%|▉| 698/743 [2:47:10<10:49, 14.43s/batch, batch_loss=7.28, batch

Validation:  94%|▉| 699/743 [2:47:10<10:59, 14.99s/batch, batch_loss=7.28, batch

Validation:  94%|▉| 699/743 [2:47:23<10:59, 14.99s/batch, batch_loss=948, batch_

Validation:  94%|▉| 700/743 [2:47:23<10:25, 14.54s/batch, batch_loss=948, batch_

Validation:  94%|▉| 700/743 [2:47:38<10:25, 14.54s/batch, batch_loss=7.39, batch

Validation:  94%|▉| 701/743 [2:47:38<10:11, 14.56s/batch, batch_loss=7.39, batch

Validation:  94%|▉| 701/743 [2:47:52<10:11, 14.56s/batch, batch_loss=7.77, batch

Validation:  94%|▉| 702/743 [2:47:52<09:44, 14.25s/batch, batch_loss=7.77, batch

Validation:  94%|▉| 702/743 [2:48:06<09:44, 14.25s/batch, batch_loss=177, batch_

Validation:  95%|▉| 703/743 [2:48:06<09:27, 14.18s/batch, batch_loss=177, batch_

Validation:  95%|▉| 703/743 [2:48:19<09:27, 14.18s/batch, batch_loss=471, batch_

Validation:  95%|▉| 704/743 [2:48:19<09:06, 14.02s/batch, batch_loss=471, batch_

Validation:  95%|▉| 704/743 [2:48:33<09:06, 14.02s/batch, batch_loss=9.06, batch

Validation:  95%|▉| 705/743 [2:48:33<08:53, 14.04s/batch, batch_loss=9.06, batch

Validation:  95%|▉| 705/743 [2:48:47<08:53, 14.04s/batch, batch_loss=18.7, batch

Validation:  95%|▉| 706/743 [2:48:47<08:40, 14.07s/batch, batch_loss=18.7, batch

Validation:  95%|▉| 706/743 [2:49:02<08:40, 14.07s/batch, batch_loss=416, batch_

Validation:  95%|▉| 707/743 [2:49:02<08:28, 14.11s/batch, batch_loss=416, batch_

Validation:  95%|▉| 707/743 [2:49:16<08:28, 14.11s/batch, batch_loss=18.5, batch

Validation:  95%|▉| 708/743 [2:49:16<08:12, 14.06s/batch, batch_loss=18.5, batch

Validation:  95%|▉| 708/743 [2:49:30<08:12, 14.06s/batch, batch_loss=31.4, batch

Validation:  95%|▉| 709/743 [2:49:30<08:00, 14.12s/batch, batch_loss=31.4, batch

Validation:  95%|▉| 709/743 [2:49:44<08:00, 14.12s/batch, batch_loss=17.3, batch

Validation:  96%|▉| 710/743 [2:49:44<07:49, 14.22s/batch, batch_loss=17.3, batch

Validation:  96%|▉| 710/743 [2:49:58<07:49, 14.22s/batch, batch_loss=18.2, batch

Validation:  96%|▉| 711/743 [2:49:58<07:26, 13.96s/batch, batch_loss=18.2, batch

Validation:  96%|▉| 711/743 [2:50:12<07:26, 13.96s/batch, batch_loss=24.8, batch

Validation:  96%|▉| 712/743 [2:50:12<07:17, 14.10s/batch, batch_loss=24.8, batch

Validation:  96%|▉| 712/743 [2:50:26<07:17, 14.10s/batch, batch_loss=17.5, batch

Validation:  96%|▉| 713/743 [2:50:26<07:05, 14.19s/batch, batch_loss=17.5, batch

Validation:  96%|▉| 713/743 [2:50:40<07:05, 14.19s/batch, batch_loss=7.71, batch

Validation:  96%|▉| 714/743 [2:50:40<06:46, 14.00s/batch, batch_loss=7.71, batch

Validation:  96%|▉| 714/743 [2:50:57<06:46, 14.00s/batch, batch_loss=11.1, batch

Validation:  96%|▉| 715/743 [2:50:57<06:54, 14.80s/batch, batch_loss=11.1, batch

Validation:  96%|▉| 715/743 [2:51:11<06:54, 14.80s/batch, batch_loss=21.4, batch

Validation:  96%|▉| 716/743 [2:51:11<06:33, 14.57s/batch, batch_loss=21.4, batch

Validation:  96%|▉| 716/743 [2:51:25<06:33, 14.57s/batch, batch_loss=391, batch_

Validation:  97%|▉| 717/743 [2:51:25<06:17, 14.51s/batch, batch_loss=391, batch_

Validation:  97%|▉| 717/743 [2:51:39<06:17, 14.51s/batch, batch_loss=26, batch_i

Validation:  97%|▉| 718/743 [2:51:39<06:00, 14.42s/batch, batch_loss=26, batch_i

Validation:  97%|▉| 718/743 [2:51:53<06:00, 14.42s/batch, batch_loss=24.3, batch

Validation:  97%|▉| 719/743 [2:51:53<05:37, 14.08s/batch, batch_loss=24.3, batch

Validation:  97%|▉| 719/743 [2:52:07<05:37, 14.08s/batch, batch_loss=21.6, batch

Validation:  97%|▉| 720/743 [2:52:07<05:24, 14.09s/batch, batch_loss=21.6, batch

Validation:  97%|▉| 720/743 [2:52:21<05:24, 14.09s/batch, batch_loss=12.2, batch

Validation:  97%|▉| 721/743 [2:52:21<05:08, 14.02s/batch, batch_loss=12.2, batch

Validation:  97%|▉| 721/743 [2:52:35<05:08, 14.02s/batch, batch_loss=29.4, batch

Validation:  97%|▉| 722/743 [2:52:35<04:55, 14.08s/batch, batch_loss=29.4, batch

Validation:  97%|▉| 722/743 [2:52:49<04:55, 14.08s/batch, batch_loss=5.3e+3, bat

Validation:  97%|▉| 723/743 [2:52:49<04:39, 14.00s/batch, batch_loss=5.3e+3, bat

Validation:  97%|▉| 723/743 [2:53:02<04:39, 14.00s/batch, batch_loss=22.4, batch

Validation:  97%|▉| 724/743 [2:53:02<04:24, 13.95s/batch, batch_loss=22.4, batch

Validation:  97%|▉| 724/743 [2:53:19<04:24, 13.95s/batch, batch_loss=15.8, batch

Validation:  98%|▉| 725/743 [2:53:19<04:26, 14.78s/batch, batch_loss=15.8, batch

Validation:  98%|▉| 725/743 [2:53:33<04:26, 14.78s/batch, batch_loss=20.8, batch

Validation:  98%|▉| 726/743 [2:53:33<04:07, 14.57s/batch, batch_loss=20.8, batch

Validation:  98%|▉| 726/743 [2:53:47<04:07, 14.57s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [2:53:47<03:49, 14.37s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [2:54:00<03:49, 14.37s/batch, batch_loss=33.5, batch

Validation:  98%|▉| 728/743 [2:54:00<03:29, 14.00s/batch, batch_loss=33.5, batch

Validation:  98%|▉| 728/743 [2:54:15<03:29, 14.00s/batch, batch_loss=32.9, batch

Validation:  98%|▉| 729/743 [2:54:15<03:18, 14.17s/batch, batch_loss=32.9, batch

Validation:  98%|▉| 729/743 [2:54:29<03:18, 14.17s/batch, batch_loss=25, batch_i

Validation:  98%|▉| 730/743 [2:54:29<03:04, 14.23s/batch, batch_loss=25, batch_i

Validation:  98%|▉| 730/743 [2:54:44<03:04, 14.23s/batch, batch_loss=14.7, batch

Validation:  98%|▉| 731/743 [2:54:44<02:53, 14.48s/batch, batch_loss=14.7, batch

Validation:  98%|▉| 731/743 [2:54:59<02:53, 14.48s/batch, batch_loss=10.5, batch

Validation:  99%|▉| 732/743 [2:54:59<02:40, 14.58s/batch, batch_loss=10.5, batch

Validation:  99%|▉| 732/743 [2:55:13<02:40, 14.58s/batch, batch_loss=30, batch_i

Validation:  99%|▉| 733/743 [2:55:13<02:25, 14.52s/batch, batch_loss=30, batch_i

Validation:  99%|▉| 733/743 [2:55:28<02:25, 14.52s/batch, batch_loss=3.44, batch

Validation:  99%|▉| 734/743 [2:55:28<02:10, 14.47s/batch, batch_loss=3.44, batch

Validation:  99%|▉| 734/743 [2:55:42<02:10, 14.47s/batch, batch_loss=7.03, batch

Validation:  99%|▉| 735/743 [2:55:42<01:55, 14.40s/batch, batch_loss=7.03, batch

Validation:  99%|▉| 735/743 [2:55:57<01:55, 14.40s/batch, batch_loss=1.19, batch

Validation:  99%|▉| 736/743 [2:55:57<01:41, 14.50s/batch, batch_loss=1.19, batch

Validation:  99%|▉| 736/743 [2:56:09<01:41, 14.50s/batch, batch_loss=0.042, batc

Validation:  99%|▉| 737/743 [2:56:09<01:23, 13.94s/batch, batch_loss=0.042, batc

Validation:  99%|▉| 737/743 [2:56:22<01:23, 13.94s/batch, batch_loss=0.042, batc

Validation:  99%|▉| 738/743 [2:56:22<01:07, 13.52s/batch, batch_loss=0.042, batc

Validation:  99%|▉| 738/743 [2:56:34<01:07, 13.52s/batch, batch_loss=0.042, batc

Validation:  99%|▉| 739/743 [2:56:34<00:52, 13.20s/batch, batch_loss=0.042, batc

Validation:  99%|▉| 739/743 [2:56:47<00:52, 13.20s/batch, batch_loss=0.042, batc

Validation: 100%|▉| 740/743 [2:56:47<00:38, 12.99s/batch, batch_loss=0.042, batc

Validation: 100%|▉| 740/743 [2:57:00<00:38, 12.99s/batch, batch_loss=0.042, batc

Validation: 100%|▉| 741/743 [2:57:00<00:25, 12.89s/batch, batch_loss=0.042, batc

Validation: 100%|▉| 741/743 [2:57:13<00:25, 12.89s/batch, batch_loss=0.042, batc

Validation: 100%|▉| 742/743 [2:57:13<00:12, 12.92s/batch, batch_loss=0.042, batc

Validation: 100%|▉| 742/743 [2:57:24<00:12, 12.92s/batch, batch_loss=0.042, batc

Validation: 100%|█| 743/743 [2:57:24<00:00, 12.56s/batch, batch_loss=0.042, batc

Validation: 100%|█| 743/743 [2:57:24<00:00, 14.33s/batch, batch_loss=0.042, batc




Val Loss: 1298.2505


Epoch 8/10:   0%|                                    | 0/991 [00:00<?, ?batch/s]

Epoch 8/10:   0%| | 0/991 [00:15<?, ?batch/s, batch_loss=18.6, batch_index=1, ba

Epoch 8/10:   0%| | 1/991 [00:15<4:09:03, 15.09s/batch, batch_loss=18.6, batch_i

Epoch 8/10:   0%| | 1/991 [00:29<4:09:03, 15.09s/batch, batch_loss=18.6, batch_i

Epoch 8/10:   0%| | 2/991 [00:29<4:00:24, 14.58s/batch, batch_loss=18.6, batch_i

Epoch 8/10:   0%| | 2/991 [00:44<4:00:24, 14.58s/batch, batch_loss=14.4, batch_i

Epoch 8/10:   0%| | 3/991 [00:44<4:01:19, 14.66s/batch, batch_loss=14.4, batch_i

Epoch 8/10:   0%| | 3/991 [00:58<4:01:19, 14.66s/batch, batch_loss=9.11, batch_i

Epoch 8/10:   0%| | 4/991 [00:58<3:58:21, 14.49s/batch, batch_loss=9.11, batch_i

Epoch 8/10:   0%| | 4/991 [01:13<3:58:21, 14.49s/batch, batch_loss=26.8, batch_i

Epoch 8/10:   1%| | 5/991 [01:13<4:01:36, 14.70s/batch, batch_loss=26.8, batch_i

Epoch 8/10:   1%| | 5/991 [01:27<4:01:36, 14.70s/batch, batch_loss=25.1, batch_i

Epoch 8/10:   1%| | 6/991 [01:27<4:00:03, 14.62s/batch, batch_loss=25.1, batch_i

Epoch 8/10:   1%| | 6/991 [01:45<4:00:03, 14.62s/batch, batch_loss=19, batch_ind

Epoch 8/10:   1%| | 7/991 [01:45<4:13:51, 15.48s/batch, batch_loss=19, batch_ind

Epoch 8/10:   1%| | 7/991 [01:59<4:13:51, 15.48s/batch, batch_loss=608, batch_in

Epoch 8/10:   1%| | 8/991 [01:59<4:06:29, 15.05s/batch, batch_loss=608, batch_in

Epoch 8/10:   1%| | 8/991 [02:13<4:06:29, 15.05s/batch, batch_loss=15.7, batch_i

Epoch 8/10:   1%| | 9/991 [02:13<4:04:24, 14.93s/batch, batch_loss=15.7, batch_i

Epoch 8/10:   1%| | 9/991 [02:29<4:04:24, 14.93s/batch, batch_loss=15.7, batch_i

Epoch 8/10:   1%| | 10/991 [02:29<4:06:44, 15.09s/batch, batch_loss=15.7, batch_

Epoch 8/10:   1%| | 10/991 [02:44<4:06:44, 15.09s/batch, batch_loss=13.4, batch_

Epoch 8/10:   1%| | 11/991 [02:44<4:07:02, 15.12s/batch, batch_loss=13.4, batch_

Epoch 8/10:   1%| | 11/991 [02:59<4:07:02, 15.12s/batch, batch_loss=2e+3, batch_

Epoch 8/10:   1%| | 12/991 [02:59<4:05:49, 15.07s/batch, batch_loss=2e+3, batch_

Epoch 8/10:   1%| | 12/991 [03:13<4:05:49, 15.07s/batch, batch_loss=20.7, batch_

Epoch 8/10:   1%| | 13/991 [03:13<4:02:38, 14.89s/batch, batch_loss=20.7, batch_

Epoch 8/10:   1%| | 13/991 [03:29<4:02:38, 14.89s/batch, batch_loss=13, batch_in

Epoch 8/10:   1%| | 14/991 [03:29<4:04:34, 15.02s/batch, batch_loss=13, batch_in

Epoch 8/10:   1%| | 14/991 [03:47<4:04:34, 15.02s/batch, batch_loss=10.7, batch_

Epoch 8/10:   2%| | 15/991 [03:47<4:18:15, 15.88s/batch, batch_loss=10.7, batch_

Epoch 8/10:   2%| | 15/991 [04:02<4:18:15, 15.88s/batch, batch_loss=14, batch_in

Epoch 8/10:   2%| | 16/991 [04:02<4:13:25, 15.60s/batch, batch_loss=14, batch_in

Epoch 8/10:   2%| | 16/991 [04:16<4:13:25, 15.60s/batch, batch_loss=18.2, batch_

Epoch 8/10:   2%| | 17/991 [04:16<4:08:16, 15.29s/batch, batch_loss=18.2, batch_

Epoch 8/10:   2%| | 17/991 [04:32<4:08:16, 15.29s/batch, batch_loss=10.9, batch_

Epoch 8/10:   2%| | 18/991 [04:32<4:08:30, 15.32s/batch, batch_loss=10.9, batch_

Epoch 8/10:   2%| | 18/991 [04:47<4:08:30, 15.32s/batch, batch_loss=8.96e+3, bat

Epoch 8/10:   2%| | 19/991 [04:47<4:08:10, 15.32s/batch, batch_loss=8.96e+3, bat

Epoch 8/10:   2%| | 19/991 [05:02<4:08:10, 15.32s/batch, batch_loss=11.8, batch_

Epoch 8/10:   2%| | 20/991 [05:02<4:07:17, 15.28s/batch, batch_loss=11.8, batch_

Epoch 8/10:   2%| | 20/991 [05:17<4:07:17, 15.28s/batch, batch_loss=19.8, batch_

Epoch 8/10:   2%| | 21/991 [05:17<4:03:24, 15.06s/batch, batch_loss=19.8, batch_

Epoch 8/10:   2%| | 21/991 [05:31<4:03:24, 15.06s/batch, batch_loss=1.08e+3, bat

Epoch 8/10:   2%| | 22/991 [05:31<4:00:44, 14.91s/batch, batch_loss=1.08e+3, bat

Epoch 8/10:   2%| | 22/991 [05:48<4:00:44, 14.91s/batch, batch_loss=9.31, batch_

Epoch 8/10:   2%| | 23/991 [05:48<4:11:56, 15.62s/batch, batch_loss=9.31, batch_

Epoch 8/10:   2%| | 23/991 [06:04<4:11:56, 15.62s/batch, batch_loss=13.6, batch_

Epoch 8/10:   2%| | 24/991 [06:04<4:13:23, 15.72s/batch, batch_loss=13.6, batch_

Epoch 8/10:   2%| | 24/991 [06:19<4:13:23, 15.72s/batch, batch_loss=14.2, batch_

Epoch 8/10:   3%| | 25/991 [06:19<4:09:02, 15.47s/batch, batch_loss=14.2, batch_

Epoch 8/10:   3%| | 25/991 [06:33<4:09:02, 15.47s/batch, batch_loss=19.2, batch_

Epoch 8/10:   3%| | 26/991 [06:33<4:01:06, 14.99s/batch, batch_loss=19.2, batch_

Epoch 8/10:   3%| | 26/991 [06:49<4:01:06, 14.99s/batch, batch_loss=16.5, batch_

Epoch 8/10:   3%| | 27/991 [06:49<4:03:55, 15.18s/batch, batch_loss=16.5, batch_

Epoch 8/10:   3%| | 27/991 [07:03<4:03:55, 15.18s/batch, batch_loss=1.13e+3, bat

Epoch 8/10:   3%| | 28/991 [07:03<4:00:48, 15.00s/batch, batch_loss=1.13e+3, bat

Epoch 8/10:   3%| | 28/991 [07:19<4:00:48, 15.00s/batch, batch_loss=11, batch_in

Epoch 8/10:   3%| | 29/991 [07:19<4:02:42, 15.14s/batch, batch_loss=11, batch_in

Epoch 8/10:   3%| | 29/991 [07:34<4:02:42, 15.14s/batch, batch_loss=11.7, batch_

Epoch 8/10:   3%| | 30/991 [07:34<4:03:47, 15.22s/batch, batch_loss=11.7, batch_

Epoch 8/10:   3%| | 30/991 [07:49<4:03:47, 15.22s/batch, batch_loss=9.81, batch_

Epoch 8/10:   3%| | 31/991 [07:49<4:02:46, 15.17s/batch, batch_loss=9.81, batch_

Epoch 8/10:   3%| | 31/991 [08:04<4:02:46, 15.17s/batch, batch_loss=1.3e+4, batc

Epoch 8/10:   3%| | 32/991 [08:04<4:01:36, 15.12s/batch, batch_loss=1.3e+4, batc

Epoch 8/10:   3%| | 32/991 [08:21<4:01:36, 15.12s/batch, batch_loss=15.2, batch_

Epoch 8/10:   3%| | 33/991 [08:21<4:07:56, 15.53s/batch, batch_loss=15.2, batch_

Epoch 8/10:   3%| | 33/991 [08:36<4:07:56, 15.53s/batch, batch_loss=8.59, batch_

Epoch 8/10:   3%| | 34/991 [08:36<4:06:25, 15.45s/batch, batch_loss=8.59, batch_

Epoch 8/10:   3%| | 34/991 [08:51<4:06:25, 15.45s/batch, batch_loss=12.5, batch_

Epoch 8/10:   4%| | 35/991 [08:51<4:04:11, 15.33s/batch, batch_loss=12.5, batch_

Epoch 8/10:   4%| | 35/991 [09:05<4:04:11, 15.33s/batch, batch_loss=10, batch_in

Epoch 8/10:   4%| | 36/991 [09:05<3:59:16, 15.03s/batch, batch_loss=10, batch_in

Epoch 8/10:   4%| | 36/991 [09:23<3:59:16, 15.03s/batch, batch_loss=11.9, batch_

Epoch 8/10:   4%| | 37/991 [09:23<4:09:36, 15.70s/batch, batch_loss=11.9, batch_

Epoch 8/10:   4%| | 37/991 [09:38<4:09:36, 15.70s/batch, batch_loss=7.07, batch_

Epoch 8/10:   4%| | 38/991 [09:38<4:06:31, 15.52s/batch, batch_loss=7.07, batch_

Epoch 8/10:   4%| | 38/991 [09:53<4:06:31, 15.52s/batch, batch_loss=1.72e+3, bat

Epoch 8/10:   4%| | 39/991 [09:53<4:03:16, 15.33s/batch, batch_loss=1.72e+3, bat

Epoch 8/10:   4%| | 39/991 [10:07<4:03:16, 15.33s/batch, batch_loss=14.9, batch_

Epoch 8/10:   4%| | 40/991 [10:07<3:58:15, 15.03s/batch, batch_loss=14.9, batch_

Epoch 8/10:   4%| | 40/991 [10:21<3:58:15, 15.03s/batch, batch_loss=6.2e+3, batc

Epoch 8/10:   4%| | 41/991 [10:21<3:50:54, 14.58s/batch, batch_loss=6.2e+3, batc

Epoch 8/10:   4%| | 41/991 [10:34<3:50:54, 14.58s/batch, batch_loss=16.4, batch_

Epoch 8/10:   4%| | 42/991 [10:34<3:43:02, 14.10s/batch, batch_loss=16.4, batch_

Epoch 8/10:   4%| | 42/991 [10:47<3:43:02, 14.10s/batch, batch_loss=10.3, batch_

Epoch 8/10:   4%| | 43/991 [10:47<3:37:52, 13.79s/batch, batch_loss=10.3, batch_

Epoch 8/10:   4%| | 43/991 [11:01<3:37:52, 13.79s/batch, batch_loss=15, batch_in

Epoch 8/10:   4%| | 44/991 [11:01<3:40:21, 13.96s/batch, batch_loss=15, batch_in

Epoch 8/10:   4%| | 44/991 [11:18<3:40:21, 13.96s/batch, batch_loss=16.2, batch_

Epoch 8/10:   5%| | 45/991 [11:18<3:56:07, 14.98s/batch, batch_loss=16.2, batch_

Epoch 8/10:   5%| | 45/991 [11:34<3:56:07, 14.98s/batch, batch_loss=13.2, batch_

Epoch 8/10:   5%| | 46/991 [11:34<4:01:07, 15.31s/batch, batch_loss=13.2, batch_

Epoch 8/10:   5%| | 46/991 [11:49<4:01:07, 15.31s/batch, batch_loss=6.02, batch_

Epoch 8/10:   5%| | 47/991 [11:49<3:56:46, 15.05s/batch, batch_loss=6.02, batch_

Epoch 8/10:   5%| | 47/991 [12:04<3:56:46, 15.05s/batch, batch_loss=13.1, batch_

Epoch 8/10:   5%| | 48/991 [12:04<3:57:43, 15.13s/batch, batch_loss=13.1, batch_

Epoch 8/10:   5%| | 48/991 [12:18<3:57:43, 15.13s/batch, batch_loss=13.4, batch_

Epoch 8/10:   5%| | 49/991 [12:18<3:52:13, 14.79s/batch, batch_loss=13.4, batch_

Epoch 8/10:   5%| | 49/991 [12:33<3:52:13, 14.79s/batch, batch_loss=14.2, batch_

Epoch 8/10:   5%| | 50/991 [12:33<3:51:37, 14.77s/batch, batch_loss=14.2, batch_

Epoch 8/10:   5%| | 50/991 [12:47<3:51:37, 14.77s/batch, batch_loss=8.56, batch_

Epoch 8/10:   5%| | 51/991 [12:47<3:48:23, 14.58s/batch, batch_loss=8.56, batch_

Epoch 8/10:   5%| | 51/991 [13:02<3:48:23, 14.58s/batch, batch_loss=14.4, batch_

Epoch 8/10:   5%| | 52/991 [13:02<3:50:09, 14.71s/batch, batch_loss=14.4, batch_

Epoch 8/10:   5%| | 52/991 [13:17<3:50:09, 14.71s/batch, batch_loss=14.8, batch_

Epoch 8/10:   5%| | 53/991 [13:17<3:50:57, 14.77s/batch, batch_loss=14.8, batch_

Epoch 8/10:   5%| | 53/991 [13:33<3:50:57, 14.77s/batch, batch_loss=9.04, batch_

Epoch 8/10:   5%| | 54/991 [13:33<3:54:51, 15.04s/batch, batch_loss=9.04, batch_

Epoch 8/10:   5%| | 54/991 [13:50<3:54:51, 15.04s/batch, batch_loss=11.3, batch_

Epoch 8/10:   6%| | 55/991 [13:50<4:05:31, 15.74s/batch, batch_loss=11.3, batch_

Epoch 8/10:   6%| | 55/991 [14:05<4:05:31, 15.74s/batch, batch_loss=12.8, batch_

Epoch 8/10:   6%| | 56/991 [14:05<4:03:01, 15.60s/batch, batch_loss=12.8, batch_

Epoch 8/10:   6%| | 56/991 [14:20<4:03:01, 15.60s/batch, batch_loss=7.87, batch_

Epoch 8/10:   6%| | 57/991 [14:20<3:57:50, 15.28s/batch, batch_loss=7.87, batch_

Epoch 8/10:   6%| | 57/991 [14:35<3:57:50, 15.28s/batch, batch_loss=14.5, batch_

Epoch 8/10:   6%| | 58/991 [14:35<3:59:34, 15.41s/batch, batch_loss=14.5, batch_

Epoch 8/10:   6%| | 58/991 [14:51<3:59:34, 15.41s/batch, batch_loss=10, batch_in

Epoch 8/10:   6%| | 59/991 [14:51<3:58:05, 15.33s/batch, batch_loss=10, batch_in

Epoch 8/10:   6%| | 59/991 [15:05<3:58:05, 15.33s/batch, batch_loss=17.7, batch_

Epoch 8/10:   6%| | 60/991 [15:05<3:55:41, 15.19s/batch, batch_loss=17.7, batch_

Epoch 8/10:   6%| | 60/991 [15:20<3:55:41, 15.19s/batch, batch_loss=11.5, batch_

Epoch 8/10:   6%| | 61/991 [15:20<3:53:11, 15.04s/batch, batch_loss=11.5, batch_

Epoch 8/10:   6%| | 61/991 [15:35<3:53:11, 15.04s/batch, batch_loss=11.2, batch_

Epoch 8/10:   6%| | 62/991 [15:35<3:54:06, 15.12s/batch, batch_loss=11.2, batch_

Epoch 8/10:   6%| | 62/991 [15:54<3:54:06, 15.12s/batch, batch_loss=417, batch_i

Epoch 8/10:   6%| | 63/991 [15:54<4:07:33, 16.01s/batch, batch_loss=417, batch_i

Epoch 8/10:   6%| | 63/991 [16:09<4:07:33, 16.01s/batch, batch_loss=798, batch_i

Epoch 8/10:   6%| | 64/991 [16:09<4:03:06, 15.74s/batch, batch_loss=798, batch_i

Epoch 8/10:   6%| | 64/991 [16:23<4:03:06, 15.74s/batch, batch_loss=2.55e+3, bat

Epoch 8/10:   7%| | 65/991 [16:23<3:56:36, 15.33s/batch, batch_loss=2.55e+3, bat

Epoch 8/10:   7%| | 65/991 [16:38<3:56:36, 15.33s/batch, batch_loss=4.3, batch_i

Epoch 8/10:   7%| | 66/991 [16:38<3:53:28, 15.14s/batch, batch_loss=4.3, batch_i

Epoch 8/10:   7%| | 66/991 [16:53<3:53:28, 15.14s/batch, batch_loss=11.1, batch_

Epoch 8/10:   7%| | 67/991 [16:53<3:51:55, 15.06s/batch, batch_loss=11.1, batch_

Epoch 8/10:   7%| | 67/991 [17:07<3:51:55, 15.06s/batch, batch_loss=7.81, batch_

Epoch 8/10:   7%| | 68/991 [17:07<3:48:14, 14.84s/batch, batch_loss=7.81, batch_

Epoch 8/10:   7%| | 68/991 [17:22<3:48:14, 14.84s/batch, batch_loss=19.5, batch_

Epoch 8/10:   7%| | 69/991 [17:22<3:47:21, 14.80s/batch, batch_loss=19.5, batch_

Epoch 8/10:   7%| | 69/991 [17:36<3:47:21, 14.80s/batch, batch_loss=8.47, batch_

Epoch 8/10:   7%| | 70/991 [17:36<3:45:53, 14.72s/batch, batch_loss=8.47, batch_

Epoch 8/10:   7%| | 70/991 [17:53<3:45:53, 14.72s/batch, batch_loss=10.8, batch_

Epoch 8/10:   7%| | 71/991 [17:53<3:54:57, 15.32s/batch, batch_loss=10.8, batch_

Epoch 8/10:   7%| | 71/991 [18:08<3:54:57, 15.32s/batch, batch_loss=14.4, batch_

Epoch 8/10:   7%| | 72/991 [18:08<3:51:47, 15.13s/batch, batch_loss=14.4, batch_

Epoch 8/10:   7%| | 72/991 [18:22<3:51:47, 15.13s/batch, batch_loss=25.7, batch_

Epoch 8/10:   7%| | 73/991 [18:22<3:49:15, 14.98s/batch, batch_loss=25.7, batch_

Epoch 8/10:   7%| | 73/991 [18:37<3:49:15, 14.98s/batch, batch_loss=1.74e+3, bat

Epoch 8/10:   7%| | 74/991 [18:37<3:49:13, 15.00s/batch, batch_loss=1.74e+3, bat

Epoch 8/10:   7%| | 74/991 [18:51<3:49:13, 15.00s/batch, batch_loss=15.7, batch_

Epoch 8/10:   8%| | 75/991 [18:51<3:45:28, 14.77s/batch, batch_loss=15.7, batch_

Epoch 8/10:   8%| | 75/991 [19:06<3:45:28, 14.77s/batch, batch_loss=11, batch_in

Epoch 8/10:   8%| | 76/991 [19:06<3:41:51, 14.55s/batch, batch_loss=11, batch_in

Epoch 8/10:   8%| | 76/991 [19:21<3:41:51, 14.55s/batch, batch_loss=11.7, batch_

Epoch 8/10:   8%| | 77/991 [19:21<3:45:15, 14.79s/batch, batch_loss=11.7, batch_

Epoch 8/10:   8%| | 77/991 [19:36<3:45:15, 14.79s/batch, batch_loss=13.9, batch_

Epoch 8/10:   8%| | 78/991 [19:36<3:45:23, 14.81s/batch, batch_loss=13.9, batch_

Epoch 8/10:   8%| | 78/991 [19:50<3:45:23, 14.81s/batch, batch_loss=10.5, batch_

Epoch 8/10:   8%| | 79/991 [19:50<3:42:45, 14.66s/batch, batch_loss=10.5, batch_

Epoch 8/10:   8%| | 79/991 [20:04<3:42:45, 14.66s/batch, batch_loss=8.72, batch_

Epoch 8/10:   8%| | 80/991 [20:04<3:41:42, 14.60s/batch, batch_loss=8.72, batch_

Epoch 8/10:   8%| | 80/991 [20:19<3:41:42, 14.60s/batch, batch_loss=14.4, batch_

Epoch 8/10:   8%| | 81/991 [20:19<3:41:18, 14.59s/batch, batch_loss=14.4, batch_

Epoch 8/10:   8%| | 81/991 [20:35<3:41:18, 14.59s/batch, batch_loss=12.2, batch_

Epoch 8/10:   8%| | 82/991 [20:35<3:45:36, 14.89s/batch, batch_loss=12.2, batch_

Epoch 8/10:   8%| | 82/991 [20:50<3:45:36, 14.89s/batch, batch_loss=7.65, batch_

Epoch 8/10:   8%| | 83/991 [20:50<3:46:54, 14.99s/batch, batch_loss=7.65, batch_

Epoch 8/10:   8%| | 83/991 [21:04<3:46:54, 14.99s/batch, batch_loss=11.3, batch_

Epoch 8/10:   8%| | 84/991 [21:04<3:43:06, 14.76s/batch, batch_loss=11.3, batch_

Epoch 8/10:   8%| | 84/991 [21:19<3:43:06, 14.76s/batch, batch_loss=9.36, batch_

Epoch 8/10:   9%| | 85/991 [21:19<3:45:25, 14.93s/batch, batch_loss=9.36, batch_

Epoch 8/10:   9%| | 85/991 [21:34<3:45:25, 14.93s/batch, batch_loss=11.7, batch_

Epoch 8/10:   9%| | 86/991 [21:34<3:42:44, 14.77s/batch, batch_loss=11.7, batch_

Epoch 8/10:   9%| | 86/991 [21:51<3:42:44, 14.77s/batch, batch_loss=10.9, batch_

Epoch 8/10:   9%| | 87/991 [21:51<3:52:20, 15.42s/batch, batch_loss=10.9, batch_

Epoch 8/10:   9%| | 87/991 [22:06<3:52:20, 15.42s/batch, batch_loss=10.5, batch_

Epoch 8/10:   9%| | 88/991 [22:06<3:49:19, 15.24s/batch, batch_loss=10.5, batch_

Epoch 8/10:   9%| | 88/991 [22:21<3:49:19, 15.24s/batch, batch_loss=5.91, batch_

Epoch 8/10:   9%| | 89/991 [22:21<3:49:29, 15.27s/batch, batch_loss=5.91, batch_

Epoch 8/10:   9%| | 89/991 [22:36<3:49:29, 15.27s/batch, batch_loss=248, batch_i

Epoch 8/10:   9%| | 90/991 [22:36<3:48:47, 15.24s/batch, batch_loss=248, batch_i

Epoch 8/10:   9%| | 90/991 [22:51<3:48:47, 15.24s/batch, batch_loss=1.54e+3, bat

Epoch 8/10:   9%| | 91/991 [22:51<3:47:51, 15.19s/batch, batch_loss=1.54e+3, bat

Epoch 8/10:   9%| | 91/991 [23:06<3:47:51, 15.19s/batch, batch_loss=14.6, batch_

Epoch 8/10:   9%| | 92/991 [23:06<3:45:34, 15.05s/batch, batch_loss=14.6, batch_

Epoch 8/10:   9%| | 92/991 [23:21<3:45:34, 15.05s/batch, batch_loss=19, batch_in

Epoch 8/10:   9%| | 93/991 [23:21<3:46:06, 15.11s/batch, batch_loss=19, batch_in

Epoch 8/10:   9%| | 93/991 [23:37<3:46:06, 15.11s/batch, batch_loss=20.1, batch_

Epoch 8/10:   9%| | 94/991 [23:37<3:47:54, 15.24s/batch, batch_loss=20.1, batch_

Epoch 8/10:   9%| | 94/991 [23:52<3:47:54, 15.24s/batch, batch_loss=19, batch_in

Epoch 8/10:  10%| | 95/991 [23:52<3:46:10, 15.15s/batch, batch_loss=19, batch_in

Epoch 8/10:  10%| | 95/991 [24:06<3:46:10, 15.15s/batch, batch_loss=19, batch_in

Epoch 8/10:  10%| | 96/991 [24:06<3:42:38, 14.93s/batch, batch_loss=19, batch_in

Epoch 8/10:  10%| | 96/991 [24:21<3:42:38, 14.93s/batch, batch_loss=17.8, batch_

Epoch 8/10:  10%| | 97/991 [24:21<3:43:43, 15.01s/batch, batch_loss=17.8, batch_

Epoch 8/10:  10%| | 97/991 [24:36<3:43:43, 15.01s/batch, batch_loss=16.9, batch_

Epoch 8/10:  10%| | 98/991 [24:36<3:44:05, 15.06s/batch, batch_loss=16.9, batch_

Epoch 8/10:  10%| | 98/991 [24:52<3:44:05, 15.06s/batch, batch_loss=16.3, batch_

Epoch 8/10:  10%| | 99/991 [24:52<3:45:37, 15.18s/batch, batch_loss=16.3, batch_

Epoch 8/10:  10%| | 99/991 [25:07<3:45:37, 15.18s/batch, batch_loss=17.5, batch_

Epoch 8/10:  10%| | 100/991 [25:07<3:45:45, 15.20s/batch, batch_loss=17.5, batch

Epoch 8/10:  10%| | 100/991 [25:23<3:45:45, 15.20s/batch, batch_loss=13.9, batch

Epoch 8/10:  10%| | 101/991 [25:23<3:47:20, 15.33s/batch, batch_loss=13.9, batch

Epoch 8/10:  10%| | 101/991 [25:38<3:47:20, 15.33s/batch, batch_loss=20.1, batch

Epoch 8/10:  10%| | 102/991 [25:38<3:47:38, 15.36s/batch, batch_loss=20.1, batch

Epoch 8/10:  10%| | 102/991 [25:56<3:47:38, 15.36s/batch, batch_loss=912, batch_

Epoch 8/10:  10%| | 103/991 [25:56<3:58:55, 16.14s/batch, batch_loss=912, batch_

Epoch 8/10:  10%| | 103/991 [26:12<3:58:55, 16.14s/batch, batch_loss=14.5, batch

Epoch 8/10:  10%| | 104/991 [26:12<3:56:24, 15.99s/batch, batch_loss=14.5, batch

Epoch 8/10:  10%| | 104/991 [26:26<3:56:24, 15.99s/batch, batch_loss=9.98, batch

Epoch 8/10:  11%| | 105/991 [26:26<3:47:17, 15.39s/batch, batch_loss=9.98, batch

Epoch 8/10:  11%| | 105/991 [26:40<3:47:17, 15.39s/batch, batch_loss=9.98, batch

Epoch 8/10:  11%| | 106/991 [26:40<3:43:08, 15.13s/batch, batch_loss=9.98, batch

Epoch 8/10:  11%| | 106/991 [26:54<3:43:08, 15.13s/batch, batch_loss=19.2, batch

Epoch 8/10:  11%| | 107/991 [26:54<3:37:12, 14.74s/batch, batch_loss=19.2, batch

Epoch 8/10:  11%| | 107/991 [27:08<3:37:12, 14.74s/batch, batch_loss=26.2, batch

Epoch 8/10:  11%| | 108/991 [27:08<3:32:18, 14.43s/batch, batch_loss=26.2, batch

Epoch 8/10:  11%| | 108/991 [27:24<3:32:18, 14.43s/batch, batch_loss=14.7, batch

Epoch 8/10:  11%| | 109/991 [27:24<3:41:34, 15.07s/batch, batch_loss=14.7, batch

Epoch 8/10:  11%| | 109/991 [27:38<3:41:34, 15.07s/batch, batch_loss=14.5, batch

Epoch 8/10:  11%| | 110/991 [27:38<3:36:44, 14.76s/batch, batch_loss=14.5, batch

Epoch 8/10:  11%| | 110/991 [27:53<3:36:44, 14.76s/batch, batch_loss=15.7, batch

Epoch 8/10:  11%| | 111/991 [27:53<3:36:45, 14.78s/batch, batch_loss=15.7, batch

Epoch 8/10:  11%| | 111/991 [28:08<3:36:45, 14.78s/batch, batch_loss=18.9, batch

Epoch 8/10:  11%| | 112/991 [28:08<3:35:37, 14.72s/batch, batch_loss=18.9, batch

Epoch 8/10:  11%| | 112/991 [28:22<3:35:37, 14.72s/batch, batch_loss=10.2, batch

Epoch 8/10:  11%| | 113/991 [28:22<3:34:38, 14.67s/batch, batch_loss=10.2, batch

Epoch 8/10:  11%| | 113/991 [28:38<3:34:38, 14.67s/batch, batch_loss=17.9, batch

Epoch 8/10:  12%| | 114/991 [28:38<3:38:41, 14.96s/batch, batch_loss=17.9, batch

Epoch 8/10:  12%| | 114/991 [28:53<3:38:41, 14.96s/batch, batch_loss=19.8, batch

Epoch 8/10:  12%| | 115/991 [28:53<3:37:01, 14.87s/batch, batch_loss=19.8, batch

Epoch 8/10:  12%| | 115/991 [29:08<3:37:01, 14.87s/batch, batch_loss=10.6, batch

Epoch 8/10:  12%| | 116/991 [29:08<3:40:06, 15.09s/batch, batch_loss=10.6, batch

Epoch 8/10:  12%| | 116/991 [29:23<3:40:06, 15.09s/batch, batch_loss=19.2, batch

Epoch 8/10:  12%| | 117/991 [29:23<3:39:58, 15.10s/batch, batch_loss=19.2, batch

Epoch 8/10:  12%| | 117/991 [29:38<3:39:58, 15.10s/batch, batch_loss=15.9, batch

Epoch 8/10:  12%| | 118/991 [29:38<3:38:30, 15.02s/batch, batch_loss=15.9, batch

Epoch 8/10:  12%| | 118/991 [29:53<3:38:30, 15.02s/batch, batch_loss=24, batch_i

Epoch 8/10:  12%| | 119/991 [29:53<3:38:28, 15.03s/batch, batch_loss=24, batch_i

Epoch 8/10:  12%| | 119/991 [30:09<3:38:28, 15.03s/batch, batch_loss=18.2, batch

Epoch 8/10:  12%| | 120/991 [30:09<3:39:22, 15.11s/batch, batch_loss=18.2, batch

Epoch 8/10:  12%| | 120/991 [30:24<3:39:22, 15.11s/batch, batch_loss=22.8, batch

Epoch 8/10:  12%| | 121/991 [30:24<3:39:11, 15.12s/batch, batch_loss=22.8, batch

Epoch 8/10:  12%| | 121/991 [30:37<3:39:11, 15.12s/batch, batch_loss=8.77, batch

Epoch 8/10:  12%| | 122/991 [30:37<3:32:11, 14.65s/batch, batch_loss=8.77, batch

Epoch 8/10:  12%| | 122/991 [30:52<3:32:11, 14.65s/batch, batch_loss=14.1, batch

Epoch 8/10:  12%| | 123/991 [30:52<3:34:11, 14.81s/batch, batch_loss=14.1, batch

Epoch 8/10:  12%| | 123/991 [31:08<3:34:11, 14.81s/batch, batch_loss=3.47e+3, ba

Epoch 8/10:  13%|▏| 124/991 [31:08<3:35:32, 14.92s/batch, batch_loss=3.47e+3, ba

Epoch 8/10:  13%|▏| 124/991 [31:23<3:35:32, 14.92s/batch, batch_loss=7.96, batch

Epoch 8/10:  13%|▏| 125/991 [31:23<3:38:19, 15.13s/batch, batch_loss=7.96, batch

Epoch 8/10:  13%|▏| 125/991 [31:38<3:38:19, 15.13s/batch, batch_loss=13.6, batch

Epoch 8/10:  13%|▏| 126/991 [31:38<3:38:20, 15.14s/batch, batch_loss=13.6, batch

Epoch 8/10:  13%|▏| 126/991 [31:53<3:38:20, 15.14s/batch, batch_loss=1.88e+3, ba

Epoch 8/10:  13%|▏| 127/991 [31:53<3:37:11, 15.08s/batch, batch_loss=1.88e+3, ba

Epoch 8/10:  13%|▏| 127/991 [32:08<3:37:11, 15.08s/batch, batch_loss=1.57e+3, ba

Epoch 8/10:  13%|▏| 128/991 [32:08<3:36:16, 15.04s/batch, batch_loss=1.57e+3, ba

Epoch 8/10:  13%|▏| 128/991 [32:23<3:36:16, 15.04s/batch, batch_loss=227, batch_

Epoch 8/10:  13%|▏| 129/991 [32:23<3:32:43, 14.81s/batch, batch_loss=227, batch_

Epoch 8/10:  13%|▏| 129/991 [32:37<3:32:43, 14.81s/batch, batch_loss=989, batch_

Epoch 8/10:  13%|▏| 130/991 [32:37<3:31:43, 14.75s/batch, batch_loss=989, batch_

Epoch 8/10:  13%|▏| 130/991 [32:52<3:31:43, 14.75s/batch, batch_loss=8.33e+3, ba

Epoch 8/10:  13%|▏| 131/991 [32:52<3:31:22, 14.75s/batch, batch_loss=8.33e+3, ba

Epoch 8/10:  13%|▏| 131/991 [33:09<3:31:22, 14.75s/batch, batch_loss=16.8, batch

Epoch 8/10:  13%|▏| 132/991 [33:09<3:41:00, 15.44s/batch, batch_loss=16.8, batch

Epoch 8/10:  13%|▏| 132/991 [33:25<3:41:00, 15.44s/batch, batch_loss=7.87, batch

Epoch 8/10:  13%|▏| 133/991 [33:25<3:41:15, 15.47s/batch, batch_loss=7.87, batch

Epoch 8/10:  13%|▏| 133/991 [33:39<3:41:15, 15.47s/batch, batch_loss=11.8, batch

Epoch 8/10:  14%|▏| 134/991 [33:40<3:38:52, 15.32s/batch, batch_loss=11.8, batch

Epoch 8/10:  14%|▏| 134/991 [33:54<3:38:52, 15.32s/batch, batch_loss=15.6, batch

Epoch 8/10:  14%|▏| 135/991 [33:54<3:34:57, 15.07s/batch, batch_loss=15.6, batch

Epoch 8/10:  14%|▏| 135/991 [34:08<3:34:57, 15.07s/batch, batch_loss=7.51, batch

Epoch 8/10:  14%|▏| 136/991 [34:08<3:30:35, 14.78s/batch, batch_loss=7.51, batch

Epoch 8/10:  14%|▏| 136/991 [34:23<3:30:35, 14.78s/batch, batch_loss=12.4, batch

Epoch 8/10:  14%|▏| 137/991 [34:23<3:31:09, 14.84s/batch, batch_loss=12.4, batch

Epoch 8/10:  14%|▏| 137/991 [34:37<3:31:09, 14.84s/batch, batch_loss=15.3, batch

Epoch 8/10:  14%|▏| 138/991 [34:37<3:28:44, 14.68s/batch, batch_loss=15.3, batch

Epoch 8/10:  14%|▏| 138/991 [34:52<3:28:44, 14.68s/batch, batch_loss=6.35, batch

Epoch 8/10:  14%|▏| 139/991 [34:52<3:26:09, 14.52s/batch, batch_loss=6.35, batch

Epoch 8/10:  14%|▏| 139/991 [35:06<3:26:09, 14.52s/batch, batch_loss=10.2, batch

Epoch 8/10:  14%|▏| 140/991 [35:06<3:26:04, 14.53s/batch, batch_loss=10.2, batch

Epoch 8/10:  14%|▏| 140/991 [35:21<3:26:04, 14.53s/batch, batch_loss=5.86, batch

Epoch 8/10:  14%|▏| 141/991 [35:21<3:27:47, 14.67s/batch, batch_loss=5.86, batch

Epoch 8/10:  14%|▏| 141/991 [35:35<3:27:47, 14.67s/batch, batch_loss=6.71, batch

Epoch 8/10:  14%|▏| 142/991 [35:35<3:24:55, 14.48s/batch, batch_loss=6.71, batch

Epoch 8/10:  14%|▏| 142/991 [35:50<3:24:55, 14.48s/batch, batch_loss=12.9, batch

Epoch 8/10:  14%|▏| 143/991 [35:50<3:24:43, 14.49s/batch, batch_loss=12.9, batch

Epoch 8/10:  14%|▏| 143/991 [36:04<3:24:43, 14.49s/batch, batch_loss=12.6, batch

Epoch 8/10:  15%|▏| 144/991 [36:04<3:26:05, 14.60s/batch, batch_loss=12.6, batch

Epoch 8/10:  15%|▏| 144/991 [36:20<3:26:05, 14.60s/batch, batch_loss=17.8, batch

Epoch 8/10:  15%|▏| 145/991 [36:20<3:29:12, 14.84s/batch, batch_loss=17.8, batch

Epoch 8/10:  15%|▏| 145/991 [36:35<3:29:12, 14.84s/batch, batch_loss=14.6, batch

Epoch 8/10:  15%|▏| 146/991 [36:35<3:30:46, 14.97s/batch, batch_loss=14.6, batch

Epoch 8/10:  15%|▏| 146/991 [36:50<3:30:46, 14.97s/batch, batch_loss=7.77, batch

Epoch 8/10:  15%|▏| 147/991 [36:50<3:32:05, 15.08s/batch, batch_loss=7.77, batch

Epoch 8/10:  15%|▏| 147/991 [37:05<3:32:05, 15.08s/batch, batch_loss=20.3, batch

Epoch 8/10:  15%|▏| 148/991 [37:05<3:31:05, 15.02s/batch, batch_loss=20.3, batch

Epoch 8/10:  15%|▏| 148/991 [37:21<3:31:05, 15.02s/batch, batch_loss=10.8, batch

Epoch 8/10:  15%|▏| 149/991 [37:21<3:31:47, 15.09s/batch, batch_loss=10.8, batch

Epoch 8/10:  15%|▏| 149/991 [37:35<3:31:47, 15.09s/batch, batch_loss=9.61, batch

Epoch 8/10:  15%|▏| 150/991 [37:35<3:29:17, 14.93s/batch, batch_loss=9.61, batch

Epoch 8/10:  15%|▏| 150/991 [37:50<3:29:17, 14.93s/batch, batch_loss=17, batch_i

Epoch 8/10:  15%|▏| 151/991 [37:50<3:27:01, 14.79s/batch, batch_loss=17, batch_i

Epoch 8/10:  15%|▏| 151/991 [38:04<3:27:01, 14.79s/batch, batch_loss=15.9, batch

Epoch 8/10:  15%|▏| 152/991 [38:04<3:25:15, 14.68s/batch, batch_loss=15.9, batch

Epoch 8/10:  15%|▏| 152/991 [38:19<3:25:15, 14.68s/batch, batch_loss=17.3, batch

Epoch 8/10:  15%|▏| 153/991 [38:19<3:24:22, 14.63s/batch, batch_loss=17.3, batch

Epoch 8/10:  15%|▏| 153/991 [38:34<3:24:22, 14.63s/batch, batch_loss=20.9, batch

Epoch 8/10:  16%|▏| 154/991 [38:34<3:27:15, 14.86s/batch, batch_loss=20.9, batch

Epoch 8/10:  16%|▏| 154/991 [38:48<3:27:15, 14.86s/batch, batch_loss=20, batch_i

Epoch 8/10:  16%|▏| 155/991 [38:48<3:24:08, 14.65s/batch, batch_loss=20, batch_i

Epoch 8/10:  16%|▏| 155/991 [39:04<3:24:08, 14.65s/batch, batch_loss=8.83, batch

Epoch 8/10:  16%|▏| 156/991 [39:04<3:27:14, 14.89s/batch, batch_loss=8.83, batch

Epoch 8/10:  16%|▏| 156/991 [39:18<3:27:14, 14.89s/batch, batch_loss=22.8, batch

Epoch 8/10:  16%|▏| 157/991 [39:18<3:23:16, 14.62s/batch, batch_loss=22.8, batch

Epoch 8/10:  16%|▏| 157/991 [39:32<3:23:16, 14.62s/batch, batch_loss=8.71, batch

Epoch 8/10:  16%|▏| 158/991 [39:32<3:21:12, 14.49s/batch, batch_loss=8.71, batch

Epoch 8/10:  16%|▏| 158/991 [39:49<3:21:12, 14.49s/batch, batch_loss=7.06, batch

Epoch 8/10:  16%|▏| 159/991 [39:49<3:31:21, 15.24s/batch, batch_loss=7.06, batch

Epoch 8/10:  16%|▏| 159/991 [40:04<3:31:21, 15.24s/batch, batch_loss=12.6, batch

Epoch 8/10:  16%|▏| 160/991 [40:04<3:29:06, 15.10s/batch, batch_loss=12.6, batch

Epoch 8/10:  16%|▏| 160/991 [40:19<3:29:06, 15.10s/batch, batch_loss=460, batch_

Epoch 8/10:  16%|▏| 161/991 [40:19<3:30:14, 15.20s/batch, batch_loss=460, batch_

Epoch 8/10:  16%|▏| 161/991 [40:32<3:30:14, 15.20s/batch, batch_loss=14.7, batch

Epoch 8/10:  16%|▏| 162/991 [40:32<3:19:39, 14.45s/batch, batch_loss=14.7, batch

Epoch 8/10:  16%|▏| 162/991 [40:45<3:19:39, 14.45s/batch, batch_loss=8.14, batch

Epoch 8/10:  16%|▏| 163/991 [40:45<3:14:24, 14.09s/batch, batch_loss=8.14, batch

Epoch 8/10:  16%|▏| 163/991 [40:58<3:14:24, 14.09s/batch, batch_loss=12.2, batch

Epoch 8/10:  17%|▏| 164/991 [40:58<3:10:56, 13.85s/batch, batch_loss=12.2, batch

Epoch 8/10:  17%|▏| 164/991 [41:12<3:10:56, 13.85s/batch, batch_loss=10.3, batch

Epoch 8/10:  17%|▏| 165/991 [41:12<3:11:59, 13.95s/batch, batch_loss=10.3, batch

Epoch 8/10:  17%|▏| 165/991 [41:30<3:11:59, 13.95s/batch, batch_loss=11.7, batch

Epoch 8/10:  17%|▏| 166/991 [41:30<3:27:00, 15.06s/batch, batch_loss=11.7, batch

Epoch 8/10:  17%|▏| 166/991 [41:45<3:27:00, 15.06s/batch, batch_loss=16.4, batch

Epoch 8/10:  17%|▏| 167/991 [41:45<3:25:37, 14.97s/batch, batch_loss=16.4, batch

Epoch 8/10:  17%|▏| 167/991 [41:59<3:25:37, 14.97s/batch, batch_loss=11.6, batch

Epoch 8/10:  17%|▏| 168/991 [41:59<3:23:19, 14.82s/batch, batch_loss=11.6, batch

Epoch 8/10:  17%|▏| 168/991 [42:14<3:23:19, 14.82s/batch, batch_loss=14.6, batch

Epoch 8/10:  17%|▏| 169/991 [42:14<3:21:41, 14.72s/batch, batch_loss=14.6, batch

Epoch 8/10:  17%|▏| 169/991 [42:28<3:21:41, 14.72s/batch, batch_loss=9.69, batch

Epoch 8/10:  17%|▏| 170/991 [42:28<3:19:52, 14.61s/batch, batch_loss=9.69, batch

Epoch 8/10:  17%|▏| 170/991 [42:43<3:19:52, 14.61s/batch, batch_loss=5.07, batch

Epoch 8/10:  17%|▏| 171/991 [42:43<3:20:44, 14.69s/batch, batch_loss=5.07, batch

Epoch 8/10:  17%|▏| 171/991 [42:58<3:20:44, 14.69s/batch, batch_loss=7.98, batch

Epoch 8/10:  17%|▏| 172/991 [42:58<3:23:16, 14.89s/batch, batch_loss=7.98, batch

Epoch 8/10:  17%|▏| 172/991 [43:13<3:23:16, 14.89s/batch, batch_loss=7.73, batch

Epoch 8/10:  17%|▏| 173/991 [43:13<3:21:52, 14.81s/batch, batch_loss=7.73, batch

Epoch 8/10:  17%|▏| 173/991 [43:31<3:21:52, 14.81s/batch, batch_loss=3.02e+4, ba

Epoch 8/10:  18%|▏| 174/991 [43:31<3:34:28, 15.75s/batch, batch_loss=3.02e+4, ba

Epoch 8/10:  18%|▏| 174/991 [43:46<3:34:28, 15.75s/batch, batch_loss=20.3, batch

Epoch 8/10:  18%|▏| 175/991 [43:46<3:33:24, 15.69s/batch, batch_loss=20.3, batch

Epoch 8/10:  18%|▏| 175/991 [44:01<3:33:24, 15.69s/batch, batch_loss=20.7, batch

Epoch 8/10:  18%|▏| 176/991 [44:01<3:29:27, 15.42s/batch, batch_loss=20.7, batch

Epoch 8/10:  18%|▏| 176/991 [44:16<3:29:27, 15.42s/batch, batch_loss=22, batch_i

Epoch 8/10:  18%|▏| 177/991 [44:16<3:28:25, 15.36s/batch, batch_loss=22, batch_i

Epoch 8/10:  18%|▏| 177/991 [44:31<3:28:25, 15.36s/batch, batch_loss=21.1, batch

Epoch 8/10:  18%|▏| 178/991 [44:31<3:26:18, 15.23s/batch, batch_loss=21.1, batch

Epoch 8/10:  18%|▏| 178/991 [44:46<3:26:18, 15.23s/batch, batch_loss=12.1, batch

Epoch 8/10:  18%|▏| 179/991 [44:46<3:24:42, 15.13s/batch, batch_loss=12.1, batch

Epoch 8/10:  18%|▏| 179/991 [45:02<3:24:42, 15.13s/batch, batch_loss=7.51, batch

Epoch 8/10:  18%|▏| 180/991 [45:02<3:25:15, 15.19s/batch, batch_loss=7.51, batch

Epoch 8/10:  18%|▏| 180/991 [45:16<3:25:15, 15.19s/batch, batch_loss=2.52e+4, ba

Epoch 8/10:  18%|▏| 181/991 [45:16<3:21:26, 14.92s/batch, batch_loss=2.52e+4, ba

Epoch 8/10:  18%|▏| 181/991 [45:33<3:21:26, 14.92s/batch, batch_loss=13.5, batch

Epoch 8/10:  18%|▏| 182/991 [45:33<3:28:15, 15.45s/batch, batch_loss=13.5, batch

Epoch 8/10:  18%|▏| 182/991 [45:48<3:28:15, 15.45s/batch, batch_loss=20.4, batch

Epoch 8/10:  18%|▏| 183/991 [45:48<3:27:53, 15.44s/batch, batch_loss=20.4, batch

Epoch 8/10:  18%|▏| 183/991 [46:02<3:27:53, 15.44s/batch, batch_loss=18.1, batch

Epoch 8/10:  19%|▏| 184/991 [46:02<3:23:01, 15.10s/batch, batch_loss=18.1, batch

Epoch 8/10:  19%|▏| 184/991 [46:17<3:23:01, 15.10s/batch, batch_loss=12.2, batch

Epoch 8/10:  19%|▏| 185/991 [46:17<3:19:52, 14.88s/batch, batch_loss=12.2, batch

Epoch 8/10:  19%|▏| 185/991 [46:31<3:19:52, 14.88s/batch, batch_loss=18.4, batch

Epoch 8/10:  19%|▏| 186/991 [46:31<3:18:24, 14.79s/batch, batch_loss=18.4, batch

Epoch 8/10:  19%|▏| 186/991 [46:46<3:18:24, 14.79s/batch, batch_loss=16.2, batch

Epoch 8/10:  19%|▏| 187/991 [46:46<3:19:25, 14.88s/batch, batch_loss=16.2, batch

Epoch 8/10:  19%|▏| 187/991 [47:01<3:19:25, 14.88s/batch, batch_loss=17.6, batch

Epoch 8/10:  19%|▏| 188/991 [47:01<3:17:49, 14.78s/batch, batch_loss=17.6, batch

Epoch 8/10:  19%|▏| 188/991 [47:15<3:17:49, 14.78s/batch, batch_loss=18.3, batch

Epoch 8/10:  19%|▏| 189/991 [47:15<3:16:51, 14.73s/batch, batch_loss=18.3, batch

Epoch 8/10:  19%|▏| 189/991 [47:33<3:16:51, 14.73s/batch, batch_loss=20.1, batch

Epoch 8/10:  19%|▏| 190/991 [47:33<3:28:11, 15.59s/batch, batch_loss=20.1, batch

Epoch 8/10:  19%|▏| 190/991 [47:48<3:28:11, 15.59s/batch, batch_loss=18.1, batch

Epoch 8/10:  19%|▏| 191/991 [47:48<3:24:03, 15.30s/batch, batch_loss=18.1, batch

Epoch 8/10:  19%|▏| 191/991 [48:02<3:24:03, 15.30s/batch, batch_loss=11.7, batch

Epoch 8/10:  19%|▏| 192/991 [48:02<3:19:50, 15.01s/batch, batch_loss=11.7, batch

Epoch 8/10:  19%|▏| 192/991 [48:17<3:19:50, 15.01s/batch, batch_loss=19.3, batch

Epoch 8/10:  19%|▏| 193/991 [48:17<3:19:43, 15.02s/batch, batch_loss=19.3, batch

Epoch 8/10:  19%|▏| 193/991 [48:31<3:19:43, 15.02s/batch, batch_loss=7.58, batch

Epoch 8/10:  20%|▏| 194/991 [48:31<3:16:41, 14.81s/batch, batch_loss=7.58, batch

Epoch 8/10:  20%|▏| 194/991 [48:46<3:16:41, 14.81s/batch, batch_loss=3.15, batch

Epoch 8/10:  20%|▏| 195/991 [48:46<3:14:40, 14.67s/batch, batch_loss=3.15, batch

Epoch 8/10:  20%|▏| 195/991 [49:00<3:14:40, 14.67s/batch, batch_loss=5.77, batch

Epoch 8/10:  20%|▏| 196/991 [49:00<3:12:45, 14.55s/batch, batch_loss=5.77, batch

Epoch 8/10:  20%|▏| 196/991 [49:16<3:12:45, 14.55s/batch, batch_loss=11.3, batch

Epoch 8/10:  20%|▏| 197/991 [49:16<3:17:00, 14.89s/batch, batch_loss=11.3, batch

Epoch 8/10:  20%|▏| 197/991 [49:33<3:17:00, 14.89s/batch, batch_loss=8.73, batch

Epoch 8/10:  20%|▏| 198/991 [49:33<3:25:54, 15.58s/batch, batch_loss=8.73, batch

Epoch 8/10:  20%|▏| 198/991 [49:48<3:25:54, 15.58s/batch, batch_loss=14.4, batch

Epoch 8/10:  20%|▏| 199/991 [49:48<3:22:56, 15.37s/batch, batch_loss=14.4, batch

Epoch 8/10:  20%|▏| 199/991 [50:03<3:22:56, 15.37s/batch, batch_loss=8.53, batch

Epoch 8/10:  20%|▏| 200/991 [50:03<3:20:47, 15.23s/batch, batch_loss=8.53, batch

Epoch 8/10:  20%|▏| 200/991 [50:17<3:20:47, 15.23s/batch, batch_loss=12.4, batch

Epoch 8/10:  20%|▏| 201/991 [50:17<3:18:43, 15.09s/batch, batch_loss=12.4, batch

Epoch 8/10:  20%|▏| 201/991 [50:32<3:18:43, 15.09s/batch, batch_loss=12.3, batch

Epoch 8/10:  20%|▏| 202/991 [50:32<3:17:38, 15.03s/batch, batch_loss=12.3, batch

Epoch 8/10:  20%|▏| 202/991 [50:48<3:17:38, 15.03s/batch, batch_loss=14.7, batch

Epoch 8/10:  20%|▏| 203/991 [50:48<3:19:51, 15.22s/batch, batch_loss=14.7, batch

Epoch 8/10:  20%|▏| 203/991 [51:03<3:19:51, 15.22s/batch, batch_loss=18.1, batch

Epoch 8/10:  21%|▏| 204/991 [51:03<3:19:07, 15.18s/batch, batch_loss=18.1, batch

Epoch 8/10:  21%|▏| 204/991 [51:18<3:19:07, 15.18s/batch, batch_loss=16.7, batch

Epoch 8/10:  21%|▏| 205/991 [51:18<3:19:13, 15.21s/batch, batch_loss=16.7, batch

Epoch 8/10:  21%|▏| 205/991 [51:32<3:19:13, 15.21s/batch, batch_loss=7.46, batch

Epoch 8/10:  21%|▏| 206/991 [51:32<3:11:40, 14.65s/batch, batch_loss=7.46, batch

Epoch 8/10:  21%|▏| 206/991 [51:46<3:11:40, 14.65s/batch, batch_loss=8.65, batch

Epoch 8/10:  21%|▏| 207/991 [51:46<3:11:43, 14.67s/batch, batch_loss=8.65, batch

Epoch 8/10:  21%|▏| 207/991 [52:01<3:11:43, 14.67s/batch, batch_loss=10.9, batch

Epoch 8/10:  21%|▏| 208/991 [52:01<3:09:55, 14.55s/batch, batch_loss=10.9, batch

Epoch 8/10:  21%|▏| 208/991 [52:15<3:09:55, 14.55s/batch, batch_loss=8.69, batch

Epoch 8/10:  21%|▏| 209/991 [52:15<3:07:49, 14.41s/batch, batch_loss=8.69, batch

Epoch 8/10:  21%|▏| 209/991 [52:29<3:07:49, 14.41s/batch, batch_loss=19, batch_i

Epoch 8/10:  21%|▏| 210/991 [52:29<3:07:37, 14.41s/batch, batch_loss=19, batch_i

Epoch 8/10:  21%|▏| 210/991 [52:45<3:07:37, 14.41s/batch, batch_loss=11.4, batch

Epoch 8/10:  21%|▏| 211/991 [52:45<3:11:32, 14.73s/batch, batch_loss=11.4, batch

Epoch 8/10:  21%|▏| 211/991 [53:00<3:11:32, 14.73s/batch, batch_loss=13.8, batch

Epoch 8/10:  21%|▏| 212/991 [53:00<3:12:47, 14.85s/batch, batch_loss=13.8, batch

Epoch 8/10:  21%|▏| 212/991 [53:15<3:12:47, 14.85s/batch, batch_loss=2.84, batch

Epoch 8/10:  21%|▏| 213/991 [53:15<3:13:58, 14.96s/batch, batch_loss=2.84, batch

Epoch 8/10:  21%|▏| 213/991 [53:31<3:13:58, 14.96s/batch, batch_loss=13.2, batch

Epoch 8/10:  22%|▏| 214/991 [53:31<3:16:47, 15.20s/batch, batch_loss=13.2, batch

Epoch 8/10:  22%|▏| 214/991 [53:46<3:16:47, 15.20s/batch, batch_loss=15.9, batch

Epoch 8/10:  22%|▏| 215/991 [53:46<3:15:37, 15.13s/batch, batch_loss=15.9, batch

Epoch 8/10:  22%|▏| 215/991 [54:00<3:15:37, 15.13s/batch, batch_loss=8.83, batch

Epoch 8/10:  22%|▏| 216/991 [54:00<3:13:24, 14.97s/batch, batch_loss=8.83, batch

Epoch 8/10:  22%|▏| 216/991 [54:15<3:13:24, 14.97s/batch, batch_loss=11.5, batch

Epoch 8/10:  22%|▏| 217/991 [54:15<3:11:53, 14.88s/batch, batch_loss=11.5, batch

Epoch 8/10:  22%|▏| 217/991 [54:30<3:11:53, 14.88s/batch, batch_loss=18, batch_i

Epoch 8/10:  22%|▏| 218/991 [54:30<3:12:37, 14.95s/batch, batch_loss=18, batch_i

Epoch 8/10:  22%|▏| 218/991 [54:45<3:12:37, 14.95s/batch, batch_loss=19.1, batch

Epoch 8/10:  22%|▏| 219/991 [54:45<3:13:10, 15.01s/batch, batch_loss=19.1, batch

Epoch 8/10:  22%|▏| 219/991 [55:00<3:13:10, 15.01s/batch, batch_loss=21.9, batch

Epoch 8/10:  22%|▏| 220/991 [55:00<3:11:25, 14.90s/batch, batch_loss=21.9, batch

Epoch 8/10:  22%|▏| 220/991 [55:14<3:11:25, 14.90s/batch, batch_loss=20.6, batch

Epoch 8/10:  22%|▏| 221/991 [55:14<3:09:32, 14.77s/batch, batch_loss=20.6, batch

Epoch 8/10:  22%|▏| 221/991 [55:30<3:09:32, 14.77s/batch, batch_loss=14.2, batch

Epoch 8/10:  22%|▏| 222/991 [55:30<3:13:13, 15.08s/batch, batch_loss=14.2, batch

Epoch 8/10:  22%|▏| 222/991 [55:47<3:13:13, 15.08s/batch, batch_loss=18.4, batch

Epoch 8/10:  23%|▏| 223/991 [55:47<3:20:18, 15.65s/batch, batch_loss=18.4, batch

Epoch 8/10:  23%|▏| 223/991 [56:02<3:20:18, 15.65s/batch, batch_loss=13.1, batch

Epoch 8/10:  23%|▏| 224/991 [56:02<3:18:15, 15.51s/batch, batch_loss=13.1, batch

Epoch 8/10:  23%|▏| 224/991 [56:17<3:18:15, 15.51s/batch, batch_loss=11, batch_i

Epoch 8/10:  23%|▏| 225/991 [56:17<3:13:25, 15.15s/batch, batch_loss=11, batch_i

Epoch 8/10:  23%|▏| 225/991 [56:32<3:13:25, 15.15s/batch, batch_loss=19.6, batch

Epoch 8/10:  23%|▏| 226/991 [56:32<3:12:17, 15.08s/batch, batch_loss=19.6, batch

Epoch 8/10:  23%|▏| 226/991 [56:47<3:12:17, 15.08s/batch, batch_loss=2.41e+3, ba

Epoch 8/10:  23%|▏| 227/991 [56:47<3:11:44, 15.06s/batch, batch_loss=2.41e+3, ba

Epoch 8/10:  23%|▏| 227/991 [57:02<3:11:44, 15.06s/batch, batch_loss=3.6e+3, bat

Epoch 8/10:  23%|▏| 228/991 [57:02<3:11:43, 15.08s/batch, batch_loss=3.6e+3, bat

Epoch 8/10:  23%|▏| 228/991 [57:17<3:11:43, 15.08s/batch, batch_loss=13.7, batch

Epoch 8/10:  23%|▏| 229/991 [57:17<3:10:34, 15.01s/batch, batch_loss=13.7, batch

Epoch 8/10:  23%|▏| 229/991 [57:30<3:10:34, 15.01s/batch, batch_loss=10.1, batch

Epoch 8/10:  23%|▏| 230/991 [57:30<3:05:23, 14.62s/batch, batch_loss=10.1, batch

Epoch 8/10:  23%|▏| 230/991 [57:45<3:05:23, 14.62s/batch, batch_loss=13.2, batch

Epoch 8/10:  23%|▏| 231/991 [57:45<3:05:09, 14.62s/batch, batch_loss=13.2, batch

Epoch 8/10:  23%|▏| 231/991 [58:00<3:05:09, 14.62s/batch, batch_loss=13.2, batch

Epoch 8/10:  23%|▏| 232/991 [58:00<3:06:04, 14.71s/batch, batch_loss=13.2, batch

Epoch 8/10:  23%|▏| 232/991 [58:14<3:06:04, 14.71s/batch, batch_loss=11.1, batch

Epoch 8/10:  24%|▏| 233/991 [58:14<3:04:34, 14.61s/batch, batch_loss=11.1, batch

Epoch 8/10:  24%|▏| 233/991 [58:29<3:04:34, 14.61s/batch, batch_loss=15.7, batch

Epoch 8/10:  24%|▏| 234/991 [58:29<3:03:49, 14.57s/batch, batch_loss=15.7, batch

Epoch 8/10:  24%|▏| 234/991 [58:43<3:03:49, 14.57s/batch, batch_loss=16.8, batch

Epoch 8/10:  24%|▏| 235/991 [58:43<3:02:27, 14.48s/batch, batch_loss=16.8, batch

Epoch 8/10:  24%|▏| 235/991 [58:58<3:02:27, 14.48s/batch, batch_loss=25.3, batch

Epoch 8/10:  24%|▏| 236/991 [58:58<3:03:25, 14.58s/batch, batch_loss=25.3, batch

Epoch 8/10:  24%|▏| 236/991 [59:12<3:03:25, 14.58s/batch, batch_loss=25.5, batch

Epoch 8/10:  24%|▏| 237/991 [59:12<3:02:47, 14.55s/batch, batch_loss=25.5, batch

Epoch 8/10:  24%|▏| 237/991 [59:27<3:02:47, 14.55s/batch, batch_loss=18.4, batch

Epoch 8/10:  24%|▏| 238/991 [59:27<3:03:01, 14.58s/batch, batch_loss=18.4, batch

Epoch 8/10:  24%|▏| 238/991 [59:41<3:03:01, 14.58s/batch, batch_loss=7.11, batch

Epoch 8/10:  24%|▏| 239/991 [59:41<3:02:06, 14.53s/batch, batch_loss=7.11, batch

Epoch 8/10:  24%|▏| 239/991 [59:56<3:02:06, 14.53s/batch, batch_loss=8.87, batch

Epoch 8/10:  24%|▏| 240/991 [59:56<3:03:01, 14.62s/batch, batch_loss=8.87, batch

Epoch 8/10:  24%|▏| 240/991 [1:00:11<3:03:01, 14.62s/batch, batch_loss=13, batch

Epoch 8/10:  24%|▏| 241/991 [1:00:11<3:03:54, 14.71s/batch, batch_loss=13, batch

Epoch 8/10:  24%|▏| 241/991 [1:00:26<3:03:54, 14.71s/batch, batch_loss=20.3, bat

Epoch 8/10:  24%|▏| 242/991 [1:00:26<3:06:12, 14.92s/batch, batch_loss=20.3, bat

Epoch 8/10:  24%|▏| 242/991 [1:00:41<3:06:12, 14.92s/batch, batch_loss=267, batc

Epoch 8/10:  25%|▏| 243/991 [1:00:41<3:06:12, 14.94s/batch, batch_loss=267, batc

Epoch 8/10:  25%|▏| 243/991 [1:00:57<3:06:12, 14.94s/batch, batch_loss=16.3, bat

Epoch 8/10:  25%|▏| 244/991 [1:00:57<3:07:52, 15.09s/batch, batch_loss=16.3, bat

Epoch 8/10:  25%|▏| 244/991 [1:01:12<3:07:52, 15.09s/batch, batch_loss=6.84, bat

Epoch 8/10:  25%|▏| 245/991 [1:01:12<3:06:45, 15.02s/batch, batch_loss=6.84, bat

Epoch 8/10:  25%|▏| 245/991 [1:01:27<3:06:45, 15.02s/batch, batch_loss=6.28, bat

Epoch 8/10:  25%|▏| 246/991 [1:01:27<3:08:01, 15.14s/batch, batch_loss=6.28, bat

Epoch 8/10:  25%|▏| 246/991 [1:01:44<3:08:01, 15.14s/batch, batch_loss=15.4, bat

Epoch 8/10:  25%|▏| 247/991 [1:01:44<3:15:32, 15.77s/batch, batch_loss=15.4, bat

Epoch 8/10:  25%|▏| 247/991 [1:01:59<3:15:32, 15.77s/batch, batch_loss=6.06, bat

Epoch 8/10:  25%|▎| 248/991 [1:01:59<3:12:23, 15.54s/batch, batch_loss=6.06, bat

Epoch 8/10:  25%|▎| 248/991 [1:02:14<3:12:23, 15.54s/batch, batch_loss=16.6, bat

Epoch 8/10:  25%|▎| 249/991 [1:02:14<3:10:10, 15.38s/batch, batch_loss=16.6, bat

Epoch 8/10:  25%|▎| 249/991 [1:02:29<3:10:10, 15.38s/batch, batch_loss=9.41, bat

Epoch 8/10:  25%|▎| 250/991 [1:02:29<3:06:26, 15.10s/batch, batch_loss=9.41, bat

Epoch 8/10:  25%|▎| 250/991 [1:02:44<3:06:26, 15.10s/batch, batch_loss=7.05, bat

Epoch 8/10:  25%|▎| 251/991 [1:02:44<3:05:06, 15.01s/batch, batch_loss=7.05, bat

Epoch 8/10:  25%|▎| 251/991 [1:02:58<3:05:06, 15.01s/batch, batch_loss=15.4, bat

Epoch 8/10:  25%|▎| 252/991 [1:02:58<3:03:40, 14.91s/batch, batch_loss=15.4, bat

Epoch 8/10:  25%|▎| 252/991 [1:03:13<3:03:40, 14.91s/batch, batch_loss=8.97, bat

Epoch 8/10:  26%|▎| 253/991 [1:03:13<3:02:07, 14.81s/batch, batch_loss=8.97, bat

Epoch 8/10:  26%|▎| 253/991 [1:03:27<3:02:07, 14.81s/batch, batch_loss=19.7, bat

Epoch 8/10:  26%|▎| 254/991 [1:03:27<3:00:09, 14.67s/batch, batch_loss=19.7, bat

Epoch 8/10:  26%|▎| 254/991 [1:03:44<3:00:09, 14.67s/batch, batch_loss=15.8, bat

Epoch 8/10:  26%|▎| 255/991 [1:03:44<3:07:00, 15.25s/batch, batch_loss=15.8, bat

Epoch 8/10:  26%|▎| 255/991 [1:03:58<3:07:00, 15.25s/batch, batch_loss=698, batc

Epoch 8/10:  26%|▎| 256/991 [1:03:58<3:02:37, 14.91s/batch, batch_loss=698, batc

Epoch 8/10:  26%|▎| 256/991 [1:04:12<3:02:37, 14.91s/batch, batch_loss=20.3, bat

Epoch 8/10:  26%|▎| 257/991 [1:04:12<2:58:03, 14.55s/batch, batch_loss=20.3, bat

Epoch 8/10:  26%|▎| 257/991 [1:04:26<2:58:03, 14.55s/batch, batch_loss=205, batc

Epoch 8/10:  26%|▎| 258/991 [1:04:26<2:55:48, 14.39s/batch, batch_loss=205, batc

Epoch 8/10:  26%|▎| 258/991 [1:04:40<2:55:48, 14.39s/batch, batch_loss=14.9, bat

Epoch 8/10:  26%|▎| 259/991 [1:04:40<2:56:44, 14.49s/batch, batch_loss=14.9, bat

Epoch 8/10:  26%|▎| 259/991 [1:04:55<2:56:44, 14.49s/batch, batch_loss=15.9, bat

Epoch 8/10:  26%|▎| 260/991 [1:04:55<2:58:09, 14.62s/batch, batch_loss=15.9, bat

Epoch 8/10:  26%|▎| 260/991 [1:05:11<2:58:09, 14.62s/batch, batch_loss=19, batch

Epoch 8/10:  26%|▎| 261/991 [1:05:11<3:00:56, 14.87s/batch, batch_loss=19, batch

Epoch 8/10:  26%|▎| 261/991 [1:05:26<3:00:56, 14.87s/batch, batch_loss=11.6, bat

Epoch 8/10:  26%|▎| 262/991 [1:05:26<3:00:40, 14.87s/batch, batch_loss=11.6, bat

Epoch 8/10:  26%|▎| 262/991 [1:05:43<3:00:40, 14.87s/batch, batch_loss=14.1, bat

Epoch 8/10:  27%|▎| 263/991 [1:05:43<3:10:24, 15.69s/batch, batch_loss=14.1, bat

Epoch 8/10:  27%|▎| 263/991 [1:05:58<3:10:24, 15.69s/batch, batch_loss=17.2, bat

Epoch 8/10:  27%|▎| 264/991 [1:05:58<3:07:31, 15.48s/batch, batch_loss=17.2, bat

Epoch 8/10:  27%|▎| 264/991 [1:06:13<3:07:31, 15.48s/batch, batch_loss=17.3, bat

Epoch 8/10:  27%|▎| 265/991 [1:06:13<3:05:43, 15.35s/batch, batch_loss=17.3, bat

Epoch 8/10:  27%|▎| 265/991 [1:06:28<3:05:43, 15.35s/batch, batch_loss=16.1, bat

Epoch 8/10:  27%|▎| 266/991 [1:06:28<3:03:08, 15.16s/batch, batch_loss=16.1, bat

Epoch 8/10:  27%|▎| 266/991 [1:06:43<3:03:08, 15.16s/batch, batch_loss=12, batch

Epoch 8/10:  27%|▎| 267/991 [1:06:43<3:02:50, 15.15s/batch, batch_loss=12, batch

Epoch 8/10:  27%|▎| 267/991 [1:06:58<3:02:50, 15.15s/batch, batch_loss=9.68, bat

Epoch 8/10:  27%|▎| 268/991 [1:06:58<3:00:20, 14.97s/batch, batch_loss=9.68, bat

Epoch 8/10:  27%|▎| 268/991 [1:07:13<3:00:20, 14.97s/batch, batch_loss=14.2, bat

Epoch 8/10:  27%|▎| 269/991 [1:07:13<3:02:15, 15.15s/batch, batch_loss=14.2, bat

Epoch 8/10:  27%|▎| 269/991 [1:07:29<3:02:15, 15.15s/batch, batch_loss=1.51, bat

Epoch 8/10:  27%|▎| 270/991 [1:07:29<3:03:13, 15.25s/batch, batch_loss=1.51, bat

Epoch 8/10:  27%|▎| 270/991 [1:07:44<3:03:13, 15.25s/batch, batch_loss=20.5, bat

Epoch 8/10:  27%|▎| 271/991 [1:07:44<3:02:23, 15.20s/batch, batch_loss=20.5, bat

Epoch 8/10:  27%|▎| 271/991 [1:07:59<3:02:23, 15.20s/batch, batch_loss=11.2, bat

Epoch 8/10:  27%|▎| 272/991 [1:07:59<3:02:55, 15.26s/batch, batch_loss=11.2, bat

Epoch 8/10:  27%|▎| 272/991 [1:08:14<3:02:55, 15.26s/batch, batch_loss=22.5, bat

Epoch 8/10:  28%|▎| 273/991 [1:08:14<3:02:08, 15.22s/batch, batch_loss=22.5, bat

Epoch 8/10:  28%|▎| 273/991 [1:08:30<3:02:08, 15.22s/batch, batch_loss=13.1, bat

Epoch 8/10:  28%|▎| 274/991 [1:08:30<3:02:31, 15.27s/batch, batch_loss=13.1, bat

Epoch 8/10:  28%|▎| 274/991 [1:08:45<3:02:31, 15.27s/batch, batch_loss=3.33e+3, 

Epoch 8/10:  28%|▎| 275/991 [1:08:45<3:00:49, 15.15s/batch, batch_loss=3.33e+3, 

Epoch 8/10:  28%|▎| 275/991 [1:08:59<3:00:49, 15.15s/batch, batch_loss=14.2, bat

Epoch 8/10:  28%|▎| 276/991 [1:08:59<2:57:52, 14.93s/batch, batch_loss=14.2, bat

Epoch 8/10:  28%|▎| 276/991 [1:09:14<2:57:52, 14.93s/batch, batch_loss=4.75e+3, 

Epoch 8/10:  28%|▎| 277/991 [1:09:14<2:56:48, 14.86s/batch, batch_loss=4.75e+3, 

Epoch 8/10:  28%|▎| 277/991 [1:09:31<2:56:48, 14.86s/batch, batch_loss=14.5, bat

Epoch 8/10:  28%|▎| 278/991 [1:09:31<3:03:42, 15.46s/batch, batch_loss=14.5, bat

Epoch 8/10:  28%|▎| 278/991 [1:09:46<3:03:42, 15.46s/batch, batch_loss=19.1, bat

Epoch 8/10:  28%|▎| 279/991 [1:09:46<3:02:32, 15.38s/batch, batch_loss=19.1, bat

Epoch 8/10:  28%|▎| 279/991 [1:10:00<3:02:32, 15.38s/batch, batch_loss=14.6, bat

Epoch 8/10:  28%|▎| 280/991 [1:10:00<2:59:10, 15.12s/batch, batch_loss=14.6, bat

Epoch 8/10:  28%|▎| 280/991 [1:10:15<2:59:10, 15.12s/batch, batch_loss=11.3, bat

Epoch 8/10:  28%|▎| 281/991 [1:10:15<2:57:19, 14.99s/batch, batch_loss=11.3, bat

Epoch 8/10:  28%|▎| 281/991 [1:10:28<2:57:19, 14.99s/batch, batch_loss=7.85, bat

Epoch 8/10:  28%|▎| 282/991 [1:10:28<2:52:00, 14.56s/batch, batch_loss=7.85, bat

Epoch 8/10:  28%|▎| 282/991 [1:10:42<2:52:00, 14.56s/batch, batch_loss=18.1, bat

Epoch 8/10:  29%|▎| 283/991 [1:10:42<2:48:10, 14.25s/batch, batch_loss=18.1, bat

Epoch 8/10:  29%|▎| 283/991 [1:10:55<2:48:10, 14.25s/batch, batch_loss=18.3, bat

Epoch 8/10:  29%|▎| 284/991 [1:10:55<2:44:18, 13.94s/batch, batch_loss=18.3, bat

Epoch 8/10:  29%|▎| 284/991 [1:11:10<2:44:18, 13.94s/batch, batch_loss=13.4, bat

Epoch 8/10:  29%|▎| 285/991 [1:11:10<2:46:38, 14.16s/batch, batch_loss=13.4, bat

Epoch 8/10:  29%|▎| 285/991 [1:11:24<2:46:38, 14.16s/batch, batch_loss=9.06, bat

Epoch 8/10:  29%|▎| 286/991 [1:11:24<2:46:11, 14.14s/batch, batch_loss=9.06, bat

Epoch 8/10:  29%|▎| 286/991 [1:11:38<2:46:11, 14.14s/batch, batch_loss=8.14, bat

Epoch 8/10:  29%|▎| 287/991 [1:11:38<2:45:04, 14.07s/batch, batch_loss=8.14, bat

Epoch 8/10:  29%|▎| 287/991 [1:11:53<2:45:04, 14.07s/batch, batch_loss=2.6e+3, b

Epoch 8/10:  29%|▎| 288/991 [1:11:53<2:49:02, 14.43s/batch, batch_loss=2.6e+3, b

Epoch 8/10:  29%|▎| 288/991 [1:12:08<2:49:02, 14.43s/batch, batch_loss=1.25e+3, 

Epoch 8/10:  29%|▎| 289/991 [1:12:08<2:49:04, 14.45s/batch, batch_loss=1.25e+3, 

Epoch 8/10:  29%|▎| 289/991 [1:12:23<2:49:04, 14.45s/batch, batch_loss=13.1, bat

Epoch 8/10:  29%|▎| 290/991 [1:12:23<2:51:56, 14.72s/batch, batch_loss=13.1, bat

Epoch 8/10:  29%|▎| 290/991 [1:12:38<2:51:56, 14.72s/batch, batch_loss=4.96, bat

Epoch 8/10:  29%|▎| 291/991 [1:12:38<2:50:57, 14.65s/batch, batch_loss=4.96, bat

Epoch 8/10:  29%|▎| 291/991 [1:12:54<2:50:57, 14.65s/batch, batch_loss=10.7, bat

Epoch 8/10:  29%|▎| 292/991 [1:12:54<2:57:20, 15.22s/batch, batch_loss=10.7, bat

Epoch 8/10:  29%|▎| 292/991 [1:13:16<2:57:20, 15.22s/batch, batch_loss=15.4, bat

Epoch 8/10:  30%|▎| 293/991 [1:13:16<3:20:29, 17.23s/batch, batch_loss=15.4, bat

Epoch 8/10:  30%|▎| 293/991 [1:13:32<3:20:29, 17.23s/batch, batch_loss=14.8, bat

Epoch 8/10:  30%|▎| 294/991 [1:13:32<3:15:08, 16.80s/batch, batch_loss=14.8, bat

Epoch 8/10:  30%|▎| 294/991 [1:13:48<3:15:08, 16.80s/batch, batch_loss=10.6, bat

Epoch 8/10:  30%|▎| 295/991 [1:13:48<3:13:14, 16.66s/batch, batch_loss=10.6, bat

Epoch 8/10:  30%|▎| 295/991 [1:14:03<3:13:14, 16.66s/batch, batch_loss=18.8, bat

Epoch 8/10:  30%|▎| 296/991 [1:14:03<3:06:06, 16.07s/batch, batch_loss=18.8, bat

Epoch 8/10:  30%|▎| 296/991 [1:14:17<3:06:06, 16.07s/batch, batch_loss=14.6, bat

Epoch 8/10:  30%|▎| 297/991 [1:14:17<2:59:57, 15.56s/batch, batch_loss=14.6, bat

Epoch 8/10:  30%|▎| 297/991 [1:14:32<2:59:57, 15.56s/batch, batch_loss=3.24e+4, 

Epoch 8/10:  30%|▎| 298/991 [1:14:32<2:57:05, 15.33s/batch, batch_loss=3.24e+4, 

Epoch 8/10:  30%|▎| 298/991 [1:14:46<2:57:05, 15.33s/batch, batch_loss=15.1, bat

Epoch 8/10:  30%|▎| 299/991 [1:14:46<2:52:47, 14.98s/batch, batch_loss=15.1, bat

Epoch 8/10:  30%|▎| 299/991 [1:15:01<2:52:47, 14.98s/batch, batch_loss=5.67, bat

Epoch 8/10:  30%|▎| 300/991 [1:15:01<2:51:01, 14.85s/batch, batch_loss=5.67, bat

Epoch 8/10:  30%|▎| 300/991 [1:15:15<2:51:01, 14.85s/batch, batch_loss=9.11, bat

Epoch 8/10:  30%|▎| 301/991 [1:15:15<2:48:21, 14.64s/batch, batch_loss=9.11, bat

Epoch 8/10:  30%|▎| 301/991 [1:15:29<2:48:21, 14.64s/batch, batch_loss=11.2, bat

Epoch 8/10:  30%|▎| 302/991 [1:15:29<2:47:20, 14.57s/batch, batch_loss=11.2, bat

Epoch 8/10:  30%|▎| 302/991 [1:15:46<2:47:20, 14.57s/batch, batch_loss=8.35, bat

Epoch 8/10:  31%|▎| 303/991 [1:15:46<2:54:04, 15.18s/batch, batch_loss=8.35, bat

Epoch 8/10:  31%|▎| 303/991 [1:16:01<2:54:04, 15.18s/batch, batch_loss=2.97, bat

Epoch 8/10:  31%|▎| 304/991 [1:16:01<2:52:17, 15.05s/batch, batch_loss=2.97, bat

Epoch 8/10:  31%|▎| 304/991 [1:16:15<2:52:17, 15.05s/batch, batch_loss=14.8, bat

Epoch 8/10:  31%|▎| 305/991 [1:16:15<2:50:22, 14.90s/batch, batch_loss=14.8, bat

Epoch 8/10:  31%|▎| 305/991 [1:16:30<2:50:22, 14.90s/batch, batch_loss=8.09, bat

Epoch 8/10:  31%|▎| 306/991 [1:16:30<2:48:44, 14.78s/batch, batch_loss=8.09, bat

Epoch 8/10:  31%|▎| 306/991 [1:16:43<2:48:44, 14.78s/batch, batch_loss=6.29e+3, 

Epoch 8/10:  31%|▎| 307/991 [1:16:43<2:42:44, 14.28s/batch, batch_loss=6.29e+3, 

Epoch 8/10:  31%|▎| 307/991 [1:16:57<2:42:44, 14.28s/batch, batch_loss=11.5, bat

Epoch 8/10:  31%|▎| 308/991 [1:16:57<2:42:56, 14.31s/batch, batch_loss=11.5, bat

Epoch 8/10:  31%|▎| 308/991 [1:17:11<2:42:56, 14.31s/batch, batch_loss=21.8, bat

Epoch 8/10:  31%|▎| 309/991 [1:17:11<2:42:02, 14.26s/batch, batch_loss=21.8, bat

Epoch 8/10:  31%|▎| 309/991 [1:17:25<2:42:02, 14.26s/batch, batch_loss=15.2, bat

Epoch 8/10:  31%|▎| 310/991 [1:17:25<2:40:36, 14.15s/batch, batch_loss=15.2, bat

Epoch 8/10:  31%|▎| 310/991 [1:17:42<2:40:36, 14.15s/batch, batch_loss=11.5, bat

Epoch 8/10:  31%|▎| 311/991 [1:17:42<2:50:34, 15.05s/batch, batch_loss=11.5, bat

Epoch 8/10:  31%|▎| 311/991 [1:17:57<2:50:34, 15.05s/batch, batch_loss=12.8, bat

Epoch 8/10:  31%|▎| 312/991 [1:17:57<2:49:57, 15.02s/batch, batch_loss=12.8, bat

Epoch 8/10:  31%|▎| 312/991 [1:18:12<2:49:57, 15.02s/batch, batch_loss=1.06e+4, 

Epoch 8/10:  32%|▎| 313/991 [1:18:12<2:48:21, 14.90s/batch, batch_loss=1.06e+4, 

Epoch 8/10:  32%|▎| 313/991 [1:18:26<2:48:21, 14.90s/batch, batch_loss=11.1, bat

Epoch 8/10:  32%|▎| 314/991 [1:18:26<2:46:56, 14.80s/batch, batch_loss=11.1, bat

Epoch 8/10:  32%|▎| 314/991 [1:18:41<2:46:56, 14.80s/batch, batch_loss=13.9, bat

Epoch 8/10:  32%|▎| 315/991 [1:18:41<2:46:07, 14.74s/batch, batch_loss=13.9, bat

Epoch 8/10:  32%|▎| 315/991 [1:18:56<2:46:07, 14.74s/batch, batch_loss=21.5, bat

Epoch 8/10:  32%|▎| 316/991 [1:18:56<2:45:35, 14.72s/batch, batch_loss=21.5, bat

Epoch 8/10:  32%|▎| 316/991 [1:19:10<2:45:35, 14.72s/batch, batch_loss=21, batch

Epoch 8/10:  32%|▎| 317/991 [1:19:10<2:42:22, 14.46s/batch, batch_loss=21, batch

Epoch 8/10:  32%|▎| 317/991 [1:19:24<2:42:22, 14.46s/batch, batch_loss=23.8, bat

Epoch 8/10:  32%|▎| 318/991 [1:19:24<2:40:48, 14.34s/batch, batch_loss=23.8, bat

Epoch 8/10:  32%|▎| 318/991 [1:19:38<2:40:48, 14.34s/batch, batch_loss=14, batch

Epoch 8/10:  32%|▎| 319/991 [1:19:38<2:40:55, 14.37s/batch, batch_loss=14, batch

Epoch 8/10:  32%|▎| 319/991 [1:19:52<2:40:55, 14.37s/batch, batch_loss=17.9, bat

Epoch 8/10:  32%|▎| 320/991 [1:19:52<2:40:47, 14.38s/batch, batch_loss=17.9, bat

Epoch 8/10:  32%|▎| 320/991 [1:20:07<2:40:47, 14.38s/batch, batch_loss=20.5, bat

Epoch 8/10:  32%|▎| 321/991 [1:20:07<2:41:06, 14.43s/batch, batch_loss=20.5, bat

Epoch 8/10:  32%|▎| 321/991 [1:20:22<2:41:06, 14.43s/batch, batch_loss=7.06, bat

Epoch 8/10:  32%|▎| 322/991 [1:20:22<2:41:42, 14.50s/batch, batch_loss=7.06, bat

Epoch 8/10:  32%|▎| 322/991 [1:20:36<2:41:42, 14.50s/batch, batch_loss=9.66, bat

Epoch 8/10:  33%|▎| 323/991 [1:20:36<2:42:10, 14.57s/batch, batch_loss=9.66, bat

Epoch 8/10:  33%|▎| 323/991 [1:20:51<2:42:10, 14.57s/batch, batch_loss=21.6, bat

Epoch 8/10:  33%|▎| 324/991 [1:20:51<2:43:06, 14.67s/batch, batch_loss=21.6, bat

Epoch 8/10:  33%|▎| 324/991 [1:21:06<2:43:06, 14.67s/batch, batch_loss=12.8, bat

Epoch 8/10:  33%|▎| 325/991 [1:21:06<2:42:01, 14.60s/batch, batch_loss=12.8, bat

Epoch 8/10:  33%|▎| 325/991 [1:21:21<2:42:01, 14.60s/batch, batch_loss=19.9, bat

Epoch 8/10:  33%|▎| 326/991 [1:21:21<2:43:40, 14.77s/batch, batch_loss=19.9, bat

Epoch 8/10:  33%|▎| 326/991 [1:21:38<2:43:40, 14.77s/batch, batch_loss=3.04e+3, 

Epoch 8/10:  33%|▎| 327/991 [1:21:38<2:52:32, 15.59s/batch, batch_loss=3.04e+3, 

Epoch 8/10:  33%|▎| 327/991 [1:21:53<2:52:32, 15.59s/batch, batch_loss=13, batch

Epoch 8/10:  33%|▎| 328/991 [1:21:53<2:49:54, 15.38s/batch, batch_loss=13, batch

Epoch 8/10:  33%|▎| 328/991 [1:22:09<2:49:54, 15.38s/batch, batch_loss=18.6, bat

Epoch 8/10:  33%|▎| 329/991 [1:22:09<2:49:14, 15.34s/batch, batch_loss=18.6, bat

Epoch 8/10:  33%|▎| 329/991 [1:22:23<2:49:14, 15.34s/batch, batch_loss=18.9, bat

Epoch 8/10:  33%|▎| 330/991 [1:22:23<2:45:58, 15.07s/batch, batch_loss=18.9, bat

Epoch 8/10:  33%|▎| 330/991 [1:22:38<2:45:58, 15.07s/batch, batch_loss=13.5, bat

Epoch 8/10:  33%|▎| 331/991 [1:22:38<2:45:00, 15.00s/batch, batch_loss=13.5, bat

Epoch 8/10:  33%|▎| 331/991 [1:22:53<2:45:00, 15.00s/batch, batch_loss=14.2, bat

Epoch 8/10:  34%|▎| 332/991 [1:22:53<2:44:31, 14.98s/batch, batch_loss=14.2, bat

Epoch 8/10:  34%|▎| 332/991 [1:23:08<2:44:31, 14.98s/batch, batch_loss=13.5, bat

Epoch 8/10:  34%|▎| 333/991 [1:23:08<2:43:44, 14.93s/batch, batch_loss=13.5, bat

Epoch 8/10:  34%|▎| 333/991 [1:23:22<2:43:44, 14.93s/batch, batch_loss=14.6, bat

Epoch 8/10:  34%|▎| 334/991 [1:23:22<2:41:22, 14.74s/batch, batch_loss=14.6, bat

Epoch 8/10:  34%|▎| 334/991 [1:23:37<2:41:22, 14.74s/batch, batch_loss=3.98, bat

Epoch 8/10:  34%|▎| 335/991 [1:23:37<2:42:32, 14.87s/batch, batch_loss=3.98, bat

Epoch 8/10:  34%|▎| 335/991 [1:23:52<2:42:32, 14.87s/batch, batch_loss=8.5e+3, b

Epoch 8/10:  34%|▎| 336/991 [1:23:52<2:43:43, 15.00s/batch, batch_loss=8.5e+3, b

Epoch 8/10:  34%|▎| 336/991 [1:24:07<2:43:43, 15.00s/batch, batch_loss=2.36e+3, 

Epoch 8/10:  34%|▎| 337/991 [1:24:07<2:42:51, 14.94s/batch, batch_loss=2.36e+3, 

Epoch 8/10:  34%|▎| 337/991 [1:24:22<2:42:51, 14.94s/batch, batch_loss=8.15, bat

Epoch 8/10:  34%|▎| 338/991 [1:24:22<2:41:36, 14.85s/batch, batch_loss=8.15, bat

Epoch 8/10:  34%|▎| 338/991 [1:24:36<2:41:36, 14.85s/batch, batch_loss=26.4, bat

Epoch 8/10:  34%|▎| 339/991 [1:24:36<2:40:18, 14.75s/batch, batch_loss=26.4, bat

Epoch 8/10:  34%|▎| 339/991 [1:24:51<2:40:18, 14.75s/batch, batch_loss=11.6, bat

Epoch 8/10:  34%|▎| 340/991 [1:24:51<2:38:39, 14.62s/batch, batch_loss=11.6, bat

Epoch 8/10:  34%|▎| 340/991 [1:25:05<2:38:39, 14.62s/batch, batch_loss=9.06, bat

Epoch 8/10:  34%|▎| 341/991 [1:25:05<2:37:11, 14.51s/batch, batch_loss=9.06, bat

Epoch 8/10:  34%|▎| 341/991 [1:25:20<2:37:11, 14.51s/batch, batch_loss=0.765, ba

Epoch 8/10:  35%|▎| 342/991 [1:25:20<2:38:34, 14.66s/batch, batch_loss=0.765, ba

Epoch 8/10:  35%|▎| 342/991 [1:25:36<2:38:34, 14.66s/batch, batch_loss=6.85, bat

Epoch 8/10:  35%|▎| 343/991 [1:25:36<2:44:37, 15.24s/batch, batch_loss=6.85, bat

Epoch 8/10:  35%|▎| 343/991 [1:25:51<2:44:37, 15.24s/batch, batch_loss=17.2, bat

Epoch 8/10:  35%|▎| 344/991 [1:25:51<2:42:33, 15.07s/batch, batch_loss=17.2, bat

Epoch 8/10:  35%|▎| 344/991 [1:26:06<2:42:33, 15.07s/batch, batch_loss=113, batc

Epoch 8/10:  35%|▎| 345/991 [1:26:06<2:40:55, 14.95s/batch, batch_loss=113, batc

Epoch 8/10:  35%|▎| 345/991 [1:26:21<2:40:55, 14.95s/batch, batch_loss=14.4, bat

Epoch 8/10:  35%|▎| 346/991 [1:26:21<2:42:27, 15.11s/batch, batch_loss=14.4, bat

Epoch 8/10:  35%|▎| 346/991 [1:26:37<2:42:27, 15.11s/batch, batch_loss=12.1, bat

Epoch 8/10:  35%|▎| 347/991 [1:26:37<2:43:31, 15.23s/batch, batch_loss=12.1, bat

Epoch 8/10:  35%|▎| 347/991 [1:26:52<2:43:31, 15.23s/batch, batch_loss=14.2, bat

Epoch 8/10:  35%|▎| 348/991 [1:26:52<2:42:22, 15.15s/batch, batch_loss=14.2, bat

Epoch 8/10:  35%|▎| 348/991 [1:27:07<2:42:22, 15.15s/batch, batch_loss=9.1, batc

Epoch 8/10:  35%|▎| 349/991 [1:27:07<2:41:42, 15.11s/batch, batch_loss=9.1, batc

Epoch 8/10:  35%|▎| 349/991 [1:27:24<2:41:42, 15.11s/batch, batch_loss=12.4, bat

Epoch 8/10:  35%|▎| 350/991 [1:27:24<2:48:05, 15.73s/batch, batch_loss=12.4, bat

Epoch 8/10:  35%|▎| 350/991 [1:27:39<2:48:05, 15.73s/batch, batch_loss=9.42, bat

Epoch 8/10:  35%|▎| 351/991 [1:27:39<2:45:16, 15.49s/batch, batch_loss=9.42, bat

Epoch 8/10:  35%|▎| 351/991 [1:27:55<2:45:16, 15.49s/batch, batch_loss=13.6, bat

Epoch 8/10:  36%|▎| 352/991 [1:27:55<2:45:17, 15.52s/batch, batch_loss=13.6, bat

Epoch 8/10:  36%|▎| 352/991 [1:28:10<2:45:17, 15.52s/batch, batch_loss=12.8, bat

Epoch 8/10:  36%|▎| 353/991 [1:28:10<2:43:37, 15.39s/batch, batch_loss=12.8, bat

Epoch 8/10:  36%|▎| 353/991 [1:28:25<2:43:37, 15.39s/batch, batch_loss=20.7, bat

Epoch 8/10:  36%|▎| 354/991 [1:28:25<2:43:16, 15.38s/batch, batch_loss=20.7, bat

Epoch 8/10:  36%|▎| 354/991 [1:28:40<2:43:16, 15.38s/batch, batch_loss=8.46, bat

Epoch 8/10:  36%|▎| 355/991 [1:28:40<2:42:09, 15.30s/batch, batch_loss=8.46, bat

Epoch 8/10:  36%|▎| 355/991 [1:28:55<2:42:09, 15.30s/batch, batch_loss=20.8, bat

Epoch 8/10:  36%|▎| 356/991 [1:28:55<2:39:17, 15.05s/batch, batch_loss=20.8, bat

Epoch 8/10:  36%|▎| 356/991 [1:29:09<2:39:17, 15.05s/batch, batch_loss=14.1, bat

Epoch 8/10:  36%|▎| 357/991 [1:29:09<2:37:37, 14.92s/batch, batch_loss=14.1, bat

Epoch 8/10:  36%|▎| 357/991 [1:29:23<2:37:37, 14.92s/batch, batch_loss=14.4, bat

Epoch 8/10:  36%|▎| 358/991 [1:29:23<2:34:04, 14.60s/batch, batch_loss=14.4, bat

Epoch 8/10:  36%|▎| 358/991 [1:29:37<2:34:04, 14.60s/batch, batch_loss=5.01, bat

Epoch 8/10:  36%|▎| 359/991 [1:29:37<2:32:42, 14.50s/batch, batch_loss=5.01, bat

Epoch 8/10:  36%|▎| 359/991 [1:29:52<2:32:42, 14.50s/batch, batch_loss=9.15, bat

Epoch 8/10:  36%|▎| 360/991 [1:29:52<2:33:27, 14.59s/batch, batch_loss=9.15, bat

Epoch 8/10:  36%|▎| 360/991 [1:30:07<2:33:27, 14.59s/batch, batch_loss=26.1, bat

Epoch 8/10:  36%|▎| 361/991 [1:30:07<2:34:49, 14.74s/batch, batch_loss=26.1, bat

Epoch 8/10:  36%|▎| 361/991 [1:30:23<2:34:49, 14.74s/batch, batch_loss=20.3, bat

Epoch 8/10:  37%|▎| 362/991 [1:30:23<2:37:03, 14.98s/batch, batch_loss=20.3, bat

Epoch 8/10:  37%|▎| 362/991 [1:30:38<2:37:03, 14.98s/batch, batch_loss=12.9, bat

Epoch 8/10:  37%|▎| 363/991 [1:30:38<2:38:30, 15.14s/batch, batch_loss=12.9, bat

Epoch 8/10:  37%|▎| 363/991 [1:30:53<2:38:30, 15.14s/batch, batch_loss=13.9, bat

Epoch 8/10:  37%|▎| 364/991 [1:30:53<2:37:06, 15.03s/batch, batch_loss=13.9, bat

Epoch 8/10:  37%|▎| 364/991 [1:31:08<2:37:06, 15.03s/batch, batch_loss=9.95, bat

Epoch 8/10:  37%|▎| 365/991 [1:31:08<2:36:20, 14.98s/batch, batch_loss=9.95, bat

Epoch 8/10:  37%|▎| 365/991 [1:31:23<2:36:20, 14.98s/batch, batch_loss=15.9, bat

Epoch 8/10:  37%|▎| 366/991 [1:31:23<2:36:15, 15.00s/batch, batch_loss=15.9, bat

Epoch 8/10:  37%|▎| 366/991 [1:31:39<2:36:15, 15.00s/batch, batch_loss=12.2, bat

Epoch 8/10:  37%|▎| 367/991 [1:31:39<2:37:54, 15.18s/batch, batch_loss=12.2, bat

Epoch 8/10:  37%|▎| 367/991 [1:31:53<2:37:54, 15.18s/batch, batch_loss=10.7, bat

Epoch 8/10:  37%|▎| 368/991 [1:31:53<2:36:10, 15.04s/batch, batch_loss=10.7, bat

Epoch 8/10:  37%|▎| 368/991 [1:32:09<2:36:10, 15.04s/batch, batch_loss=14.4, bat

Epoch 8/10:  37%|▎| 369/991 [1:32:09<2:38:02, 15.24s/batch, batch_loss=14.4, bat

Epoch 8/10:  37%|▎| 369/991 [1:32:24<2:38:02, 15.24s/batch, batch_loss=1.2e+4, b

Epoch 8/10:  37%|▎| 370/991 [1:32:24<2:37:39, 15.23s/batch, batch_loss=1.2e+4, b

Epoch 8/10:  37%|▎| 370/991 [1:32:39<2:37:39, 15.23s/batch, batch_loss=20.5, bat

Epoch 8/10:  37%|▎| 371/991 [1:32:39<2:34:47, 14.98s/batch, batch_loss=20.5, bat

Epoch 8/10:  37%|▎| 371/991 [1:32:54<2:34:47, 14.98s/batch, batch_loss=16.4, bat

Epoch 8/10:  38%|▍| 372/991 [1:32:54<2:35:27, 15.07s/batch, batch_loss=16.4, bat

Epoch 8/10:  38%|▍| 372/991 [1:33:09<2:35:27, 15.07s/batch, batch_loss=23.7, bat

Epoch 8/10:  38%|▍| 373/991 [1:33:09<2:36:00, 15.15s/batch, batch_loss=23.7, bat

Epoch 8/10:  38%|▍| 373/991 [1:33:24<2:36:00, 15.15s/batch, batch_loss=470, batc

Epoch 8/10:  38%|▍| 374/991 [1:33:24<2:34:27, 15.02s/batch, batch_loss=470, batc

Epoch 8/10:  38%|▍| 374/991 [1:33:39<2:34:27, 15.02s/batch, batch_loss=1.43e+3, 

Epoch 8/10:  38%|▍| 375/991 [1:33:39<2:33:15, 14.93s/batch, batch_loss=1.43e+3, 

Epoch 8/10:  38%|▍| 375/991 [1:33:55<2:33:15, 14.93s/batch, batch_loss=1.22e+3, 

Epoch 8/10:  38%|▍| 376/991 [1:33:55<2:36:08, 15.23s/batch, batch_loss=1.22e+3, 

Epoch 8/10:  38%|▍| 376/991 [1:34:12<2:36:08, 15.23s/batch, batch_loss=20, batch

Epoch 8/10:  38%|▍| 377/991 [1:34:12<2:43:53, 16.01s/batch, batch_loss=20, batch

Epoch 8/10:  38%|▍| 377/991 [1:34:27<2:43:53, 16.01s/batch, batch_loss=1.18e+3, 

Epoch 8/10:  38%|▍| 378/991 [1:34:27<2:40:14, 15.68s/batch, batch_loss=1.18e+3, 

Epoch 8/10:  38%|▍| 378/991 [1:34:42<2:40:14, 15.68s/batch, batch_loss=11.4, bat

Epoch 8/10:  38%|▍| 379/991 [1:34:42<2:35:59, 15.29s/batch, batch_loss=11.4, bat

Epoch 8/10:  38%|▍| 379/991 [1:34:56<2:35:59, 15.29s/batch, batch_loss=14.3, bat

Epoch 8/10:  38%|▍| 380/991 [1:34:56<2:33:12, 15.04s/batch, batch_loss=14.3, bat

Epoch 8/10:  38%|▍| 380/991 [1:35:10<2:33:12, 15.04s/batch, batch_loss=22.2, bat

Epoch 8/10:  38%|▍| 381/991 [1:35:10<2:29:40, 14.72s/batch, batch_loss=22.2, bat

Epoch 8/10:  38%|▍| 381/991 [1:35:25<2:29:40, 14.72s/batch, batch_loss=10.8, bat

Epoch 8/10:  39%|▍| 382/991 [1:35:25<2:28:33, 14.64s/batch, batch_loss=10.8, bat

Epoch 8/10:  39%|▍| 382/991 [1:35:39<2:28:33, 14.64s/batch, batch_loss=10.6, bat

Epoch 8/10:  39%|▍| 383/991 [1:35:39<2:28:36, 14.66s/batch, batch_loss=10.6, bat

Epoch 8/10:  39%|▍| 383/991 [1:35:54<2:28:36, 14.66s/batch, batch_loss=26.6, bat

Epoch 8/10:  39%|▍| 384/991 [1:35:54<2:28:37, 14.69s/batch, batch_loss=26.6, bat

Epoch 8/10:  39%|▍| 384/991 [1:36:08<2:28:37, 14.69s/batch, batch_loss=10.6, bat

Epoch 8/10:  39%|▍| 385/991 [1:36:08<2:27:29, 14.60s/batch, batch_loss=10.6, bat

Epoch 8/10:  39%|▍| 385/991 [1:36:24<2:27:29, 14.60s/batch, batch_loss=17.8, bat

Epoch 8/10:  39%|▍| 386/991 [1:36:24<2:29:54, 14.87s/batch, batch_loss=17.8, bat

Epoch 8/10:  39%|▍| 386/991 [1:36:40<2:29:54, 14.87s/batch, batch_loss=27.1, bat

Epoch 8/10:  39%|▍| 387/991 [1:36:40<2:32:27, 15.14s/batch, batch_loss=27.1, bat

Epoch 8/10:  39%|▍| 387/991 [1:36:53<2:32:27, 15.14s/batch, batch_loss=788, batc

Epoch 8/10:  39%|▍| 388/991 [1:36:53<2:27:31, 14.68s/batch, batch_loss=788, batc

Epoch 8/10:  39%|▍| 388/991 [1:37:08<2:27:31, 14.68s/batch, batch_loss=15.6, bat

Epoch 8/10:  39%|▍| 389/991 [1:37:08<2:27:27, 14.70s/batch, batch_loss=15.6, bat

Epoch 8/10:  39%|▍| 389/991 [1:37:23<2:27:27, 14.70s/batch, batch_loss=869, batc

Epoch 8/10:  39%|▍| 390/991 [1:37:23<2:28:54, 14.87s/batch, batch_loss=869, batc

Epoch 8/10:  39%|▍| 390/991 [1:37:38<2:28:54, 14.87s/batch, batch_loss=20, batch

Epoch 8/10:  39%|▍| 391/991 [1:37:38<2:27:37, 14.76s/batch, batch_loss=20, batch

Epoch 8/10:  39%|▍| 391/991 [1:37:53<2:27:37, 14.76s/batch, batch_loss=13.7, bat

Epoch 8/10:  40%|▍| 392/991 [1:37:53<2:28:43, 14.90s/batch, batch_loss=13.7, bat

Epoch 8/10:  40%|▍| 392/991 [1:38:08<2:28:43, 14.90s/batch, batch_loss=20.7, bat

Epoch 8/10:  40%|▍| 393/991 [1:38:08<2:29:42, 15.02s/batch, batch_loss=20.7, bat

Epoch 8/10:  40%|▍| 393/991 [1:38:23<2:29:42, 15.02s/batch, batch_loss=606, batc

Epoch 8/10:  40%|▍| 394/991 [1:38:23<2:28:52, 14.96s/batch, batch_loss=606, batc

Epoch 8/10:  40%|▍| 394/991 [1:38:38<2:28:52, 14.96s/batch, batch_loss=19.1, bat

Epoch 8/10:  40%|▍| 395/991 [1:38:38<2:27:52, 14.89s/batch, batch_loss=19.1, bat

Epoch 8/10:  40%|▍| 395/991 [1:38:53<2:27:52, 14.89s/batch, batch_loss=11.5, bat

Epoch 8/10:  40%|▍| 396/991 [1:38:53<2:28:03, 14.93s/batch, batch_loss=11.5, bat

Epoch 8/10:  40%|▍| 396/991 [1:39:08<2:28:03, 14.93s/batch, batch_loss=15, batch

Epoch 8/10:  40%|▍| 397/991 [1:39:08<2:27:19, 14.88s/batch, batch_loss=15, batch

Epoch 8/10:  40%|▍| 397/991 [1:39:23<2:27:19, 14.88s/batch, batch_loss=13.5, bat

Epoch 8/10:  40%|▍| 398/991 [1:39:23<2:28:14, 15.00s/batch, batch_loss=13.5, bat

Epoch 8/10:  40%|▍| 398/991 [1:39:38<2:28:14, 15.00s/batch, batch_loss=24, batch

Epoch 8/10:  40%|▍| 399/991 [1:39:38<2:28:20, 15.03s/batch, batch_loss=24, batch

Epoch 8/10:  40%|▍| 399/991 [1:39:56<2:28:20, 15.03s/batch, batch_loss=11.8, bat

Epoch 8/10:  40%|▍| 400/991 [1:39:56<2:36:53, 15.93s/batch, batch_loss=11.8, bat

Epoch 8/10:  40%|▍| 400/991 [1:40:11<2:36:53, 15.93s/batch, batch_loss=10.4, bat

Epoch 8/10:  40%|▍| 401/991 [1:40:11<2:34:26, 15.71s/batch, batch_loss=10.4, bat

Epoch 8/10:  40%|▍| 401/991 [1:40:26<2:34:26, 15.71s/batch, batch_loss=15.5, bat

Epoch 8/10:  41%|▍| 402/991 [1:40:26<2:30:18, 15.31s/batch, batch_loss=15.5, bat

Epoch 8/10:  41%|▍| 402/991 [1:40:39<2:30:18, 15.31s/batch, batch_loss=17.4, bat

Epoch 8/10:  41%|▍| 403/991 [1:40:39<2:24:13, 14.72s/batch, batch_loss=17.4, bat

Epoch 8/10:  41%|▍| 403/991 [1:40:52<2:24:13, 14.72s/batch, batch_loss=10, batch

Epoch 8/10:  41%|▍| 404/991 [1:40:52<2:19:15, 14.23s/batch, batch_loss=10, batch

Epoch 8/10:  41%|▍| 404/991 [1:41:06<2:19:15, 14.23s/batch, batch_loss=13.1, bat

Epoch 8/10:  41%|▍| 405/991 [1:41:06<2:19:22, 14.27s/batch, batch_loss=13.1, bat

Epoch 8/10:  41%|▍| 405/991 [1:41:24<2:19:22, 14.27s/batch, batch_loss=5.59, bat

Epoch 8/10:  41%|▍| 406/991 [1:41:24<2:28:26, 15.22s/batch, batch_loss=5.59, bat

Epoch 8/10:  41%|▍| 406/991 [1:41:38<2:28:26, 15.22s/batch, batch_loss=23.4, bat

Epoch 8/10:  41%|▍| 407/991 [1:41:38<2:26:10, 15.02s/batch, batch_loss=23.4, bat

Epoch 8/10:  41%|▍| 407/991 [1:41:54<2:26:10, 15.02s/batch, batch_loss=6.82, bat

Epoch 8/10:  41%|▍| 408/991 [1:41:54<2:27:42, 15.20s/batch, batch_loss=6.82, bat

Epoch 8/10:  41%|▍| 408/991 [1:42:09<2:27:42, 15.20s/batch, batch_loss=21.9, bat

Epoch 8/10:  41%|▍| 409/991 [1:42:09<2:25:26, 14.99s/batch, batch_loss=21.9, bat

Epoch 8/10:  41%|▍| 409/991 [1:42:23<2:25:26, 14.99s/batch, batch_loss=19.4, bat

Epoch 8/10:  41%|▍| 410/991 [1:42:23<2:23:21, 14.80s/batch, batch_loss=19.4, bat

Epoch 8/10:  41%|▍| 410/991 [1:42:37<2:23:21, 14.80s/batch, batch_loss=11.9, bat

Epoch 8/10:  41%|▍| 411/991 [1:42:37<2:21:14, 14.61s/batch, batch_loss=11.9, bat

Epoch 8/10:  41%|▍| 411/991 [1:42:51<2:21:14, 14.61s/batch, batch_loss=13.6, bat

Epoch 8/10:  42%|▍| 412/991 [1:42:51<2:20:15, 14.53s/batch, batch_loss=13.6, bat

Epoch 8/10:  42%|▍| 412/991 [1:43:06<2:20:15, 14.53s/batch, batch_loss=14.4, bat

Epoch 8/10:  42%|▍| 413/991 [1:43:06<2:21:16, 14.67s/batch, batch_loss=14.4, bat

Epoch 8/10:  42%|▍| 413/991 [1:43:23<2:21:16, 14.67s/batch, batch_loss=14.3, bat

Epoch 8/10:  42%|▍| 414/991 [1:43:23<2:27:00, 15.29s/batch, batch_loss=14.3, bat

Epoch 8/10:  42%|▍| 414/991 [1:43:38<2:27:00, 15.29s/batch, batch_loss=7.63, bat

Epoch 8/10:  42%|▍| 415/991 [1:43:39<2:26:55, 15.31s/batch, batch_loss=7.63, bat

Epoch 8/10:  42%|▍| 415/991 [1:43:53<2:26:55, 15.31s/batch, batch_loss=10.6, bat

Epoch 8/10:  42%|▍| 416/991 [1:43:53<2:24:52, 15.12s/batch, batch_loss=10.6, bat

Epoch 8/10:  42%|▍| 416/991 [1:44:08<2:24:52, 15.12s/batch, batch_loss=9.06, bat

Epoch 8/10:  42%|▍| 417/991 [1:44:08<2:24:31, 15.11s/batch, batch_loss=9.06, bat

Epoch 8/10:  42%|▍| 417/991 [1:44:24<2:24:31, 15.11s/batch, batch_loss=12.3, bat

Epoch 8/10:  42%|▍| 418/991 [1:44:24<2:25:04, 15.19s/batch, batch_loss=12.3, bat

Epoch 8/10:  42%|▍| 418/991 [1:44:38<2:25:04, 15.19s/batch, batch_loss=1.3e+3, b

Epoch 8/10:  42%|▍| 419/991 [1:44:38<2:23:14, 15.03s/batch, batch_loss=1.3e+3, b

Epoch 8/10:  42%|▍| 419/991 [1:44:54<2:23:14, 15.03s/batch, batch_loss=16.3, bat

Epoch 8/10:  42%|▍| 420/991 [1:44:54<2:23:41, 15.10s/batch, batch_loss=16.3, bat

Epoch 8/10:  42%|▍| 420/991 [1:45:08<2:23:41, 15.10s/batch, batch_loss=13.8, bat

Epoch 8/10:  42%|▍| 421/991 [1:45:08<2:22:12, 14.97s/batch, batch_loss=13.8, bat

Epoch 8/10:  42%|▍| 421/991 [1:45:26<2:22:12, 14.97s/batch, batch_loss=9.4, batc

Epoch 8/10:  43%|▍| 422/991 [1:45:26<2:29:56, 15.81s/batch, batch_loss=9.4, batc

Epoch 8/10:  43%|▍| 422/991 [1:45:41<2:29:56, 15.81s/batch, batch_loss=10.3, bat

Epoch 8/10:  43%|▍| 423/991 [1:45:41<2:27:15, 15.55s/batch, batch_loss=10.3, bat

Epoch 8/10:  43%|▍| 423/991 [1:45:56<2:27:15, 15.55s/batch, batch_loss=10.4, bat

Epoch 8/10:  43%|▍| 424/991 [1:45:56<2:25:42, 15.42s/batch, batch_loss=10.4, bat

Epoch 8/10:  43%|▍| 424/991 [1:46:10<2:25:42, 15.42s/batch, batch_loss=6.85, bat

Epoch 8/10:  43%|▍| 425/991 [1:46:10<2:22:39, 15.12s/batch, batch_loss=6.85, bat

Epoch 8/10:  43%|▍| 425/991 [1:46:25<2:22:39, 15.12s/batch, batch_loss=2.47, bat

Epoch 8/10:  43%|▍| 426/991 [1:46:25<2:21:03, 14.98s/batch, batch_loss=2.47, bat

Epoch 8/10:  43%|▍| 426/991 [1:46:40<2:21:03, 14.98s/batch, batch_loss=9.98, bat

Epoch 8/10:  43%|▍| 427/991 [1:46:40<2:20:54, 14.99s/batch, batch_loss=9.98, bat

Epoch 8/10:  43%|▍| 427/991 [1:46:55<2:20:54, 14.99s/batch, batch_loss=14.9, bat

Epoch 8/10:  43%|▍| 428/991 [1:46:55<2:21:14, 15.05s/batch, batch_loss=14.9, bat

Epoch 8/10:  43%|▍| 428/991 [1:47:11<2:21:14, 15.05s/batch, batch_loss=18.6, bat

Epoch 8/10:  43%|▍| 429/991 [1:47:11<2:23:30, 15.32s/batch, batch_loss=18.6, bat

Epoch 8/10:  43%|▍| 429/991 [1:47:29<2:23:30, 15.32s/batch, batch_loss=9.32e+3, 

Epoch 8/10:  43%|▍| 430/991 [1:47:29<2:30:21, 16.08s/batch, batch_loss=9.32e+3, 

Epoch 8/10:  43%|▍| 430/991 [1:47:44<2:30:21, 16.08s/batch, batch_loss=25, batch

Epoch 8/10:  43%|▍| 431/991 [1:47:44<2:25:22, 15.58s/batch, batch_loss=25, batch

Epoch 8/10:  43%|▍| 431/991 [1:48:00<2:25:22, 15.58s/batch, batch_loss=19, batch

Epoch 8/10:  44%|▍| 432/991 [1:48:00<2:26:16, 15.70s/batch, batch_loss=19, batch

Epoch 8/10:  44%|▍| 432/991 [1:48:16<2:26:16, 15.70s/batch, batch_loss=10.8, bat

Epoch 8/10:  44%|▍| 433/991 [1:48:16<2:26:47, 15.78s/batch, batch_loss=10.8, bat

Epoch 8/10:  44%|▍| 433/991 [1:48:31<2:26:47, 15.78s/batch, batch_loss=15.8, bat

Epoch 8/10:  44%|▍| 434/991 [1:48:31<2:25:00, 15.62s/batch, batch_loss=15.8, bat

Epoch 8/10:  44%|▍| 434/991 [1:48:46<2:25:00, 15.62s/batch, batch_loss=13.3, bat

Epoch 8/10:  44%|▍| 435/991 [1:48:46<2:24:35, 15.60s/batch, batch_loss=13.3, bat

Epoch 8/10:  44%|▍| 435/991 [1:49:02<2:24:35, 15.60s/batch, batch_loss=14, batch

Epoch 8/10:  44%|▍| 436/991 [1:49:02<2:24:41, 15.64s/batch, batch_loss=14, batch

Epoch 8/10:  44%|▍| 436/991 [1:49:18<2:24:41, 15.64s/batch, batch_loss=16.9, bat

Epoch 8/10:  44%|▍| 437/991 [1:49:18<2:24:07, 15.61s/batch, batch_loss=16.9, bat

Epoch 8/10:  44%|▍| 437/991 [1:49:33<2:24:07, 15.61s/batch, batch_loss=18.9, bat

Epoch 8/10:  44%|▍| 438/991 [1:49:33<2:24:26, 15.67s/batch, batch_loss=18.9, bat

Epoch 8/10:  44%|▍| 438/991 [1:49:48<2:24:26, 15.67s/batch, batch_loss=12.7, bat

Epoch 8/10:  44%|▍| 439/991 [1:49:48<2:21:07, 15.34s/batch, batch_loss=12.7, bat

Epoch 8/10:  44%|▍| 439/991 [1:50:06<2:21:07, 15.34s/batch, batch_loss=20.3, bat

Epoch 8/10:  44%|▍| 440/991 [1:50:06<2:28:06, 16.13s/batch, batch_loss=20.3, bat

Epoch 8/10:  44%|▍| 440/991 [1:50:21<2:28:06, 16.13s/batch, batch_loss=22.3, bat

Epoch 8/10:  45%|▍| 441/991 [1:50:21<2:25:01, 15.82s/batch, batch_loss=22.3, bat

Epoch 8/10:  45%|▍| 441/991 [1:50:36<2:25:01, 15.82s/batch, batch_loss=18.5, bat

Epoch 8/10:  45%|▍| 442/991 [1:50:36<2:21:24, 15.45s/batch, batch_loss=18.5, bat

Epoch 8/10:  45%|▍| 442/991 [1:50:51<2:21:24, 15.45s/batch, batch_loss=21.6, bat

Epoch 8/10:  45%|▍| 443/991 [1:50:51<2:20:49, 15.42s/batch, batch_loss=21.6, bat

Epoch 8/10:  45%|▍| 443/991 [1:51:05<2:20:49, 15.42s/batch, batch_loss=20, batch

Epoch 8/10:  45%|▍| 444/991 [1:51:05<2:17:56, 15.13s/batch, batch_loss=20, batch

Epoch 8/10:  45%|▍| 444/991 [1:51:21<2:17:56, 15.13s/batch, batch_loss=23.6, bat

Epoch 8/10:  45%|▍| 445/991 [1:51:21<2:18:07, 15.18s/batch, batch_loss=23.6, bat

Epoch 8/10:  45%|▍| 445/991 [1:51:36<2:18:07, 15.18s/batch, batch_loss=26.4, bat

Epoch 8/10:  45%|▍| 446/991 [1:51:36<2:18:50, 15.29s/batch, batch_loss=26.4, bat

Epoch 8/10:  45%|▍| 446/991 [1:51:51<2:18:50, 15.29s/batch, batch_loss=18.7, bat

Epoch 8/10:  45%|▍| 447/991 [1:51:51<2:17:32, 15.17s/batch, batch_loss=18.7, bat

Epoch 8/10:  45%|▍| 447/991 [1:52:06<2:17:32, 15.17s/batch, batch_loss=18.8, bat

Epoch 8/10:  45%|▍| 448/991 [1:52:06<2:16:49, 15.12s/batch, batch_loss=18.8, bat

Epoch 8/10:  45%|▍| 448/991 [1:52:21<2:16:49, 15.12s/batch, batch_loss=16.8, bat

Epoch 8/10:  45%|▍| 449/991 [1:52:21<2:15:01, 14.95s/batch, batch_loss=16.8, bat

Epoch 8/10:  45%|▍| 449/991 [1:52:35<2:15:01, 14.95s/batch, batch_loss=22.4, bat

Epoch 8/10:  45%|▍| 450/991 [1:52:35<2:12:46, 14.73s/batch, batch_loss=22.4, bat

Epoch 8/10:  45%|▍| 450/991 [1:52:51<2:12:46, 14.73s/batch, batch_loss=21.8, bat

Epoch 8/10:  46%|▍| 451/991 [1:52:51<2:15:07, 15.01s/batch, batch_loss=21.8, bat

Epoch 8/10:  46%|▍| 451/991 [1:53:06<2:15:07, 15.01s/batch, batch_loss=16.5, bat

Epoch 8/10:  46%|▍| 452/991 [1:53:06<2:15:08, 15.04s/batch, batch_loss=16.5, bat

Epoch 8/10:  46%|▍| 452/991 [1:53:21<2:15:08, 15.04s/batch, batch_loss=21.2, bat

Epoch 8/10:  46%|▍| 453/991 [1:53:21<2:15:58, 15.16s/batch, batch_loss=21.2, bat

Epoch 8/10:  46%|▍| 453/991 [1:53:39<2:15:58, 15.16s/batch, batch_loss=7.24e+3, 

Epoch 8/10:  46%|▍| 454/991 [1:53:39<2:22:53, 15.97s/batch, batch_loss=7.24e+3, 

Epoch 8/10:  46%|▍| 454/991 [1:53:54<2:22:53, 15.97s/batch, batch_loss=21.6, bat

Epoch 8/10:  46%|▍| 455/991 [1:53:54<2:21:00, 15.78s/batch, batch_loss=21.6, bat

Epoch 8/10:  46%|▍| 455/991 [1:54:10<2:21:00, 15.78s/batch, batch_loss=22.8, bat

Epoch 8/10:  46%|▍| 456/991 [1:54:10<2:20:00, 15.70s/batch, batch_loss=22.8, bat

Epoch 8/10:  46%|▍| 456/991 [1:54:25<2:20:00, 15.70s/batch, batch_loss=15.8, bat

Epoch 8/10:  46%|▍| 457/991 [1:54:25<2:18:35, 15.57s/batch, batch_loss=15.8, bat

Epoch 8/10:  46%|▍| 457/991 [1:54:40<2:18:35, 15.57s/batch, batch_loss=17.2, bat

Epoch 8/10:  46%|▍| 458/991 [1:54:40<2:16:33, 15.37s/batch, batch_loss=17.2, bat

Epoch 8/10:  46%|▍| 458/991 [1:54:55<2:16:33, 15.37s/batch, batch_loss=23.5, bat

Epoch 8/10:  46%|▍| 459/991 [1:54:55<2:14:19, 15.15s/batch, batch_loss=23.5, bat

Epoch 8/10:  46%|▍| 459/991 [1:55:10<2:14:19, 15.15s/batch, batch_loss=19.6, bat

Epoch 8/10:  46%|▍| 460/991 [1:55:10<2:13:51, 15.12s/batch, batch_loss=19.6, bat

Epoch 8/10:  46%|▍| 460/991 [1:55:25<2:13:51, 15.12s/batch, batch_loss=50.9, bat

Epoch 8/10:  47%|▍| 461/991 [1:55:25<2:14:03, 15.18s/batch, batch_loss=50.9, bat

Epoch 8/10:  47%|▍| 461/991 [1:55:41<2:14:03, 15.18s/batch, batch_loss=18, batch

Epoch 8/10:  47%|▍| 462/991 [1:55:41<2:15:17, 15.35s/batch, batch_loss=18, batch

Epoch 8/10:  47%|▍| 462/991 [1:55:58<2:15:17, 15.35s/batch, batch_loss=6.21e+4, 

Epoch 8/10:  47%|▍| 463/991 [1:55:58<2:20:07, 15.92s/batch, batch_loss=6.21e+4, 

Epoch 8/10:  47%|▍| 463/991 [1:56:14<2:20:07, 15.92s/batch, batch_loss=13.3, bat

Epoch 8/10:  47%|▍| 464/991 [1:56:14<2:20:32, 16.00s/batch, batch_loss=13.3, bat

Epoch 8/10:  47%|▍| 464/991 [1:56:30<2:20:32, 16.00s/batch, batch_loss=13.6, bat

Epoch 8/10:  47%|▍| 465/991 [1:56:30<2:19:47, 15.95s/batch, batch_loss=13.6, bat

Epoch 8/10:  47%|▍| 465/991 [1:56:46<2:19:47, 15.95s/batch, batch_loss=19.3, bat

Epoch 8/10:  47%|▍| 466/991 [1:56:46<2:18:51, 15.87s/batch, batch_loss=19.3, bat

Epoch 8/10:  47%|▍| 466/991 [1:57:00<2:18:51, 15.87s/batch, batch_loss=15.2, bat

Epoch 8/10:  47%|▍| 467/991 [1:57:00<2:15:22, 15.50s/batch, batch_loss=15.2, bat

Epoch 8/10:  47%|▍| 467/991 [1:57:16<2:15:22, 15.50s/batch, batch_loss=19.1, bat

Epoch 8/10:  47%|▍| 468/991 [1:57:16<2:14:36, 15.44s/batch, batch_loss=19.1, bat

Epoch 8/10:  47%|▍| 468/991 [1:57:31<2:14:36, 15.44s/batch, batch_loss=15.7, bat

Epoch 8/10:  47%|▍| 469/991 [1:57:31<2:13:45, 15.37s/batch, batch_loss=15.7, bat

Epoch 8/10:  47%|▍| 469/991 [1:57:45<2:13:45, 15.37s/batch, batch_loss=16.4, bat

Epoch 8/10:  47%|▍| 470/991 [1:57:45<2:11:17, 15.12s/batch, batch_loss=16.4, bat

Epoch 8/10:  47%|▍| 470/991 [1:58:01<2:11:17, 15.12s/batch, batch_loss=18.6, bat

Epoch 8/10:  48%|▍| 471/991 [1:58:01<2:11:31, 15.18s/batch, batch_loss=18.6, bat

Epoch 8/10:  48%|▍| 471/991 [1:58:16<2:11:31, 15.18s/batch, batch_loss=22, batch

Epoch 8/10:  48%|▍| 472/991 [1:58:16<2:10:33, 15.09s/batch, batch_loss=22, batch

Epoch 8/10:  48%|▍| 472/991 [1:58:30<2:10:33, 15.09s/batch, batch_loss=20.1, bat

Epoch 8/10:  48%|▍| 473/991 [1:58:30<2:09:24, 14.99s/batch, batch_loss=20.1, bat

Epoch 8/10:  48%|▍| 473/991 [1:58:46<2:09:24, 14.99s/batch, batch_loss=16.9, bat

Epoch 8/10:  48%|▍| 474/991 [1:58:46<2:10:19, 15.12s/batch, batch_loss=16.9, bat

Epoch 8/10:  48%|▍| 474/991 [1:59:01<2:10:19, 15.12s/batch, batch_loss=2.4e+3, b

Epoch 8/10:  48%|▍| 475/991 [1:59:01<2:10:05, 15.13s/batch, batch_loss=2.4e+3, b

Epoch 8/10:  48%|▍| 475/991 [1:59:16<2:10:05, 15.13s/batch, batch_loss=16.6, bat

Epoch 8/10:  48%|▍| 476/991 [1:59:16<2:10:43, 15.23s/batch, batch_loss=16.6, bat

Epoch 8/10:  48%|▍| 476/991 [1:59:31<2:10:43, 15.23s/batch, batch_loss=17.5, bat

Epoch 8/10:  48%|▍| 477/991 [1:59:31<2:08:52, 15.04s/batch, batch_loss=17.5, bat

Epoch 8/10:  48%|▍| 477/991 [1:59:46<2:08:52, 15.04s/batch, batch_loss=18.6, bat

Epoch 8/10:  48%|▍| 478/991 [1:59:46<2:07:45, 14.94s/batch, batch_loss=18.6, bat

Epoch 8/10:  48%|▍| 478/991 [2:00:01<2:07:45, 14.94s/batch, batch_loss=19.4, bat

Epoch 8/10:  48%|▍| 479/991 [2:00:01<2:08:53, 15.10s/batch, batch_loss=19.4, bat

Epoch 8/10:  48%|▍| 479/991 [2:00:18<2:08:53, 15.10s/batch, batch_loss=18.7, bat

Epoch 8/10:  48%|▍| 480/991 [2:00:18<2:12:28, 15.55s/batch, batch_loss=18.7, bat

Epoch 8/10:  48%|▍| 480/991 [2:00:33<2:12:28, 15.55s/batch, batch_loss=27.2, bat

Epoch 8/10:  49%|▍| 481/991 [2:00:33<2:11:21, 15.45s/batch, batch_loss=27.2, bat

Epoch 8/10:  49%|▍| 481/991 [2:00:48<2:11:21, 15.45s/batch, batch_loss=19.4, bat

Epoch 8/10:  49%|▍| 482/991 [2:00:48<2:10:48, 15.42s/batch, batch_loss=19.4, bat

Epoch 8/10:  49%|▍| 482/991 [2:01:03<2:10:48, 15.42s/batch, batch_loss=15.3, bat

Epoch 8/10:  49%|▍| 483/991 [2:01:03<2:08:19, 15.16s/batch, batch_loss=15.3, bat

Epoch 8/10:  49%|▍| 483/991 [2:01:20<2:08:19, 15.16s/batch, batch_loss=23, batch

Epoch 8/10:  49%|▍| 484/991 [2:01:20<2:13:23, 15.79s/batch, batch_loss=23, batch

Epoch 8/10:  49%|▍| 484/991 [2:01:36<2:13:23, 15.79s/batch, batch_loss=9.59, bat

Epoch 8/10:  49%|▍| 485/991 [2:01:36<2:12:01, 15.65s/batch, batch_loss=9.59, bat

Epoch 8/10:  49%|▍| 485/991 [2:01:51<2:12:01, 15.65s/batch, batch_loss=25.9, bat

Epoch 8/10:  49%|▍| 486/991 [2:01:51<2:11:29, 15.62s/batch, batch_loss=25.9, bat

Epoch 8/10:  49%|▍| 486/991 [2:02:06<2:11:29, 15.62s/batch, batch_loss=15, batch

Epoch 8/10:  49%|▍| 487/991 [2:02:06<2:09:28, 15.41s/batch, batch_loss=15, batch

Epoch 8/10:  49%|▍| 487/991 [2:02:21<2:09:28, 15.41s/batch, batch_loss=8.77, bat

Epoch 8/10:  49%|▍| 488/991 [2:02:21<2:09:25, 15.44s/batch, batch_loss=8.77, bat

Epoch 8/10:  49%|▍| 488/991 [2:02:38<2:09:25, 15.44s/batch, batch_loss=10.4, bat

Epoch 8/10:  49%|▍| 489/991 [2:02:38<2:11:17, 15.69s/batch, batch_loss=10.4, bat

Epoch 8/10:  49%|▍| 489/991 [2:02:53<2:11:17, 15.69s/batch, batch_loss=8.49, bat

Epoch 8/10:  49%|▍| 490/991 [2:02:53<2:09:45, 15.54s/batch, batch_loss=8.49, bat

Epoch 8/10:  49%|▍| 490/991 [2:03:08<2:09:45, 15.54s/batch, batch_loss=21.6, bat

Epoch 8/10:  50%|▍| 491/991 [2:03:08<2:09:03, 15.49s/batch, batch_loss=21.6, bat

Epoch 8/10:  50%|▍| 491/991 [2:03:23<2:09:03, 15.49s/batch, batch_loss=17.2, bat

Epoch 8/10:  50%|▍| 492/991 [2:03:23<2:06:55, 15.26s/batch, batch_loss=17.2, bat

Epoch 8/10:  50%|▍| 492/991 [2:03:41<2:06:55, 15.26s/batch, batch_loss=19.6, bat

Epoch 8/10:  50%|▍| 493/991 [2:03:41<2:13:41, 16.11s/batch, batch_loss=19.6, bat

Epoch 8/10:  50%|▍| 493/991 [2:03:57<2:13:41, 16.11s/batch, batch_loss=8.54, bat

Epoch 8/10:  50%|▍| 494/991 [2:03:57<2:11:49, 15.91s/batch, batch_loss=8.54, bat

Epoch 8/10:  50%|▍| 494/991 [2:04:11<2:11:49, 15.91s/batch, batch_loss=8.55e+4, 

Epoch 8/10:  50%|▍| 495/991 [2:04:11<2:08:45, 15.58s/batch, batch_loss=8.55e+4, 

Epoch 8/10:  50%|▍| 495/991 [2:04:27<2:08:45, 15.58s/batch, batch_loss=15.5, bat

Epoch 8/10:  50%|▌| 496/991 [2:04:27<2:07:54, 15.50s/batch, batch_loss=15.5, bat

Epoch 8/10:  50%|▌| 496/991 [2:04:42<2:07:54, 15.50s/batch, batch_loss=174, batc

Epoch 8/10:  50%|▌| 497/991 [2:04:42<2:07:01, 15.43s/batch, batch_loss=174, batc

Epoch 8/10:  50%|▌| 497/991 [2:04:57<2:07:01, 15.43s/batch, batch_loss=12.1, bat

Epoch 8/10:  50%|▌| 498/991 [2:04:57<2:05:45, 15.31s/batch, batch_loss=12.1, bat

Epoch 8/10:  50%|▌| 498/991 [2:05:13<2:05:45, 15.31s/batch, batch_loss=403, batc

Epoch 8/10:  50%|▌| 499/991 [2:05:13<2:08:01, 15.61s/batch, batch_loss=403, batc

Epoch 8/10:  50%|▌| 499/991 [2:05:29<2:08:01, 15.61s/batch, batch_loss=18.1, bat

Epoch 8/10:  50%|▌| 500/991 [2:05:29<2:09:07, 15.78s/batch, batch_loss=18.1, bat

Epoch 8/10:  50%|▌| 500/991 [2:05:49<2:09:07, 15.78s/batch, batch_loss=7.71, bat

Epoch 8/10:  51%|▌| 501/991 [2:05:49<2:16:59, 16.77s/batch, batch_loss=7.71, bat

Epoch 8/10:  51%|▌| 501/991 [2:06:05<2:16:59, 16.77s/batch, batch_loss=10.5, bat

Epoch 8/10:  51%|▌| 502/991 [2:06:05<2:15:30, 16.63s/batch, batch_loss=10.5, bat

Epoch 8/10:  51%|▌| 502/991 [2:06:19<2:15:30, 16.63s/batch, batch_loss=16.2, bat

Epoch 8/10:  51%|▌| 503/991 [2:06:19<2:09:36, 15.94s/batch, batch_loss=16.2, bat

Epoch 8/10:  51%|▌| 503/991 [2:06:35<2:09:36, 15.94s/batch, batch_loss=11.4, bat

Epoch 8/10:  51%|▌| 504/991 [2:06:35<2:08:21, 15.81s/batch, batch_loss=11.4, bat

Epoch 8/10:  51%|▌| 504/991 [2:06:50<2:08:21, 15.81s/batch, batch_loss=7.22, bat

Epoch 8/10:  51%|▌| 505/991 [2:06:50<2:07:05, 15.69s/batch, batch_loss=7.22, bat

Epoch 8/10:  51%|▌| 505/991 [2:07:06<2:07:05, 15.69s/batch, batch_loss=13.4, bat

Epoch 8/10:  51%|▌| 506/991 [2:07:06<2:07:21, 15.76s/batch, batch_loss=13.4, bat

Epoch 8/10:  51%|▌| 506/991 [2:07:22<2:07:21, 15.76s/batch, batch_loss=11.2, bat

Epoch 8/10:  51%|▌| 507/991 [2:07:22<2:06:47, 15.72s/batch, batch_loss=11.2, bat

Epoch 8/10:  51%|▌| 507/991 [2:07:37<2:06:47, 15.72s/batch, batch_loss=14.9, bat

Epoch 8/10:  51%|▌| 508/991 [2:07:37<2:05:31, 15.59s/batch, batch_loss=14.9, bat

Epoch 8/10:  51%|▌| 508/991 [2:07:52<2:05:31, 15.59s/batch, batch_loss=17.2, bat

Epoch 8/10:  51%|▌| 509/991 [2:07:52<2:04:24, 15.49s/batch, batch_loss=17.2, bat

Epoch 8/10:  51%|▌| 509/991 [2:08:08<2:04:24, 15.49s/batch, batch_loss=13.1, bat

Epoch 8/10:  51%|▌| 510/991 [2:08:08<2:04:19, 15.51s/batch, batch_loss=13.1, bat

Epoch 8/10:  51%|▌| 510/991 [2:08:23<2:04:19, 15.51s/batch, batch_loss=12.9, bat

Epoch 8/10:  52%|▌| 511/991 [2:08:23<2:04:02, 15.51s/batch, batch_loss=12.9, bat

Epoch 8/10:  52%|▌| 511/991 [2:08:39<2:04:02, 15.51s/batch, batch_loss=9.09, bat

Epoch 8/10:  52%|▌| 512/991 [2:08:39<2:03:56, 15.52s/batch, batch_loss=9.09, bat

Epoch 8/10:  52%|▌| 512/991 [2:08:54<2:03:56, 15.52s/batch, batch_loss=9.95, bat

Epoch 8/10:  52%|▌| 513/991 [2:08:54<2:02:30, 15.38s/batch, batch_loss=9.95, bat

Epoch 8/10:  52%|▌| 513/991 [2:09:09<2:02:30, 15.38s/batch, batch_loss=13.2, bat

Epoch 8/10:  52%|▌| 514/991 [2:09:09<2:00:57, 15.22s/batch, batch_loss=13.2, bat

Epoch 8/10:  52%|▌| 514/991 [2:09:26<2:00:57, 15.22s/batch, batch_loss=9.39, bat

Epoch 8/10:  52%|▌| 515/991 [2:09:26<2:06:33, 15.95s/batch, batch_loss=9.39, bat

Epoch 8/10:  52%|▌| 515/991 [2:09:43<2:06:33, 15.95s/batch, batch_loss=13.6, bat

Epoch 8/10:  52%|▌| 516/991 [2:09:43<2:06:56, 16.04s/batch, batch_loss=13.6, bat

Epoch 8/10:  52%|▌| 516/991 [2:09:57<2:06:56, 16.04s/batch, batch_loss=12.9, bat

Epoch 8/10:  52%|▌| 517/991 [2:09:57<2:03:32, 15.64s/batch, batch_loss=12.9, bat

Epoch 8/10:  52%|▌| 517/991 [2:10:12<2:03:32, 15.64s/batch, batch_loss=20.6, bat

Epoch 8/10:  52%|▌| 518/991 [2:10:12<2:01:43, 15.44s/batch, batch_loss=20.6, bat

Epoch 8/10:  52%|▌| 518/991 [2:10:26<2:01:43, 15.44s/batch, batch_loss=13.5, bat

Epoch 8/10:  52%|▌| 519/991 [2:10:26<1:58:12, 15.03s/batch, batch_loss=13.5, bat

Epoch 8/10:  52%|▌| 519/991 [2:10:40<1:58:12, 15.03s/batch, batch_loss=12.5, bat

Epoch 8/10:  52%|▌| 520/991 [2:10:40<1:54:17, 14.56s/batch, batch_loss=12.5, bat

Epoch 8/10:  52%|▌| 520/991 [2:10:53<1:54:17, 14.56s/batch, batch_loss=8.65, bat

Epoch 8/10:  53%|▌| 521/991 [2:10:53<1:51:39, 14.25s/batch, batch_loss=8.65, bat

Epoch 8/10:  53%|▌| 521/991 [2:11:08<1:51:39, 14.25s/batch, batch_loss=9.92, bat

Epoch 8/10:  53%|▌| 522/991 [2:11:08<1:52:29, 14.39s/batch, batch_loss=9.92, bat

Epoch 8/10:  53%|▌| 522/991 [2:11:23<1:52:29, 14.39s/batch, batch_loss=3.13, bat

Epoch 8/10:  53%|▌| 523/991 [2:11:23<1:53:44, 14.58s/batch, batch_loss=3.13, bat

Epoch 8/10:  53%|▌| 523/991 [2:11:38<1:53:44, 14.58s/batch, batch_loss=7.86, bat

Epoch 8/10:  53%|▌| 524/991 [2:11:38<1:53:50, 14.63s/batch, batch_loss=7.86, bat

Epoch 8/10:  53%|▌| 524/991 [2:11:54<1:53:50, 14.63s/batch, batch_loss=6.79, bat

Epoch 8/10:  53%|▌| 525/991 [2:11:54<1:58:05, 15.20s/batch, batch_loss=6.79, bat

Epoch 8/10:  53%|▌| 525/991 [2:12:10<1:58:05, 15.20s/batch, batch_loss=7.29, bat

Epoch 8/10:  53%|▌| 526/991 [2:12:10<1:57:55, 15.22s/batch, batch_loss=7.29, bat

Epoch 8/10:  53%|▌| 526/991 [2:12:24<1:57:55, 15.22s/batch, batch_loss=13.8, bat

Epoch 8/10:  53%|▌| 527/991 [2:12:24<1:55:57, 15.00s/batch, batch_loss=13.8, bat

Epoch 8/10:  53%|▌| 527/991 [2:12:39<1:55:57, 15.00s/batch, batch_loss=14.2, bat

Epoch 8/10:  53%|▌| 528/991 [2:12:39<1:56:10, 15.05s/batch, batch_loss=14.2, bat

Epoch 8/10:  53%|▌| 528/991 [2:12:55<1:56:10, 15.05s/batch, batch_loss=9.14, bat

Epoch 8/10:  53%|▌| 529/991 [2:12:55<1:56:43, 15.16s/batch, batch_loss=9.14, bat

Epoch 8/10:  53%|▌| 529/991 [2:13:10<1:56:43, 15.16s/batch, batch_loss=16.6, bat

Epoch 8/10:  53%|▌| 530/991 [2:13:10<1:56:29, 15.16s/batch, batch_loss=16.6, bat

Epoch 8/10:  53%|▌| 530/991 [2:13:25<1:56:29, 15.16s/batch, batch_loss=12.4, bat

Epoch 8/10:  54%|▌| 531/991 [2:13:25<1:55:23, 15.05s/batch, batch_loss=12.4, bat

Epoch 8/10:  54%|▌| 531/991 [2:13:43<1:55:23, 15.05s/batch, batch_loss=11.9, bat

Epoch 8/10:  54%|▌| 532/991 [2:13:43<2:02:18, 15.99s/batch, batch_loss=11.9, bat

Epoch 8/10:  54%|▌| 532/991 [2:13:59<2:02:18, 15.99s/batch, batch_loss=12.5, bat

Epoch 8/10:  54%|▌| 533/991 [2:13:59<2:01:58, 15.98s/batch, batch_loss=12.5, bat

Epoch 8/10:  54%|▌| 533/991 [2:14:14<2:01:58, 15.98s/batch, batch_loss=12.3, bat

Epoch 8/10:  54%|▌| 534/991 [2:14:14<2:00:48, 15.86s/batch, batch_loss=12.3, bat

Epoch 8/10:  54%|▌| 534/991 [2:14:30<2:00:48, 15.86s/batch, batch_loss=15.8, bat

Epoch 8/10:  54%|▌| 535/991 [2:14:30<1:59:17, 15.70s/batch, batch_loss=15.8, bat

Epoch 8/10:  54%|▌| 535/991 [2:14:44<1:59:17, 15.70s/batch, batch_loss=12.4, bat

Epoch 8/10:  54%|▌| 536/991 [2:14:44<1:56:50, 15.41s/batch, batch_loss=12.4, bat

Epoch 8/10:  54%|▌| 536/991 [2:15:00<1:56:50, 15.41s/batch, batch_loss=8.17, bat

Epoch 8/10:  54%|▌| 537/991 [2:15:00<1:56:03, 15.34s/batch, batch_loss=8.17, bat

Epoch 8/10:  54%|▌| 537/991 [2:15:16<1:56:03, 15.34s/batch, batch_loss=1.77e+3, 

Epoch 8/10:  54%|▌| 538/991 [2:15:16<1:58:16, 15.66s/batch, batch_loss=1.77e+3, 

Epoch 8/10:  54%|▌| 538/991 [2:15:32<1:58:16, 15.66s/batch, batch_loss=32.2, bat

Epoch 8/10:  54%|▌| 539/991 [2:15:32<1:57:38, 15.62s/batch, batch_loss=32.2, bat

Epoch 8/10:  54%|▌| 539/991 [2:15:47<1:57:38, 15.62s/batch, batch_loss=27.9, bat

Epoch 8/10:  54%|▌| 540/991 [2:15:47<1:56:17, 15.47s/batch, batch_loss=27.9, bat

Epoch 8/10:  54%|▌| 540/991 [2:16:04<1:56:17, 15.47s/batch, batch_loss=1.3e+4, b

Epoch 8/10:  55%|▌| 541/991 [2:16:04<1:59:20, 15.91s/batch, batch_loss=1.3e+4, b

Epoch 8/10:  55%|▌| 541/991 [2:16:20<1:59:20, 15.91s/batch, batch_loss=2.84e+3, 

Epoch 8/10:  55%|▌| 542/991 [2:16:20<1:59:40, 15.99s/batch, batch_loss=2.84e+3, 

Epoch 8/10:  55%|▌| 542/991 [2:16:35<1:59:40, 15.99s/batch, batch_loss=22.2, bat

Epoch 8/10:  55%|▌| 543/991 [2:16:35<1:56:59, 15.67s/batch, batch_loss=22.2, bat

Epoch 8/10:  55%|▌| 543/991 [2:16:50<1:56:59, 15.67s/batch, batch_loss=18.9, bat

Epoch 8/10:  55%|▌| 544/991 [2:16:50<1:56:33, 15.65s/batch, batch_loss=18.9, bat

Epoch 8/10:  55%|▌| 544/991 [2:17:06<1:56:33, 15.65s/batch, batch_loss=16.1, bat

Epoch 8/10:  55%|▌| 545/991 [2:17:06<1:55:29, 15.54s/batch, batch_loss=16.1, bat

Epoch 8/10:  55%|▌| 545/991 [2:17:20<1:55:29, 15.54s/batch, batch_loss=300, batc

Epoch 8/10:  55%|▌| 546/991 [2:17:20<1:53:49, 15.35s/batch, batch_loss=300, batc

Epoch 8/10:  55%|▌| 546/991 [2:17:36<1:53:49, 15.35s/batch, batch_loss=17.2, bat

Epoch 8/10:  55%|▌| 547/991 [2:17:36<1:54:26, 15.47s/batch, batch_loss=17.2, bat

Epoch 8/10:  55%|▌| 547/991 [2:17:53<1:54:26, 15.47s/batch, batch_loss=15.1, bat

Epoch 8/10:  55%|▌| 548/991 [2:17:53<1:57:44, 15.95s/batch, batch_loss=15.1, bat

Epoch 8/10:  55%|▌| 548/991 [2:18:09<1:57:44, 15.95s/batch, batch_loss=9.02, bat

Epoch 8/10:  55%|▌| 549/991 [2:18:09<1:57:39, 15.97s/batch, batch_loss=9.02, bat

Epoch 8/10:  55%|▌| 549/991 [2:18:25<1:57:39, 15.97s/batch, batch_loss=19.7, bat

Epoch 8/10:  55%|▌| 550/991 [2:18:25<1:56:37, 15.87s/batch, batch_loss=19.7, bat

Epoch 8/10:  55%|▌| 550/991 [2:18:39<1:56:37, 15.87s/batch, batch_loss=16.6, bat

Epoch 8/10:  56%|▌| 551/991 [2:18:39<1:52:49, 15.38s/batch, batch_loss=16.6, bat

Epoch 8/10:  56%|▌| 551/991 [2:18:53<1:52:49, 15.38s/batch, batch_loss=14, batch

Epoch 8/10:  56%|▌| 552/991 [2:18:53<1:49:40, 14.99s/batch, batch_loss=14, batch

Epoch 8/10:  56%|▌| 552/991 [2:19:07<1:49:40, 14.99s/batch, batch_loss=16.8, bat

Epoch 8/10:  56%|▌| 553/991 [2:19:07<1:47:20, 14.70s/batch, batch_loss=16.8, bat

Epoch 8/10:  56%|▌| 553/991 [2:19:22<1:47:20, 14.70s/batch, batch_loss=5.73e+3, 

Epoch 8/10:  56%|▌| 554/991 [2:19:22<1:47:59, 14.83s/batch, batch_loss=5.73e+3, 

Epoch 8/10:  56%|▌| 554/991 [2:19:37<1:47:59, 14.83s/batch, batch_loss=2.57e+3, 

Epoch 8/10:  56%|▌| 555/991 [2:19:37<1:46:35, 14.67s/batch, batch_loss=2.57e+3, 

Epoch 8/10:  56%|▌| 555/991 [2:19:52<1:46:35, 14.67s/batch, batch_loss=16.4, bat

Epoch 8/10:  56%|▌| 556/991 [2:19:52<1:46:42, 14.72s/batch, batch_loss=16.4, bat

Epoch 8/10:  56%|▌| 556/991 [2:20:07<1:46:42, 14.72s/batch, batch_loss=1.27e+4, 

Epoch 8/10:  56%|▌| 557/991 [2:20:07<1:47:31, 14.86s/batch, batch_loss=1.27e+4, 

Epoch 8/10:  56%|▌| 557/991 [2:20:22<1:47:31, 14.86s/batch, batch_loss=9, batch_

Epoch 8/10:  56%|▌| 558/991 [2:20:22<1:47:32, 14.90s/batch, batch_loss=9, batch_

Epoch 8/10:  56%|▌| 558/991 [2:20:37<1:47:32, 14.90s/batch, batch_loss=17.5, bat

Epoch 8/10:  56%|▌| 559/991 [2:20:37<1:47:24, 14.92s/batch, batch_loss=17.5, bat

Epoch 8/10:  56%|▌| 559/991 [2:20:52<1:47:24, 14.92s/batch, batch_loss=6.64, bat

Epoch 8/10:  57%|▌| 560/991 [2:20:52<1:47:37, 14.98s/batch, batch_loss=6.64, bat

Epoch 8/10:  57%|▌| 560/991 [2:21:07<1:47:37, 14.98s/batch, batch_loss=8.47, bat

Epoch 8/10:  57%|▌| 561/991 [2:21:07<1:47:19, 14.97s/batch, batch_loss=8.47, bat

Epoch 8/10:  57%|▌| 561/991 [2:21:21<1:47:19, 14.97s/batch, batch_loss=14.4, bat

Epoch 8/10:  57%|▌| 562/991 [2:21:21<1:44:47, 14.66s/batch, batch_loss=14.4, bat

Epoch 8/10:  57%|▌| 562/991 [2:21:36<1:44:47, 14.66s/batch, batch_loss=7.23, bat

Epoch 8/10:  57%|▌| 563/991 [2:21:36<1:46:23, 14.92s/batch, batch_loss=7.23, bat

Epoch 8/10:  57%|▌| 563/991 [2:21:53<1:46:23, 14.92s/batch, batch_loss=12, batch

Epoch 8/10:  57%|▌| 564/991 [2:21:53<1:49:53, 15.44s/batch, batch_loss=12, batch

Epoch 8/10:  57%|▌| 564/991 [2:22:08<1:49:53, 15.44s/batch, batch_loss=499, batc

Epoch 8/10:  57%|▌| 565/991 [2:22:08<1:48:21, 15.26s/batch, batch_loss=499, batc

Epoch 8/10:  57%|▌| 565/991 [2:22:23<1:48:21, 15.26s/batch, batch_loss=11.6, bat

Epoch 8/10:  57%|▌| 566/991 [2:22:23<1:47:59, 15.25s/batch, batch_loss=11.6, bat

Epoch 8/10:  57%|▌| 566/991 [2:22:39<1:47:59, 15.25s/batch, batch_loss=18.7, bat

Epoch 8/10:  57%|▌| 567/991 [2:22:39<1:48:43, 15.39s/batch, batch_loss=18.7, bat

Epoch 8/10:  57%|▌| 567/991 [2:22:53<1:48:43, 15.39s/batch, batch_loss=298, batc

Epoch 8/10:  57%|▌| 568/991 [2:22:53<1:47:16, 15.22s/batch, batch_loss=298, batc

Epoch 8/10:  57%|▌| 568/991 [2:23:08<1:47:16, 15.22s/batch, batch_loss=26.8, bat

Epoch 8/10:  57%|▌| 569/991 [2:23:08<1:46:32, 15.15s/batch, batch_loss=26.8, bat

Epoch 8/10:  57%|▌| 569/991 [2:23:24<1:46:32, 15.15s/batch, batch_loss=8.49e+3, 

Epoch 8/10:  58%|▌| 570/991 [2:23:24<1:46:06, 15.12s/batch, batch_loss=8.49e+3, 

Epoch 8/10:  58%|▌| 570/991 [2:23:39<1:46:06, 15.12s/batch, batch_loss=8.96, bat

Epoch 8/10:  58%|▌| 571/991 [2:23:39<1:46:18, 15.19s/batch, batch_loss=8.96, bat

Epoch 8/10:  58%|▌| 571/991 [2:23:53<1:46:18, 15.19s/batch, batch_loss=11, batch

Epoch 8/10:  58%|▌| 572/991 [2:23:53<1:44:06, 14.91s/batch, batch_loss=11, batch

Epoch 8/10:  58%|▌| 572/991 [2:24:08<1:44:06, 14.91s/batch, batch_loss=6.72, bat

Epoch 8/10:  58%|▌| 573/991 [2:24:08<1:44:31, 15.00s/batch, batch_loss=6.72, bat

Epoch 8/10:  58%|▌| 573/991 [2:24:24<1:44:31, 15.00s/batch, batch_loss=12.2, bat

Epoch 8/10:  58%|▌| 574/991 [2:24:24<1:44:43, 15.07s/batch, batch_loss=12.2, bat

Epoch 8/10:  58%|▌| 574/991 [2:24:39<1:44:43, 15.07s/batch, batch_loss=17.2, bat

Epoch 8/10:  58%|▌| 575/991 [2:24:39<1:45:30, 15.22s/batch, batch_loss=17.2, bat

Epoch 8/10:  58%|▌| 575/991 [2:24:54<1:45:30, 15.22s/batch, batch_loss=25.1, bat

Epoch 8/10:  58%|▌| 576/991 [2:24:54<1:44:09, 15.06s/batch, batch_loss=25.1, bat

Epoch 8/10:  58%|▌| 576/991 [2:25:08<1:44:09, 15.06s/batch, batch_loss=11.7, bat

Epoch 8/10:  58%|▌| 577/991 [2:25:08<1:41:50, 14.76s/batch, batch_loss=11.7, bat

Epoch 8/10:  58%|▌| 577/991 [2:25:23<1:41:50, 14.76s/batch, batch_loss=7.59, bat

Epoch 8/10:  58%|▌| 578/991 [2:25:23<1:41:45, 14.78s/batch, batch_loss=7.59, bat

Epoch 8/10:  58%|▌| 578/991 [2:25:37<1:41:45, 14.78s/batch, batch_loss=10.4, bat

Epoch 8/10:  58%|▌| 579/991 [2:25:37<1:41:17, 14.75s/batch, batch_loss=10.4, bat

Epoch 8/10:  58%|▌| 579/991 [2:25:54<1:41:17, 14.75s/batch, batch_loss=17.4, bat

Epoch 8/10:  59%|▌| 580/991 [2:25:54<1:45:13, 15.36s/batch, batch_loss=17.4, bat

Epoch 8/10:  59%|▌| 580/991 [2:26:09<1:45:13, 15.36s/batch, batch_loss=5.65, bat

Epoch 8/10:  59%|▌| 581/991 [2:26:09<1:42:59, 15.07s/batch, batch_loss=5.65, bat

Epoch 8/10:  59%|▌| 581/991 [2:26:23<1:42:59, 15.07s/batch, batch_loss=0.12, bat

Epoch 8/10:  59%|▌| 582/991 [2:26:23<1:42:03, 14.97s/batch, batch_loss=0.12, bat

Epoch 8/10:  59%|▌| 582/991 [2:26:38<1:42:03, 14.97s/batch, batch_loss=6.61e+3, 

Epoch 8/10:  59%|▌| 583/991 [2:26:38<1:40:43, 14.81s/batch, batch_loss=6.61e+3, 

Epoch 8/10:  59%|▌| 583/991 [2:26:52<1:40:43, 14.81s/batch, batch_loss=11, batch

Epoch 8/10:  59%|▌| 584/991 [2:26:52<1:39:44, 14.70s/batch, batch_loss=11, batch

Epoch 8/10:  59%|▌| 584/991 [2:27:07<1:39:44, 14.70s/batch, batch_loss=8.47, bat

Epoch 8/10:  59%|▌| 585/991 [2:27:07<1:39:11, 14.66s/batch, batch_loss=8.47, bat

Epoch 8/10:  59%|▌| 585/991 [2:27:24<1:39:11, 14.66s/batch, batch_loss=23.5, bat

Epoch 8/10:  59%|▌| 586/991 [2:27:24<1:44:46, 15.52s/batch, batch_loss=23.5, bat

Epoch 8/10:  59%|▌| 586/991 [2:27:38<1:44:46, 15.52s/batch, batch_loss=21.7, bat

Epoch 8/10:  59%|▌| 587/991 [2:27:38<1:40:58, 15.00s/batch, batch_loss=21.7, bat

Epoch 8/10:  59%|▌| 587/991 [2:27:53<1:40:58, 15.00s/batch, batch_loss=15.5, bat

Epoch 8/10:  59%|▌| 588/991 [2:27:53<1:39:45, 14.85s/batch, batch_loss=15.5, bat

Epoch 8/10:  59%|▌| 588/991 [2:28:07<1:39:45, 14.85s/batch, batch_loss=7.04, bat

Epoch 8/10:  59%|▌| 589/991 [2:28:07<1:39:16, 14.82s/batch, batch_loss=7.04, bat

Epoch 8/10:  59%|▌| 589/991 [2:28:22<1:39:16, 14.82s/batch, batch_loss=17, batch

Epoch 8/10:  60%|▌| 590/991 [2:28:22<1:38:16, 14.71s/batch, batch_loss=17, batch

Epoch 8/10:  60%|▌| 590/991 [2:28:36<1:38:16, 14.71s/batch, batch_loss=15.8, bat

Epoch 8/10:  60%|▌| 591/991 [2:28:36<1:37:00, 14.55s/batch, batch_loss=15.8, bat

Epoch 8/10:  60%|▌| 591/991 [2:28:51<1:37:00, 14.55s/batch, batch_loss=6.77, bat

Epoch 8/10:  60%|▌| 592/991 [2:28:51<1:36:44, 14.55s/batch, batch_loss=6.77, bat

Epoch 8/10:  60%|▌| 592/991 [2:29:06<1:36:44, 14.55s/batch, batch_loss=11.4, bat

Epoch 8/10:  60%|▌| 593/991 [2:29:06<1:37:34, 14.71s/batch, batch_loss=11.4, bat

Epoch 8/10:  60%|▌| 593/991 [2:29:20<1:37:34, 14.71s/batch, batch_loss=10.2, bat

Epoch 8/10:  60%|▌| 594/991 [2:29:20<1:37:38, 14.76s/batch, batch_loss=10.2, bat

Epoch 8/10:  60%|▌| 594/991 [2:29:37<1:37:38, 14.76s/batch, batch_loss=6.54, bat

Epoch 8/10:  60%|▌| 595/991 [2:29:37<1:40:57, 15.30s/batch, batch_loss=6.54, bat

Epoch 8/10:  60%|▌| 595/991 [2:29:52<1:40:57, 15.30s/batch, batch_loss=6.57, bat

Epoch 8/10:  60%|▌| 596/991 [2:29:52<1:39:44, 15.15s/batch, batch_loss=6.57, bat

Epoch 8/10:  60%|▌| 596/991 [2:30:07<1:39:44, 15.15s/batch, batch_loss=22.6, bat

Epoch 8/10:  60%|▌| 597/991 [2:30:07<1:40:05, 15.24s/batch, batch_loss=22.6, bat

Epoch 8/10:  60%|▌| 597/991 [2:30:22<1:40:05, 15.24s/batch, batch_loss=8.84, bat

Epoch 8/10:  60%|▌| 598/991 [2:30:22<1:39:44, 15.23s/batch, batch_loss=8.84, bat

Epoch 8/10:  60%|▌| 598/991 [2:30:38<1:39:44, 15.23s/batch, batch_loss=17.1, bat

Epoch 8/10:  60%|▌| 599/991 [2:30:38<1:39:17, 15.20s/batch, batch_loss=17.1, bat

Epoch 8/10:  60%|▌| 599/991 [2:30:53<1:39:17, 15.20s/batch, batch_loss=11.5, bat

Epoch 8/10:  61%|▌| 600/991 [2:30:53<1:38:53, 15.18s/batch, batch_loss=11.5, bat

Epoch 8/10:  61%|▌| 600/991 [2:31:08<1:38:53, 15.18s/batch, batch_loss=13.9, bat

Epoch 8/10:  61%|▌| 601/991 [2:31:08<1:38:24, 15.14s/batch, batch_loss=13.9, bat

Epoch 8/10:  61%|▌| 601/991 [2:31:23<1:38:24, 15.14s/batch, batch_loss=10.1, bat

Epoch 8/10:  61%|▌| 602/991 [2:31:23<1:37:43, 15.07s/batch, batch_loss=10.1, bat

Epoch 8/10:  61%|▌| 602/991 [2:31:38<1:37:43, 15.07s/batch, batch_loss=5.82, bat

Epoch 8/10:  61%|▌| 603/991 [2:31:38<1:37:30, 15.08s/batch, batch_loss=5.82, bat

Epoch 8/10:  61%|▌| 603/991 [2:31:52<1:37:30, 15.08s/batch, batch_loss=1.01e+4, 

Epoch 8/10:  61%|▌| 604/991 [2:31:52<1:36:18, 14.93s/batch, batch_loss=1.01e+4, 

Epoch 8/10:  61%|▌| 604/991 [2:32:08<1:36:18, 14.93s/batch, batch_loss=9.56, bat

Epoch 8/10:  61%|▌| 605/991 [2:32:08<1:36:32, 15.01s/batch, batch_loss=9.56, bat

Epoch 8/10:  61%|▌| 605/991 [2:32:23<1:36:32, 15.01s/batch, batch_loss=8.04, bat

Epoch 8/10:  61%|▌| 606/991 [2:32:23<1:37:01, 15.12s/batch, batch_loss=8.04, bat

Epoch 8/10:  61%|▌| 606/991 [2:32:38<1:37:01, 15.12s/batch, batch_loss=11.2, bat

Epoch 8/10:  61%|▌| 607/991 [2:32:38<1:36:20, 15.05s/batch, batch_loss=11.2, bat

Epoch 8/10:  61%|▌| 607/991 [2:32:53<1:36:20, 15.05s/batch, batch_loss=12.5, bat

Epoch 8/10:  61%|▌| 608/991 [2:32:53<1:35:37, 14.98s/batch, batch_loss=12.5, bat

Epoch 8/10:  61%|▌| 608/991 [2:33:08<1:35:37, 14.98s/batch, batch_loss=14.7, bat

Epoch 8/10:  61%|▌| 609/991 [2:33:08<1:35:35, 15.01s/batch, batch_loss=14.7, bat

Epoch 8/10:  61%|▌| 609/991 [2:33:23<1:35:35, 15.01s/batch, batch_loss=15, batch

Epoch 8/10:  62%|▌| 610/991 [2:33:23<1:36:32, 15.20s/batch, batch_loss=15, batch

Epoch 8/10:  62%|▌| 610/991 [2:33:39<1:36:32, 15.20s/batch, batch_loss=24.6, bat

Epoch 8/10:  62%|▌| 611/991 [2:33:39<1:37:15, 15.36s/batch, batch_loss=24.6, bat

Epoch 8/10:  62%|▌| 611/991 [2:33:54<1:37:15, 15.36s/batch, batch_loss=6.73, bat

Epoch 8/10:  62%|▌| 612/991 [2:33:54<1:36:43, 15.31s/batch, batch_loss=6.73, bat

Epoch 8/10:  62%|▌| 612/991 [2:34:09<1:36:43, 15.31s/batch, batch_loss=10.8, bat

Epoch 8/10:  62%|▌| 613/991 [2:34:09<1:35:51, 15.22s/batch, batch_loss=10.8, bat

Epoch 8/10:  62%|▌| 613/991 [2:34:24<1:35:51, 15.22s/batch, batch_loss=1.73e+4, 

Epoch 8/10:  62%|▌| 614/991 [2:34:24<1:34:51, 15.10s/batch, batch_loss=1.73e+4, 

Epoch 8/10:  62%|▌| 614/991 [2:34:42<1:34:51, 15.10s/batch, batch_loss=986, batc

Epoch 8/10:  62%|▌| 615/991 [2:34:42<1:39:08, 15.82s/batch, batch_loss=986, batc

Epoch 8/10:  62%|▌| 615/991 [2:34:58<1:39:08, 15.82s/batch, batch_loss=7.69, bat

Epoch 8/10:  62%|▌| 616/991 [2:34:58<1:38:59, 15.84s/batch, batch_loss=7.69, bat

Epoch 8/10:  62%|▌| 616/991 [2:35:13<1:38:59, 15.84s/batch, batch_loss=16.7, bat

Epoch 8/10:  62%|▌| 617/991 [2:35:13<1:37:56, 15.71s/batch, batch_loss=16.7, bat

Epoch 8/10:  62%|▌| 617/991 [2:35:29<1:37:56, 15.71s/batch, batch_loss=11.4, bat

Epoch 8/10:  62%|▌| 618/991 [2:35:29<1:38:13, 15.80s/batch, batch_loss=11.4, bat

Epoch 8/10:  62%|▌| 618/991 [2:35:45<1:38:13, 15.80s/batch, batch_loss=19.2, bat

Epoch 8/10:  62%|▌| 619/991 [2:35:45<1:38:45, 15.93s/batch, batch_loss=19.2, bat

Epoch 8/10:  62%|▌| 619/991 [2:36:01<1:38:45, 15.93s/batch, batch_loss=11.6, bat

Epoch 8/10:  63%|▋| 620/991 [2:36:01<1:37:32, 15.78s/batch, batch_loss=11.6, bat

Epoch 8/10:  63%|▋| 620/991 [2:36:16<1:37:32, 15.78s/batch, batch_loss=8.95, bat

Epoch 8/10:  63%|▋| 621/991 [2:36:16<1:36:13, 15.60s/batch, batch_loss=8.95, bat

Epoch 8/10:  63%|▋| 621/991 [2:36:30<1:36:13, 15.60s/batch, batch_loss=5.48e+3, 

Epoch 8/10:  63%|▋| 622/991 [2:36:30<1:34:07, 15.31s/batch, batch_loss=5.48e+3, 

Epoch 8/10:  63%|▋| 622/991 [2:36:45<1:34:07, 15.31s/batch, batch_loss=21, batch

Epoch 8/10:  63%|▋| 623/991 [2:36:45<1:33:13, 15.20s/batch, batch_loss=21, batch

Epoch 8/10:  63%|▋| 623/991 [2:37:00<1:33:13, 15.20s/batch, batch_loss=1.6e+4, b

Epoch 8/10:  63%|▋| 624/991 [2:37:00<1:32:50, 15.18s/batch, batch_loss=1.6e+4, b

Epoch 8/10:  63%|▋| 624/991 [2:37:15<1:32:50, 15.18s/batch, batch_loss=9.34, bat

Epoch 8/10:  63%|▋| 625/991 [2:37:15<1:31:38, 15.02s/batch, batch_loss=9.34, bat

Epoch 8/10:  63%|▋| 625/991 [2:37:30<1:31:38, 15.02s/batch, batch_loss=6.8, batc

Epoch 8/10:  63%|▋| 626/991 [2:37:30<1:31:16, 15.00s/batch, batch_loss=6.8, batc

Epoch 8/10:  63%|▋| 626/991 [2:37:47<1:31:16, 15.00s/batch, batch_loss=4.32e+3, 

Epoch 8/10:  63%|▋| 627/991 [2:37:47<1:35:03, 15.67s/batch, batch_loss=4.32e+3, 

Epoch 8/10:  63%|▋| 627/991 [2:38:02<1:35:03, 15.67s/batch, batch_loss=1.05e+3, 

Epoch 8/10:  63%|▋| 628/991 [2:38:02<1:32:22, 15.27s/batch, batch_loss=1.05e+3, 

Epoch 8/10:  63%|▋| 628/991 [2:38:16<1:32:22, 15.27s/batch, batch_loss=12.8, bat

Epoch 8/10:  63%|▋| 629/991 [2:38:16<1:31:12, 15.12s/batch, batch_loss=12.8, bat

Epoch 8/10:  63%|▋| 629/991 [2:38:32<1:31:12, 15.12s/batch, batch_loss=19.1, bat

Epoch 8/10:  64%|▋| 630/991 [2:38:32<1:30:55, 15.11s/batch, batch_loss=19.1, bat

Epoch 8/10:  64%|▋| 630/991 [2:38:47<1:30:55, 15.11s/batch, batch_loss=16, batch

Epoch 8/10:  64%|▋| 631/991 [2:38:47<1:31:05, 15.18s/batch, batch_loss=16, batch

Epoch 8/10:  64%|▋| 631/991 [2:39:01<1:31:05, 15.18s/batch, batch_loss=3.01, bat

Epoch 8/10:  64%|▋| 632/991 [2:39:01<1:28:33, 14.80s/batch, batch_loss=3.01, bat

Epoch 8/10:  64%|▋| 632/991 [2:39:16<1:28:33, 14.80s/batch, batch_loss=20.6, bat

Epoch 8/10:  64%|▋| 633/991 [2:39:16<1:28:56, 14.91s/batch, batch_loss=20.6, bat

Epoch 8/10:  64%|▋| 633/991 [2:39:31<1:28:56, 14.91s/batch, batch_loss=24.3, bat

Epoch 8/10:  64%|▋| 634/991 [2:39:31<1:29:04, 14.97s/batch, batch_loss=24.3, bat

Epoch 8/10:  64%|▋| 634/991 [2:39:46<1:29:04, 14.97s/batch, batch_loss=22.7, bat

Epoch 8/10:  64%|▋| 635/991 [2:39:46<1:28:19, 14.89s/batch, batch_loss=22.7, bat

Epoch 8/10:  64%|▋| 635/991 [2:40:04<1:28:19, 14.89s/batch, batch_loss=17, batch

Epoch 8/10:  64%|▋| 636/991 [2:40:04<1:33:12, 15.75s/batch, batch_loss=17, batch

Epoch 8/10:  64%|▋| 636/991 [2:40:19<1:33:12, 15.75s/batch, batch_loss=17.5, bat

Epoch 8/10:  64%|▋| 637/991 [2:40:19<1:32:32, 15.68s/batch, batch_loss=17.5, bat

Epoch 8/10:  64%|▋| 637/991 [2:40:33<1:32:32, 15.68s/batch, batch_loss=15.2, bat

Epoch 8/10:  64%|▋| 638/991 [2:40:33<1:28:33, 15.05s/batch, batch_loss=15.2, bat

Epoch 8/10:  64%|▋| 638/991 [2:40:47<1:28:33, 15.05s/batch, batch_loss=11.2, bat

Epoch 8/10:  64%|▋| 639/991 [2:40:47<1:26:16, 14.71s/batch, batch_loss=11.2, bat

Epoch 8/10:  64%|▋| 639/991 [2:41:02<1:26:16, 14.71s/batch, batch_loss=672, batc

Epoch 8/10:  65%|▋| 640/991 [2:41:02<1:26:45, 14.83s/batch, batch_loss=672, batc

Epoch 8/10:  65%|▋| 640/991 [2:41:19<1:26:45, 14.83s/batch, batch_loss=14.9, bat

Epoch 8/10:  65%|▋| 641/991 [2:41:19<1:31:06, 15.62s/batch, batch_loss=14.9, bat

Epoch 8/10:  65%|▋| 641/991 [2:41:35<1:31:06, 15.62s/batch, batch_loss=8.74, bat

Epoch 8/10:  65%|▋| 642/991 [2:41:35<1:30:41, 15.59s/batch, batch_loss=8.74, bat

Epoch 8/10:  65%|▋| 642/991 [2:41:50<1:30:41, 15.59s/batch, batch_loss=2.12e+4, 

Epoch 8/10:  65%|▋| 643/991 [2:41:50<1:30:04, 15.53s/batch, batch_loss=2.12e+4, 

Epoch 8/10:  65%|▋| 643/991 [2:42:06<1:30:04, 15.53s/batch, batch_loss=1.76e+4, 

Epoch 8/10:  65%|▋| 644/991 [2:42:06<1:30:21, 15.62s/batch, batch_loss=1.76e+4, 

Epoch 8/10:  65%|▋| 644/991 [2:42:21<1:30:21, 15.62s/batch, batch_loss=2.19e+3, 

Epoch 8/10:  65%|▋| 645/991 [2:42:21<1:29:01, 15.44s/batch, batch_loss=2.19e+3, 

Epoch 8/10:  65%|▋| 645/991 [2:42:36<1:29:01, 15.44s/batch, batch_loss=11.1, bat

Epoch 8/10:  65%|▋| 646/991 [2:42:36<1:28:39, 15.42s/batch, batch_loss=11.1, bat

Epoch 8/10:  65%|▋| 646/991 [2:42:52<1:28:39, 15.42s/batch, batch_loss=13, batch

Epoch 8/10:  65%|▋| 647/991 [2:42:52<1:29:14, 15.56s/batch, batch_loss=13, batch

Epoch 8/10:  65%|▋| 647/991 [2:43:07<1:29:14, 15.56s/batch, batch_loss=14.8, bat

Epoch 8/10:  65%|▋| 648/991 [2:43:07<1:27:26, 15.29s/batch, batch_loss=14.8, bat

Epoch 8/10:  65%|▋| 648/991 [2:43:27<1:27:26, 15.29s/batch, batch_loss=15.8, bat

Epoch 8/10:  65%|▋| 649/991 [2:43:27<1:35:31, 16.76s/batch, batch_loss=15.8, bat

Epoch 8/10:  65%|▋| 649/991 [2:43:43<1:35:31, 16.76s/batch, batch_loss=1.34e+4, 

Epoch 8/10:  66%|▋| 650/991 [2:43:43<1:34:41, 16.66s/batch, batch_loss=1.34e+4, 

Epoch 8/10:  66%|▋| 650/991 [2:43:57<1:34:41, 16.66s/batch, batch_loss=8.67, bat

Epoch 8/10:  66%|▋| 651/991 [2:43:57<1:29:39, 15.82s/batch, batch_loss=8.67, bat

Epoch 8/10:  66%|▋| 651/991 [2:44:13<1:29:39, 15.82s/batch, batch_loss=12.1, bat

Epoch 8/10:  66%|▋| 652/991 [2:44:13<1:28:37, 15.69s/batch, batch_loss=12.1, bat

Epoch 8/10:  66%|▋| 652/991 [2:44:27<1:28:37, 15.69s/batch, batch_loss=17.1, bat

Epoch 8/10:  66%|▋| 653/991 [2:44:27<1:26:34, 15.37s/batch, batch_loss=17.1, bat

Epoch 8/10:  66%|▋| 653/991 [2:44:43<1:26:34, 15.37s/batch, batch_loss=19.1, bat

Epoch 8/10:  66%|▋| 654/991 [2:44:43<1:26:52, 15.47s/batch, batch_loss=19.1, bat

Epoch 8/10:  66%|▋| 654/991 [2:44:59<1:26:52, 15.47s/batch, batch_loss=3.83e+3, 

Epoch 8/10:  66%|▋| 655/991 [2:44:59<1:27:05, 15.55s/batch, batch_loss=3.83e+3, 

Epoch 8/10:  66%|▋| 655/991 [2:45:15<1:27:05, 15.55s/batch, batch_loss=5.19e+3, 

Epoch 8/10:  66%|▋| 656/991 [2:45:15<1:27:38, 15.70s/batch, batch_loss=5.19e+3, 

Epoch 8/10:  66%|▋| 656/991 [2:45:31<1:27:38, 15.70s/batch, batch_loss=4.21e+3, 

Epoch 8/10:  66%|▋| 657/991 [2:45:31<1:27:27, 15.71s/batch, batch_loss=4.21e+3, 

Epoch 8/10:  66%|▋| 657/991 [2:45:49<1:27:27, 15.71s/batch, batch_loss=2.2e+4, b

Epoch 8/10:  66%|▋| 658/991 [2:45:49<1:31:52, 16.55s/batch, batch_loss=2.2e+4, b

Epoch 8/10:  66%|▋| 658/991 [2:46:05<1:31:52, 16.55s/batch, batch_loss=3.96, bat

Epoch 8/10:  66%|▋| 659/991 [2:46:05<1:31:27, 16.53s/batch, batch_loss=3.96, bat

Epoch 8/10:  66%|▋| 659/991 [2:46:23<1:31:27, 16.53s/batch, batch_loss=4.98, bat

Epoch 8/10:  67%|▋| 660/991 [2:46:23<1:33:31, 16.95s/batch, batch_loss=4.98, bat

Epoch 8/10:  67%|▋| 660/991 [2:46:41<1:33:31, 16.95s/batch, batch_loss=12.7, bat

Epoch 8/10:  67%|▋| 661/991 [2:46:41<1:34:42, 17.22s/batch, batch_loss=12.7, bat

Epoch 8/10:  67%|▋| 661/991 [2:46:58<1:34:42, 17.22s/batch, batch_loss=13.8, bat

Epoch 8/10:  67%|▋| 662/991 [2:46:58<1:33:19, 17.02s/batch, batch_loss=13.8, bat

Epoch 8/10:  67%|▋| 662/991 [2:47:15<1:33:19, 17.02s/batch, batch_loss=17, batch

Epoch 8/10:  67%|▋| 663/991 [2:47:15<1:33:39, 17.13s/batch, batch_loss=17, batch

Epoch 8/10:  67%|▋| 663/991 [2:47:32<1:33:39, 17.13s/batch, batch_loss=3.04e+3, 

Epoch 8/10:  67%|▋| 664/991 [2:47:32<1:32:24, 16.96s/batch, batch_loss=3.04e+3, 

Epoch 8/10:  67%|▋| 664/991 [2:47:51<1:32:24, 16.96s/batch, batch_loss=12.9, bat

Epoch 8/10:  67%|▋| 665/991 [2:47:51<1:36:37, 17.78s/batch, batch_loss=12.9, bat

Epoch 8/10:  67%|▋| 665/991 [2:48:09<1:36:37, 17.78s/batch, batch_loss=3.06e+3, 

Epoch 8/10:  67%|▋| 666/991 [2:48:09<1:35:10, 17.57s/batch, batch_loss=3.06e+3, 

Epoch 8/10:  67%|▋| 666/991 [2:48:26<1:35:10, 17.57s/batch, batch_loss=19.2, bat

Epoch 8/10:  67%|▋| 667/991 [2:48:26<1:34:51, 17.57s/batch, batch_loss=19.2, bat

Epoch 8/10:  67%|▋| 667/991 [2:48:42<1:34:51, 17.57s/batch, batch_loss=374, batc

Epoch 8/10:  67%|▋| 668/991 [2:48:42<1:32:07, 17.11s/batch, batch_loss=374, batc

Epoch 8/10:  67%|▋| 668/991 [2:48:59<1:32:07, 17.11s/batch, batch_loss=2.96e+3, 

Epoch 8/10:  68%|▋| 669/991 [2:48:59<1:30:57, 16.95s/batch, batch_loss=2.96e+3, 

Epoch 8/10:  68%|▋| 669/991 [2:49:16<1:30:57, 16.95s/batch, batch_loss=1.02e+3, 

Epoch 8/10:  68%|▋| 670/991 [2:49:16<1:30:23, 16.90s/batch, batch_loss=1.02e+3, 

Epoch 8/10:  68%|▋| 670/991 [2:49:31<1:30:23, 16.90s/batch, batch_loss=10.1, bat

Epoch 8/10:  68%|▋| 671/991 [2:49:31<1:28:25, 16.58s/batch, batch_loss=10.1, bat

Epoch 8/10:  68%|▋| 671/991 [2:49:49<1:28:25, 16.58s/batch, batch_loss=14.8, bat

Epoch 8/10:  68%|▋| 672/991 [2:49:49<1:30:06, 16.95s/batch, batch_loss=14.8, bat

Epoch 8/10:  68%|▋| 672/991 [2:50:05<1:30:06, 16.95s/batch, batch_loss=19.8, bat

Epoch 8/10:  68%|▋| 673/991 [2:50:05<1:28:35, 16.72s/batch, batch_loss=19.8, bat

Epoch 8/10:  68%|▋| 673/991 [2:50:22<1:28:35, 16.72s/batch, batch_loss=15.5, bat

Epoch 8/10:  68%|▋| 674/991 [2:50:22<1:27:34, 16.58s/batch, batch_loss=15.5, bat

Epoch 8/10:  68%|▋| 674/991 [2:50:37<1:27:34, 16.58s/batch, batch_loss=4.39, bat

Epoch 8/10:  68%|▋| 675/991 [2:50:37<1:25:14, 16.18s/batch, batch_loss=4.39, bat

Epoch 8/10:  68%|▋| 675/991 [2:50:53<1:25:14, 16.18s/batch, batch_loss=9.12, bat

Epoch 8/10:  68%|▋| 676/991 [2:50:53<1:25:15, 16.24s/batch, batch_loss=9.12, bat

Epoch 8/10:  68%|▋| 676/991 [2:51:10<1:25:15, 16.24s/batch, batch_loss=16.3, bat

Epoch 8/10:  68%|▋| 677/991 [2:51:10<1:25:42, 16.38s/batch, batch_loss=16.3, bat

Epoch 8/10:  68%|▋| 677/991 [2:51:27<1:25:42, 16.38s/batch, batch_loss=6.02, bat

Epoch 8/10:  68%|▋| 678/991 [2:51:27<1:25:46, 16.44s/batch, batch_loss=6.02, bat

Epoch 8/10:  68%|▋| 678/991 [2:51:43<1:25:46, 16.44s/batch, batch_loss=3.81e+3, 

Epoch 8/10:  69%|▋| 679/991 [2:51:43<1:25:11, 16.38s/batch, batch_loss=3.81e+3, 

Epoch 8/10:  69%|▋| 679/991 [2:51:59<1:25:11, 16.38s/batch, batch_loss=6.11e+3, 

Epoch 8/10:  69%|▋| 680/991 [2:51:59<1:24:50, 16.37s/batch, batch_loss=6.11e+3, 

Epoch 8/10:  69%|▋| 680/991 [2:52:15<1:24:50, 16.37s/batch, batch_loss=7.23e+4, 

Epoch 8/10:  69%|▋| 681/991 [2:52:15<1:23:26, 16.15s/batch, batch_loss=7.23e+4, 

Epoch 8/10:  69%|▋| 681/991 [2:52:31<1:23:26, 16.15s/batch, batch_loss=13.7, bat

Epoch 8/10:  69%|▋| 682/991 [2:52:31<1:23:48, 16.27s/batch, batch_loss=13.7, bat

Epoch 8/10:  69%|▋| 682/991 [2:52:48<1:23:48, 16.27s/batch, batch_loss=377, batc

Epoch 8/10:  69%|▋| 683/991 [2:52:48<1:23:46, 16.32s/batch, batch_loss=377, batc

Epoch 8/10:  69%|▋| 683/991 [2:53:04<1:23:46, 16.32s/batch, batch_loss=5.29, bat

Epoch 8/10:  69%|▋| 684/991 [2:53:04<1:23:41, 16.36s/batch, batch_loss=5.29, bat

Epoch 8/10:  69%|▋| 684/991 [2:53:20<1:23:41, 16.36s/batch, batch_loss=12.5, bat

Epoch 8/10:  69%|▋| 685/991 [2:53:20<1:23:05, 16.29s/batch, batch_loss=12.5, bat

Epoch 8/10:  69%|▋| 685/991 [2:53:36<1:23:05, 16.29s/batch, batch_loss=13.4, bat

Epoch 8/10:  69%|▋| 686/991 [2:53:36<1:22:34, 16.25s/batch, batch_loss=13.4, bat

Epoch 8/10:  69%|▋| 686/991 [2:53:54<1:22:34, 16.25s/batch, batch_loss=540, batc

Epoch 8/10:  69%|▋| 687/991 [2:53:54<1:24:54, 16.76s/batch, batch_loss=540, batc

Epoch 8/10:  69%|▋| 687/991 [2:54:10<1:24:54, 16.76s/batch, batch_loss=4.71, bat

Epoch 8/10:  69%|▋| 688/991 [2:54:10<1:22:34, 16.35s/batch, batch_loss=4.71, bat

Epoch 8/10:  69%|▋| 688/991 [2:54:25<1:22:34, 16.35s/batch, batch_loss=7.53, bat

Epoch 8/10:  70%|▋| 689/991 [2:54:25<1:21:15, 16.15s/batch, batch_loss=7.53, bat

Epoch 8/10:  70%|▋| 689/991 [2:54:42<1:21:15, 16.15s/batch, batch_loss=11.6, bat

Epoch 8/10:  70%|▋| 690/991 [2:54:42<1:21:04, 16.16s/batch, batch_loss=11.6, bat

Epoch 8/10:  70%|▋| 690/991 [2:54:58<1:21:04, 16.16s/batch, batch_loss=15.6, bat

Epoch 8/10:  70%|▋| 691/991 [2:54:58<1:20:24, 16.08s/batch, batch_loss=15.6, bat

Epoch 8/10:  70%|▋| 691/991 [2:55:14<1:20:24, 16.08s/batch, batch_loss=5.31, bat

Epoch 8/10:  70%|▋| 692/991 [2:55:14<1:20:34, 16.17s/batch, batch_loss=5.31, bat

Epoch 8/10:  70%|▋| 692/991 [2:55:32<1:20:34, 16.17s/batch, batch_loss=4.65e+3, 

Epoch 8/10:  70%|▋| 693/991 [2:55:32<1:23:45, 16.86s/batch, batch_loss=4.65e+3, 

Epoch 8/10:  70%|▋| 693/991 [2:55:48<1:23:45, 16.86s/batch, batch_loss=456, batc

Epoch 8/10:  70%|▋| 694/991 [2:55:48<1:22:13, 16.61s/batch, batch_loss=456, batc

Epoch 8/10:  70%|▋| 694/991 [2:56:04<1:22:13, 16.61s/batch, batch_loss=776, batc

Epoch 8/10:  70%|▋| 695/991 [2:56:04<1:20:18, 16.28s/batch, batch_loss=776, batc

Epoch 8/10:  70%|▋| 695/991 [2:56:18<1:20:18, 16.28s/batch, batch_loss=9.83, bat

Epoch 8/10:  70%|▋| 696/991 [2:56:18<1:17:25, 15.75s/batch, batch_loss=9.83, bat

Epoch 8/10:  70%|▋| 696/991 [2:56:33<1:17:25, 15.75s/batch, batch_loss=6.8e+3, b

Epoch 8/10:  70%|▋| 697/991 [2:56:33<1:15:36, 15.43s/batch, batch_loss=6.8e+3, b

Epoch 8/10:  70%|▋| 697/991 [2:56:50<1:15:36, 15.43s/batch, batch_loss=13, batch

Epoch 8/10:  70%|▋| 698/991 [2:56:50<1:16:44, 15.72s/batch, batch_loss=13, batch

Epoch 8/10:  70%|▋| 698/991 [2:57:08<1:16:44, 15.72s/batch, batch_loss=8.46, bat

Epoch 8/10:  71%|▋| 699/991 [2:57:08<1:19:53, 16.42s/batch, batch_loss=8.46, bat

Epoch 8/10:  71%|▋| 699/991 [2:57:23<1:19:53, 16.42s/batch, batch_loss=9.63, bat

Epoch 8/10:  71%|▋| 700/991 [2:57:23<1:17:44, 16.03s/batch, batch_loss=9.63, bat

Epoch 8/10:  71%|▋| 700/991 [2:57:38<1:17:44, 16.03s/batch, batch_loss=211, batc

Epoch 8/10:  71%|▋| 701/991 [2:57:38<1:16:27, 15.82s/batch, batch_loss=211, batc

Epoch 8/10:  71%|▋| 701/991 [2:57:53<1:16:27, 15.82s/batch, batch_loss=17.7, bat

Epoch 8/10:  71%|▋| 702/991 [2:57:53<1:14:53, 15.55s/batch, batch_loss=17.7, bat

Epoch 8/10:  71%|▋| 702/991 [2:58:08<1:14:53, 15.55s/batch, batch_loss=274, batc

Epoch 8/10:  71%|▋| 703/991 [2:58:08<1:13:29, 15.31s/batch, batch_loss=274, batc

Epoch 8/10:  71%|▋| 703/991 [2:58:24<1:13:29, 15.31s/batch, batch_loss=8.48, bat

Epoch 8/10:  71%|▋| 704/991 [2:58:24<1:14:23, 15.55s/batch, batch_loss=8.48, bat

Epoch 8/10:  71%|▋| 704/991 [2:58:38<1:14:23, 15.55s/batch, batch_loss=10.4, bat

Epoch 8/10:  71%|▋| 705/991 [2:58:38<1:12:37, 15.24s/batch, batch_loss=10.4, bat

Epoch 8/10:  71%|▋| 705/991 [2:58:53<1:12:37, 15.24s/batch, batch_loss=17.4, bat

Epoch 8/10:  71%|▋| 706/991 [2:58:53<1:11:30, 15.06s/batch, batch_loss=17.4, bat

Epoch 8/10:  71%|▋| 706/991 [2:59:08<1:11:30, 15.06s/batch, batch_loss=16.8, bat

Epoch 8/10:  71%|▋| 707/991 [2:59:08<1:11:02, 15.01s/batch, batch_loss=16.8, bat

Epoch 8/10:  71%|▋| 707/991 [2:59:23<1:11:02, 15.01s/batch, batch_loss=9.29, bat

Epoch 8/10:  71%|▋| 708/991 [2:59:23<1:11:04, 15.07s/batch, batch_loss=9.29, bat

Epoch 8/10:  71%|▋| 708/991 [2:59:38<1:11:04, 15.07s/batch, batch_loss=7.42, bat

Epoch 8/10:  72%|▋| 709/991 [2:59:38<1:11:16, 15.17s/batch, batch_loss=7.42, bat

Epoch 8/10:  72%|▋| 709/991 [2:59:54<1:11:16, 15.17s/batch, batch_loss=29.1, bat

Epoch 8/10:  72%|▋| 710/991 [2:59:54<1:11:55, 15.36s/batch, batch_loss=29.1, bat

Epoch 8/10:  72%|▋| 710/991 [3:00:09<1:11:55, 15.36s/batch, batch_loss=97.5, bat

Epoch 8/10:  72%|▋| 711/991 [3:00:09<1:11:24, 15.30s/batch, batch_loss=97.5, bat

Epoch 8/10:  72%|▋| 711/991 [3:00:25<1:11:24, 15.30s/batch, batch_loss=12.7, bat

Epoch 8/10:  72%|▋| 712/991 [3:00:25<1:12:10, 15.52s/batch, batch_loss=12.7, bat

Epoch 8/10:  72%|▋| 712/991 [3:00:41<1:12:10, 15.52s/batch, batch_loss=76.5, bat

Epoch 8/10:  72%|▋| 713/991 [3:00:41<1:12:32, 15.66s/batch, batch_loss=76.5, bat

Epoch 8/10:  72%|▋| 713/991 [3:00:57<1:12:32, 15.66s/batch, batch_loss=21.4, bat

Epoch 8/10:  72%|▋| 714/991 [3:00:57<1:12:24, 15.68s/batch, batch_loss=21.4, bat

Epoch 8/10:  72%|▋| 714/991 [3:01:13<1:12:24, 15.68s/batch, batch_loss=16.9, bat

Epoch 8/10:  72%|▋| 715/991 [3:01:13<1:11:41, 15.58s/batch, batch_loss=16.9, bat

Epoch 8/10:  72%|▋| 715/991 [3:01:30<1:11:41, 15.58s/batch, batch_loss=15.5, bat

Epoch 8/10:  72%|▋| 716/991 [3:01:30<1:13:19, 16.00s/batch, batch_loss=15.5, bat

Epoch 8/10:  72%|▋| 716/991 [3:01:48<1:13:19, 16.00s/batch, batch_loss=16.2, bat

Epoch 8/10:  72%|▋| 717/991 [3:01:48<1:15:48, 16.60s/batch, batch_loss=16.2, bat

Epoch 8/10:  72%|▋| 717/991 [3:02:04<1:15:48, 16.60s/batch, batch_loss=21.5, bat

Epoch 8/10:  72%|▋| 718/991 [3:02:04<1:14:42, 16.42s/batch, batch_loss=21.5, bat

Epoch 8/10:  72%|▋| 718/991 [3:02:19<1:14:42, 16.42s/batch, batch_loss=11, batch

Epoch 8/10:  73%|▋| 719/991 [3:02:19<1:13:50, 16.29s/batch, batch_loss=11, batch

Epoch 8/10:  73%|▋| 719/991 [3:02:36<1:13:50, 16.29s/batch, batch_loss=12.4, bat

Epoch 8/10:  73%|▋| 720/991 [3:02:36<1:14:13, 16.43s/batch, batch_loss=12.4, bat

Epoch 8/10:  73%|▋| 720/991 [3:02:53<1:14:13, 16.43s/batch, batch_loss=17.1, bat

Epoch 8/10:  73%|▋| 721/991 [3:02:53<1:14:24, 16.54s/batch, batch_loss=17.1, bat

Epoch 8/10:  73%|▋| 721/991 [3:03:10<1:14:24, 16.54s/batch, batch_loss=20.4, bat

Epoch 8/10:  73%|▋| 722/991 [3:03:10<1:14:52, 16.70s/batch, batch_loss=20.4, bat

Epoch 8/10:  73%|▋| 722/991 [3:03:27<1:14:52, 16.70s/batch, batch_loss=7.23e+3, 

Epoch 8/10:  73%|▋| 723/991 [3:03:27<1:15:04, 16.81s/batch, batch_loss=7.23e+3, 

Epoch 8/10:  73%|▋| 723/991 [3:03:46<1:15:04, 16.81s/batch, batch_loss=4.22, bat

Epoch 8/10:  73%|▋| 724/991 [3:03:46<1:17:20, 17.38s/batch, batch_loss=4.22, bat

Epoch 8/10:  73%|▋| 724/991 [3:04:02<1:17:20, 17.38s/batch, batch_loss=16.9, bat

Epoch 8/10:  73%|▋| 725/991 [3:04:02<1:15:36, 17.05s/batch, batch_loss=16.9, bat

Epoch 8/10:  73%|▋| 725/991 [3:04:18<1:15:36, 17.05s/batch, batch_loss=12.8, bat

Epoch 8/10:  73%|▋| 726/991 [3:04:18<1:13:34, 16.66s/batch, batch_loss=12.8, bat

Epoch 8/10:  73%|▋| 726/991 [3:04:35<1:13:34, 16.66s/batch, batch_loss=1.3e+4, b

Epoch 8/10:  73%|▋| 727/991 [3:04:35<1:13:39, 16.74s/batch, batch_loss=1.3e+4, b

Epoch 8/10:  73%|▋| 727/991 [3:04:51<1:13:39, 16.74s/batch, batch_loss=12.8, bat

Epoch 8/10:  73%|▋| 728/991 [3:04:51<1:13:09, 16.69s/batch, batch_loss=12.8, bat

Epoch 8/10:  73%|▋| 728/991 [3:05:07<1:13:09, 16.69s/batch, batch_loss=131, batc

Epoch 8/10:  74%|▋| 729/991 [3:05:07<1:11:28, 16.37s/batch, batch_loss=131, batc

Epoch 8/10:  74%|▋| 729/991 [3:05:24<1:11:28, 16.37s/batch, batch_loss=10.6, bat

Epoch 8/10:  74%|▋| 730/991 [3:05:24<1:12:18, 16.62s/batch, batch_loss=10.6, bat

Epoch 8/10:  74%|▋| 730/991 [3:05:43<1:12:18, 16.62s/batch, batch_loss=120, batc

Epoch 8/10:  74%|▋| 731/991 [3:05:43<1:15:23, 17.40s/batch, batch_loss=120, batc

Epoch 8/10:  74%|▋| 731/991 [3:06:00<1:15:23, 17.40s/batch, batch_loss=1.39e+4, 

Epoch 8/10:  74%|▋| 732/991 [3:06:00<1:13:58, 17.14s/batch, batch_loss=1.39e+4, 

Epoch 8/10:  74%|▋| 732/991 [3:06:17<1:13:58, 17.14s/batch, batch_loss=16.4, bat

Epoch 8/10:  74%|▋| 733/991 [3:06:17<1:13:10, 17.02s/batch, batch_loss=16.4, bat

Epoch 8/10:  74%|▋| 733/991 [3:06:32<1:13:10, 17.02s/batch, batch_loss=6.78e+3, 

Epoch 8/10:  74%|▋| 734/991 [3:06:32<1:10:42, 16.51s/batch, batch_loss=6.78e+3, 

Epoch 8/10:  74%|▋| 734/991 [3:06:48<1:10:42, 16.51s/batch, batch_loss=14, batch

Epoch 8/10:  74%|▋| 735/991 [3:06:48<1:10:19, 16.48s/batch, batch_loss=14, batch

Epoch 8/10:  74%|▋| 735/991 [3:07:05<1:10:19, 16.48s/batch, batch_loss=18.1, bat

Epoch 8/10:  74%|▋| 736/991 [3:07:05<1:10:41, 16.63s/batch, batch_loss=18.1, bat

Epoch 8/10:  74%|▋| 736/991 [3:07:21<1:10:41, 16.63s/batch, batch_loss=10.9, bat

Epoch 8/10:  74%|▋| 737/991 [3:07:21<1:09:04, 16.32s/batch, batch_loss=10.9, bat

Epoch 8/10:  74%|▋| 737/991 [3:07:37<1:09:04, 16.32s/batch, batch_loss=1.49e+3, 

Epoch 8/10:  74%|▋| 738/991 [3:07:37<1:08:25, 16.23s/batch, batch_loss=1.49e+3, 

Epoch 8/10:  74%|▋| 738/991 [3:07:52<1:08:25, 16.23s/batch, batch_loss=27.1, bat

Epoch 8/10:  75%|▋| 739/991 [3:07:52<1:06:14, 15.77s/batch, batch_loss=27.1, bat

Epoch 8/10:  75%|▋| 739/991 [3:08:08<1:06:14, 15.77s/batch, batch_loss=11.5, bat

Epoch 8/10:  75%|▋| 740/991 [3:08:08<1:06:44, 15.95s/batch, batch_loss=11.5, bat

Epoch 8/10:  75%|▋| 740/991 [3:08:25<1:06:44, 15.95s/batch, batch_loss=1.81e+4, 

Epoch 8/10:  75%|▋| 741/991 [3:08:25<1:06:59, 16.08s/batch, batch_loss=1.81e+4, 

Epoch 8/10:  75%|▋| 741/991 [3:08:42<1:06:59, 16.08s/batch, batch_loss=2.28e+3, 

Epoch 8/10:  75%|▋| 742/991 [3:08:42<1:08:13, 16.44s/batch, batch_loss=2.28e+3, 

Epoch 8/10:  75%|▋| 742/991 [3:08:57<1:08:13, 16.44s/batch, batch_loss=12.6, bat

Epoch 8/10:  75%|▋| 743/991 [3:08:57<1:06:28, 16.08s/batch, batch_loss=12.6, bat

Epoch 8/10:  75%|▋| 743/991 [3:09:12<1:06:28, 16.08s/batch, batch_loss=14.2, bat

Epoch 8/10:  75%|▊| 744/991 [3:09:12<1:05:04, 15.81s/batch, batch_loss=14.2, bat

Epoch 8/10:  75%|▊| 744/991 [3:09:30<1:05:04, 15.81s/batch, batch_loss=15.6, bat

Epoch 8/10:  75%|▊| 745/991 [3:09:30<1:07:18, 16.42s/batch, batch_loss=15.6, bat

Epoch 8/10:  75%|▊| 745/991 [3:09:46<1:07:18, 16.42s/batch, batch_loss=1.16e+3, 

Epoch 8/10:  75%|▊| 746/991 [3:09:46<1:06:04, 16.18s/batch, batch_loss=1.16e+3, 

Epoch 8/10:  75%|▊| 746/991 [3:10:01<1:06:04, 16.18s/batch, batch_loss=3.9e+3, b

Epoch 8/10:  75%|▊| 747/991 [3:10:01<1:05:15, 16.05s/batch, batch_loss=3.9e+3, b

Epoch 8/10:  75%|▊| 747/991 [3:10:18<1:05:15, 16.05s/batch, batch_loss=11.1, bat

Epoch 8/10:  75%|▊| 748/991 [3:10:18<1:06:00, 16.30s/batch, batch_loss=11.1, bat

Epoch 8/10:  75%|▊| 748/991 [3:10:42<1:06:00, 16.30s/batch, batch_loss=14.4, bat

Epoch 8/10:  76%|▊| 749/991 [3:10:42<1:14:30, 18.47s/batch, batch_loss=14.4, bat

Epoch 8/10:  76%|▊| 749/991 [3:11:04<1:14:30, 18.47s/batch, batch_loss=10.5, bat

Epoch 8/10:  76%|▊| 750/991 [3:11:04<1:18:42, 19.59s/batch, batch_loss=10.5, bat

Epoch 8/10:  76%|▊| 750/991 [3:11:21<1:18:42, 19.59s/batch, batch_loss=9.76, bat

Epoch 8/10:  76%|▊| 751/991 [3:11:21<1:14:47, 18.70s/batch, batch_loss=9.76, bat

Epoch 8/10:  76%|▊| 751/991 [3:11:36<1:14:47, 18.70s/batch, batch_loss=6.96, bat

Epoch 8/10:  76%|▊| 752/991 [3:11:36<1:10:20, 17.66s/batch, batch_loss=6.96, bat

Epoch 8/10:  76%|▊| 752/991 [3:11:52<1:10:20, 17.66s/batch, batch_loss=7.41, bat

Epoch 8/10:  76%|▊| 753/991 [3:11:52<1:07:59, 17.14s/batch, batch_loss=7.41, bat

Epoch 8/10:  76%|▊| 753/991 [3:12:10<1:07:59, 17.14s/batch, batch_loss=5.17, bat

Epoch 8/10:  76%|▊| 754/991 [3:12:10<1:08:27, 17.33s/batch, batch_loss=5.17, bat

Epoch 8/10:  76%|▊| 754/991 [3:12:27<1:08:27, 17.33s/batch, batch_loss=19, batch

Epoch 8/10:  76%|▊| 755/991 [3:12:27<1:08:09, 17.33s/batch, batch_loss=19, batch

Epoch 8/10:  76%|▊| 755/991 [3:12:43<1:08:09, 17.33s/batch, batch_loss=15.2, bat

Epoch 8/10:  76%|▊| 756/991 [3:12:43<1:06:50, 17.07s/batch, batch_loss=15.2, bat

Epoch 8/10:  76%|▊| 756/991 [3:13:00<1:06:50, 17.07s/batch, batch_loss=5.29, bat

Epoch 8/10:  76%|▊| 757/991 [3:13:00<1:05:38, 16.83s/batch, batch_loss=5.29, bat

Epoch 8/10:  76%|▊| 757/991 [3:13:24<1:05:38, 16.83s/batch, batch_loss=15.2, bat

Epoch 8/10:  76%|▊| 758/991 [3:13:24<1:14:09, 19.10s/batch, batch_loss=15.2, bat

Epoch 8/10:  76%|▊| 758/991 [3:13:40<1:14:09, 19.10s/batch, batch_loss=15.3, bat

Epoch 8/10:  77%|▊| 759/991 [3:13:40<1:10:23, 18.21s/batch, batch_loss=15.3, bat

Epoch 8/10:  77%|▊| 759/991 [3:13:59<1:10:23, 18.21s/batch, batch_loss=15.2, bat

Epoch 8/10:  77%|▊| 760/991 [3:13:59<1:11:04, 18.46s/batch, batch_loss=15.2, bat

Epoch 8/10:  77%|▊| 760/991 [3:14:16<1:11:04, 18.46s/batch, batch_loss=20.7, bat

Epoch 8/10:  77%|▊| 761/991 [3:14:16<1:08:35, 17.89s/batch, batch_loss=20.7, bat

Epoch 8/10:  77%|▊| 761/991 [3:14:31<1:08:35, 17.89s/batch, batch_loss=24.5, bat

Epoch 8/10:  77%|▊| 762/991 [3:14:31<1:05:26, 17.15s/batch, batch_loss=24.5, bat

Epoch 8/10:  77%|▊| 762/991 [3:14:47<1:05:26, 17.15s/batch, batch_loss=510, batc

Epoch 8/10:  77%|▊| 763/991 [3:14:47<1:04:06, 16.87s/batch, batch_loss=510, batc

Epoch 8/10:  77%|▊| 763/991 [3:15:05<1:04:06, 16.87s/batch, batch_loss=12.1, bat

Epoch 8/10:  77%|▊| 764/991 [3:15:05<1:04:10, 16.96s/batch, batch_loss=12.1, bat

Epoch 8/10:  77%|▊| 764/991 [3:15:21<1:04:10, 16.96s/batch, batch_loss=2.97, bat

Epoch 8/10:  77%|▊| 765/991 [3:15:21<1:03:41, 16.91s/batch, batch_loss=2.97, bat

Epoch 8/10:  77%|▊| 765/991 [3:15:38<1:03:41, 16.91s/batch, batch_loss=12.2, bat

Epoch 8/10:  77%|▊| 766/991 [3:15:38<1:02:35, 16.69s/batch, batch_loss=12.2, bat

Epoch 8/10:  77%|▊| 766/991 [3:15:55<1:02:35, 16.69s/batch, batch_loss=16.7, bat

Epoch 8/10:  77%|▊| 767/991 [3:15:55<1:02:44, 16.81s/batch, batch_loss=16.7, bat

Epoch 8/10:  77%|▊| 767/991 [3:16:11<1:02:44, 16.81s/batch, batch_loss=4.27, bat

Epoch 8/10:  77%|▊| 768/991 [3:16:11<1:01:35, 16.57s/batch, batch_loss=4.27, bat

Epoch 8/10:  77%|▊| 768/991 [3:16:26<1:01:35, 16.57s/batch, batch_loss=1.86, bat

Epoch 8/10:  78%|▊| 769/991 [3:16:26<59:27, 16.07s/batch, batch_loss=1.86, batch

Epoch 8/10:  78%|▊| 769/991 [3:16:42<59:27, 16.07s/batch, batch_loss=12.2, batch

Epoch 8/10:  78%|▊| 770/991 [3:16:42<59:03, 16.03s/batch, batch_loss=12.2, batch

Epoch 8/10:  78%|▊| 770/991 [3:16:58<59:03, 16.03s/batch, batch_loss=2.76e+3, ba

Epoch 8/10:  78%|▊| 771/991 [3:16:58<59:21, 16.19s/batch, batch_loss=2.76e+3, ba

Epoch 8/10:  78%|▊| 771/991 [3:17:14<59:21, 16.19s/batch, batch_loss=5.11, batch

Epoch 8/10:  78%|▊| 772/991 [3:17:14<58:56, 16.15s/batch, batch_loss=5.11, batch

Epoch 8/10:  78%|▊| 772/991 [3:17:30<58:56, 16.15s/batch, batch_loss=1.22, batch

Epoch 8/10:  78%|▊| 773/991 [3:17:30<57:50, 15.92s/batch, batch_loss=1.22, batch

Epoch 8/10:  78%|▊| 773/991 [3:17:45<57:50, 15.92s/batch, batch_loss=9.49, batch

Epoch 8/10:  78%|▊| 774/991 [3:17:45<57:11, 15.81s/batch, batch_loss=9.49, batch

Epoch 8/10:  78%|▊| 774/991 [3:18:01<57:11, 15.81s/batch, batch_loss=7.51, batch

Epoch 8/10:  78%|▊| 775/991 [3:18:01<56:39, 15.74s/batch, batch_loss=7.51, batch

Epoch 8/10:  78%|▊| 775/991 [3:18:16<56:39, 15.74s/batch, batch_loss=261, batch_

Epoch 8/10:  78%|▊| 776/991 [3:18:16<56:25, 15.75s/batch, batch_loss=261, batch_

Epoch 8/10:  78%|▊| 776/991 [3:18:33<56:25, 15.75s/batch, batch_loss=0.39, batch

Epoch 8/10:  78%|▊| 777/991 [3:18:33<56:53, 15.95s/batch, batch_loss=0.39, batch

Epoch 8/10:  78%|▊| 777/991 [3:18:47<56:53, 15.95s/batch, batch_loss=0.659, batc

Epoch 8/10:  79%|▊| 778/991 [3:18:47<55:01, 15.50s/batch, batch_loss=0.659, batc

Epoch 8/10:  79%|▊| 778/991 [3:19:03<55:01, 15.50s/batch, batch_loss=6.73, batch

Epoch 8/10:  79%|▊| 779/991 [3:19:03<55:21, 15.67s/batch, batch_loss=6.73, batch

Epoch 8/10:  79%|▊| 779/991 [3:19:20<55:21, 15.67s/batch, batch_loss=2.6, batch_

Epoch 8/10:  79%|▊| 780/991 [3:19:20<55:40, 15.83s/batch, batch_loss=2.6, batch_

Epoch 8/10:  79%|▊| 780/991 [3:19:35<55:40, 15.83s/batch, batch_loss=3.18, batch

Epoch 8/10:  79%|▊| 781/991 [3:19:35<54:49, 15.66s/batch, batch_loss=3.18, batch

Epoch 8/10:  79%|▊| 781/991 [3:19:50<54:49, 15.66s/batch, batch_loss=2.51e+4, ba

Epoch 8/10:  79%|▊| 782/991 [3:19:50<54:31, 15.65s/batch, batch_loss=2.51e+4, ba

Epoch 8/10:  79%|▊| 782/991 [3:20:07<54:31, 15.65s/batch, batch_loss=20.4, batch

Epoch 8/10:  79%|▊| 783/991 [3:20:07<54:54, 15.84s/batch, batch_loss=20.4, batch

Epoch 8/10:  79%|▊| 783/991 [3:20:23<54:54, 15.84s/batch, batch_loss=13, batch_i

Epoch 8/10:  79%|▊| 784/991 [3:20:23<54:38, 15.84s/batch, batch_loss=13, batch_i

Epoch 8/10:  79%|▊| 784/991 [3:20:39<54:38, 15.84s/batch, batch_loss=13.2, batch

Epoch 8/10:  79%|▊| 785/991 [3:20:39<54:34, 15.90s/batch, batch_loss=13.2, batch

Epoch 8/10:  79%|▊| 785/991 [3:20:56<54:34, 15.90s/batch, batch_loss=8.19, batch

Epoch 8/10:  79%|▊| 786/991 [3:20:56<55:32, 16.26s/batch, batch_loss=8.19, batch

Epoch 8/10:  79%|▊| 786/991 [3:21:11<55:32, 16.26s/batch, batch_loss=2.48e+4, ba

Epoch 8/10:  79%|▊| 787/991 [3:21:11<54:28, 16.02s/batch, batch_loss=2.48e+4, ba

Epoch 8/10:  79%|▊| 787/991 [3:21:26<54:28, 16.02s/batch, batch_loss=681, batch_

Epoch 8/10:  80%|▊| 788/991 [3:21:26<53:22, 15.78s/batch, batch_loss=681, batch_

Epoch 8/10:  80%|▊| 788/991 [3:21:43<53:22, 15.78s/batch, batch_loss=19.3, batch

Epoch 8/10:  80%|▊| 789/991 [3:21:43<53:39, 15.94s/batch, batch_loss=19.3, batch

Epoch 8/10:  80%|▊| 789/991 [3:21:59<53:39, 15.94s/batch, batch_loss=15.4, batch

Epoch 8/10:  80%|▊| 790/991 [3:21:59<53:16, 15.90s/batch, batch_loss=15.4, batch

Epoch 8/10:  80%|▊| 790/991 [3:22:15<53:16, 15.90s/batch, batch_loss=12.3, batch

Epoch 8/10:  80%|▊| 791/991 [3:22:15<53:50, 16.15s/batch, batch_loss=12.3, batch

Epoch 8/10:  80%|▊| 791/991 [3:22:31<53:50, 16.15s/batch, batch_loss=1.04e+4, ba

Epoch 8/10:  80%|▊| 792/991 [3:22:31<53:30, 16.13s/batch, batch_loss=1.04e+4, ba

Epoch 8/10:  80%|▊| 792/991 [3:22:47<53:30, 16.13s/batch, batch_loss=8.29, batch

Epoch 8/10:  80%|▊| 793/991 [3:22:48<53:15, 16.14s/batch, batch_loss=8.29, batch

Epoch 8/10:  80%|▊| 793/991 [3:23:02<53:15, 16.14s/batch, batch_loss=1.86, batch

Epoch 8/10:  80%|▊| 794/991 [3:23:02<51:37, 15.73s/batch, batch_loss=1.86, batch

Epoch 8/10:  80%|▊| 794/991 [3:23:18<51:37, 15.73s/batch, batch_loss=7.31, batch

Epoch 8/10:  80%|▊| 795/991 [3:23:18<51:12, 15.68s/batch, batch_loss=7.31, batch

Epoch 8/10:  80%|▊| 795/991 [3:23:34<51:12, 15.68s/batch, batch_loss=11.8, batch

Epoch 8/10:  80%|▊| 796/991 [3:23:34<51:09, 15.74s/batch, batch_loss=11.8, batch

Epoch 8/10:  80%|▊| 796/991 [3:23:49<51:09, 15.74s/batch, batch_loss=21.5, batch

Epoch 8/10:  80%|▊| 797/991 [3:23:49<50:50, 15.72s/batch, batch_loss=21.5, batch

Epoch 8/10:  80%|▊| 797/991 [3:24:05<50:50, 15.72s/batch, batch_loss=339, batch_

Epoch 8/10:  81%|▊| 798/991 [3:24:05<50:23, 15.67s/batch, batch_loss=339, batch_

Epoch 8/10:  81%|▊| 798/991 [3:24:20<50:23, 15.67s/batch, batch_loss=9.71, batch

Epoch 8/10:  81%|▊| 799/991 [3:24:20<49:59, 15.62s/batch, batch_loss=9.71, batch

Epoch 8/10:  81%|▊| 799/991 [3:24:36<49:59, 15.62s/batch, batch_loss=15.6, batch

Epoch 8/10:  81%|▊| 800/991 [3:24:36<49:53, 15.67s/batch, batch_loss=15.6, batch

Epoch 8/10:  81%|▊| 800/991 [3:24:53<49:53, 15.67s/batch, batch_loss=12.2, batch

Epoch 8/10:  81%|▊| 801/991 [3:24:53<50:33, 15.97s/batch, batch_loss=12.2, batch

Epoch 8/10:  81%|▊| 801/991 [3:25:09<50:33, 15.97s/batch, batch_loss=16.3, batch

Epoch 8/10:  81%|▊| 802/991 [3:25:09<50:43, 16.10s/batch, batch_loss=16.3, batch

Epoch 8/10:  81%|▊| 802/991 [3:25:26<50:43, 16.10s/batch, batch_loss=6.18, batch

Epoch 8/10:  81%|▊| 803/991 [3:25:26<50:57, 16.26s/batch, batch_loss=6.18, batch

Epoch 8/10:  81%|▊| 803/991 [3:25:42<50:57, 16.26s/batch, batch_loss=13.9, batch

Epoch 8/10:  81%|▊| 804/991 [3:25:42<50:09, 16.09s/batch, batch_loss=13.9, batch

Epoch 8/10:  81%|▊| 804/991 [3:25:57<50:09, 16.09s/batch, batch_loss=5.97, batch

Epoch 8/10:  81%|▊| 805/991 [3:25:57<49:06, 15.84s/batch, batch_loss=5.97, batch

Epoch 8/10:  81%|▊| 805/991 [3:26:12<49:06, 15.84s/batch, batch_loss=10, batch_i

Epoch 8/10:  81%|▊| 806/991 [3:26:12<48:34, 15.76s/batch, batch_loss=10, batch_i

Epoch 8/10:  81%|▊| 806/991 [3:26:28<48:34, 15.76s/batch, batch_loss=9.59, batch

Epoch 8/10:  81%|▊| 807/991 [3:26:28<48:14, 15.73s/batch, batch_loss=9.59, batch

Epoch 8/10:  81%|▊| 807/991 [3:26:44<48:14, 15.73s/batch, batch_loss=15.4, batch

Epoch 8/10:  82%|▊| 808/991 [3:26:44<47:39, 15.63s/batch, batch_loss=15.4, batch

Epoch 8/10:  82%|▊| 808/991 [3:26:59<47:39, 15.63s/batch, batch_loss=1.21e+4, ba

Epoch 8/10:  82%|▊| 809/991 [3:26:59<47:07, 15.53s/batch, batch_loss=1.21e+4, ba

Epoch 8/10:  82%|▊| 809/991 [3:27:14<47:07, 15.53s/batch, batch_loss=14.4, batch

Epoch 8/10:  82%|▊| 810/991 [3:27:14<46:51, 15.53s/batch, batch_loss=14.4, batch

Epoch 8/10:  82%|▊| 810/991 [3:27:31<46:51, 15.53s/batch, batch_loss=7.51, batch

Epoch 8/10:  82%|▊| 811/991 [3:27:31<47:25, 15.81s/batch, batch_loss=7.51, batch

Epoch 8/10:  82%|▊| 811/991 [3:27:47<47:25, 15.81s/batch, batch_loss=7.15, batch

Epoch 8/10:  82%|▊| 812/991 [3:27:47<47:14, 15.83s/batch, batch_loss=7.15, batch

Epoch 8/10:  82%|▊| 812/991 [3:28:03<47:14, 15.83s/batch, batch_loss=8.38, batch

Epoch 8/10:  82%|▊| 813/991 [3:28:03<47:08, 15.89s/batch, batch_loss=8.38, batch

Epoch 8/10:  82%|▊| 813/991 [3:28:18<47:08, 15.89s/batch, batch_loss=11.7, batch

Epoch 8/10:  82%|▊| 814/991 [3:28:18<46:08, 15.64s/batch, batch_loss=11.7, batch

Epoch 8/10:  82%|▊| 814/991 [3:28:33<46:08, 15.64s/batch, batch_loss=7.6, batch_

Epoch 8/10:  82%|▊| 815/991 [3:28:33<45:05, 15.37s/batch, batch_loss=7.6, batch_

Epoch 8/10:  82%|▊| 815/991 [3:28:48<45:05, 15.37s/batch, batch_loss=91, batch_i

Epoch 8/10:  82%|▊| 816/991 [3:28:48<45:05, 15.46s/batch, batch_loss=91, batch_i

Epoch 8/10:  82%|▊| 816/991 [3:29:03<45:05, 15.46s/batch, batch_loss=360, batch_

Epoch 8/10:  82%|▊| 817/991 [3:29:03<44:29, 15.34s/batch, batch_loss=360, batch_

Epoch 8/10:  82%|▊| 817/991 [3:29:19<44:29, 15.34s/batch, batch_loss=366, batch_

Epoch 8/10:  83%|▊| 818/991 [3:29:19<44:10, 15.32s/batch, batch_loss=366, batch_

Epoch 8/10:  83%|▊| 818/991 [3:29:34<44:10, 15.32s/batch, batch_loss=12.6, batch

Epoch 8/10:  83%|▊| 819/991 [3:29:34<43:57, 15.33s/batch, batch_loss=12.6, batch

Epoch 8/10:  83%|▊| 819/991 [3:29:49<43:57, 15.33s/batch, batch_loss=7.38, batch

Epoch 8/10:  83%|▊| 820/991 [3:29:49<43:37, 15.30s/batch, batch_loss=7.38, batch

Epoch 8/10:  83%|▊| 820/991 [3:30:04<43:37, 15.30s/batch, batch_loss=7.46, batch

Epoch 8/10:  83%|▊| 821/991 [3:30:04<42:38, 15.05s/batch, batch_loss=7.46, batch

Epoch 8/10:  83%|▊| 821/991 [3:30:19<42:38, 15.05s/batch, batch_loss=8.56, batch

Epoch 8/10:  83%|▊| 822/991 [3:30:19<42:49, 15.21s/batch, batch_loss=8.56, batch

Epoch 8/10:  83%|▊| 822/991 [3:30:36<42:49, 15.21s/batch, batch_loss=154, batch_

Epoch 8/10:  83%|▊| 823/991 [3:30:36<44:13, 15.80s/batch, batch_loss=154, batch_

Epoch 8/10:  83%|▊| 823/991 [3:30:55<44:13, 15.80s/batch, batch_loss=8.31, batch

Epoch 8/10:  83%|▊| 824/991 [3:30:55<46:12, 16.60s/batch, batch_loss=8.31, batch

Epoch 8/10:  83%|▊| 824/991 [3:31:13<46:12, 16.60s/batch, batch_loss=14, batch_i

Epoch 8/10:  83%|▊| 825/991 [3:31:13<47:39, 17.23s/batch, batch_loss=14, batch_i

Epoch 8/10:  83%|▊| 825/991 [3:31:32<47:39, 17.23s/batch, batch_loss=2.61e+3, ba

Epoch 8/10:  83%|▊| 826/991 [3:31:32<48:49, 17.76s/batch, batch_loss=2.61e+3, ba

Epoch 8/10:  83%|▊| 826/991 [3:31:51<48:49, 17.76s/batch, batch_loss=22.3, batch

Epoch 8/10:  83%|▊| 827/991 [3:31:51<49:18, 18.04s/batch, batch_loss=22.3, batch

Epoch 8/10:  83%|▊| 827/991 [3:32:08<49:18, 18.04s/batch, batch_loss=17.6, batch

Epoch 8/10:  84%|▊| 828/991 [3:32:08<48:15, 17.76s/batch, batch_loss=17.6, batch

Epoch 8/10:  84%|▊| 828/991 [3:32:27<48:15, 17.76s/batch, batch_loss=7.93, batch

Epoch 8/10:  84%|▊| 829/991 [3:32:27<48:42, 18.04s/batch, batch_loss=7.93, batch

Epoch 8/10:  84%|▊| 829/991 [3:32:45<48:42, 18.04s/batch, batch_loss=12.4, batch

Epoch 8/10:  84%|▊| 830/991 [3:32:45<48:30, 18.08s/batch, batch_loss=12.4, batch

Epoch 8/10:  84%|▊| 830/991 [3:33:03<48:30, 18.08s/batch, batch_loss=10.5, batch

Epoch 8/10:  84%|▊| 831/991 [3:33:03<47:47, 17.92s/batch, batch_loss=10.5, batch

Epoch 8/10:  84%|▊| 831/991 [3:33:23<47:47, 17.92s/batch, batch_loss=14.6, batch

Epoch 8/10:  84%|▊| 832/991 [3:33:23<49:19, 18.61s/batch, batch_loss=14.6, batch

Epoch 8/10:  84%|▊| 832/991 [3:33:44<49:19, 18.61s/batch, batch_loss=217, batch_

Epoch 8/10:  84%|▊| 833/991 [3:33:44<50:51, 19.31s/batch, batch_loss=217, batch_

Epoch 8/10:  84%|▊| 833/991 [3:34:02<50:51, 19.31s/batch, batch_loss=15.6, batch

Epoch 8/10:  84%|▊| 834/991 [3:34:02<49:35, 18.95s/batch, batch_loss=15.6, batch

Epoch 8/10:  84%|▊| 834/991 [3:34:22<49:35, 18.95s/batch, batch_loss=12.9, batch

Epoch 8/10:  84%|▊| 835/991 [3:34:22<49:44, 19.13s/batch, batch_loss=12.9, batch

Epoch 8/10:  84%|▊| 835/991 [3:34:41<49:44, 19.13s/batch, batch_loss=3.27e+3, ba

Epoch 8/10:  84%|▊| 836/991 [3:34:41<49:17, 19.08s/batch, batch_loss=3.27e+3, ba

Epoch 8/10:  84%|▊| 836/991 [3:35:01<49:17, 19.08s/batch, batch_loss=4.9e+3, bat

Epoch 8/10:  84%|▊| 837/991 [3:35:01<50:04, 19.51s/batch, batch_loss=4.9e+3, bat

Epoch 8/10:  84%|▊| 837/991 [3:35:23<50:04, 19.51s/batch, batch_loss=15.4, batch

Epoch 8/10:  85%|▊| 838/991 [3:35:23<51:22, 20.14s/batch, batch_loss=15.4, batch

Epoch 8/10:  85%|▊| 838/991 [3:35:43<51:22, 20.14s/batch, batch_loss=5.32, batch

Epoch 8/10:  85%|▊| 839/991 [3:35:43<51:06, 20.18s/batch, batch_loss=5.32, batch

Epoch 8/10:  85%|▊| 839/991 [3:36:01<51:06, 20.18s/batch, batch_loss=4.7, batch_

Epoch 8/10:  85%|▊| 840/991 [3:36:01<49:10, 19.54s/batch, batch_loss=4.7, batch_

Epoch 8/10:  85%|▊| 840/991 [3:36:19<49:10, 19.54s/batch, batch_loss=14.4, batch

Epoch 8/10:  85%|▊| 841/991 [3:36:19<47:44, 19.10s/batch, batch_loss=14.4, batch

Epoch 8/10:  85%|▊| 841/991 [3:36:38<47:44, 19.10s/batch, batch_loss=16.1, batch

Epoch 8/10:  85%|▊| 842/991 [3:36:38<47:09, 18.99s/batch, batch_loss=16.1, batch

Epoch 8/10:  85%|▊| 842/991 [3:36:54<47:09, 18.99s/batch, batch_loss=8.83, batch

Epoch 8/10:  85%|▊| 843/991 [3:36:54<45:09, 18.30s/batch, batch_loss=8.83, batch

Epoch 8/10:  85%|▊| 843/991 [3:37:12<45:09, 18.30s/batch, batch_loss=1.69e+3, ba

Epoch 8/10:  85%|▊| 844/991 [3:37:12<43:58, 17.95s/batch, batch_loss=1.69e+3, ba

Epoch 8/10:  85%|▊| 844/991 [3:37:32<43:58, 17.95s/batch, batch_loss=14.1, batch

Epoch 8/10:  85%|▊| 845/991 [3:37:32<45:08, 18.55s/batch, batch_loss=14.1, batch

Epoch 8/10:  85%|▊| 845/991 [3:37:49<45:08, 18.55s/batch, batch_loss=1.18e+4, ba

Epoch 8/10:  85%|▊| 846/991 [3:37:49<44:02, 18.22s/batch, batch_loss=1.18e+4, ba

Epoch 8/10:  85%|▊| 846/991 [3:38:06<44:02, 18.22s/batch, batch_loss=18.1, batch

Epoch 8/10:  85%|▊| 847/991 [3:38:06<43:04, 17.95s/batch, batch_loss=18.1, batch

Epoch 8/10:  85%|▊| 847/991 [3:38:25<43:04, 17.95s/batch, batch_loss=29.2, batch

Epoch 8/10:  86%|▊| 848/991 [3:38:25<43:36, 18.30s/batch, batch_loss=29.2, batch

Epoch 8/10:  86%|▊| 848/991 [3:38:44<43:36, 18.30s/batch, batch_loss=1.01e+3, ba

Epoch 8/10:  86%|▊| 849/991 [3:38:44<43:20, 18.31s/batch, batch_loss=1.01e+3, ba

Epoch 8/10:  86%|▊| 849/991 [3:39:01<43:20, 18.31s/batch, batch_loss=7.48, batch

Epoch 8/10:  86%|▊| 850/991 [3:39:01<41:58, 17.86s/batch, batch_loss=7.48, batch

Epoch 8/10:  86%|▊| 850/991 [3:39:18<41:58, 17.86s/batch, batch_loss=13.7, batch

Epoch 8/10:  86%|▊| 851/991 [3:39:18<41:43, 17.88s/batch, batch_loss=13.7, batch

Epoch 8/10:  86%|▊| 851/991 [3:39:37<41:43, 17.88s/batch, batch_loss=14.5, batch

Epoch 8/10:  86%|▊| 852/991 [3:39:37<41:30, 17.92s/batch, batch_loss=14.5, batch

Epoch 8/10:  86%|▊| 852/991 [3:39:54<41:30, 17.92s/batch, batch_loss=7.65e+3, ba

Epoch 8/10:  86%|▊| 853/991 [3:39:54<40:49, 17.75s/batch, batch_loss=7.65e+3, ba

Epoch 8/10:  86%|▊| 853/991 [3:40:11<40:49, 17.75s/batch, batch_loss=17.5, batch

Epoch 8/10:  86%|▊| 854/991 [3:40:11<40:12, 17.61s/batch, batch_loss=17.5, batch

Epoch 8/10:  86%|▊| 854/991 [3:40:28<40:12, 17.61s/batch, batch_loss=7.76, batch

Epoch 8/10:  86%|▊| 855/991 [3:40:28<39:19, 17.35s/batch, batch_loss=7.76, batch

Epoch 8/10:  86%|▊| 855/991 [3:40:42<39:19, 17.35s/batch, batch_loss=7.98, batch

Epoch 8/10:  86%|▊| 856/991 [3:40:42<36:42, 16.32s/batch, batch_loss=7.98, batch

Epoch 8/10:  86%|▊| 856/991 [3:40:56<36:42, 16.32s/batch, batch_loss=9.13, batch

Epoch 8/10:  86%|▊| 857/991 [3:40:56<35:02, 15.69s/batch, batch_loss=9.13, batch

Epoch 8/10:  86%|▊| 857/991 [3:41:11<35:02, 15.69s/batch, batch_loss=19.6, batch

Epoch 8/10:  87%|▊| 858/991 [3:41:11<34:29, 15.56s/batch, batch_loss=19.6, batch

Epoch 8/10:  87%|▊| 858/991 [3:41:28<34:29, 15.56s/batch, batch_loss=14.4, batch

Epoch 8/10:  87%|▊| 859/991 [3:41:28<34:59, 15.90s/batch, batch_loss=14.4, batch

Epoch 8/10:  87%|▊| 859/991 [3:41:44<34:59, 15.90s/batch, batch_loss=17.8, batch

Epoch 8/10:  87%|▊| 860/991 [3:41:44<35:06, 16.08s/batch, batch_loss=17.8, batch

Epoch 8/10:  87%|▊| 860/991 [3:42:00<35:06, 16.08s/batch, batch_loss=7.17, batch

Epoch 8/10:  87%|▊| 861/991 [3:42:00<34:42, 16.02s/batch, batch_loss=7.17, batch

Epoch 8/10:  87%|▊| 861/991 [3:42:17<34:42, 16.02s/batch, batch_loss=17.7, batch

Epoch 8/10:  87%|▊| 862/991 [3:42:17<34:37, 16.10s/batch, batch_loss=17.7, batch

Epoch 8/10:  87%|▊| 862/991 [3:42:33<34:37, 16.10s/batch, batch_loss=26.7, batch

Epoch 8/10:  87%|▊| 863/991 [3:42:33<34:37, 16.23s/batch, batch_loss=26.7, batch

Epoch 8/10:  87%|▊| 863/991 [3:42:51<34:37, 16.23s/batch, batch_loss=9.05, batch

Epoch 8/10:  87%|▊| 864/991 [3:42:51<35:06, 16.59s/batch, batch_loss=9.05, batch

Epoch 8/10:  87%|▊| 864/991 [3:43:07<35:06, 16.59s/batch, batch_loss=16.6, batch

Epoch 8/10:  87%|▊| 865/991 [3:43:07<34:52, 16.60s/batch, batch_loss=16.6, batch

Epoch 8/10:  87%|▊| 865/991 [3:43:23<34:52, 16.60s/batch, batch_loss=20.7, batch

Epoch 8/10:  87%|▊| 866/991 [3:43:23<34:05, 16.36s/batch, batch_loss=20.7, batch

Epoch 8/10:  87%|▊| 866/991 [3:43:40<34:05, 16.36s/batch, batch_loss=20.1, batch

Epoch 8/10:  87%|▊| 867/991 [3:43:40<33:54, 16.41s/batch, batch_loss=20.1, batch

Epoch 8/10:  87%|▊| 867/991 [3:43:56<33:54, 16.41s/batch, batch_loss=20.6, batch

Epoch 8/10:  88%|▉| 868/991 [3:43:56<33:24, 16.30s/batch, batch_loss=20.6, batch

Epoch 8/10:  88%|▉| 868/991 [3:44:12<33:24, 16.30s/batch, batch_loss=10.8, batch

Epoch 8/10:  88%|▉| 869/991 [3:44:12<33:07, 16.29s/batch, batch_loss=10.8, batch

Epoch 8/10:  88%|▉| 869/991 [3:44:29<33:07, 16.29s/batch, batch_loss=12.6, batch

Epoch 8/10:  88%|▉| 870/991 [3:44:29<33:35, 16.65s/batch, batch_loss=12.6, batch

Epoch 8/10:  88%|▉| 870/991 [3:44:45<33:35, 16.65s/batch, batch_loss=8.01, batch

Epoch 8/10:  88%|▉| 871/991 [3:44:45<32:40, 16.33s/batch, batch_loss=8.01, batch

Epoch 8/10:  88%|▉| 871/991 [3:45:02<32:40, 16.33s/batch, batch_loss=19.5, batch

Epoch 8/10:  88%|▉| 872/991 [3:45:02<32:35, 16.43s/batch, batch_loss=19.5, batch

Epoch 8/10:  88%|▉| 872/991 [3:45:18<32:35, 16.43s/batch, batch_loss=12.4, batch

Epoch 8/10:  88%|▉| 873/991 [3:45:18<32:07, 16.33s/batch, batch_loss=12.4, batch

Epoch 8/10:  88%|▉| 873/991 [3:45:35<32:07, 16.33s/batch, batch_loss=7.08, batch

Epoch 8/10:  88%|▉| 874/991 [3:45:35<32:26, 16.63s/batch, batch_loss=7.08, batch

Epoch 8/10:  88%|▉| 874/991 [3:45:52<32:26, 16.63s/batch, batch_loss=13.7, batch

Epoch 8/10:  88%|▉| 875/991 [3:45:52<32:21, 16.74s/batch, batch_loss=13.7, batch

Epoch 8/10:  88%|▉| 875/991 [3:46:07<32:21, 16.74s/batch, batch_loss=19.6, batch

Epoch 8/10:  88%|▉| 876/991 [3:46:07<31:19, 16.35s/batch, batch_loss=19.6, batch

Epoch 8/10:  88%|▉| 876/991 [3:46:23<31:19, 16.35s/batch, batch_loss=18, batch_i

Epoch 8/10:  88%|▉| 877/991 [3:46:23<30:24, 16.00s/batch, batch_loss=18, batch_i

Epoch 8/10:  88%|▉| 877/991 [3:46:39<30:24, 16.00s/batch, batch_loss=23.5, batch

Epoch 8/10:  89%|▉| 878/991 [3:46:39<30:02, 15.95s/batch, batch_loss=23.5, batch

Epoch 8/10:  89%|▉| 878/991 [3:46:55<30:02, 15.95s/batch, batch_loss=17.5, batch

Epoch 8/10:  89%|▉| 879/991 [3:46:55<30:01, 16.09s/batch, batch_loss=17.5, batch

Epoch 8/10:  89%|▉| 879/991 [3:47:11<30:01, 16.09s/batch, batch_loss=12.7, batch

Epoch 8/10:  89%|▉| 880/991 [3:47:11<29:34, 15.99s/batch, batch_loss=12.7, batch

Epoch 8/10:  89%|▉| 880/991 [3:47:27<29:34, 15.99s/batch, batch_loss=5.12e+3, ba

Epoch 8/10:  89%|▉| 881/991 [3:47:27<29:41, 16.19s/batch, batch_loss=5.12e+3, ba

Epoch 8/10:  89%|▉| 881/991 [3:47:43<29:41, 16.19s/batch, batch_loss=15.2, batch

Epoch 8/10:  89%|▉| 882/991 [3:47:43<29:19, 16.14s/batch, batch_loss=15.2, batch

Epoch 8/10:  89%|▉| 882/991 [3:48:00<29:19, 16.14s/batch, batch_loss=16.8, batch

Epoch 8/10:  89%|▉| 883/991 [3:48:00<29:16, 16.26s/batch, batch_loss=16.8, batch

Epoch 8/10:  89%|▉| 883/991 [3:48:17<29:16, 16.26s/batch, batch_loss=9.01, batch

Epoch 8/10:  89%|▉| 884/991 [3:48:17<29:32, 16.56s/batch, batch_loss=9.01, batch

Epoch 8/10:  89%|▉| 884/991 [3:48:34<29:32, 16.56s/batch, batch_loss=12.6, batch

Epoch 8/10:  89%|▉| 885/991 [3:48:34<29:32, 16.72s/batch, batch_loss=12.6, batch

Epoch 8/10:  89%|▉| 885/991 [3:48:51<29:32, 16.72s/batch, batch_loss=16.1, batch

Epoch 8/10:  89%|▉| 886/991 [3:48:51<29:14, 16.71s/batch, batch_loss=16.1, batch

Epoch 8/10:  89%|▉| 886/991 [3:49:08<29:14, 16.71s/batch, batch_loss=1.93e+4, ba

Epoch 8/10:  90%|▉| 887/991 [3:49:08<28:55, 16.69s/batch, batch_loss=1.93e+4, ba

Epoch 8/10:  90%|▉| 887/991 [3:49:24<28:55, 16.69s/batch, batch_loss=17.2, batch

Epoch 8/10:  90%|▉| 888/991 [3:49:24<28:17, 16.48s/batch, batch_loss=17.2, batch

Epoch 8/10:  90%|▉| 888/991 [3:49:40<28:17, 16.48s/batch, batch_loss=18.6, batch

Epoch 8/10:  90%|▉| 889/991 [3:49:40<28:03, 16.51s/batch, batch_loss=18.6, batch

Epoch 8/10:  90%|▉| 889/991 [3:49:55<28:03, 16.51s/batch, batch_loss=12.2, batch

Epoch 8/10:  90%|▉| 890/991 [3:49:55<26:59, 16.04s/batch, batch_loss=12.2, batch

Epoch 8/10:  90%|▉| 890/991 [3:50:10<26:59, 16.04s/batch, batch_loss=14.3, batch

Epoch 8/10:  90%|▉| 891/991 [3:50:10<26:23, 15.84s/batch, batch_loss=14.3, batch

Epoch 8/10:  90%|▉| 891/991 [3:50:27<26:23, 15.84s/batch, batch_loss=15.5, batch

Epoch 8/10:  90%|▉| 892/991 [3:50:27<26:37, 16.14s/batch, batch_loss=15.5, batch

Epoch 8/10:  90%|▉| 892/991 [3:50:45<26:37, 16.14s/batch, batch_loss=3.71e+3, ba

Epoch 8/10:  90%|▉| 893/991 [3:50:45<26:57, 16.51s/batch, batch_loss=3.71e+3, ba

Epoch 8/10:  90%|▉| 893/991 [3:51:01<26:57, 16.51s/batch, batch_loss=8.38, batch

Epoch 8/10:  90%|▉| 894/991 [3:51:01<26:39, 16.49s/batch, batch_loss=8.38, batch

Epoch 8/10:  90%|▉| 894/991 [3:51:17<26:39, 16.49s/batch, batch_loss=13.7, batch

Epoch 8/10:  90%|▉| 895/991 [3:51:17<26:11, 16.37s/batch, batch_loss=13.7, batch

Epoch 8/10:  90%|▉| 895/991 [3:51:33<26:11, 16.37s/batch, batch_loss=11, batch_i

Epoch 8/10:  90%|▉| 896/991 [3:51:33<25:51, 16.33s/batch, batch_loss=11, batch_i

Epoch 8/10:  90%|▉| 896/991 [3:51:49<25:51, 16.33s/batch, batch_loss=16.6, batch

Epoch 8/10:  91%|▉| 897/991 [3:51:49<25:07, 16.03s/batch, batch_loss=16.6, batch

Epoch 8/10:  91%|▉| 897/991 [3:52:04<25:07, 16.03s/batch, batch_loss=17.8, batch

Epoch 8/10:  91%|▉| 898/991 [3:52:04<24:24, 15.75s/batch, batch_loss=17.8, batch

Epoch 8/10:  91%|▉| 898/991 [3:52:19<24:24, 15.75s/batch, batch_loss=15.9, batch

Epoch 8/10:  91%|▉| 899/991 [3:52:19<23:57, 15.62s/batch, batch_loss=15.9, batch

Epoch 8/10:  91%|▉| 899/991 [3:52:35<23:57, 15.62s/batch, batch_loss=16.7, batch

Epoch 8/10:  91%|▉| 900/991 [3:52:35<23:50, 15.72s/batch, batch_loss=16.7, batch

Epoch 8/10:  91%|▉| 900/991 [3:52:50<23:50, 15.72s/batch, batch_loss=16.4, batch

Epoch 8/10:  91%|▉| 901/991 [3:52:50<23:10, 15.45s/batch, batch_loss=16.4, batch

Epoch 8/10:  91%|▉| 901/991 [3:53:06<23:10, 15.45s/batch, batch_loss=11.5, batch

Epoch 8/10:  91%|▉| 902/991 [3:53:06<23:14, 15.67s/batch, batch_loss=11.5, batch

Epoch 8/10:  91%|▉| 902/991 [3:53:23<23:14, 15.67s/batch, batch_loss=6.98, batch

Epoch 8/10:  91%|▉| 903/991 [3:53:23<23:26, 15.99s/batch, batch_loss=6.98, batch

Epoch 8/10:  91%|▉| 903/991 [3:53:39<23:26, 15.99s/batch, batch_loss=9.11, batch

Epoch 8/10:  91%|▉| 904/991 [3:53:39<23:20, 16.10s/batch, batch_loss=9.11, batch

Epoch 8/10:  91%|▉| 904/991 [3:53:55<23:20, 16.10s/batch, batch_loss=24.2, batch

Epoch 8/10:  91%|▉| 905/991 [3:53:55<22:59, 16.04s/batch, batch_loss=24.2, batch

Epoch 8/10:  91%|▉| 905/991 [3:54:11<22:59, 16.04s/batch, batch_loss=18.4, batch

Epoch 8/10:  91%|▉| 906/991 [3:54:11<22:41, 16.02s/batch, batch_loss=18.4, batch

Epoch 8/10:  91%|▉| 906/991 [3:54:27<22:41, 16.02s/batch, batch_loss=18.5, batch

Epoch 8/10:  92%|▉| 907/991 [3:54:27<22:21, 15.97s/batch, batch_loss=18.5, batch

Epoch 8/10:  92%|▉| 907/991 [3:54:45<22:21, 15.97s/batch, batch_loss=15.2, batch

Epoch 8/10:  92%|▉| 908/991 [3:54:45<22:53, 16.54s/batch, batch_loss=15.2, batch

Epoch 8/10:  92%|▉| 908/991 [3:55:01<22:53, 16.54s/batch, batch_loss=5.78, batch

Epoch 8/10:  92%|▉| 909/991 [3:55:01<22:29, 16.45s/batch, batch_loss=5.78, batch

Epoch 8/10:  92%|▉| 909/991 [3:55:17<22:29, 16.45s/batch, batch_loss=684, batch_

Epoch 8/10:  92%|▉| 910/991 [3:55:17<21:53, 16.22s/batch, batch_loss=684, batch_

Epoch 8/10:  92%|▉| 910/991 [3:55:33<21:53, 16.22s/batch, batch_loss=1.02e+3, ba

Epoch 8/10:  92%|▉| 911/991 [3:55:33<21:45, 16.32s/batch, batch_loss=1.02e+3, ba

Epoch 8/10:  92%|▉| 911/991 [3:55:51<21:45, 16.32s/batch, batch_loss=24.2, batch

Epoch 8/10:  92%|▉| 912/991 [3:55:51<22:12, 16.86s/batch, batch_loss=24.2, batch

Epoch 8/10:  92%|▉| 912/991 [3:56:10<22:12, 16.86s/batch, batch_loss=22.7, batch

Epoch 8/10:  92%|▉| 913/991 [3:56:10<22:25, 17.25s/batch, batch_loss=22.7, batch

Epoch 8/10:  92%|▉| 913/991 [3:56:27<22:25, 17.25s/batch, batch_loss=19.7, batch

Epoch 8/10:  92%|▉| 914/991 [3:56:27<22:00, 17.15s/batch, batch_loss=19.7, batch

Epoch 8/10:  92%|▉| 914/991 [3:56:44<22:00, 17.15s/batch, batch_loss=17.5, batch

Epoch 8/10:  92%|▉| 915/991 [3:56:44<21:44, 17.16s/batch, batch_loss=17.5, batch

Epoch 8/10:  92%|▉| 915/991 [3:57:01<21:44, 17.16s/batch, batch_loss=14.5, batch

Epoch 8/10:  92%|▉| 916/991 [3:57:01<21:35, 17.27s/batch, batch_loss=14.5, batch

Epoch 8/10:  92%|▉| 916/991 [3:57:17<21:35, 17.27s/batch, batch_loss=7.8, batch_

Epoch 8/10:  93%|▉| 917/991 [3:57:17<20:45, 16.84s/batch, batch_loss=7.8, batch_

Epoch 8/10:  93%|▉| 917/991 [3:57:34<20:45, 16.84s/batch, batch_loss=11.4, batch

Epoch 8/10:  93%|▉| 918/991 [3:57:34<20:25, 16.78s/batch, batch_loss=11.4, batch

Epoch 8/10:  93%|▉| 918/991 [3:57:50<20:25, 16.78s/batch, batch_loss=11.4, batch

Epoch 8/10:  93%|▉| 919/991 [3:57:50<19:59, 16.65s/batch, batch_loss=11.4, batch

Epoch 8/10:  93%|▉| 919/991 [3:58:05<19:59, 16.65s/batch, batch_loss=13.3, batch

Epoch 8/10:  93%|▉| 920/991 [3:58:05<19:11, 16.22s/batch, batch_loss=13.3, batch

Epoch 8/10:  93%|▉| 920/991 [3:58:21<19:11, 16.22s/batch, batch_loss=16.7, batch

Epoch 8/10:  93%|▉| 921/991 [3:58:21<18:43, 16.05s/batch, batch_loss=16.7, batch

Epoch 8/10:  93%|▉| 921/991 [3:58:36<18:43, 16.05s/batch, batch_loss=20.4, batch

Epoch 8/10:  93%|▉| 922/991 [3:58:36<18:02, 15.69s/batch, batch_loss=20.4, batch

Epoch 8/10:  93%|▉| 922/991 [3:58:51<18:02, 15.69s/batch, batch_loss=7.51, batch

Epoch 8/10:  93%|▉| 923/991 [3:58:51<17:39, 15.58s/batch, batch_loss=7.51, batch

Epoch 8/10:  93%|▉| 923/991 [3:59:08<17:39, 15.58s/batch, batch_loss=11, batch_i

Epoch 8/10:  93%|▉| 924/991 [3:59:08<17:40, 15.83s/batch, batch_loss=11, batch_i

Epoch 8/10:  93%|▉| 924/991 [3:59:24<17:40, 15.83s/batch, batch_loss=10.8, batch

Epoch 8/10:  93%|▉| 925/991 [3:59:24<17:43, 16.11s/batch, batch_loss=10.8, batch

Epoch 8/10:  93%|▉| 925/991 [3:59:41<17:43, 16.11s/batch, batch_loss=3e+4, batch

Epoch 8/10:  93%|▉| 926/991 [3:59:41<17:32, 16.20s/batch, batch_loss=3e+4, batch

Epoch 8/10:  93%|▉| 926/991 [3:59:58<17:32, 16.20s/batch, batch_loss=6.33, batch

Epoch 8/10:  94%|▉| 927/991 [3:59:58<17:28, 16.39s/batch, batch_loss=6.33, batch

Epoch 8/10:  94%|▉| 927/991 [4:00:14<17:28, 16.39s/batch, batch_loss=853, batch_

Epoch 8/10:  94%|▉| 928/991 [4:00:14<17:13, 16.41s/batch, batch_loss=853, batch_

Epoch 8/10:  94%|▉| 928/991 [4:00:30<17:13, 16.41s/batch, batch_loss=9.85, batch

Epoch 8/10:  94%|▉| 929/991 [4:00:30<16:51, 16.32s/batch, batch_loss=9.85, batch

Epoch 8/10:  94%|▉| 929/991 [4:00:46<16:51, 16.32s/batch, batch_loss=7.93, batch

Epoch 8/10:  94%|▉| 930/991 [4:00:46<16:32, 16.27s/batch, batch_loss=7.93, batch

Epoch 8/10:  94%|▉| 930/991 [4:01:02<16:32, 16.27s/batch, batch_loss=11.2, batch

Epoch 8/10:  94%|▉| 931/991 [4:01:02<16:13, 16.23s/batch, batch_loss=11.2, batch

Epoch 8/10:  94%|▉| 931/991 [4:01:19<16:13, 16.23s/batch, batch_loss=10, batch_i

Epoch 8/10:  94%|▉| 932/991 [4:01:19<16:03, 16.34s/batch, batch_loss=10, batch_i

Epoch 8/10:  94%|▉| 932/991 [4:01:36<16:03, 16.34s/batch, batch_loss=11.3, batch

Epoch 8/10:  94%|▉| 933/991 [4:01:36<15:57, 16.51s/batch, batch_loss=11.3, batch

Epoch 8/10:  94%|▉| 933/991 [4:01:53<15:57, 16.51s/batch, batch_loss=2.01, batch

Epoch 8/10:  94%|▉| 934/991 [4:01:53<15:54, 16.74s/batch, batch_loss=2.01, batch

Epoch 8/10:  94%|▉| 934/991 [4:02:10<15:54, 16.74s/batch, batch_loss=1.81, batch

Epoch 8/10:  94%|▉| 935/991 [4:02:10<15:42, 16.82s/batch, batch_loss=1.81, batch

Epoch 8/10:  94%|▉| 935/991 [4:02:27<15:42, 16.82s/batch, batch_loss=164, batch_

Epoch 8/10:  94%|▉| 936/991 [4:02:27<15:20, 16.74s/batch, batch_loss=164, batch_

Epoch 8/10:  94%|▉| 936/991 [4:02:43<15:20, 16.74s/batch, batch_loss=36.3, batch

Epoch 8/10:  95%|▉| 937/991 [4:02:43<15:00, 16.68s/batch, batch_loss=36.3, batch

Epoch 8/10:  95%|▉| 937/991 [4:02:59<15:00, 16.68s/batch, batch_loss=9.3, batch_

Epoch 8/10:  95%|▉| 938/991 [4:02:59<14:26, 16.36s/batch, batch_loss=9.3, batch_

Epoch 8/10:  95%|▉| 938/991 [4:03:15<14:26, 16.36s/batch, batch_loss=8.86, batch

Epoch 8/10:  95%|▉| 939/991 [4:03:15<14:09, 16.33s/batch, batch_loss=8.86, batch

Epoch 8/10:  95%|▉| 939/991 [4:03:32<14:09, 16.33s/batch, batch_loss=418, batch_

Epoch 8/10:  95%|▉| 940/991 [4:03:32<14:02, 16.52s/batch, batch_loss=418, batch_

Epoch 8/10:  95%|▉| 940/991 [4:03:49<14:02, 16.52s/batch, batch_loss=16.5, batch

Epoch 8/10:  95%|▉| 941/991 [4:03:49<13:54, 16.70s/batch, batch_loss=16.5, batch

Epoch 8/10:  95%|▉| 941/991 [4:04:06<13:54, 16.70s/batch, batch_loss=14.7, batch

Epoch 8/10:  95%|▉| 942/991 [4:04:06<13:37, 16.69s/batch, batch_loss=14.7, batch

Epoch 8/10:  95%|▉| 942/991 [4:04:22<13:37, 16.69s/batch, batch_loss=11.1, batch

Epoch 8/10:  95%|▉| 943/991 [4:04:22<13:15, 16.58s/batch, batch_loss=11.1, batch

Epoch 8/10:  95%|▉| 943/991 [4:04:39<13:15, 16.58s/batch, batch_loss=12.9, batch

Epoch 8/10:  95%|▉| 944/991 [4:04:39<12:59, 16.59s/batch, batch_loss=12.9, batch

Epoch 8/10:  95%|▉| 944/991 [4:04:56<12:59, 16.59s/batch, batch_loss=2.47, batch

Epoch 8/10:  95%|▉| 945/991 [4:04:56<12:46, 16.66s/batch, batch_loss=2.47, batch

Epoch 8/10:  95%|▉| 945/991 [4:05:12<12:46, 16.66s/batch, batch_loss=13.8, batch

Epoch 8/10:  95%|▉| 946/991 [4:05:12<12:29, 16.66s/batch, batch_loss=13.8, batch

Epoch 8/10:  95%|▉| 946/991 [4:05:29<12:29, 16.66s/batch, batch_loss=14.3, batch

Epoch 8/10:  96%|▉| 947/991 [4:05:29<12:16, 16.74s/batch, batch_loss=14.3, batch

Epoch 8/10:  96%|▉| 947/991 [4:05:46<12:16, 16.74s/batch, batch_loss=10.2, batch

Epoch 8/10:  96%|▉| 948/991 [4:05:46<11:59, 16.74s/batch, batch_loss=10.2, batch

Epoch 8/10:  96%|▉| 948/991 [4:06:03<11:59, 16.74s/batch, batch_loss=6.25, batch

Epoch 8/10:  96%|▉| 949/991 [4:06:03<11:44, 16.78s/batch, batch_loss=6.25, batch

Epoch 8/10:  96%|▉| 949/991 [4:06:19<11:44, 16.78s/batch, batch_loss=8.29, batch

Epoch 8/10:  96%|▉| 950/991 [4:06:19<11:20, 16.59s/batch, batch_loss=8.29, batch

Epoch 8/10:  96%|▉| 950/991 [4:06:36<11:20, 16.59s/batch, batch_loss=15.8, batch

Epoch 8/10:  96%|▉| 951/991 [4:06:36<11:12, 16.81s/batch, batch_loss=15.8, batch

Epoch 8/10:  96%|▉| 951/991 [4:06:54<11:12, 16.81s/batch, batch_loss=15.7, batch

Epoch 8/10:  96%|▉| 952/991 [4:06:54<11:07, 17.11s/batch, batch_loss=15.7, batch

Epoch 8/10:  96%|▉| 952/991 [4:07:12<11:07, 17.11s/batch, batch_loss=6.96, batch

Epoch 8/10:  96%|▉| 953/991 [4:07:12<10:53, 17.19s/batch, batch_loss=6.96, batch

Epoch 8/10:  96%|▉| 953/991 [4:07:30<10:53, 17.19s/batch, batch_loss=333, batch_

Epoch 8/10:  96%|▉| 954/991 [4:07:30<10:53, 17.66s/batch, batch_loss=333, batch_

Epoch 8/10:  96%|▉| 954/991 [4:07:49<10:53, 17.66s/batch, batch_loss=12.6, batch

Epoch 8/10:  96%|▉| 955/991 [4:07:49<10:52, 18.13s/batch, batch_loss=12.6, batch

Epoch 8/10:  96%|▉| 955/991 [4:08:08<10:52, 18.13s/batch, batch_loss=14.5, batch

Epoch 8/10:  96%|▉| 956/991 [4:08:08<10:43, 18.39s/batch, batch_loss=14.5, batch

Epoch 8/10:  96%|▉| 956/991 [4:08:26<10:43, 18.39s/batch, batch_loss=14.9, batch

Epoch 8/10:  97%|▉| 957/991 [4:08:26<10:13, 18.03s/batch, batch_loss=14.9, batch

Epoch 8/10:  97%|▉| 957/991 [4:08:43<10:13, 18.03s/batch, batch_loss=12.5, batch

Epoch 8/10:  97%|▉| 958/991 [4:08:43<09:51, 17.93s/batch, batch_loss=12.5, batch

Epoch 8/10:  97%|▉| 958/991 [4:09:01<09:51, 17.93s/batch, batch_loss=7.22, batch

Epoch 8/10:  97%|▉| 959/991 [4:09:01<09:33, 17.91s/batch, batch_loss=7.22, batch

Epoch 8/10:  97%|▉| 959/991 [4:09:19<09:33, 17.91s/batch, batch_loss=12.1, batch

Epoch 8/10:  97%|▉| 960/991 [4:09:19<09:15, 17.92s/batch, batch_loss=12.1, batch

Epoch 8/10:  97%|▉| 960/991 [4:09:37<09:15, 17.92s/batch, batch_loss=15.3, batch

Epoch 8/10:  97%|▉| 961/991 [4:09:37<08:58, 17.95s/batch, batch_loss=15.3, batch

Epoch 8/10:  97%|▉| 961/991 [4:09:55<08:58, 17.95s/batch, batch_loss=4.53, batch

Epoch 8/10:  97%|▉| 962/991 [4:09:55<08:37, 17.84s/batch, batch_loss=4.53, batch

Epoch 8/10:  97%|▉| 962/991 [4:10:13<08:37, 17.84s/batch, batch_loss=6.67, batch

Epoch 8/10:  97%|▉| 963/991 [4:10:13<08:25, 18.05s/batch, batch_loss=6.67, batch

Epoch 8/10:  97%|▉| 963/991 [4:10:34<08:25, 18.05s/batch, batch_loss=9.44e+3, ba

Epoch 8/10:  97%|▉| 964/991 [4:10:34<08:27, 18.79s/batch, batch_loss=9.44e+3, ba

Epoch 8/10:  97%|▉| 964/991 [4:10:50<08:27, 18.79s/batch, batch_loss=19.1, batch

Epoch 8/10:  97%|▉| 965/991 [4:10:50<07:49, 18.04s/batch, batch_loss=19.1, batch

Epoch 8/10:  97%|▉| 965/991 [4:11:07<07:49, 18.04s/batch, batch_loss=17.1, batch

Epoch 8/10:  97%|▉| 966/991 [4:11:07<07:22, 17.71s/batch, batch_loss=17.1, batch

Epoch 8/10:  97%|▉| 966/991 [4:11:26<07:22, 17.71s/batch, batch_loss=2.41e+4, ba

Epoch 8/10:  98%|▉| 967/991 [4:11:26<07:13, 18.06s/batch, batch_loss=2.41e+4, ba

Epoch 8/10:  98%|▉| 967/991 [4:11:43<07:13, 18.06s/batch, batch_loss=411, batch_

Epoch 8/10:  98%|▉| 968/991 [4:11:43<06:50, 17.84s/batch, batch_loss=411, batch_

Epoch 8/10:  98%|▉| 968/991 [4:12:01<06:50, 17.84s/batch, batch_loss=20.4, batch

Epoch 8/10:  98%|▉| 969/991 [4:12:01<06:31, 17.78s/batch, batch_loss=20.4, batch

Epoch 8/10:  98%|▉| 969/991 [4:12:19<06:31, 17.78s/batch, batch_loss=1.38, batch

Epoch 8/10:  98%|▉| 970/991 [4:12:19<06:15, 17.88s/batch, batch_loss=1.38, batch

Epoch 8/10:  98%|▉| 970/991 [4:12:38<06:15, 17.88s/batch, batch_loss=10.1, batch

Epoch 8/10:  98%|▉| 971/991 [4:12:38<06:02, 18.11s/batch, batch_loss=10.1, batch

Epoch 8/10:  98%|▉| 971/991 [4:12:57<06:02, 18.11s/batch, batch_loss=23.7, batch

Epoch 8/10:  98%|▉| 972/991 [4:12:57<05:48, 18.33s/batch, batch_loss=23.7, batch

Epoch 8/10:  98%|▉| 972/991 [4:13:16<05:48, 18.33s/batch, batch_loss=19.3, batch

Epoch 8/10:  98%|▉| 973/991 [4:13:16<05:34, 18.60s/batch, batch_loss=19.3, batch

Epoch 8/10:  98%|▉| 973/991 [4:13:35<05:34, 18.60s/batch, batch_loss=13.6, batch

Epoch 8/10:  98%|▉| 974/991 [4:13:35<05:17, 18.68s/batch, batch_loss=13.6, batch

Epoch 8/10:  98%|▉| 974/991 [4:13:54<05:17, 18.68s/batch, batch_loss=7.13, batch

Epoch 8/10:  98%|▉| 975/991 [4:13:54<05:04, 19.01s/batch, batch_loss=7.13, batch

Epoch 8/10:  98%|▉| 975/991 [4:14:14<05:04, 19.01s/batch, batch_loss=25.3, batch

Epoch 8/10:  98%|▉| 976/991 [4:14:14<04:46, 19.10s/batch, batch_loss=25.3, batch

Epoch 8/10:  98%|▉| 976/991 [4:14:35<04:46, 19.10s/batch, batch_loss=0.658, batc

Epoch 8/10:  99%|▉| 977/991 [4:14:35<04:37, 19.80s/batch, batch_loss=0.658, batc

Epoch 8/10:  99%|▉| 977/991 [4:14:54<04:37, 19.80s/batch, batch_loss=0.504, batc

Epoch 8/10:  99%|▉| 978/991 [4:14:54<04:13, 19.53s/batch, batch_loss=0.504, batc

Epoch 8/10:  99%|▉| 978/991 [4:15:13<04:13, 19.53s/batch, batch_loss=0.309, batc

Epoch 8/10:  99%|▉| 979/991 [4:15:13<03:50, 19.22s/batch, batch_loss=0.309, batc

Epoch 8/10:  99%|▉| 979/991 [4:15:32<03:50, 19.22s/batch, batch_loss=0.227, batc

Epoch 8/10:  99%|▉| 980/991 [4:15:32<03:32, 19.28s/batch, batch_loss=0.227, batc

Epoch 8/10:  99%|▉| 980/991 [4:15:49<03:32, 19.28s/batch, batch_loss=0.167, batc

Epoch 8/10:  99%|▉| 981/991 [4:15:49<03:06, 18.68s/batch, batch_loss=0.167, batc

Epoch 8/10:  99%|▉| 981/991 [4:16:08<03:06, 18.68s/batch, batch_loss=0.125, batc

Epoch 8/10:  99%|▉| 982/991 [4:16:08<02:47, 18.64s/batch, batch_loss=0.125, batc

Epoch 8/10:  99%|▉| 982/991 [4:16:27<02:47, 18.64s/batch, batch_loss=0.0969, bat

Epoch 8/10:  99%|▉| 983/991 [4:16:27<02:30, 18.80s/batch, batch_loss=0.0969, bat

Epoch 8/10:  99%|▉| 983/991 [4:16:46<02:30, 18.80s/batch, batch_loss=0.0782, bat

Epoch 8/10:  99%|▉| 984/991 [4:16:46<02:11, 18.77s/batch, batch_loss=0.0782, bat

Epoch 8/10:  99%|▉| 984/991 [4:17:05<02:11, 18.77s/batch, batch_loss=0.0651, bat

Epoch 8/10:  99%|▉| 985/991 [4:17:05<01:52, 18.81s/batch, batch_loss=0.0651, bat

Epoch 8/10:  99%|▉| 985/991 [4:17:23<01:52, 18.81s/batch, batch_loss=0.0562, bat

Epoch 8/10:  99%|▉| 986/991 [4:17:23<01:33, 18.65s/batch, batch_loss=0.0562, bat

Epoch 8/10:  99%|▉| 986/991 [4:17:43<01:33, 18.65s/batch, batch_loss=0.051, batc

Epoch 8/10: 100%|▉| 987/991 [4:17:43<01:16, 19.06s/batch, batch_loss=0.051, batc

Epoch 8/10: 100%|▉| 987/991 [4:18:02<01:16, 19.06s/batch, batch_loss=0.0491, bat

Epoch 8/10: 100%|▉| 988/991 [4:18:02<00:56, 18.94s/batch, batch_loss=0.0491, bat

Epoch 8/10: 100%|▉| 988/991 [4:18:22<00:56, 18.94s/batch, batch_loss=0.0495, bat

Epoch 8/10: 100%|▉| 989/991 [4:18:22<00:38, 19.48s/batch, batch_loss=0.0495, bat

Epoch 8/10: 100%|▉| 989/991 [4:18:41<00:38, 19.48s/batch, batch_loss=0.0507, bat

Epoch 8/10: 100%|▉| 990/991 [4:18:41<00:19, 19.32s/batch, batch_loss=0.0507, bat

Epoch 8/10: 100%|▉| 990/991 [4:18:56<00:19, 19.32s/batch, batch_loss=0.0532, bat

Epoch 8/10: 100%|█| 991/991 [4:18:56<00:00, 17.93s/batch, batch_loss=0.0532, bat

Epoch 8/10: 100%|█| 991/991 [4:18:56<00:00, 15.68s/batch, batch_loss=0.0532, bat




Epoch 8, Loss: 986.7820


Validation:   0%|                                    | 0/743 [00:00<?, ?batch/s]

Validation:   0%| | 0/743 [00:20<?, ?batch/s, batch_loss=16.7, batch_index=1, ba

Validation:   0%| | 1/743 [00:20<4:14:40, 20.59s/batch, batch_loss=16.7, batch_i

Validation:   0%| | 1/743 [00:39<4:14:40, 20.59s/batch, batch_loss=17, batch_ind

Validation:   0%| | 2/743 [00:39<4:05:23, 19.87s/batch, batch_loss=17, batch_ind

Validation:   0%| | 2/743 [01:00<4:05:23, 19.87s/batch, batch_loss=14.9, batch_i

Validation:   0%| | 3/743 [01:00<4:07:12, 20.04s/batch, batch_loss=14.9, batch_i

Validation:   0%| | 3/743 [01:16<4:07:12, 20.04s/batch, batch_loss=10.7, batch_i

Validation:   1%| | 4/743 [01:16<3:48:41, 18.57s/batch, batch_loss=10.7, batch_i

Validation:   1%| | 4/743 [01:35<3:48:41, 18.57s/batch, batch_loss=20.2, batch_i

Validation:   1%| | 5/743 [01:35<3:52:11, 18.88s/batch, batch_loss=20.2, batch_i

Validation:   1%| | 5/743 [01:52<3:52:11, 18.88s/batch, batch_loss=21.2, batch_i

Validation:   1%| | 6/743 [01:52<3:42:33, 18.12s/batch, batch_loss=21.2, batch_i

Validation:   1%| | 6/743 [02:07<3:42:33, 18.12s/batch, batch_loss=555, batch_in

Validation:   1%| | 7/743 [02:07<3:29:32, 17.08s/batch, batch_loss=555, batch_in

Validation:   1%| | 7/743 [02:24<3:29:32, 17.08s/batch, batch_loss=16.5, batch_i

Validation:   1%| | 8/743 [02:24<3:26:54, 16.89s/batch, batch_loss=16.5, batch_i

Validation:   1%| | 8/743 [02:41<3:26:54, 16.89s/batch, batch_loss=14.6, batch_i

Validation:   1%| | 9/743 [02:41<3:27:18, 16.95s/batch, batch_loss=14.6, batch_i

Validation:   1%| | 9/743 [02:59<3:27:18, 16.95s/batch, batch_loss=14.2, batch_i

Validation:   1%| | 10/743 [02:59<3:31:14, 17.29s/batch, batch_loss=14.2, batch_

Validation:   1%| | 10/743 [03:19<3:31:14, 17.29s/batch, batch_loss=11.2, batch_

Validation:   1%| | 11/743 [03:19<3:40:39, 18.09s/batch, batch_loss=11.2, batch_

Validation:   1%| | 11/743 [03:35<3:40:39, 18.09s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:35<3:34:43, 17.62s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:52<3:34:43, 17.62s/batch, batch_loss=13.8, batch_

Validation:   2%| | 13/743 [03:52<3:31:00, 17.34s/batch, batch_loss=13.8, batch_

Validation:   2%| | 13/743 [04:09<3:31:00, 17.34s/batch, batch_loss=9.56, batch_

Validation:   2%| | 14/743 [04:09<3:28:50, 17.19s/batch, batch_loss=9.56, batch_

Validation:   2%| | 14/743 [04:26<3:28:50, 17.19s/batch, batch_loss=18.4, batch_

Validation:   2%| | 15/743 [04:26<3:28:44, 17.20s/batch, batch_loss=18.4, batch_

Validation:   2%| | 15/743 [04:43<3:28:44, 17.20s/batch, batch_loss=16.3, batch_

Validation:   2%| | 16/743 [04:43<3:28:20, 17.19s/batch, batch_loss=16.3, batch_

Validation:   2%| | 16/743 [05:01<3:28:20, 17.19s/batch, batch_loss=11.9, batch_

Validation:   2%| | 17/743 [05:01<3:32:03, 17.53s/batch, batch_loss=11.9, batch_

Validation:   2%| | 17/743 [05:19<3:32:03, 17.53s/batch, batch_loss=4.53e+3, bat

Validation:   2%| | 18/743 [05:19<3:32:19, 17.57s/batch, batch_loss=4.53e+3, bat

Validation:   2%| | 18/743 [05:37<3:32:19, 17.57s/batch, batch_loss=9.99, batch_

Validation:   3%| | 19/743 [05:37<3:34:11, 17.75s/batch, batch_loss=9.99, batch_

Validation:   3%| | 19/743 [05:54<3:34:11, 17.75s/batch, batch_loss=14.8, batch_

Validation:   3%| | 20/743 [05:54<3:29:13, 17.36s/batch, batch_loss=14.8, batch_

Validation:   3%| | 20/743 [06:09<3:29:13, 17.36s/batch, batch_loss=959, batch_i

Validation:   3%| | 21/743 [06:09<3:23:26, 16.91s/batch, batch_loss=959, batch_i

Validation:   3%| | 21/743 [06:25<3:23:26, 16.91s/batch, batch_loss=14.4, batch_

Validation:   3%| | 22/743 [06:25<3:19:18, 16.59s/batch, batch_loss=14.4, batch_

Validation:   3%| | 22/743 [06:42<3:19:18, 16.59s/batch, batch_loss=6.87, batch_

Validation:   3%| | 23/743 [06:42<3:18:57, 16.58s/batch, batch_loss=6.87, batch_

Validation:   3%| | 23/743 [06:58<3:18:57, 16.58s/batch, batch_loss=16.2, batch_

Validation:   3%| | 24/743 [06:58<3:15:47, 16.34s/batch, batch_loss=16.2, batch_

Validation:   3%| | 24/743 [07:16<3:15:47, 16.34s/batch, batch_loss=13, batch_in

Validation:   3%| | 25/743 [07:16<3:23:50, 17.03s/batch, batch_loss=13, batch_in

Validation:   3%| | 25/743 [07:32<3:23:50, 17.03s/batch, batch_loss=18.7, batch_

Validation:   3%| | 26/743 [07:32<3:18:43, 16.63s/batch, batch_loss=18.7, batch_

Validation:   3%| | 26/743 [07:50<3:18:43, 16.63s/batch, batch_loss=1.64e+3, bat

Validation:   4%| | 27/743 [07:50<3:22:39, 16.98s/batch, batch_loss=1.64e+3, bat

Validation:   4%| | 27/743 [08:07<3:22:39, 16.98s/batch, batch_loss=16.9, batch_

Validation:   4%| | 28/743 [08:07<3:21:47, 16.93s/batch, batch_loss=16.9, batch_

Validation:   4%| | 28/743 [08:28<3:21:47, 16.93s/batch, batch_loss=15, batch_in

Validation:   4%| | 29/743 [08:28<3:36:10, 18.17s/batch, batch_loss=15, batch_in

Validation:   4%| | 29/743 [08:44<3:36:10, 18.17s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [08:44<3:30:46, 17.74s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [09:01<3:30:46, 17.74s/batch, batch_loss=17.7, batch_

Validation:   4%| | 31/743 [09:01<3:27:20, 17.47s/batch, batch_loss=17.7, batch_

Validation:   4%| | 31/743 [09:18<3:27:20, 17.47s/batch, batch_loss=14.6, batch_

Validation:   4%| | 32/743 [09:18<3:24:29, 17.26s/batch, batch_loss=14.6, batch_

Validation:   4%| | 32/743 [09:35<3:24:29, 17.26s/batch, batch_loss=16.7, batch_

Validation:   4%| | 33/743 [09:35<3:23:00, 17.16s/batch, batch_loss=16.7, batch_

Validation:   4%| | 33/743 [09:50<3:23:00, 17.16s/batch, batch_loss=15, batch_in

Validation:   5%| | 34/743 [09:50<3:16:21, 16.62s/batch, batch_loss=15, batch_in

Validation:   5%| | 34/743 [10:06<3:16:21, 16.62s/batch, batch_loss=2.83e+3, bat

Validation:   5%| | 35/743 [10:06<3:13:27, 16.39s/batch, batch_loss=2.83e+3, bat

Validation:   5%| | 35/743 [10:22<3:13:27, 16.39s/batch, batch_loss=15, batch_in

Validation:   5%| | 36/743 [10:22<3:11:01, 16.21s/batch, batch_loss=15, batch_in

Validation:   5%| | 36/743 [10:40<3:11:01, 16.21s/batch, batch_loss=163, batch_i

Validation:   5%| | 37/743 [10:40<3:17:15, 16.76s/batch, batch_loss=163, batch_i

Validation:   5%| | 37/743 [10:57<3:17:15, 16.76s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [10:57<3:17:53, 16.84s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [11:14<3:17:53, 16.84s/batch, batch_loss=15.5, batch_

Validation:   5%| | 39/743 [11:14<3:17:20, 16.82s/batch, batch_loss=15.5, batch_

Validation:   5%| | 39/743 [11:31<3:17:20, 16.82s/batch, batch_loss=19.9, batch_

Validation:   5%| | 40/743 [11:31<3:17:52, 16.89s/batch, batch_loss=19.9, batch_

Validation:   5%| | 40/743 [11:47<3:17:52, 16.89s/batch, batch_loss=14.7, batch_

Validation:   6%| | 41/743 [11:47<3:16:02, 16.76s/batch, batch_loss=14.7, batch_

Validation:   6%| | 41/743 [12:03<3:16:02, 16.76s/batch, batch_loss=16, batch_in

Validation:   6%| | 42/743 [12:03<3:13:42, 16.58s/batch, batch_loss=16, batch_in

Validation:   6%| | 42/743 [12:20<3:13:42, 16.58s/batch, batch_loss=11, batch_in

Validation:   6%| | 43/743 [12:20<3:13:33, 16.59s/batch, batch_loss=11, batch_in

Validation:   6%| | 43/743 [12:37<3:13:33, 16.59s/batch, batch_loss=14.9, batch_

Validation:   6%| | 44/743 [12:37<3:13:59, 16.65s/batch, batch_loss=14.9, batch_

Validation:   6%| | 44/743 [12:54<3:13:59, 16.65s/batch, batch_loss=19, batch_in

Validation:   6%| | 45/743 [12:54<3:14:40, 16.73s/batch, batch_loss=19, batch_in

Validation:   6%| | 45/743 [13:11<3:14:40, 16.73s/batch, batch_loss=10.2, batch_

Validation:   6%| | 46/743 [13:11<3:15:02, 16.79s/batch, batch_loss=10.2, batch_

Validation:   6%| | 46/743 [13:26<3:15:02, 16.79s/batch, batch_loss=17.7, batch_

Validation:   6%| | 47/743 [13:26<3:11:09, 16.48s/batch, batch_loss=17.7, batch_

Validation:   6%| | 47/743 [13:42<3:11:09, 16.48s/batch, batch_loss=18.2, batch_

Validation:   6%| | 48/743 [13:42<3:08:41, 16.29s/batch, batch_loss=18.2, batch_

Validation:   6%| | 48/743 [13:59<3:08:41, 16.29s/batch, batch_loss=19.1, batch_

Validation:   7%| | 49/743 [13:59<3:10:03, 16.43s/batch, batch_loss=19.1, batch_

Validation:   7%| | 49/743 [14:16<3:10:03, 16.43s/batch, batch_loss=13, batch_in

Validation:   7%| | 50/743 [14:16<3:10:00, 16.45s/batch, batch_loss=13, batch_in

Validation:   7%| | 50/743 [14:33<3:10:00, 16.45s/batch, batch_loss=13.3, batch_

Validation:   7%| | 51/743 [14:33<3:13:58, 16.82s/batch, batch_loss=13.3, batch_

Validation:   7%| | 51/743 [14:49<3:13:58, 16.82s/batch, batch_loss=15.8, batch_

Validation:   7%| | 52/743 [14:49<3:11:25, 16.62s/batch, batch_loss=15.8, batch_

Validation:   7%| | 52/743 [15:06<3:11:25, 16.62s/batch, batch_loss=22.6, batch_

Validation:   7%| | 53/743 [15:06<3:11:59, 16.70s/batch, batch_loss=22.6, batch_

Validation:   7%| | 53/743 [15:23<3:11:59, 16.70s/batch, batch_loss=13.2, batch_

Validation:   7%| | 54/743 [15:23<3:12:12, 16.74s/batch, batch_loss=13.2, batch_

Validation:   7%| | 54/743 [15:39<3:12:12, 16.74s/batch, batch_loss=20.4, batch_

Validation:   7%| | 55/743 [15:39<3:10:15, 16.59s/batch, batch_loss=20.4, batch_

Validation:   7%| | 55/743 [15:56<3:10:15, 16.59s/batch, batch_loss=16.4, batch_

Validation:   8%| | 56/743 [15:56<3:10:44, 16.66s/batch, batch_loss=16.4, batch_

Validation:   8%| | 56/743 [16:12<3:10:44, 16.66s/batch, batch_loss=13, batch_in

Validation:   8%| | 57/743 [16:12<3:09:13, 16.55s/batch, batch_loss=13, batch_in

Validation:   8%| | 57/743 [16:28<3:09:13, 16.55s/batch, batch_loss=18.5, batch_

Validation:   8%| | 58/743 [16:28<3:06:38, 16.35s/batch, batch_loss=18.5, batch_

Validation:   8%| | 58/743 [16:45<3:06:38, 16.35s/batch, batch_loss=106, batch_i

Validation:   8%| | 59/743 [16:45<3:06:33, 16.37s/batch, batch_loss=106, batch_i

Validation:   8%| | 59/743 [17:02<3:06:33, 16.37s/batch, batch_loss=6.14e+3, bat

Validation:   8%| | 60/743 [17:02<3:08:54, 16.59s/batch, batch_loss=6.14e+3, bat

Validation:   8%| | 60/743 [17:20<3:08:54, 16.59s/batch, batch_loss=7.61, batch_

Validation:   8%| | 61/743 [17:20<3:12:50, 16.97s/batch, batch_loss=7.61, batch_

Validation:   8%| | 61/743 [17:36<3:12:50, 16.97s/batch, batch_loss=8.71, batch_

Validation:   8%| | 62/743 [17:36<3:09:21, 16.68s/batch, batch_loss=8.71, batch_

Validation:   8%| | 62/743 [17:53<3:09:21, 16.68s/batch, batch_loss=21.2, batch_

Validation:   8%| | 63/743 [17:53<3:12:13, 16.96s/batch, batch_loss=21.2, batch_

Validation:   8%| | 63/743 [18:10<3:12:13, 16.96s/batch, batch_loss=11.5, batch_

Validation:   9%| | 64/743 [18:10<3:10:52, 16.87s/batch, batch_loss=11.5, batch_

Validation:   9%| | 64/743 [18:27<3:10:52, 16.87s/batch, batch_loss=17.1, batch_

Validation:   9%| | 65/743 [18:27<3:11:35, 16.95s/batch, batch_loss=17.1, batch_

Validation:   9%| | 65/743 [18:44<3:11:35, 16.95s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [18:44<3:10:18, 16.87s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [19:00<3:10:18, 16.87s/batch, batch_loss=13.3, batch_

Validation:   9%| | 67/743 [19:00<3:07:18, 16.63s/batch, batch_loss=13.3, batch_

Validation:   9%| | 67/743 [19:17<3:07:18, 16.63s/batch, batch_loss=15.2, batch_

Validation:   9%| | 68/743 [19:17<3:09:50, 16.87s/batch, batch_loss=15.2, batch_

Validation:   9%| | 68/743 [19:37<3:09:50, 16.87s/batch, batch_loss=10.1, batch_

Validation:   9%| | 69/743 [19:37<3:19:03, 17.72s/batch, batch_loss=10.1, batch_

Validation:   9%| | 69/743 [19:55<3:19:03, 17.72s/batch, batch_loss=14.5, batch_

Validation:   9%| | 70/743 [19:55<3:18:58, 17.74s/batch, batch_loss=14.5, batch_

Validation:   9%| | 70/743 [20:13<3:18:58, 17.74s/batch, batch_loss=9.16, batch_

Validation:  10%| | 71/743 [20:13<3:18:42, 17.74s/batch, batch_loss=9.16, batch_

Validation:  10%| | 71/743 [20:29<3:18:42, 17.74s/batch, batch_loss=12.4, batch_

Validation:  10%| | 72/743 [20:29<3:15:18, 17.46s/batch, batch_loss=12.4, batch_

Validation:  10%| | 72/743 [20:48<3:15:18, 17.46s/batch, batch_loss=13.9, batch_

Validation:  10%| | 73/743 [20:48<3:17:44, 17.71s/batch, batch_loss=13.9, batch_

Validation:  10%| | 73/743 [21:06<3:17:44, 17.71s/batch, batch_loss=16.8, batch_

Validation:  10%| | 74/743 [21:06<3:19:24, 17.88s/batch, batch_loss=16.8, batch_

Validation:  10%| | 74/743 [21:27<3:19:24, 17.88s/batch, batch_loss=12.5, batch_

Validation:  10%| | 75/743 [21:27<3:29:29, 18.82s/batch, batch_loss=12.5, batch_

Validation:  10%| | 75/743 [21:42<3:29:29, 18.82s/batch, batch_loss=14.5, batch_

Validation:  10%| | 76/743 [21:42<3:17:12, 17.74s/batch, batch_loss=14.5, batch_

Validation:  10%| | 76/743 [21:57<3:17:12, 17.74s/batch, batch_loss=12.5, batch_

Validation:  10%| | 77/743 [21:57<3:06:12, 16.78s/batch, batch_loss=12.5, batch_

Validation:  10%| | 77/743 [22:15<3:06:12, 16.78s/batch, batch_loss=15.2, batch_

Validation:  10%| | 78/743 [22:15<3:09:28, 17.10s/batch, batch_loss=15.2, batch_

Validation:  10%| | 78/743 [22:34<3:09:28, 17.10s/batch, batch_loss=8.88, batch_

Validation:  11%| | 79/743 [22:34<3:18:30, 17.94s/batch, batch_loss=8.88, batch_

Validation:  11%| | 79/743 [22:52<3:18:30, 17.94s/batch, batch_loss=7.22, batch_

Validation:  11%| | 80/743 [22:52<3:17:59, 17.92s/batch, batch_loss=7.22, batch_

Validation:  11%| | 80/743 [23:10<3:17:59, 17.92s/batch, batch_loss=150, batch_i

Validation:  11%| | 81/743 [23:10<3:16:24, 17.80s/batch, batch_loss=150, batch_i

Validation:  11%| | 81/743 [23:27<3:16:24, 17.80s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [23:27<3:13:16, 17.54s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [23:44<3:13:16, 17.54s/batch, batch_loss=30.9, batch_

Validation:  11%| | 83/743 [23:44<3:13:17, 17.57s/batch, batch_loss=30.9, batch_

Validation:  11%| | 83/743 [24:02<3:13:17, 17.57s/batch, batch_loss=16.3, batch_

Validation:  11%| | 84/743 [24:02<3:11:24, 17.43s/batch, batch_loss=16.3, batch_

Validation:  11%| | 84/743 [24:19<3:11:24, 17.43s/batch, batch_loss=22.1, batch_

Validation:  11%| | 85/743 [24:19<3:10:28, 17.37s/batch, batch_loss=22.1, batch_

Validation:  11%| | 85/743 [24:39<3:10:28, 17.37s/batch, batch_loss=24.1, batch_

Validation:  12%| | 86/743 [24:39<3:18:40, 18.14s/batch, batch_loss=24.1, batch_

Validation:  12%| | 86/743 [24:56<3:18:40, 18.14s/batch, batch_loss=30.9, batch_

Validation:  12%| | 87/743 [24:56<3:14:45, 17.81s/batch, batch_loss=30.9, batch_

Validation:  12%| | 87/743 [25:13<3:14:45, 17.81s/batch, batch_loss=21.3, batch_

Validation:  12%| | 88/743 [25:13<3:12:22, 17.62s/batch, batch_loss=21.3, batch_

Validation:  12%| | 88/743 [25:29<3:12:22, 17.62s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [25:29<3:08:03, 17.25s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [25:47<3:08:03, 17.25s/batch, batch_loss=4.12, batch_

Validation:  12%| | 90/743 [25:47<3:09:01, 17.37s/batch, batch_loss=4.12, batch_

Validation:  12%| | 90/743 [26:05<3:09:01, 17.37s/batch, batch_loss=31.7, batch_

Validation:  12%| | 91/743 [26:05<3:09:31, 17.44s/batch, batch_loss=31.7, batch_

Validation:  12%| | 91/743 [26:23<3:09:31, 17.44s/batch, batch_loss=27.5, batch_

Validation:  12%| | 92/743 [26:23<3:11:29, 17.65s/batch, batch_loss=27.5, batch_

Validation:  12%| | 92/743 [26:40<3:11:29, 17.65s/batch, batch_loss=22.8, batch_

Validation:  13%|▏| 93/743 [26:40<3:09:15, 17.47s/batch, batch_loss=22.8, batch_

Validation:  13%|▏| 93/743 [26:56<3:09:15, 17.47s/batch, batch_loss=30.9, batch_

Validation:  13%|▏| 94/743 [26:56<3:05:44, 17.17s/batch, batch_loss=30.9, batch_

Validation:  13%|▏| 94/743 [27:16<3:05:44, 17.17s/batch, batch_loss=11.9, batch_

Validation:  13%|▏| 95/743 [27:16<3:14:36, 18.02s/batch, batch_loss=11.9, batch_

Validation:  13%|▏| 95/743 [27:33<3:14:36, 18.02s/batch, batch_loss=17.8, batch_

Validation:  13%|▏| 96/743 [27:33<3:09:18, 17.56s/batch, batch_loss=17.8, batch_

Validation:  13%|▏| 96/743 [27:49<3:09:18, 17.56s/batch, batch_loss=27.2, batch_

Validation:  13%|▏| 97/743 [27:49<3:03:29, 17.04s/batch, batch_loss=27.2, batch_

Validation:  13%|▏| 97/743 [28:05<3:03:29, 17.04s/batch, batch_loss=18.4, batch_

Validation:  13%|▏| 98/743 [28:05<3:00:55, 16.83s/batch, batch_loss=18.4, batch_

Validation:  13%|▏| 98/743 [28:24<3:00:55, 16.83s/batch, batch_loss=23.4, batch_

Validation:  13%|▏| 99/743 [28:24<3:07:38, 17.48s/batch, batch_loss=23.4, batch_

Validation:  13%|▏| 99/743 [28:42<3:07:38, 17.48s/batch, batch_loss=10.8, batch_

Validation:  13%|▏| 100/743 [28:42<3:08:32, 17.59s/batch, batch_loss=10.8, batch

Validation:  13%|▏| 100/743 [28:59<3:08:32, 17.59s/batch, batch_loss=15.2, batch

Validation:  14%|▏| 101/743 [28:59<3:08:00, 17.57s/batch, batch_loss=15.2, batch

Validation:  14%|▏| 101/743 [29:17<3:08:00, 17.57s/batch, batch_loss=13.7, batch

Validation:  14%|▏| 102/743 [29:17<3:09:38, 17.75s/batch, batch_loss=13.7, batch

Validation:  14%|▏| 102/743 [29:34<3:09:38, 17.75s/batch, batch_loss=3.39e+3, ba

Validation:  14%|▏| 103/743 [29:34<3:07:02, 17.54s/batch, batch_loss=3.39e+3, ba

Validation:  14%|▏| 103/743 [29:53<3:07:02, 17.54s/batch, batch_loss=14.9, batch

Validation:  14%|▏| 104/743 [29:53<3:09:21, 17.78s/batch, batch_loss=14.9, batch

Validation:  14%|▏| 104/743 [30:11<3:09:21, 17.78s/batch, batch_loss=8.71, batch

Validation:  14%|▏| 105/743 [30:11<3:10:48, 17.94s/batch, batch_loss=8.71, batch

Validation:  14%|▏| 105/743 [30:31<3:10:48, 17.94s/batch, batch_loss=15.2, batch

Validation:  14%|▏| 106/743 [30:31<3:17:15, 18.58s/batch, batch_loss=15.2, batch

Validation:  14%|▏| 106/743 [30:49<3:17:15, 18.58s/batch, batch_loss=715, batch_

Validation:  14%|▏| 107/743 [30:49<3:14:07, 18.31s/batch, batch_loss=715, batch_

Validation:  14%|▏| 107/743 [31:08<3:14:07, 18.31s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [31:08<3:16:10, 18.54s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [31:27<3:16:10, 18.54s/batch, batch_loss=203, batch_

Validation:  15%|▏| 109/743 [31:27<3:16:20, 18.58s/batch, batch_loss=203, batch_

Validation:  15%|▏| 109/743 [31:44<3:16:20, 18.58s/batch, batch_loss=25.3, batch

Validation:  15%|▏| 110/743 [31:44<3:10:38, 18.07s/batch, batch_loss=25.3, batch

Validation:  15%|▏| 110/743 [32:01<3:10:38, 18.07s/batch, batch_loss=13.6, batch

Validation:  15%|▏| 111/743 [32:01<3:09:00, 17.94s/batch, batch_loss=13.6, batch

Validation:  15%|▏| 111/743 [32:18<3:09:00, 17.94s/batch, batch_loss=24.9, batch

Validation:  15%|▏| 112/743 [32:18<3:05:51, 17.67s/batch, batch_loss=24.9, batch

Validation:  15%|▏| 112/743 [32:36<3:05:51, 17.67s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [32:36<3:06:08, 17.73s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [32:53<3:06:08, 17.73s/batch, batch_loss=18.3, batch

Validation:  15%|▏| 114/743 [32:53<3:01:52, 17.35s/batch, batch_loss=18.3, batch

Validation:  15%|▏| 114/743 [33:11<3:01:52, 17.35s/batch, batch_loss=19.4, batch

Validation:  15%|▏| 115/743 [33:11<3:05:02, 17.68s/batch, batch_loss=19.4, batch

Validation:  15%|▏| 115/743 [33:29<3:05:02, 17.68s/batch, batch_loss=14.5, batch

Validation:  16%|▏| 116/743 [33:29<3:06:01, 17.80s/batch, batch_loss=14.5, batch

Validation:  16%|▏| 116/743 [33:46<3:06:01, 17.80s/batch, batch_loss=24.2, batch

Validation:  16%|▏| 117/743 [33:46<3:03:33, 17.59s/batch, batch_loss=24.2, batch

Validation:  16%|▏| 117/743 [34:03<3:03:33, 17.59s/batch, batch_loss=22.4, batch

Validation:  16%|▏| 118/743 [34:03<2:59:46, 17.26s/batch, batch_loss=22.4, batch

Validation:  16%|▏| 118/743 [34:21<2:59:46, 17.26s/batch, batch_loss=16.3, batch

Validation:  16%|▏| 119/743 [34:21<3:02:49, 17.58s/batch, batch_loss=16.3, batch

Validation:  16%|▏| 119/743 [34:39<3:02:49, 17.58s/batch, batch_loss=20.2, batch

Validation:  16%|▏| 120/743 [34:39<3:04:32, 17.77s/batch, batch_loss=20.2, batch

Validation:  16%|▏| 120/743 [34:58<3:04:32, 17.77s/batch, batch_loss=13.2, batch

Validation:  16%|▏| 121/743 [34:58<3:08:14, 18.16s/batch, batch_loss=13.2, batch

Validation:  16%|▏| 121/743 [35:17<3:08:14, 18.16s/batch, batch_loss=5.13, batch

Validation:  16%|▏| 122/743 [35:17<3:08:41, 18.23s/batch, batch_loss=5.13, batch

Validation:  16%|▏| 122/743 [35:37<3:08:41, 18.23s/batch, batch_loss=8.56, batch

Validation:  17%|▏| 123/743 [35:37<3:16:26, 19.01s/batch, batch_loss=8.56, batch

Validation:  17%|▏| 123/743 [35:56<3:16:26, 19.01s/batch, batch_loss=11.9, batch

Validation:  17%|▏| 124/743 [35:56<3:15:21, 18.94s/batch, batch_loss=11.9, batch

Validation:  17%|▏| 124/743 [36:14<3:15:21, 18.94s/batch, batch_loss=26.7, batch

Validation:  17%|▏| 125/743 [36:14<3:11:21, 18.58s/batch, batch_loss=26.7, batch

Validation:  17%|▏| 125/743 [36:35<3:11:21, 18.58s/batch, batch_loss=13.1, batch

Validation:  17%|▏| 126/743 [36:35<3:17:31, 19.21s/batch, batch_loss=13.1, batch

Validation:  17%|▏| 126/743 [36:52<3:17:31, 19.21s/batch, batch_loss=12.2, batch

Validation:  17%|▏| 127/743 [36:52<3:11:58, 18.70s/batch, batch_loss=12.2, batch

Validation:  17%|▏| 127/743 [37:09<3:11:58, 18.70s/batch, batch_loss=20, batch_i

Validation:  17%|▏| 128/743 [37:09<3:06:44, 18.22s/batch, batch_loss=20, batch_i

Validation:  17%|▏| 128/743 [37:28<3:06:44, 18.22s/batch, batch_loss=13.4, batch

Validation:  17%|▏| 129/743 [37:28<3:09:00, 18.47s/batch, batch_loss=13.4, batch

Validation:  17%|▏| 129/743 [37:46<3:09:00, 18.47s/batch, batch_loss=18.1, batch

Validation:  17%|▏| 130/743 [37:46<3:07:01, 18.31s/batch, batch_loss=18.1, batch

Validation:  17%|▏| 130/743 [38:04<3:07:01, 18.31s/batch, batch_loss=21.7, batch

Validation:  18%|▏| 131/743 [38:04<3:05:26, 18.18s/batch, batch_loss=21.7, batch

Validation:  18%|▏| 131/743 [38:22<3:05:26, 18.18s/batch, batch_loss=25.1, batch

Validation:  18%|▏| 132/743 [38:22<3:03:51, 18.06s/batch, batch_loss=25.1, batch

Validation:  18%|▏| 132/743 [38:40<3:03:51, 18.06s/batch, batch_loss=35.9, batch

Validation:  18%|▏| 133/743 [38:40<3:02:27, 17.95s/batch, batch_loss=35.9, batch

Validation:  18%|▏| 133/743 [38:58<3:02:27, 17.95s/batch, batch_loss=19, batch_i

Validation:  18%|▏| 134/743 [38:58<3:03:12, 18.05s/batch, batch_loss=19, batch_i

Validation:  18%|▏| 134/743 [39:16<3:03:12, 18.05s/batch, batch_loss=36.3, batch

Validation:  18%|▏| 135/743 [39:16<3:02:38, 18.02s/batch, batch_loss=36.3, batch

Validation:  18%|▏| 135/743 [39:35<3:02:38, 18.02s/batch, batch_loss=16, batch_i

Validation:  18%|▏| 136/743 [39:35<3:05:35, 18.34s/batch, batch_loss=16, batch_i

Validation:  18%|▏| 136/743 [39:54<3:05:35, 18.34s/batch, batch_loss=24.8, batch

Validation:  18%|▏| 137/743 [39:54<3:07:29, 18.56s/batch, batch_loss=24.8, batch

Validation:  18%|▏| 137/743 [40:12<3:07:29, 18.56s/batch, batch_loss=7.07, batch

Validation:  19%|▏| 138/743 [40:12<3:06:45, 18.52s/batch, batch_loss=7.07, batch

Validation:  19%|▏| 138/743 [40:31<3:06:45, 18.52s/batch, batch_loss=252, batch_

Validation:  19%|▏| 139/743 [40:31<3:05:16, 18.40s/batch, batch_loss=252, batch_

Validation:  19%|▏| 139/743 [40:50<3:05:16, 18.40s/batch, batch_loss=17.4, batch

Validation:  19%|▏| 140/743 [40:50<3:06:56, 18.60s/batch, batch_loss=17.4, batch

Validation:  19%|▏| 140/743 [41:10<3:06:56, 18.60s/batch, batch_loss=12.4, batch

Validation:  19%|▏| 141/743 [41:10<3:11:23, 19.08s/batch, batch_loss=12.4, batch

Validation:  19%|▏| 141/743 [41:29<3:11:23, 19.08s/batch, batch_loss=13.1, batch

Validation:  19%|▏| 142/743 [41:29<3:12:13, 19.19s/batch, batch_loss=13.1, batch

Validation:  19%|▏| 142/743 [41:48<3:12:13, 19.19s/batch, batch_loss=13, batch_i

Validation:  19%|▏| 143/743 [41:48<3:09:27, 18.95s/batch, batch_loss=13, batch_i

Validation:  19%|▏| 143/743 [42:06<3:09:27, 18.95s/batch, batch_loss=19.4, batch

Validation:  19%|▏| 144/743 [42:06<3:08:29, 18.88s/batch, batch_loss=19.4, batch

Validation:  19%|▏| 144/743 [42:25<3:08:29, 18.88s/batch, batch_loss=14.6, batch

Validation:  20%|▏| 145/743 [42:25<3:08:47, 18.94s/batch, batch_loss=14.6, batch

Validation:  20%|▏| 145/743 [42:44<3:08:47, 18.94s/batch, batch_loss=15.9, batch

Validation:  20%|▏| 146/743 [42:44<3:08:24, 18.94s/batch, batch_loss=15.9, batch

Validation:  20%|▏| 146/743 [43:03<3:08:24, 18.94s/batch, batch_loss=18.1, batch

Validation:  20%|▏| 147/743 [43:03<3:05:54, 18.72s/batch, batch_loss=18.1, batch

Validation:  20%|▏| 147/743 [43:23<3:05:54, 18.72s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [43:23<3:09:37, 19.12s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [43:41<3:09:37, 19.12s/batch, batch_loss=25.1, batch

Validation:  20%|▏| 149/743 [43:41<3:07:25, 18.93s/batch, batch_loss=25.1, batch

Validation:  20%|▏| 149/743 [44:00<3:07:25, 18.93s/batch, batch_loss=25, batch_i

Validation:  20%|▏| 150/743 [44:00<3:06:28, 18.87s/batch, batch_loss=25, batch_i

Validation:  20%|▏| 150/743 [44:19<3:06:28, 18.87s/batch, batch_loss=14.5, batch

Validation:  20%|▏| 151/743 [44:19<3:05:51, 18.84s/batch, batch_loss=14.5, batch

Validation:  20%|▏| 151/743 [44:39<3:05:51, 18.84s/batch, batch_loss=1.03e+4, ba

Validation:  20%|▏| 152/743 [44:39<3:08:45, 19.16s/batch, batch_loss=1.03e+4, ba

Validation:  20%|▏| 152/743 [45:01<3:08:45, 19.16s/batch, batch_loss=15.6, batch

Validation:  21%|▏| 153/743 [45:01<3:19:13, 20.26s/batch, batch_loss=15.6, batch

Validation:  21%|▏| 153/743 [45:27<3:19:13, 20.26s/batch, batch_loss=15.5, batch

Validation:  21%|▏| 154/743 [45:27<3:34:47, 21.88s/batch, batch_loss=15.5, batch

Validation:  21%|▏| 154/743 [45:49<3:34:47, 21.88s/batch, batch_loss=18.8, batch

Validation:  21%|▏| 155/743 [45:49<3:34:56, 21.93s/batch, batch_loss=18.8, batch

Validation:  21%|▏| 155/743 [46:08<3:34:56, 21.93s/batch, batch_loss=17.3, batch

Validation:  21%|▏| 156/743 [46:08<3:24:53, 20.94s/batch, batch_loss=17.3, batch

Validation:  21%|▏| 156/743 [46:26<3:24:53, 20.94s/batch, batch_loss=17.4, batch

Validation:  21%|▏| 157/743 [46:26<3:17:03, 20.18s/batch, batch_loss=17.4, batch

Validation:  21%|▏| 157/743 [46:45<3:17:03, 20.18s/batch, batch_loss=22.4, batch

Validation:  21%|▏| 158/743 [46:45<3:12:45, 19.77s/batch, batch_loss=22.4, batch

Validation:  21%|▏| 158/743 [47:03<3:12:45, 19.77s/batch, batch_loss=22.2, batch

Validation:  21%|▏| 159/743 [47:03<3:06:44, 19.19s/batch, batch_loss=22.2, batch

Validation:  21%|▏| 159/743 [47:26<3:06:44, 19.19s/batch, batch_loss=14.8, batch

Validation:  22%|▏| 160/743 [47:26<3:17:03, 20.28s/batch, batch_loss=14.8, batch

Validation:  22%|▏| 160/743 [47:46<3:17:03, 20.28s/batch, batch_loss=16.4, batch

Validation:  22%|▏| 161/743 [47:46<3:17:06, 20.32s/batch, batch_loss=16.4, batch

Validation:  22%|▏| 161/743 [48:05<3:17:06, 20.32s/batch, batch_loss=19.6, batch

Validation:  22%|▏| 162/743 [48:05<3:12:43, 19.90s/batch, batch_loss=19.6, batch

Validation:  22%|▏| 162/743 [48:22<3:12:43, 19.90s/batch, batch_loss=12.8, batch

Validation:  22%|▏| 163/743 [48:22<3:03:53, 19.02s/batch, batch_loss=12.8, batch

Validation:  22%|▏| 163/743 [48:40<3:03:53, 19.02s/batch, batch_loss=10.3, batch

Validation:  22%|▏| 164/743 [48:40<2:59:27, 18.60s/batch, batch_loss=10.3, batch

Validation:  22%|▏| 164/743 [48:56<2:59:27, 18.60s/batch, batch_loss=12.2, batch

Validation:  22%|▏| 165/743 [48:56<2:53:56, 18.06s/batch, batch_loss=12.2, batch

Validation:  22%|▏| 165/743 [49:15<2:53:56, 18.06s/batch, batch_loss=11.8, batch

Validation:  22%|▏| 166/743 [49:15<2:55:27, 18.25s/batch, batch_loss=11.8, batch

Validation:  22%|▏| 166/743 [49:33<2:55:27, 18.25s/batch, batch_loss=13.8, batch

Validation:  22%|▏| 167/743 [49:33<2:55:24, 18.27s/batch, batch_loss=13.8, batch

Validation:  22%|▏| 167/743 [49:52<2:55:24, 18.27s/batch, batch_loss=21, batch_i

Validation:  23%|▏| 168/743 [49:52<2:55:49, 18.35s/batch, batch_loss=21, batch_i

Validation:  23%|▏| 168/743 [50:10<2:55:49, 18.35s/batch, batch_loss=22.1, batch

Validation:  23%|▏| 169/743 [50:10<2:55:51, 18.38s/batch, batch_loss=22.1, batch

Validation:  23%|▏| 169/743 [50:32<2:55:51, 18.38s/batch, batch_loss=18.9, batch

Validation:  23%|▏| 170/743 [50:32<3:03:48, 19.25s/batch, batch_loss=18.9, batch

Validation:  23%|▏| 170/743 [50:50<3:03:48, 19.25s/batch, batch_loss=16.8, batch

Validation:  23%|▏| 171/743 [50:50<3:00:33, 18.94s/batch, batch_loss=16.8, batch

Validation:  23%|▏| 171/743 [51:08<3:00:33, 18.94s/batch, batch_loss=17.5, batch

Validation:  23%|▏| 172/743 [51:08<2:57:42, 18.67s/batch, batch_loss=17.5, batch

Validation:  23%|▏| 172/743 [51:26<2:57:42, 18.67s/batch, batch_loss=21.5, batch

Validation:  23%|▏| 173/743 [51:26<2:55:05, 18.43s/batch, batch_loss=21.5, batch

Validation:  23%|▏| 173/743 [51:42<2:55:05, 18.43s/batch, batch_loss=14.4, batch

Validation:  23%|▏| 174/743 [51:42<2:49:40, 17.89s/batch, batch_loss=14.4, batch

Validation:  23%|▏| 174/743 [51:59<2:49:40, 17.89s/batch, batch_loss=19.5, batch

Validation:  24%|▏| 175/743 [51:59<2:45:45, 17.51s/batch, batch_loss=19.5, batch

Validation:  24%|▏| 175/743 [52:17<2:45:45, 17.51s/batch, batch_loss=14.6, batch

Validation:  24%|▏| 176/743 [52:17<2:47:46, 17.75s/batch, batch_loss=14.6, batch

Validation:  24%|▏| 176/743 [52:36<2:47:46, 17.75s/batch, batch_loss=17.1, batch

Validation:  24%|▏| 177/743 [52:36<2:50:29, 18.07s/batch, batch_loss=17.1, batch

Validation:  24%|▏| 177/743 [52:54<2:50:29, 18.07s/batch, batch_loss=21.4, batch

Validation:  24%|▏| 178/743 [52:54<2:48:30, 17.90s/batch, batch_loss=21.4, batch

Validation:  24%|▏| 178/743 [53:12<2:48:30, 17.90s/batch, batch_loss=18.3, batch

Validation:  24%|▏| 179/743 [53:12<2:49:03, 17.98s/batch, batch_loss=18.3, batch

Validation:  24%|▏| 179/743 [53:29<2:49:03, 17.98s/batch, batch_loss=7.24e+3, ba

Validation:  24%|▏| 180/743 [53:29<2:47:03, 17.80s/batch, batch_loss=7.24e+3, ba

Validation:  24%|▏| 180/743 [53:46<2:47:03, 17.80s/batch, batch_loss=15.4, batch

Validation:  24%|▏| 181/743 [53:46<2:45:09, 17.63s/batch, batch_loss=15.4, batch

Validation:  24%|▏| 181/743 [54:03<2:45:09, 17.63s/batch, batch_loss=19.5, batch

Validation:  24%|▏| 182/743 [54:03<2:43:21, 17.47s/batch, batch_loss=19.5, batch

Validation:  24%|▏| 182/743 [54:21<2:43:21, 17.47s/batch, batch_loss=16.5, batch

Validation:  25%|▏| 183/743 [54:21<2:42:53, 17.45s/batch, batch_loss=16.5, batch

Validation:  25%|▏| 183/743 [54:39<2:42:53, 17.45s/batch, batch_loss=9.62, batch

Validation:  25%|▏| 184/743 [54:39<2:44:46, 17.69s/batch, batch_loss=9.62, batch

Validation:  25%|▏| 184/743 [55:00<2:44:46, 17.69s/batch, batch_loss=16, batch_i

Validation:  25%|▏| 185/743 [55:00<2:52:47, 18.58s/batch, batch_loss=16, batch_i

Validation:  25%|▏| 185/743 [55:18<2:52:47, 18.58s/batch, batch_loss=22.9, batch

Validation:  25%|▎| 186/743 [55:18<2:50:23, 18.36s/batch, batch_loss=22.9, batch

Validation:  25%|▎| 186/743 [55:36<2:50:23, 18.36s/batch, batch_loss=25.9, batch

Validation:  25%|▎| 187/743 [55:36<2:48:57, 18.23s/batch, batch_loss=25.9, batch

Validation:  25%|▎| 187/743 [55:54<2:48:57, 18.23s/batch, batch_loss=14, batch_i

Validation:  25%|▎| 188/743 [55:54<2:50:17, 18.41s/batch, batch_loss=14, batch_i

Validation:  25%|▎| 188/743 [56:12<2:50:17, 18.41s/batch, batch_loss=16.2, batch

Validation:  25%|▎| 189/743 [56:12<2:47:59, 18.19s/batch, batch_loss=16.2, batch

Validation:  25%|▎| 189/743 [56:30<2:47:59, 18.19s/batch, batch_loss=973, batch_

Validation:  26%|▎| 190/743 [56:30<2:48:13, 18.25s/batch, batch_loss=973, batch_

Validation:  26%|▎| 190/743 [56:51<2:48:13, 18.25s/batch, batch_loss=21.5, batch

Validation:  26%|▎| 191/743 [56:51<2:55:16, 19.05s/batch, batch_loss=21.5, batch

Validation:  26%|▎| 191/743 [57:08<2:55:16, 19.05s/batch, batch_loss=12.5, batch

Validation:  26%|▎| 192/743 [57:08<2:49:25, 18.45s/batch, batch_loss=12.5, batch

Validation:  26%|▎| 192/743 [57:26<2:49:25, 18.45s/batch, batch_loss=16.8, batch

Validation:  26%|▎| 193/743 [57:26<2:46:11, 18.13s/batch, batch_loss=16.8, batch

Validation:  26%|▎| 193/743 [57:43<2:46:11, 18.13s/batch, batch_loss=16.7, batch

Validation:  26%|▎| 194/743 [57:43<2:43:19, 17.85s/batch, batch_loss=16.7, batch

Validation:  26%|▎| 194/743 [58:02<2:43:19, 17.85s/batch, batch_loss=12.5, batch

Validation:  26%|▎| 195/743 [58:02<2:45:20, 18.10s/batch, batch_loss=12.5, batch

Validation:  26%|▎| 195/743 [58:19<2:45:20, 18.10s/batch, batch_loss=16.9, batch

Validation:  26%|▎| 196/743 [58:19<2:41:30, 17.72s/batch, batch_loss=16.9, batch

Validation:  26%|▎| 196/743 [58:36<2:41:30, 17.72s/batch, batch_loss=8.98, batch

Validation:  27%|▎| 197/743 [58:36<2:40:20, 17.62s/batch, batch_loss=8.98, batch

Validation:  27%|▎| 197/743 [58:55<2:40:20, 17.62s/batch, batch_loss=18.2, batch

Validation:  27%|▎| 198/743 [58:55<2:43:52, 18.04s/batch, batch_loss=18.2, batch

Validation:  27%|▎| 198/743 [59:13<2:43:52, 18.04s/batch, batch_loss=17.9, batch

Validation:  27%|▎| 199/743 [59:13<2:43:13, 18.00s/batch, batch_loss=17.9, batch

Validation:  27%|▎| 199/743 [59:30<2:43:13, 18.00s/batch, batch_loss=279, batch_

Validation:  27%|▎| 200/743 [59:30<2:41:15, 17.82s/batch, batch_loss=279, batch_

Validation:  27%|▎| 200/743 [59:47<2:41:15, 17.82s/batch, batch_loss=34.8, batch

Validation:  27%|▎| 201/743 [59:47<2:38:38, 17.56s/batch, batch_loss=34.8, batch

Validation:  27%|▎| 201/743 [1:00:03<2:38:38, 17.56s/batch, batch_loss=20.6, bat

Validation:  27%|▎| 202/743 [1:00:03<2:33:53, 17.07s/batch, batch_loss=20.6, bat

Validation:  27%|▎| 202/743 [1:00:21<2:33:53, 17.07s/batch, batch_loss=14.6, bat

Validation:  27%|▎| 203/743 [1:00:21<2:35:40, 17.30s/batch, batch_loss=14.6, bat

Validation:  27%|▎| 203/743 [1:00:38<2:35:40, 17.30s/batch, batch_loss=18.7, bat

Validation:  27%|▎| 204/743 [1:00:38<2:33:40, 17.11s/batch, batch_loss=18.7, bat

Validation:  27%|▎| 204/743 [1:00:55<2:33:40, 17.11s/batch, batch_loss=20.5, bat

Validation:  28%|▎| 205/743 [1:00:55<2:34:29, 17.23s/batch, batch_loss=20.5, bat

Validation:  28%|▎| 205/743 [1:01:12<2:34:29, 17.23s/batch, batch_loss=12.5, bat

Validation:  28%|▎| 206/743 [1:01:12<2:33:09, 17.11s/batch, batch_loss=12.5, bat

Validation:  28%|▎| 206/743 [1:01:29<2:33:09, 17.11s/batch, batch_loss=18.7, bat

Validation:  28%|▎| 207/743 [1:01:29<2:32:49, 17.11s/batch, batch_loss=18.7, bat

Validation:  28%|▎| 207/743 [1:01:46<2:32:49, 17.11s/batch, batch_loss=18, batch

Validation:  28%|▎| 208/743 [1:01:46<2:33:07, 17.17s/batch, batch_loss=18, batch

Validation:  28%|▎| 208/743 [1:02:04<2:33:07, 17.17s/batch, batch_loss=7.68, bat

Validation:  28%|▎| 209/743 [1:02:04<2:34:21, 17.34s/batch, batch_loss=7.68, bat

Validation:  28%|▎| 209/743 [1:02:21<2:34:21, 17.34s/batch, batch_loss=10.2, bat

Validation:  28%|▎| 210/743 [1:02:21<2:32:42, 17.19s/batch, batch_loss=10.2, bat

Validation:  28%|▎| 210/743 [1:02:37<2:32:42, 17.19s/batch, batch_loss=13.2, bat

Validation:  28%|▎| 211/743 [1:02:37<2:30:35, 16.98s/batch, batch_loss=13.2, bat

Validation:  28%|▎| 211/743 [1:02:57<2:30:35, 16.98s/batch, batch_loss=12.8, bat

Validation:  29%|▎| 212/743 [1:02:57<2:37:17, 17.77s/batch, batch_loss=12.8, bat

Validation:  29%|▎| 212/743 [1:03:14<2:37:17, 17.77s/batch, batch_loss=540, batc

Validation:  29%|▎| 213/743 [1:03:14<2:34:48, 17.53s/batch, batch_loss=540, batc

Validation:  29%|▎| 213/743 [1:03:31<2:34:48, 17.53s/batch, batch_loss=12.8, bat

Validation:  29%|▎| 214/743 [1:03:31<2:33:58, 17.46s/batch, batch_loss=12.8, bat

Validation:  29%|▎| 214/743 [1:03:48<2:33:58, 17.46s/batch, batch_loss=13.8, bat

Validation:  29%|▎| 215/743 [1:03:48<2:32:20, 17.31s/batch, batch_loss=13.8, bat

Validation:  29%|▎| 215/743 [1:04:05<2:32:20, 17.31s/batch, batch_loss=2.57e+3, 

Validation:  29%|▎| 216/743 [1:04:05<2:29:10, 16.98s/batch, batch_loss=2.57e+3, 

Validation:  29%|▎| 216/743 [1:04:22<2:29:10, 16.98s/batch, batch_loss=19.9, bat

Validation:  29%|▎| 217/743 [1:04:22<2:29:46, 17.08s/batch, batch_loss=19.9, bat

Validation:  29%|▎| 217/743 [1:04:38<2:29:46, 17.08s/batch, batch_loss=13.1, bat

Validation:  29%|▎| 218/743 [1:04:38<2:28:08, 16.93s/batch, batch_loss=13.1, bat

Validation:  29%|▎| 218/743 [1:04:55<2:28:08, 16.93s/batch, batch_loss=25.5, bat

Validation:  29%|▎| 219/743 [1:04:55<2:27:41, 16.91s/batch, batch_loss=25.5, bat

Validation:  29%|▎| 219/743 [1:05:13<2:27:41, 16.91s/batch, batch_loss=26.7, bat

Validation:  30%|▎| 220/743 [1:05:13<2:29:31, 17.15s/batch, batch_loss=26.7, bat

Validation:  30%|▎| 220/743 [1:05:30<2:29:31, 17.15s/batch, batch_loss=17.4, bat

Validation:  30%|▎| 221/743 [1:05:30<2:29:47, 17.22s/batch, batch_loss=17.4, bat

Validation:  30%|▎| 221/743 [1:05:48<2:29:47, 17.22s/batch, batch_loss=12.6, bat

Validation:  30%|▎| 222/743 [1:05:48<2:30:08, 17.29s/batch, batch_loss=12.6, bat

Validation:  30%|▎| 222/743 [1:06:05<2:30:08, 17.29s/batch, batch_loss=11.3, bat

Validation:  30%|▎| 223/743 [1:06:05<2:30:25, 17.36s/batch, batch_loss=11.3, bat

Validation:  30%|▎| 223/743 [1:06:23<2:30:25, 17.36s/batch, batch_loss=9.74, bat

Validation:  30%|▎| 224/743 [1:06:23<2:31:27, 17.51s/batch, batch_loss=9.74, bat

Validation:  30%|▎| 224/743 [1:06:41<2:31:27, 17.51s/batch, batch_loss=4.93e+3, 

Validation:  30%|▎| 225/743 [1:06:41<2:32:23, 17.65s/batch, batch_loss=4.93e+3, 

Validation:  30%|▎| 225/743 [1:07:00<2:32:23, 17.65s/batch, batch_loss=16.6, bat

Validation:  30%|▎| 226/743 [1:07:00<2:35:19, 18.03s/batch, batch_loss=16.6, bat

Validation:  30%|▎| 226/743 [1:07:18<2:35:19, 18.03s/batch, batch_loss=15.9, bat

Validation:  31%|▎| 227/743 [1:07:18<2:34:53, 18.01s/batch, batch_loss=15.9, bat

Validation:  31%|▎| 227/743 [1:07:35<2:34:53, 18.01s/batch, batch_loss=16.8, bat

Validation:  31%|▎| 228/743 [1:07:35<2:32:51, 17.81s/batch, batch_loss=16.8, bat

Validation:  31%|▎| 228/743 [1:07:53<2:32:51, 17.81s/batch, batch_loss=18.9, bat

Validation:  31%|▎| 229/743 [1:07:53<2:32:51, 17.84s/batch, batch_loss=18.9, bat

Validation:  31%|▎| 229/743 [1:08:10<2:32:51, 17.84s/batch, batch_loss=20.2, bat

Validation:  31%|▎| 230/743 [1:08:10<2:30:07, 17.56s/batch, batch_loss=20.2, bat

Validation:  31%|▎| 230/743 [1:08:31<2:30:07, 17.56s/batch, batch_loss=3.23e+4, 

Validation:  31%|▎| 231/743 [1:08:31<2:38:45, 18.61s/batch, batch_loss=3.23e+4, 

Validation:  31%|▎| 231/743 [1:08:50<2:38:45, 18.61s/batch, batch_loss=17.5, bat

Validation:  31%|▎| 232/743 [1:08:50<2:37:45, 18.52s/batch, batch_loss=17.5, bat

Validation:  31%|▎| 232/743 [1:09:08<2:37:45, 18.52s/batch, batch_loss=10.9, bat

Validation:  31%|▎| 233/743 [1:09:08<2:36:50, 18.45s/batch, batch_loss=10.9, bat

Validation:  31%|▎| 233/743 [1:09:26<2:36:50, 18.45s/batch, batch_loss=14.4, bat

Validation:  31%|▎| 234/743 [1:09:26<2:36:50, 18.49s/batch, batch_loss=14.4, bat

Validation:  31%|▎| 234/743 [1:09:44<2:36:50, 18.49s/batch, batch_loss=16.2, bat

Validation:  32%|▎| 235/743 [1:09:44<2:33:50, 18.17s/batch, batch_loss=16.2, bat

Validation:  32%|▎| 235/743 [1:10:00<2:33:50, 18.17s/batch, batch_loss=2.73, bat

Validation:  32%|▎| 236/743 [1:10:00<2:29:17, 17.67s/batch, batch_loss=2.73, bat

Validation:  32%|▎| 236/743 [1:10:18<2:29:17, 17.67s/batch, batch_loss=19.1, bat

Validation:  32%|▎| 237/743 [1:10:18<2:29:19, 17.71s/batch, batch_loss=19.1, bat

Validation:  32%|▎| 237/743 [1:10:35<2:29:19, 17.71s/batch, batch_loss=13.8, bat

Validation:  32%|▎| 238/743 [1:10:35<2:26:12, 17.37s/batch, batch_loss=13.8, bat

Validation:  32%|▎| 238/743 [1:10:54<2:26:12, 17.37s/batch, batch_loss=4.5e+3, b

Validation:  32%|▎| 239/743 [1:10:54<2:29:55, 17.85s/batch, batch_loss=4.5e+3, b

Validation:  32%|▎| 239/743 [1:11:11<2:29:55, 17.85s/batch, batch_loss=19.8, bat

Validation:  32%|▎| 240/743 [1:11:11<2:29:12, 17.80s/batch, batch_loss=19.8, bat

Validation:  32%|▎| 240/743 [1:11:30<2:29:12, 17.80s/batch, batch_loss=16.7, bat

Validation:  32%|▎| 241/743 [1:11:30<2:31:24, 18.10s/batch, batch_loss=16.7, bat

Validation:  32%|▎| 241/743 [1:11:47<2:31:24, 18.10s/batch, batch_loss=232, batc

Validation:  33%|▎| 242/743 [1:11:47<2:28:51, 17.83s/batch, batch_loss=232, batc

Validation:  33%|▎| 242/743 [1:12:06<2:28:51, 17.83s/batch, batch_loss=10.2, bat

Validation:  33%|▎| 243/743 [1:12:06<2:29:44, 17.97s/batch, batch_loss=10.2, bat

Validation:  33%|▎| 243/743 [1:12:22<2:29:44, 17.97s/batch, batch_loss=14.7, bat

Validation:  33%|▎| 244/743 [1:12:22<2:26:28, 17.61s/batch, batch_loss=14.7, bat

Validation:  33%|▎| 244/743 [1:12:40<2:26:28, 17.61s/batch, batch_loss=22.4, bat

Validation:  33%|▎| 245/743 [1:12:40<2:25:14, 17.50s/batch, batch_loss=22.4, bat

Validation:  33%|▎| 245/743 [1:12:57<2:25:14, 17.50s/batch, batch_loss=6.58, bat

Validation:  33%|▎| 246/743 [1:12:57<2:25:02, 17.51s/batch, batch_loss=6.58, bat

Validation:  33%|▎| 246/743 [1:13:18<2:25:02, 17.51s/batch, batch_loss=14.4, bat

Validation:  33%|▎| 247/743 [1:13:18<2:31:54, 18.38s/batch, batch_loss=14.4, bat

Validation:  33%|▎| 247/743 [1:13:35<2:31:54, 18.38s/batch, batch_loss=39.4, bat

Validation:  33%|▎| 248/743 [1:13:35<2:28:44, 18.03s/batch, batch_loss=39.4, bat

Validation:  33%|▎| 248/743 [1:13:53<2:28:44, 18.03s/batch, batch_loss=12.6, bat

Validation:  34%|▎| 249/743 [1:13:53<2:28:02, 17.98s/batch, batch_loss=12.6, bat

Validation:  34%|▎| 249/743 [1:14:11<2:28:02, 17.98s/batch, batch_loss=19.5, bat

Validation:  34%|▎| 250/743 [1:14:11<2:28:18, 18.05s/batch, batch_loss=19.5, bat

Validation:  34%|▎| 250/743 [1:14:31<2:28:18, 18.05s/batch, batch_loss=19.1, bat

Validation:  34%|▎| 251/743 [1:14:31<2:32:45, 18.63s/batch, batch_loss=19.1, bat

Validation:  34%|▎| 251/743 [1:14:50<2:32:45, 18.63s/batch, batch_loss=21.4, bat

Validation:  34%|▎| 252/743 [1:14:50<2:32:24, 18.62s/batch, batch_loss=21.4, bat

Validation:  34%|▎| 252/743 [1:15:08<2:32:24, 18.62s/batch, batch_loss=18.6, bat

Validation:  34%|▎| 253/743 [1:15:08<2:31:31, 18.55s/batch, batch_loss=18.6, bat

Validation:  34%|▎| 253/743 [1:15:25<2:31:31, 18.55s/batch, batch_loss=1.14e+4, 

Validation:  34%|▎| 254/743 [1:15:25<2:27:07, 18.05s/batch, batch_loss=1.14e+4, 

Validation:  34%|▎| 254/743 [1:15:44<2:27:07, 18.05s/batch, batch_loss=2.44e+3, 

Validation:  34%|▎| 255/743 [1:15:44<2:29:27, 18.38s/batch, batch_loss=2.44e+3, 

Validation:  34%|▎| 255/743 [1:16:02<2:29:27, 18.38s/batch, batch_loss=20.2, bat

Validation:  34%|▎| 256/743 [1:16:02<2:28:29, 18.30s/batch, batch_loss=20.2, bat

Validation:  34%|▎| 256/743 [1:16:19<2:28:29, 18.30s/batch, batch_loss=21.1, bat

Validation:  35%|▎| 257/743 [1:16:19<2:26:00, 18.03s/batch, batch_loss=21.1, bat

Validation:  35%|▎| 257/743 [1:16:36<2:26:00, 18.03s/batch, batch_loss=13.4, bat

Validation:  35%|▎| 258/743 [1:16:36<2:23:05, 17.70s/batch, batch_loss=13.4, bat

Validation:  35%|▎| 258/743 [1:16:53<2:23:05, 17.70s/batch, batch_loss=3.37, bat

Validation:  35%|▎| 259/743 [1:16:53<2:21:07, 17.50s/batch, batch_loss=3.37, bat

Validation:  35%|▎| 259/743 [1:17:10<2:21:07, 17.50s/batch, batch_loss=1.6, batc

Validation:  35%|▎| 260/743 [1:17:10<2:19:26, 17.32s/batch, batch_loss=1.6, batc

Validation:  35%|▎| 260/743 [1:17:28<2:19:26, 17.32s/batch, batch_loss=7.27, bat

Validation:  35%|▎| 261/743 [1:17:28<2:19:38, 17.38s/batch, batch_loss=7.27, bat

Validation:  35%|▎| 261/743 [1:17:46<2:19:38, 17.38s/batch, batch_loss=27.6, bat

Validation:  35%|▎| 262/743 [1:17:46<2:20:15, 17.50s/batch, batch_loss=27.6, bat

Validation:  35%|▎| 262/743 [1:18:04<2:20:15, 17.50s/batch, batch_loss=2.72e+3, 

Validation:  35%|▎| 263/743 [1:18:04<2:21:04, 17.63s/batch, batch_loss=2.72e+3, 

Validation:  35%|▎| 263/743 [1:18:21<2:21:04, 17.63s/batch, batch_loss=9.95, bat

Validation:  36%|▎| 264/743 [1:18:21<2:21:17, 17.70s/batch, batch_loss=9.95, bat

Validation:  36%|▎| 264/743 [1:18:39<2:21:17, 17.70s/batch, batch_loss=20.7, bat

Validation:  36%|▎| 265/743 [1:18:39<2:19:50, 17.55s/batch, batch_loss=20.7, bat

Validation:  36%|▎| 265/743 [1:18:56<2:19:50, 17.55s/batch, batch_loss=24.2, bat

Validation:  36%|▎| 266/743 [1:18:56<2:19:42, 17.57s/batch, batch_loss=24.2, bat

Validation:  36%|▎| 266/743 [1:19:12<2:19:42, 17.57s/batch, batch_loss=21.8, bat

Validation:  36%|▎| 267/743 [1:19:12<2:15:11, 17.04s/batch, batch_loss=21.8, bat

Validation:  36%|▎| 267/743 [1:19:30<2:15:11, 17.04s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:19:30<2:18:08, 17.45s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:19:48<2:18:08, 17.45s/batch, batch_loss=34.2, bat

Validation:  36%|▎| 269/743 [1:19:48<2:17:52, 17.45s/batch, batch_loss=34.2, bat

Validation:  36%|▎| 269/743 [1:20:05<2:17:52, 17.45s/batch, batch_loss=31.1, bat

Validation:  36%|▎| 270/743 [1:20:05<2:17:04, 17.39s/batch, batch_loss=31.1, bat

Validation:  36%|▎| 270/743 [1:20:23<2:17:04, 17.39s/batch, batch_loss=26.8, bat

Validation:  36%|▎| 271/743 [1:20:23<2:18:25, 17.60s/batch, batch_loss=26.8, bat

Validation:  36%|▎| 271/743 [1:20:39<2:18:25, 17.60s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:20:39<2:14:47, 17.17s/batch, batch_loss=1.05e+3, 

Validation:  37%|▎| 272/743 [1:20:56<2:14:47, 17.17s/batch, batch_loss=17.5, bat

Validation:  37%|▎| 273/743 [1:20:56<2:13:34, 17.05s/batch, batch_loss=17.5, bat

Validation:  37%|▎| 273/743 [1:21:15<2:13:34, 17.05s/batch, batch_loss=22.3, bat

Validation:  37%|▎| 274/743 [1:21:15<2:17:27, 17.59s/batch, batch_loss=22.3, bat

Validation:  37%|▎| 274/743 [1:21:33<2:17:27, 17.59s/batch, batch_loss=18.8, bat

Validation:  37%|▎| 275/743 [1:21:33<2:18:53, 17.81s/batch, batch_loss=18.8, bat

Validation:  37%|▎| 275/743 [1:21:48<2:18:53, 17.81s/batch, batch_loss=13.9, bat

Validation:  37%|▎| 276/743 [1:21:48<2:11:47, 16.93s/batch, batch_loss=13.9, bat

Validation:  37%|▎| 276/743 [1:22:05<2:11:47, 16.93s/batch, batch_loss=24.3, bat

Validation:  37%|▎| 277/743 [1:22:05<2:10:07, 16.75s/batch, batch_loss=24.3, bat

Validation:  37%|▎| 277/743 [1:22:21<2:10:07, 16.75s/batch, batch_loss=18.4, bat

Validation:  37%|▎| 278/743 [1:22:21<2:09:51, 16.76s/batch, batch_loss=18.4, bat

Validation:  37%|▎| 278/743 [1:22:39<2:09:51, 16.76s/batch, batch_loss=8.6, batc

Validation:  38%|▍| 279/743 [1:22:39<2:11:51, 17.05s/batch, batch_loss=8.6, batc

Validation:  38%|▍| 279/743 [1:22:59<2:11:51, 17.05s/batch, batch_loss=13.9, bat

Validation:  38%|▍| 280/743 [1:22:59<2:18:17, 17.92s/batch, batch_loss=13.9, bat

Validation:  38%|▍| 280/743 [1:23:16<2:18:17, 17.92s/batch, batch_loss=18.4, bat

Validation:  38%|▍| 281/743 [1:23:16<2:16:06, 17.68s/batch, batch_loss=18.4, bat

Validation:  38%|▍| 281/743 [1:23:33<2:16:06, 17.68s/batch, batch_loss=23.3, bat

Validation:  38%|▍| 282/743 [1:23:33<2:14:06, 17.45s/batch, batch_loss=23.3, bat

Validation:  38%|▍| 282/743 [1:23:49<2:14:06, 17.45s/batch, batch_loss=16.4, bat

Validation:  38%|▍| 283/743 [1:23:49<2:10:33, 17.03s/batch, batch_loss=16.4, bat

Validation:  38%|▍| 283/743 [1:24:07<2:10:33, 17.03s/batch, batch_loss=15, batch

Validation:  38%|▍| 284/743 [1:24:07<2:13:07, 17.40s/batch, batch_loss=15, batch

Validation:  38%|▍| 284/743 [1:24:27<2:13:07, 17.40s/batch, batch_loss=14.1, bat

Validation:  38%|▍| 285/743 [1:24:27<2:18:00, 18.08s/batch, batch_loss=14.1, bat

Validation:  38%|▍| 285/743 [1:24:44<2:18:00, 18.08s/batch, batch_loss=16.6, bat

Validation:  38%|▍| 286/743 [1:24:44<2:16:09, 17.88s/batch, batch_loss=16.6, bat

Validation:  38%|▍| 286/743 [1:25:00<2:16:09, 17.88s/batch, batch_loss=1.19e+4, 

Validation:  39%|▍| 287/743 [1:25:00<2:11:22, 17.29s/batch, batch_loss=1.19e+4, 

Validation:  39%|▍| 287/743 [1:25:17<2:11:22, 17.29s/batch, batch_loss=20.9, bat

Validation:  39%|▍| 288/743 [1:25:17<2:10:04, 17.15s/batch, batch_loss=20.9, bat

Validation:  39%|▍| 288/743 [1:25:35<2:10:04, 17.15s/batch, batch_loss=20.5, bat

Validation:  39%|▍| 289/743 [1:25:35<2:12:24, 17.50s/batch, batch_loss=20.5, bat

Validation:  39%|▍| 289/743 [1:25:53<2:12:24, 17.50s/batch, batch_loss=483, batc

Validation:  39%|▍| 290/743 [1:25:53<2:12:39, 17.57s/batch, batch_loss=483, batc

Validation:  39%|▍| 290/743 [1:26:11<2:12:39, 17.57s/batch, batch_loss=1.52e+3, 

Validation:  39%|▍| 291/743 [1:26:11<2:12:26, 17.58s/batch, batch_loss=1.52e+3, 

Validation:  39%|▍| 291/743 [1:26:28<2:12:26, 17.58s/batch, batch_loss=1.2e+3, b

Validation:  39%|▍| 292/743 [1:26:28<2:11:52, 17.54s/batch, batch_loss=1.2e+3, b

Validation:  39%|▍| 292/743 [1:26:48<2:11:52, 17.54s/batch, batch_loss=26.7, bat

Validation:  39%|▍| 293/743 [1:26:48<2:15:53, 18.12s/batch, batch_loss=26.7, bat

Validation:  39%|▍| 293/743 [1:27:05<2:15:53, 18.12s/batch, batch_loss=1.1e+3, b

Validation:  40%|▍| 294/743 [1:27:05<2:12:55, 17.76s/batch, batch_loss=1.1e+3, b

Validation:  40%|▍| 294/743 [1:27:23<2:12:55, 17.76s/batch, batch_loss=18, batch

Validation:  40%|▍| 295/743 [1:27:23<2:12:50, 17.79s/batch, batch_loss=18, batch

Validation:  40%|▍| 295/743 [1:27:40<2:12:50, 17.79s/batch, batch_loss=16.1, bat

Validation:  40%|▍| 296/743 [1:27:40<2:11:00, 17.59s/batch, batch_loss=16.1, bat

Validation:  40%|▍| 296/743 [1:27:57<2:11:00, 17.59s/batch, batch_loss=10.8, bat

Validation:  40%|▍| 297/743 [1:27:57<2:10:13, 17.52s/batch, batch_loss=10.8, bat

Validation:  40%|▍| 297/743 [1:28:15<2:10:13, 17.52s/batch, batch_loss=22.6, bat

Validation:  40%|▍| 298/743 [1:28:15<2:10:22, 17.58s/batch, batch_loss=22.6, bat

Validation:  40%|▍| 298/743 [1:28:35<2:10:22, 17.58s/batch, batch_loss=31.1, bat

Validation:  40%|▍| 299/743 [1:28:35<2:14:58, 18.24s/batch, batch_loss=31.1, bat

Validation:  40%|▍| 299/743 [1:28:51<2:14:58, 18.24s/batch, batch_loss=34.8, bat

Validation:  40%|▍| 300/743 [1:28:51<2:11:01, 17.75s/batch, batch_loss=34.8, bat

Validation:  40%|▍| 300/743 [1:29:08<2:11:01, 17.75s/batch, batch_loss=835, batc

Validation:  41%|▍| 301/743 [1:29:08<2:09:15, 17.55s/batch, batch_loss=835, batc

Validation:  41%|▍| 301/743 [1:29:26<2:09:15, 17.55s/batch, batch_loss=12, batch

Validation:  41%|▍| 302/743 [1:29:26<2:09:50, 17.67s/batch, batch_loss=12, batch

Validation:  41%|▍| 302/743 [1:29:44<2:09:50, 17.67s/batch, batch_loss=14.6, bat

Validation:  41%|▍| 303/743 [1:29:44<2:10:16, 17.76s/batch, batch_loss=14.6, bat

Validation:  41%|▍| 303/743 [1:30:02<2:10:16, 17.76s/batch, batch_loss=16, batch

Validation:  41%|▍| 304/743 [1:30:02<2:09:11, 17.66s/batch, batch_loss=16, batch

Validation:  41%|▍| 304/743 [1:30:18<2:09:11, 17.66s/batch, batch_loss=12, batch

Validation:  41%|▍| 305/743 [1:30:18<2:06:19, 17.31s/batch, batch_loss=12, batch

Validation:  41%|▍| 305/743 [1:30:36<2:06:19, 17.31s/batch, batch_loss=19.2, bat

Validation:  41%|▍| 306/743 [1:30:36<2:06:57, 17.43s/batch, batch_loss=19.2, bat

Validation:  41%|▍| 306/743 [1:30:53<2:06:57, 17.43s/batch, batch_loss=20.9, bat

Validation:  41%|▍| 307/743 [1:30:53<2:06:14, 17.37s/batch, batch_loss=20.9, bat

Validation:  41%|▍| 307/743 [1:31:10<2:06:14, 17.37s/batch, batch_loss=881, batc

Validation:  41%|▍| 308/743 [1:31:10<2:05:36, 17.33s/batch, batch_loss=881, batc

Validation:  41%|▍| 308/743 [1:31:31<2:05:36, 17.33s/batch, batch_loss=24.8, bat

Validation:  42%|▍| 309/743 [1:31:31<2:12:34, 18.33s/batch, batch_loss=24.8, bat

Validation:  42%|▍| 309/743 [1:31:48<2:12:34, 18.33s/batch, batch_loss=17.2, bat

Validation:  42%|▍| 310/743 [1:31:48<2:10:35, 18.10s/batch, batch_loss=17.2, bat

Validation:  42%|▍| 310/743 [1:32:07<2:10:35, 18.10s/batch, batch_loss=17.5, bat

Validation:  42%|▍| 311/743 [1:32:07<2:11:29, 18.26s/batch, batch_loss=17.5, bat

Validation:  42%|▍| 311/743 [1:32:24<2:11:29, 18.26s/batch, batch_loss=16.3, bat

Validation:  42%|▍| 312/743 [1:32:24<2:09:11, 17.98s/batch, batch_loss=16.3, bat

Validation:  42%|▍| 312/743 [1:32:41<2:09:11, 17.98s/batch, batch_loss=7.01, bat

Validation:  42%|▍| 313/743 [1:32:41<2:04:56, 17.43s/batch, batch_loss=7.01, bat

Validation:  42%|▍| 313/743 [1:32:58<2:04:56, 17.43s/batch, batch_loss=11.5, bat

Validation:  42%|▍| 314/743 [1:32:58<2:04:10, 17.37s/batch, batch_loss=11.5, bat

Validation:  42%|▍| 314/743 [1:33:14<2:04:10, 17.37s/batch, batch_loss=20.4, bat

Validation:  42%|▍| 315/743 [1:33:14<2:02:08, 17.12s/batch, batch_loss=20.4, bat

Validation:  42%|▍| 315/743 [1:33:30<2:02:08, 17.12s/batch, batch_loss=19.6, bat

Validation:  43%|▍| 316/743 [1:33:30<1:59:46, 16.83s/batch, batch_loss=19.6, bat

Validation:  43%|▍| 316/743 [1:33:48<1:59:46, 16.83s/batch, batch_loss=19.7, bat

Validation:  43%|▍| 317/743 [1:33:48<2:00:43, 17.00s/batch, batch_loss=19.7, bat

Validation:  43%|▍| 317/743 [1:34:04<2:00:43, 17.00s/batch, batch_loss=14.4, bat

Validation:  43%|▍| 318/743 [1:34:04<1:59:05, 16.81s/batch, batch_loss=14.4, bat

Validation:  43%|▍| 318/743 [1:34:21<1:59:05, 16.81s/batch, batch_loss=19.7, bat

Validation:  43%|▍| 319/743 [1:34:21<1:58:24, 16.76s/batch, batch_loss=19.7, bat

Validation:  43%|▍| 319/743 [1:34:40<1:58:24, 16.76s/batch, batch_loss=18.9, bat

Validation:  43%|▍| 320/743 [1:34:40<2:04:00, 17.59s/batch, batch_loss=18.9, bat

Validation:  43%|▍| 320/743 [1:34:56<2:04:00, 17.59s/batch, batch_loss=17.7, bat

Validation:  43%|▍| 321/743 [1:34:56<1:58:49, 16.90s/batch, batch_loss=17.7, bat

Validation:  43%|▍| 321/743 [1:35:11<1:58:49, 16.90s/batch, batch_loss=15.8, bat

Validation:  43%|▍| 322/743 [1:35:11<1:55:56, 16.52s/batch, batch_loss=15.8, bat

Validation:  43%|▍| 322/743 [1:35:28<1:55:56, 16.52s/batch, batch_loss=18, batch

Validation:  43%|▍| 323/743 [1:35:28<1:55:06, 16.44s/batch, batch_loss=18, batch

Validation:  43%|▍| 323/743 [1:35:43<1:55:06, 16.44s/batch, batch_loss=297, batc

Validation:  44%|▍| 324/743 [1:35:43<1:53:03, 16.19s/batch, batch_loss=297, batc

Validation:  44%|▍| 324/743 [1:36:00<1:53:03, 16.19s/batch, batch_loss=18.4, bat

Validation:  44%|▍| 325/743 [1:36:00<1:53:24, 16.28s/batch, batch_loss=18.4, bat

Validation:  44%|▍| 325/743 [1:36:16<1:53:24, 16.28s/batch, batch_loss=17.6, bat

Validation:  44%|▍| 326/743 [1:36:16<1:53:54, 16.39s/batch, batch_loss=17.6, bat

Validation:  44%|▍| 326/743 [1:36:36<1:53:54, 16.39s/batch, batch_loss=18.2, bat

Validation:  44%|▍| 327/743 [1:36:36<2:01:03, 17.46s/batch, batch_loss=18.2, bat

Validation:  44%|▍| 327/743 [1:36:53<2:01:03, 17.46s/batch, batch_loss=16.7, bat

Validation:  44%|▍| 328/743 [1:36:53<1:58:27, 17.13s/batch, batch_loss=16.7, bat

Validation:  44%|▍| 328/743 [1:37:10<1:58:27, 17.13s/batch, batch_loss=6.65, bat

Validation:  44%|▍| 329/743 [1:37:10<1:57:42, 17.06s/batch, batch_loss=6.65, bat

Validation:  44%|▍| 329/743 [1:37:26<1:57:42, 17.06s/batch, batch_loss=14.1, bat

Validation:  44%|▍| 330/743 [1:37:26<1:56:45, 16.96s/batch, batch_loss=14.1, bat

Validation:  44%|▍| 330/743 [1:37:44<1:56:45, 16.96s/batch, batch_loss=21.9, bat

Validation:  45%|▍| 331/743 [1:37:44<1:58:23, 17.24s/batch, batch_loss=21.9, bat

Validation:  45%|▍| 331/743 [1:38:02<1:58:23, 17.24s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:38:02<1:58:46, 17.34s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:38:19<1:58:46, 17.34s/batch, batch_loss=31.1, bat

Validation:  45%|▍| 333/743 [1:38:19<1:57:43, 17.23s/batch, batch_loss=31.1, bat

Validation:  45%|▍| 333/743 [1:38:36<1:57:43, 17.23s/batch, batch_loss=24.4, bat

Validation:  45%|▍| 334/743 [1:38:36<1:57:06, 17.18s/batch, batch_loss=24.4, bat

Validation:  45%|▍| 334/743 [1:38:52<1:57:06, 17.18s/batch, batch_loss=33, batch

Validation:  45%|▍| 335/743 [1:38:52<1:54:32, 16.84s/batch, batch_loss=33, batch

Validation:  45%|▍| 335/743 [1:39:09<1:54:32, 16.84s/batch, batch_loss=12.3, bat

Validation:  45%|▍| 336/743 [1:39:09<1:54:33, 16.89s/batch, batch_loss=12.3, bat

Validation:  45%|▍| 336/743 [1:39:26<1:54:33, 16.89s/batch, batch_loss=22.6, bat

Validation:  45%|▍| 337/743 [1:39:26<1:55:00, 17.00s/batch, batch_loss=22.6, bat

Validation:  45%|▍| 337/743 [1:39:45<1:55:00, 17.00s/batch, batch_loss=37.1, bat

Validation:  45%|▍| 338/743 [1:39:45<1:58:45, 17.59s/batch, batch_loss=37.1, bat

Validation:  45%|▍| 338/743 [1:40:02<1:58:45, 17.59s/batch, batch_loss=33.7, bat

Validation:  46%|▍| 339/743 [1:40:02<1:57:40, 17.48s/batch, batch_loss=33.7, bat

Validation:  46%|▍| 339/743 [1:40:19<1:57:40, 17.48s/batch, batch_loss=31.9, bat

Validation:  46%|▍| 340/743 [1:40:19<1:56:42, 17.38s/batch, batch_loss=31.9, bat

Validation:  46%|▍| 340/743 [1:40:37<1:56:42, 17.38s/batch, batch_loss=15, batch

Validation:  46%|▍| 341/743 [1:40:37<1:57:32, 17.54s/batch, batch_loss=15, batch

Validation:  46%|▍| 341/743 [1:40:55<1:57:32, 17.54s/batch, batch_loss=23.1, bat

Validation:  46%|▍| 342/743 [1:40:55<1:57:07, 17.53s/batch, batch_loss=23.1, bat

Validation:  46%|▍| 342/743 [1:41:13<1:57:07, 17.53s/batch, batch_loss=21.5, bat

Validation:  46%|▍| 343/743 [1:41:13<1:57:33, 17.63s/batch, batch_loss=21.5, bat

Validation:  46%|▍| 343/743 [1:41:35<1:57:33, 17.63s/batch, batch_loss=24.2, bat

Validation:  46%|▍| 344/743 [1:41:35<2:05:41, 18.90s/batch, batch_loss=24.2, bat

Validation:  46%|▍| 344/743 [1:41:51<2:05:41, 18.90s/batch, batch_loss=18.9, bat

Validation:  46%|▍| 345/743 [1:41:51<2:00:46, 18.21s/batch, batch_loss=18.9, bat

Validation:  46%|▍| 345/743 [1:42:08<2:00:46, 18.21s/batch, batch_loss=30.9, bat

Validation:  47%|▍| 346/743 [1:42:08<1:56:53, 17.67s/batch, batch_loss=30.9, bat

Validation:  47%|▍| 346/743 [1:42:26<1:56:53, 17.67s/batch, batch_loss=21.7, bat

Validation:  47%|▍| 347/743 [1:42:26<1:57:46, 17.85s/batch, batch_loss=21.7, bat

Validation:  47%|▍| 347/743 [1:42:45<1:57:46, 17.85s/batch, batch_loss=28.3, bat

Validation:  47%|▍| 348/743 [1:42:45<2:00:55, 18.37s/batch, batch_loss=28.3, bat

Validation:  47%|▍| 348/743 [1:43:05<2:00:55, 18.37s/batch, batch_loss=21.8, bat

Validation:  47%|▍| 349/743 [1:43:05<2:02:07, 18.60s/batch, batch_loss=21.8, bat

Validation:  47%|▍| 349/743 [1:43:25<2:02:07, 18.60s/batch, batch_loss=20.1, bat

Validation:  47%|▍| 350/743 [1:43:25<2:05:40, 19.19s/batch, batch_loss=20.1, bat

Validation:  47%|▍| 350/743 [1:43:43<2:05:40, 19.19s/batch, batch_loss=13110.0, 

Validation:  47%|▍| 351/743 [1:43:43<2:03:36, 18.92s/batch, batch_loss=13110.0, 

Validation:  47%|▍| 351/743 [1:44:00<2:03:36, 18.92s/batch, batch_loss=30.7, bat

Validation:  47%|▍| 352/743 [1:44:00<1:59:21, 18.32s/batch, batch_loss=30.7, bat

Validation:  47%|▍| 352/743 [1:44:17<1:59:21, 18.32s/batch, batch_loss=17.1, bat

Validation:  48%|▍| 353/743 [1:44:17<1:56:35, 17.94s/batch, batch_loss=17.1, bat

Validation:  48%|▍| 353/743 [1:44:36<1:56:35, 17.94s/batch, batch_loss=22.1, bat

Validation:  48%|▍| 354/743 [1:44:36<1:57:03, 18.06s/batch, batch_loss=22.1, bat

Validation:  48%|▍| 354/743 [1:44:56<1:57:03, 18.06s/batch, batch_loss=26.8, bat

Validation:  48%|▍| 355/743 [1:44:56<2:00:56, 18.70s/batch, batch_loss=26.8, bat

Validation:  48%|▍| 355/743 [1:45:14<2:00:56, 18.70s/batch, batch_loss=37.9, bat

Validation:  48%|▍| 356/743 [1:45:14<1:59:09, 18.47s/batch, batch_loss=37.9, bat

Validation:  48%|▍| 356/743 [1:45:32<1:59:09, 18.47s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:45:32<1:57:29, 18.26s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:45:49<1:57:29, 18.26s/batch, batch_loss=15, batch

Validation:  48%|▍| 358/743 [1:45:49<1:55:44, 18.04s/batch, batch_loss=15, batch

Validation:  48%|▍| 358/743 [1:46:07<1:55:44, 18.04s/batch, batch_loss=11.9, bat

Validation:  48%|▍| 359/743 [1:46:07<1:54:24, 17.88s/batch, batch_loss=11.9, bat

Validation:  48%|▍| 359/743 [1:46:23<1:54:24, 17.88s/batch, batch_loss=22.1, bat

Validation:  48%|▍| 360/743 [1:46:23<1:52:06, 17.56s/batch, batch_loss=22.1, bat

Validation:  48%|▍| 360/743 [1:46:41<1:52:06, 17.56s/batch, batch_loss=15.1, bat

Validation:  49%|▍| 361/743 [1:46:41<1:51:39, 17.54s/batch, batch_loss=15.1, bat

Validation:  49%|▍| 361/743 [1:47:02<1:51:39, 17.54s/batch, batch_loss=24.7, bat

Validation:  49%|▍| 362/743 [1:47:02<1:57:58, 18.58s/batch, batch_loss=24.7, bat

Validation:  49%|▍| 362/743 [1:47:18<1:57:58, 18.58s/batch, batch_loss=25.3, bat

Validation:  49%|▍| 363/743 [1:47:18<1:53:44, 17.96s/batch, batch_loss=25.3, bat

Validation:  49%|▍| 363/743 [1:47:34<1:53:44, 17.96s/batch, batch_loss=24, batch

Validation:  49%|▍| 364/743 [1:47:34<1:49:12, 17.29s/batch, batch_loss=24, batch

Validation:  49%|▍| 364/743 [1:47:51<1:49:12, 17.29s/batch, batch_loss=19.2, bat

Validation:  49%|▍| 365/743 [1:47:51<1:48:29, 17.22s/batch, batch_loss=19.2, bat

Validation:  49%|▍| 365/743 [1:48:08<1:48:29, 17.22s/batch, batch_loss=13.8, bat

Validation:  49%|▍| 366/743 [1:48:08<1:47:30, 17.11s/batch, batch_loss=13.8, bat

Validation:  49%|▍| 366/743 [1:48:26<1:47:30, 17.11s/batch, batch_loss=17, batch

Validation:  49%|▍| 367/743 [1:48:26<1:47:52, 17.21s/batch, batch_loss=17, batch

Validation:  49%|▍| 367/743 [1:48:43<1:47:52, 17.21s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:48:43<1:47:26, 17.19s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:49:01<1:47:26, 17.19s/batch, batch_loss=17.9, bat

Validation:  50%|▍| 369/743 [1:49:01<1:49:52, 17.63s/batch, batch_loss=17.9, bat

Validation:  50%|▍| 369/743 [1:49:18<1:49:52, 17.63s/batch, batch_loss=26, batch

Validation:  50%|▍| 370/743 [1:49:18<1:48:42, 17.49s/batch, batch_loss=26, batch

Validation:  50%|▍| 370/743 [1:49:36<1:48:42, 17.49s/batch, batch_loss=20.9, bat

Validation:  50%|▍| 371/743 [1:49:36<1:48:12, 17.45s/batch, batch_loss=20.9, bat

Validation:  50%|▍| 371/743 [1:49:53<1:48:12, 17.45s/batch, batch_loss=19.5, bat

Validation:  50%|▌| 372/743 [1:49:53<1:47:30, 17.39s/batch, batch_loss=19.5, bat

Validation:  50%|▌| 372/743 [1:50:08<1:47:30, 17.39s/batch, batch_loss=25.1, bat

Validation:  50%|▌| 373/743 [1:50:08<1:43:04, 16.72s/batch, batch_loss=25.1, bat

Validation:  50%|▌| 373/743 [1:50:30<1:43:04, 16.72s/batch, batch_loss=16.4, bat

Validation:  50%|▌| 374/743 [1:50:30<1:51:38, 18.15s/batch, batch_loss=16.4, bat

Validation:  50%|▌| 374/743 [1:50:49<1:51:38, 18.15s/batch, batch_loss=8.31, bat

Validation:  50%|▌| 375/743 [1:50:49<1:52:26, 18.33s/batch, batch_loss=8.31, bat

Validation:  50%|▌| 375/743 [1:51:05<1:52:26, 18.33s/batch, batch_loss=29.9, bat

Validation:  51%|▌| 376/743 [1:51:05<1:48:25, 17.73s/batch, batch_loss=29.9, bat

Validation:  51%|▌| 376/743 [1:51:21<1:48:25, 17.73s/batch, batch_loss=11.8, bat

Validation:  51%|▌| 377/743 [1:51:21<1:44:54, 17.20s/batch, batch_loss=11.8, bat

Validation:  51%|▌| 377/743 [1:51:37<1:44:54, 17.20s/batch, batch_loss=21.1, bat

Validation:  51%|▌| 378/743 [1:51:37<1:42:04, 16.78s/batch, batch_loss=21.1, bat

Validation:  51%|▌| 378/743 [1:51:52<1:42:04, 16.78s/batch, batch_loss=7.64, bat

Validation:  51%|▌| 379/743 [1:51:52<1:39:44, 16.44s/batch, batch_loss=7.64, bat

Validation:  51%|▌| 379/743 [1:52:10<1:39:44, 16.44s/batch, batch_loss=7.3, batc

Validation:  51%|▌| 380/743 [1:52:10<1:41:48, 16.83s/batch, batch_loss=7.3, batc

Validation:  51%|▌| 380/743 [1:52:29<1:41:48, 16.83s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:52:29<1:45:06, 17.42s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:52:48<1:45:06, 17.42s/batch, batch_loss=915, batc

Validation:  51%|▌| 382/743 [1:52:48<1:47:15, 17.83s/batch, batch_loss=915, batc

Validation:  51%|▌| 382/743 [1:53:04<1:47:15, 17.83s/batch, batch_loss=210, batc

Validation:  52%|▌| 383/743 [1:53:04<1:45:19, 17.56s/batch, batch_loss=210, batc

Validation:  52%|▌| 383/743 [1:53:21<1:45:19, 17.56s/batch, batch_loss=280, batc

Validation:  52%|▌| 384/743 [1:53:21<1:43:45, 17.34s/batch, batch_loss=280, batc

Validation:  52%|▌| 384/743 [1:53:39<1:43:45, 17.34s/batch, batch_loss=19.5, bat

Validation:  52%|▌| 385/743 [1:53:39<1:43:31, 17.35s/batch, batch_loss=19.5, bat

Validation:  52%|▌| 385/743 [1:53:55<1:43:31, 17.35s/batch, batch_loss=11.4, bat

Validation:  52%|▌| 386/743 [1:53:55<1:41:00, 16.97s/batch, batch_loss=11.4, bat

Validation:  52%|▌| 386/743 [1:54:12<1:41:00, 16.97s/batch, batch_loss=7.62, bat

Validation:  52%|▌| 387/743 [1:54:12<1:41:05, 17.04s/batch, batch_loss=7.62, bat

Validation:  52%|▌| 387/743 [1:54:29<1:41:05, 17.04s/batch, batch_loss=17.4, bat

Validation:  52%|▌| 388/743 [1:54:29<1:40:47, 17.04s/batch, batch_loss=17.4, bat

Validation:  52%|▌| 388/743 [1:54:49<1:40:47, 17.04s/batch, batch_loss=12, batch

Validation:  52%|▌| 389/743 [1:54:49<1:45:44, 17.92s/batch, batch_loss=12, batch

Validation:  52%|▌| 389/743 [1:55:06<1:45:44, 17.92s/batch, batch_loss=17.7, bat

Validation:  52%|▌| 390/743 [1:55:06<1:43:58, 17.67s/batch, batch_loss=17.7, bat

Validation:  52%|▌| 390/743 [1:55:24<1:43:58, 17.67s/batch, batch_loss=14.4, bat

Validation:  53%|▌| 391/743 [1:55:24<1:43:55, 17.71s/batch, batch_loss=14.4, bat

Validation:  53%|▌| 391/743 [1:55:41<1:43:55, 17.71s/batch, batch_loss=13.9, bat

Validation:  53%|▌| 392/743 [1:55:41<1:43:11, 17.64s/batch, batch_loss=13.9, bat

Validation:  53%|▌| 392/743 [1:55:58<1:43:11, 17.64s/batch, batch_loss=18.9, bat

Validation:  53%|▌| 393/743 [1:55:58<1:40:29, 17.23s/batch, batch_loss=18.9, bat

Validation:  53%|▌| 393/743 [1:56:15<1:40:29, 17.23s/batch, batch_loss=16.5, bat

Validation:  53%|▌| 394/743 [1:56:15<1:40:23, 17.26s/batch, batch_loss=16.5, bat

Validation:  53%|▌| 394/743 [1:56:32<1:40:23, 17.26s/batch, batch_loss=11.4, bat

Validation:  53%|▌| 395/743 [1:56:32<1:39:29, 17.15s/batch, batch_loss=11.4, bat

Validation:  53%|▌| 395/743 [1:56:51<1:39:29, 17.15s/batch, batch_loss=15.4, bat

Validation:  53%|▌| 396/743 [1:56:51<1:43:06, 17.83s/batch, batch_loss=15.4, bat

Validation:  53%|▌| 396/743 [1:57:08<1:43:06, 17.83s/batch, batch_loss=10.4, bat

Validation:  53%|▌| 397/743 [1:57:08<1:41:16, 17.56s/batch, batch_loss=10.4, bat

Validation:  53%|▌| 397/743 [1:57:25<1:41:16, 17.56s/batch, batch_loss=18.9, bat

Validation:  54%|▌| 398/743 [1:57:25<1:39:43, 17.34s/batch, batch_loss=18.9, bat

Validation:  54%|▌| 398/743 [1:57:42<1:39:43, 17.34s/batch, batch_loss=15.2, bat

Validation:  54%|▌| 399/743 [1:57:42<1:38:32, 17.19s/batch, batch_loss=15.2, bat

Validation:  54%|▌| 399/743 [1:57:58<1:38:32, 17.19s/batch, batch_loss=20.8, bat

Validation:  54%|▌| 400/743 [1:57:58<1:36:20, 16.85s/batch, batch_loss=20.8, bat

Validation:  54%|▌| 400/743 [1:58:15<1:36:20, 16.85s/batch, batch_loss=17.9, bat

Validation:  54%|▌| 401/743 [1:58:15<1:36:41, 16.96s/batch, batch_loss=17.9, bat

Validation:  54%|▌| 401/743 [1:58:33<1:36:41, 16.96s/batch, batch_loss=6.99, bat

Validation:  54%|▌| 402/743 [1:58:33<1:37:16, 17.11s/batch, batch_loss=6.99, bat

Validation:  54%|▌| 402/743 [1:58:52<1:37:16, 17.11s/batch, batch_loss=16.2, bat

Validation:  54%|▌| 403/743 [1:58:52<1:40:31, 17.74s/batch, batch_loss=16.2, bat

Validation:  54%|▌| 403/743 [1:59:09<1:40:31, 17.74s/batch, batch_loss=14.3, bat

Validation:  54%|▌| 404/743 [1:59:09<1:39:51, 17.67s/batch, batch_loss=14.3, bat

Validation:  54%|▌| 404/743 [1:59:26<1:39:51, 17.67s/batch, batch_loss=9.89, bat

Validation:  55%|▌| 405/743 [1:59:26<1:38:15, 17.44s/batch, batch_loss=9.89, bat

Validation:  55%|▌| 405/743 [1:59:43<1:38:15, 17.44s/batch, batch_loss=13.2, bat

Validation:  55%|▌| 406/743 [1:59:43<1:36:06, 17.11s/batch, batch_loss=13.2, bat

Validation:  55%|▌| 406/743 [1:59:59<1:36:06, 17.11s/batch, batch_loss=16.6, bat

Validation:  55%|▌| 407/743 [1:59:59<1:35:13, 17.00s/batch, batch_loss=16.6, bat

Validation:  55%|▌| 407/743 [2:00:17<1:35:13, 17.00s/batch, batch_loss=20.1, bat

Validation:  55%|▌| 408/743 [2:00:17<1:35:25, 17.09s/batch, batch_loss=20.1, bat

Validation:  55%|▌| 408/743 [2:00:34<1:35:25, 17.09s/batch, batch_loss=10.9, bat

Validation:  55%|▌| 409/743 [2:00:34<1:35:07, 17.09s/batch, batch_loss=10.9, bat

Validation:  55%|▌| 409/743 [2:00:50<1:35:07, 17.09s/batch, batch_loss=16.2, bat

Validation:  55%|▌| 410/743 [2:00:50<1:34:09, 16.96s/batch, batch_loss=16.2, bat

Validation:  55%|▌| 410/743 [2:01:08<1:34:09, 16.96s/batch, batch_loss=19, batch

Validation:  55%|▌| 411/743 [2:01:08<1:35:15, 17.22s/batch, batch_loss=19, batch

Validation:  55%|▌| 411/743 [2:01:26<1:35:15, 17.22s/batch, batch_loss=14.7, bat

Validation:  55%|▌| 412/743 [2:01:26<1:35:39, 17.34s/batch, batch_loss=14.7, bat

Validation:  55%|▌| 412/743 [2:01:42<1:35:39, 17.34s/batch, batch_loss=1.93e+3, 

Validation:  56%|▌| 413/743 [2:01:42<1:33:35, 17.02s/batch, batch_loss=1.93e+3, 

Validation:  56%|▌| 413/743 [2:01:59<1:33:35, 17.02s/batch, batch_loss=22.5, bat

Validation:  56%|▌| 414/743 [2:01:59<1:33:50, 17.11s/batch, batch_loss=22.5, bat

Validation:  56%|▌| 414/743 [2:02:17<1:33:50, 17.11s/batch, batch_loss=23.6, bat

Validation:  56%|▌| 415/743 [2:02:17<1:33:37, 17.13s/batch, batch_loss=23.6, bat

Validation:  56%|▌| 415/743 [2:02:32<1:33:37, 17.13s/batch, batch_loss=6.47e+3, 

Validation:  56%|▌| 416/743 [2:02:32<1:31:19, 16.76s/batch, batch_loss=6.47e+3, 

Validation:  56%|▌| 416/743 [2:02:51<1:31:19, 16.76s/batch, batch_loss=16.9, bat

Validation:  56%|▌| 417/743 [2:02:51<1:33:47, 17.26s/batch, batch_loss=16.9, bat

Validation:  56%|▌| 417/743 [2:03:08<1:33:47, 17.26s/batch, batch_loss=13.8, bat

Validation:  56%|▌| 418/743 [2:03:08<1:32:42, 17.12s/batch, batch_loss=13.8, bat

Validation:  56%|▌| 418/743 [2:03:25<1:32:42, 17.12s/batch, batch_loss=16.4, bat

Validation:  56%|▌| 419/743 [2:03:25<1:32:53, 17.20s/batch, batch_loss=16.4, bat

Validation:  56%|▌| 419/743 [2:03:42<1:32:53, 17.20s/batch, batch_loss=16.1, bat

Validation:  57%|▌| 420/743 [2:03:42<1:31:42, 17.04s/batch, batch_loss=16.1, bat

Validation:  57%|▌| 420/743 [2:03:59<1:31:42, 17.04s/batch, batch_loss=31.3, bat

Validation:  57%|▌| 421/743 [2:03:59<1:31:37, 17.07s/batch, batch_loss=31.3, bat

Validation:  57%|▌| 421/743 [2:04:16<1:31:37, 17.07s/batch, batch_loss=10.3, bat

Validation:  57%|▌| 422/743 [2:04:16<1:31:11, 17.04s/batch, batch_loss=10.3, bat

Validation:  57%|▌| 422/743 [2:04:34<1:31:11, 17.04s/batch, batch_loss=22.6, bat

Validation:  57%|▌| 423/743 [2:04:34<1:32:01, 17.25s/batch, batch_loss=22.6, bat

Validation:  57%|▌| 423/743 [2:04:50<1:32:01, 17.25s/batch, batch_loss=322, batc

Validation:  57%|▌| 424/743 [2:04:50<1:29:43, 16.88s/batch, batch_loss=322, batc

Validation:  57%|▌| 424/743 [2:05:07<1:29:43, 16.88s/batch, batch_loss=22.7, bat

Validation:  57%|▌| 425/743 [2:05:07<1:30:14, 17.03s/batch, batch_loss=22.7, bat

Validation:  57%|▌| 425/743 [2:05:24<1:30:14, 17.03s/batch, batch_loss=23.8, bat

Validation:  57%|▌| 426/743 [2:05:24<1:30:37, 17.15s/batch, batch_loss=23.8, bat

Validation:  57%|▌| 426/743 [2:05:42<1:30:37, 17.15s/batch, batch_loss=21.4, bat

Validation:  57%|▌| 427/743 [2:05:42<1:30:48, 17.24s/batch, batch_loss=21.4, bat

Validation:  57%|▌| 427/743 [2:06:00<1:30:48, 17.24s/batch, batch_loss=5.28e+3, 

Validation:  58%|▌| 428/743 [2:06:00<1:31:58, 17.52s/batch, batch_loss=5.28e+3, 

Validation:  58%|▌| 428/743 [2:06:18<1:31:58, 17.52s/batch, batch_loss=16.9, bat

Validation:  58%|▌| 429/743 [2:06:18<1:31:39, 17.51s/batch, batch_loss=16.9, bat

Validation:  58%|▌| 429/743 [2:06:36<1:31:39, 17.51s/batch, batch_loss=5.4e+3, b

Validation:  58%|▌| 430/743 [2:06:36<1:32:09, 17.67s/batch, batch_loss=5.4e+3, b

Validation:  58%|▌| 430/743 [2:06:57<1:32:09, 17.67s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [2:06:57<1:38:04, 18.86s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [2:07:14<1:38:04, 18.86s/batch, batch_loss=961, batc

Validation:  58%|▌| 432/743 [2:07:14<1:34:09, 18.17s/batch, batch_loss=961, batc

Validation:  58%|▌| 432/743 [2:07:31<1:34:09, 18.17s/batch, batch_loss=15.6, bat

Validation:  58%|▌| 433/743 [2:07:31<1:33:10, 18.03s/batch, batch_loss=15.6, bat

Validation:  58%|▌| 433/743 [2:07:49<1:33:10, 18.03s/batch, batch_loss=10.5, bat

Validation:  58%|▌| 434/743 [2:07:49<1:32:40, 18.00s/batch, batch_loss=10.5, bat

Validation:  58%|▌| 434/743 [2:08:06<1:32:40, 18.00s/batch, batch_loss=15.1, bat

Validation:  59%|▌| 435/743 [2:08:06<1:30:38, 17.66s/batch, batch_loss=15.1, bat

Validation:  59%|▌| 435/743 [2:08:27<1:30:38, 17.66s/batch, batch_loss=12.8, bat

Validation:  59%|▌| 436/743 [2:08:27<1:34:20, 18.44s/batch, batch_loss=12.8, bat

Validation:  59%|▌| 436/743 [2:08:45<1:34:20, 18.44s/batch, batch_loss=23.6, bat

Validation:  59%|▌| 437/743 [2:08:45<1:34:19, 18.49s/batch, batch_loss=23.6, bat

Validation:  59%|▌| 437/743 [2:09:02<1:34:19, 18.49s/batch, batch_loss=976, batc

Validation:  59%|▌| 438/743 [2:09:02<1:31:53, 18.08s/batch, batch_loss=976, batc

Validation:  59%|▌| 438/743 [2:09:19<1:31:53, 18.08s/batch, batch_loss=901, batc

Validation:  59%|▌| 439/743 [2:09:19<1:29:49, 17.73s/batch, batch_loss=901, batc

Validation:  59%|▌| 439/743 [2:09:37<1:29:49, 17.73s/batch, batch_loss=19.7, bat

Validation:  59%|▌| 440/743 [2:09:37<1:29:11, 17.66s/batch, batch_loss=19.7, bat

Validation:  59%|▌| 440/743 [2:09:55<1:29:11, 17.66s/batch, batch_loss=16.4, bat

Validation:  59%|▌| 441/743 [2:09:55<1:29:24, 17.76s/batch, batch_loss=16.4, bat

Validation:  59%|▌| 441/743 [2:10:13<1:29:24, 17.76s/batch, batch_loss=14.7, bat

Validation:  59%|▌| 442/743 [2:10:13<1:29:32, 17.85s/batch, batch_loss=14.7, bat

Validation:  59%|▌| 442/743 [2:10:31<1:29:32, 17.85s/batch, batch_loss=12.8, bat

Validation:  60%|▌| 443/743 [2:10:31<1:29:50, 17.97s/batch, batch_loss=12.8, bat

Validation:  60%|▌| 443/743 [2:10:51<1:29:50, 17.97s/batch, batch_loss=15.8, bat

Validation:  60%|▌| 444/743 [2:10:51<1:31:59, 18.46s/batch, batch_loss=15.8, bat

Validation:  60%|▌| 444/743 [2:11:10<1:31:59, 18.46s/batch, batch_loss=8.89, bat

Validation:  60%|▌| 445/743 [2:11:10<1:32:57, 18.72s/batch, batch_loss=8.89, bat

Validation:  60%|▌| 445/743 [2:11:29<1:32:57, 18.72s/batch, batch_loss=15.2, bat

Validation:  60%|▌| 446/743 [2:11:29<1:32:40, 18.72s/batch, batch_loss=15.2, bat

Validation:  60%|▌| 446/743 [2:11:47<1:32:40, 18.72s/batch, batch_loss=6.85e+3, 

Validation:  60%|▌| 447/743 [2:11:47<1:31:23, 18.52s/batch, batch_loss=6.85e+3, 

Validation:  60%|▌| 447/743 [2:12:04<1:31:23, 18.52s/batch, batch_loss=6.41, bat

Validation:  60%|▌| 448/743 [2:12:04<1:29:39, 18.24s/batch, batch_loss=6.41, bat

Validation:  60%|▌| 448/743 [2:12:22<1:29:39, 18.24s/batch, batch_loss=11.5, bat

Validation:  60%|▌| 449/743 [2:12:22<1:29:06, 18.19s/batch, batch_loss=11.5, bat

Validation:  60%|▌| 449/743 [2:12:44<1:29:06, 18.19s/batch, batch_loss=18, batch

Validation:  61%|▌| 450/743 [2:12:44<1:33:27, 19.14s/batch, batch_loss=18, batch

Validation:  61%|▌| 450/743 [2:13:03<1:33:27, 19.14s/batch, batch_loss=12.4, bat

Validation:  61%|▌| 451/743 [2:13:03<1:33:39, 19.24s/batch, batch_loss=12.4, bat

Validation:  61%|▌| 451/743 [2:13:22<1:33:39, 19.24s/batch, batch_loss=18.4, bat

Validation:  61%|▌| 452/743 [2:13:22<1:33:09, 19.21s/batch, batch_loss=18.4, bat

Validation:  61%|▌| 452/743 [2:13:42<1:33:09, 19.21s/batch, batch_loss=13, batch

Validation:  61%|▌| 453/743 [2:13:42<1:32:58, 19.24s/batch, batch_loss=13, batch

Validation:  61%|▌| 453/743 [2:14:01<1:32:58, 19.24s/batch, batch_loss=6.4, batc

Validation:  61%|▌| 454/743 [2:14:01<1:32:47, 19.27s/batch, batch_loss=6.4, batc

Validation:  61%|▌| 454/743 [2:14:18<1:32:47, 19.27s/batch, batch_loss=9.8, batc

Validation:  61%|▌| 455/743 [2:14:18<1:29:21, 18.62s/batch, batch_loss=9.8, batc

Validation:  61%|▌| 455/743 [2:14:35<1:29:21, 18.62s/batch, batch_loss=8.31, bat

Validation:  61%|▌| 456/743 [2:14:35<1:26:41, 18.12s/batch, batch_loss=8.31, bat

Validation:  61%|▌| 456/743 [2:14:53<1:26:41, 18.12s/batch, batch_loss=14.9, bat

Validation:  62%|▌| 457/743 [2:14:53<1:26:02, 18.05s/batch, batch_loss=14.9, bat

Validation:  62%|▌| 457/743 [2:15:13<1:26:02, 18.05s/batch, batch_loss=25.8, bat

Validation:  62%|▌| 458/743 [2:15:13<1:28:54, 18.72s/batch, batch_loss=25.8, bat

Validation:  62%|▌| 458/743 [2:15:33<1:28:54, 18.72s/batch, batch_loss=13.9, bat

Validation:  62%|▌| 459/743 [2:15:33<1:30:13, 19.06s/batch, batch_loss=13.9, bat

Validation:  62%|▌| 459/743 [2:15:51<1:30:13, 19.06s/batch, batch_loss=19.5, bat

Validation:  62%|▌| 460/743 [2:15:51<1:28:17, 18.72s/batch, batch_loss=19.5, bat

Validation:  62%|▌| 460/743 [2:16:09<1:28:17, 18.72s/batch, batch_loss=16.8, bat

Validation:  62%|▌| 461/743 [2:16:09<1:27:25, 18.60s/batch, batch_loss=16.8, bat

Validation:  62%|▌| 461/743 [2:16:26<1:27:25, 18.60s/batch, batch_loss=14.7, bat

Validation:  62%|▌| 462/743 [2:16:26<1:24:32, 18.05s/batch, batch_loss=14.7, bat

Validation:  62%|▌| 462/743 [2:16:45<1:24:32, 18.05s/batch, batch_loss=13.3, bat

Validation:  62%|▌| 463/743 [2:16:45<1:24:49, 18.18s/batch, batch_loss=13.3, bat

Validation:  62%|▌| 463/743 [2:17:04<1:24:49, 18.18s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [2:17:04<1:26:40, 18.64s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [2:17:22<1:26:40, 18.64s/batch, batch_loss=18.2, bat

Validation:  63%|▋| 465/743 [2:17:22<1:25:29, 18.45s/batch, batch_loss=18.2, bat

Validation:  63%|▋| 465/743 [2:17:40<1:25:29, 18.45s/batch, batch_loss=15.3, bat

Validation:  63%|▋| 466/743 [2:17:40<1:23:39, 18.12s/batch, batch_loss=15.3, bat

Validation:  63%|▋| 466/743 [2:17:56<1:23:39, 18.12s/batch, batch_loss=24.6, bat

Validation:  63%|▋| 467/743 [2:17:56<1:21:12, 17.65s/batch, batch_loss=24.6, bat

Validation:  63%|▋| 467/743 [2:18:12<1:21:12, 17.65s/batch, batch_loss=13.3, bat

Validation:  63%|▋| 468/743 [2:18:12<1:18:51, 17.21s/batch, batch_loss=13.3, bat

Validation:  63%|▋| 468/743 [2:18:29<1:18:51, 17.21s/batch, batch_loss=19.8, bat

Validation:  63%|▋| 469/743 [2:18:29<1:17:26, 16.96s/batch, batch_loss=19.8, bat

Validation:  63%|▋| 469/743 [2:18:46<1:17:26, 16.96s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [2:18:46<1:17:33, 17.05s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [2:19:02<1:17:33, 17.05s/batch, batch_loss=11.5, bat

Validation:  63%|▋| 471/743 [2:19:02<1:15:39, 16.69s/batch, batch_loss=11.5, bat

Validation:  63%|▋| 471/743 [2:19:17<1:15:39, 16.69s/batch, batch_loss=21.4, bat

Validation:  64%|▋| 472/743 [2:19:17<1:13:32, 16.28s/batch, batch_loss=21.4, bat

Validation:  64%|▋| 472/743 [2:19:33<1:13:32, 16.28s/batch, batch_loss=583, batc

Validation:  64%|▋| 473/743 [2:19:33<1:12:42, 16.16s/batch, batch_loss=583, batc

Validation:  64%|▋| 473/743 [2:19:48<1:12:42, 16.16s/batch, batch_loss=16.5, bat

Validation:  64%|▋| 474/743 [2:19:48<1:11:01, 15.84s/batch, batch_loss=16.5, bat

Validation:  64%|▋| 474/743 [2:20:03<1:11:01, 15.84s/batch, batch_loss=20.1, bat

Validation:  64%|▋| 475/743 [2:20:03<1:09:26, 15.55s/batch, batch_loss=20.1, bat

Validation:  64%|▋| 475/743 [2:20:19<1:09:26, 15.55s/batch, batch_loss=9.59, bat

Validation:  64%|▋| 476/743 [2:20:19<1:09:47, 15.68s/batch, batch_loss=9.59, bat

Validation:  64%|▋| 476/743 [2:20:33<1:09:47, 15.68s/batch, batch_loss=13.1, bat

Validation:  64%|▋| 477/743 [2:20:33<1:06:41, 15.04s/batch, batch_loss=13.1, bat

Validation:  64%|▋| 477/743 [2:20:48<1:06:41, 15.04s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [2:20:48<1:07:33, 15.29s/batch, batch_loss=2.45e+3, 

Validation:  64%|▋| 478/743 [2:21:04<1:07:33, 15.29s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [2:21:04<1:07:10, 15.27s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [2:21:18<1:07:10, 15.27s/batch, batch_loss=10.6, bat

Validation:  65%|▋| 480/743 [2:21:18<1:05:49, 15.02s/batch, batch_loss=10.6, bat

Validation:  65%|▋| 480/743 [2:21:32<1:05:49, 15.02s/batch, batch_loss=11.7, bat

Validation:  65%|▋| 481/743 [2:21:32<1:03:38, 14.57s/batch, batch_loss=11.7, bat

Validation:  65%|▋| 481/743 [2:21:44<1:03:38, 14.57s/batch, batch_loss=6.96e+3, 

Validation:  65%|▋| 482/743 [2:21:44<1:00:49, 13.98s/batch, batch_loss=6.96e+3, 

Validation:  65%|▋| 482/743 [2:21:57<1:00:49, 13.98s/batch, batch_loss=19.2, bat

Validation:  65%|▋| 483/743 [2:21:57<58:58, 13.61s/batch, batch_loss=19.2, batch

Validation:  65%|▋| 483/743 [2:22:12<58:58, 13.61s/batch, batch_loss=2.31e+4, ba

Validation:  65%|▋| 484/743 [2:22:12<1:00:17, 13.97s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [2:22:26<1:00:17, 13.97s/batch, batch_loss=3.13e+4, 

Validation:  65%|▋| 485/743 [2:22:26<1:00:36, 14.10s/batch, batch_loss=3.13e+4, 

Validation:  65%|▋| 485/743 [2:22:42<1:00:36, 14.10s/batch, batch_loss=16.5, bat

Validation:  65%|▋| 486/743 [2:22:42<1:02:04, 14.49s/batch, batch_loss=16.5, bat

Validation:  65%|▋| 486/743 [2:22:56<1:02:04, 14.49s/batch, batch_loss=33.4, bat

Validation:  66%|▋| 487/743 [2:22:56<1:01:38, 14.45s/batch, batch_loss=33.4, bat

Validation:  66%|▋| 487/743 [2:23:11<1:01:38, 14.45s/batch, batch_loss=23.2, bat

Validation:  66%|▋| 488/743 [2:23:11<1:02:25, 14.69s/batch, batch_loss=23.2, bat

Validation:  66%|▋| 488/743 [2:23:25<1:02:25, 14.69s/batch, batch_loss=11.3, bat

Validation:  66%|▋| 489/743 [2:23:25<1:01:06, 14.44s/batch, batch_loss=11.3, bat

Validation:  66%|▋| 489/743 [2:23:39<1:01:06, 14.44s/batch, batch_loss=19.8, bat

Validation:  66%|▋| 490/743 [2:23:39<59:57, 14.22s/batch, batch_loss=19.8, batch

Validation:  66%|▋| 490/743 [2:23:53<59:57, 14.22s/batch, batch_loss=17.9, batch

Validation:  66%|▋| 491/743 [2:23:53<1:00:11, 14.33s/batch, batch_loss=17.9, bat

Validation:  66%|▋| 491/743 [2:24:09<1:00:11, 14.33s/batch, batch_loss=1.04e+3, 

Validation:  66%|▋| 492/743 [2:24:09<1:01:16, 14.65s/batch, batch_loss=1.04e+3, 

Validation:  66%|▋| 492/743 [2:24:26<1:01:16, 14.65s/batch, batch_loss=1.43e+4, 

Validation:  66%|▋| 493/743 [2:24:26<1:04:04, 15.38s/batch, batch_loss=1.43e+4, 

Validation:  66%|▋| 493/743 [2:24:41<1:04:04, 15.38s/batch, batch_loss=9.36, bat

Validation:  66%|▋| 494/743 [2:24:41<1:03:17, 15.25s/batch, batch_loss=9.36, bat

Validation:  66%|▋| 494/743 [2:24:55<1:03:17, 15.25s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:24:55<1:01:50, 14.96s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:25:10<1:01:50, 14.96s/batch, batch_loss=16.4, bat

Validation:  67%|▋| 496/743 [2:25:10<1:01:21, 14.90s/batch, batch_loss=16.4, bat

Validation:  67%|▋| 496/743 [2:25:25<1:01:21, 14.90s/batch, batch_loss=13.7, bat

Validation:  67%|▋| 497/743 [2:25:25<1:00:58, 14.87s/batch, batch_loss=13.7, bat

Validation:  67%|▋| 497/743 [2:25:39<1:00:58, 14.87s/batch, batch_loss=13.7, bat

Validation:  67%|▋| 498/743 [2:25:39<1:00:00, 14.70s/batch, batch_loss=13.7, bat

Validation:  67%|▋| 498/743 [2:25:54<1:00:00, 14.70s/batch, batch_loss=6.7, batc

Validation:  67%|▋| 499/743 [2:25:54<59:59, 14.75s/batch, batch_loss=6.7, batch_

Validation:  67%|▋| 499/743 [2:26:09<59:59, 14.75s/batch, batch_loss=2.51e+4, ba

Validation:  67%|▋| 500/743 [2:26:09<1:00:41, 14.98s/batch, batch_loss=2.51e+4, 

Validation:  67%|▋| 500/743 [2:26:24<1:00:41, 14.98s/batch, batch_loss=18.3, bat

Validation:  67%|▋| 501/743 [2:26:24<1:00:45, 15.06s/batch, batch_loss=18.3, bat

Validation:  67%|▋| 501/743 [2:26:40<1:00:45, 15.06s/batch, batch_loss=3.15e+3, 

Validation:  68%|▋| 502/743 [2:26:40<1:01:34, 15.33s/batch, batch_loss=3.15e+3, 

Validation:  68%|▋| 502/743 [2:26:55<1:01:34, 15.33s/batch, batch_loss=14.2, bat

Validation:  68%|▋| 503/743 [2:26:55<1:00:57, 15.24s/batch, batch_loss=14.2, bat

Validation:  68%|▋| 503/743 [2:27:10<1:00:57, 15.24s/batch, batch_loss=12.2, bat

Validation:  68%|▋| 504/743 [2:27:10<59:19, 14.89s/batch, batch_loss=12.2, batch

Validation:  68%|▋| 504/743 [2:27:24<59:19, 14.89s/batch, batch_loss=18.5, batch

Validation:  68%|▋| 505/743 [2:27:24<58:46, 14.82s/batch, batch_loss=18.5, batch

Validation:  68%|▋| 505/743 [2:27:39<58:46, 14.82s/batch, batch_loss=2.83e+3, ba

Validation:  68%|▋| 506/743 [2:27:39<58:18, 14.76s/batch, batch_loss=2.83e+3, ba

Validation:  68%|▋| 506/743 [2:27:53<58:18, 14.76s/batch, batch_loss=1.99e+3, ba

Validation:  68%|▋| 507/743 [2:27:53<57:21, 14.58s/batch, batch_loss=1.99e+3, ba

Validation:  68%|▋| 507/743 [2:28:07<57:21, 14.58s/batch, batch_loss=8.37e+3, ba

Validation:  68%|▋| 508/743 [2:28:07<56:59, 14.55s/batch, batch_loss=8.37e+3, ba

Validation:  68%|▋| 508/743 [2:28:22<56:59, 14.55s/batch, batch_loss=8.47e+3, ba

Validation:  69%|▋| 509/743 [2:28:22<56:16, 14.43s/batch, batch_loss=8.47e+3, ba

Validation:  69%|▋| 509/743 [2:28:39<56:16, 14.43s/batch, batch_loss=13.7, batch

Validation:  69%|▋| 510/743 [2:28:39<59:17, 15.27s/batch, batch_loss=13.7, batch

Validation:  69%|▋| 510/743 [2:28:54<59:17, 15.27s/batch, batch_loss=19.1, batch

Validation:  69%|▋| 511/743 [2:28:54<58:42, 15.18s/batch, batch_loss=19.1, batch

Validation:  69%|▋| 511/743 [2:29:07<58:42, 15.18s/batch, batch_loss=16.4, batch

Validation:  69%|▋| 512/743 [2:29:07<55:52, 14.51s/batch, batch_loss=16.4, batch

Validation:  69%|▋| 512/743 [2:29:21<55:52, 14.51s/batch, batch_loss=17.2, batch

Validation:  69%|▋| 513/743 [2:29:21<54:59, 14.34s/batch, batch_loss=17.2, batch

Validation:  69%|▋| 513/743 [2:29:35<54:59, 14.34s/batch, batch_loss=14.5, batch

Validation:  69%|▋| 514/743 [2:29:35<55:05, 14.43s/batch, batch_loss=14.5, batch

Validation:  69%|▋| 514/743 [2:29:51<55:05, 14.43s/batch, batch_loss=10.3, batch

Validation:  69%|▋| 515/743 [2:29:51<55:50, 14.69s/batch, batch_loss=10.3, batch

Validation:  69%|▋| 515/743 [2:30:06<55:50, 14.69s/batch, batch_loss=13.2, batch

Validation:  69%|▋| 516/743 [2:30:06<55:47, 14.75s/batch, batch_loss=13.2, batch

Validation:  69%|▋| 516/743 [2:30:20<55:47, 14.75s/batch, batch_loss=6.16e+4, ba

Validation:  70%|▋| 517/743 [2:30:20<55:00, 14.60s/batch, batch_loss=6.16e+4, ba

Validation:  70%|▋| 517/743 [2:30:37<55:00, 14.60s/batch, batch_loss=505, batch_

Validation:  70%|▋| 518/743 [2:30:37<57:30, 15.33s/batch, batch_loss=505, batch_

Validation:  70%|▋| 518/743 [2:30:51<57:30, 15.33s/batch, batch_loss=11.5, batch

Validation:  70%|▋| 519/743 [2:30:51<56:16, 15.07s/batch, batch_loss=11.5, batch

Validation:  70%|▋| 519/743 [2:31:06<56:16, 15.07s/batch, batch_loss=18.5, batch

Validation:  70%|▋| 520/743 [2:31:06<55:45, 15.00s/batch, batch_loss=18.5, batch

Validation:  70%|▋| 520/743 [2:31:21<55:45, 15.00s/batch, batch_loss=14.3, batch

Validation:  70%|▋| 521/743 [2:31:21<55:16, 14.94s/batch, batch_loss=14.3, batch

Validation:  70%|▋| 521/743 [2:31:38<55:16, 14.94s/batch, batch_loss=12.9, batch

Validation:  70%|▋| 522/743 [2:31:38<57:41, 15.66s/batch, batch_loss=12.9, batch

Validation:  70%|▋| 522/743 [2:31:53<57:41, 15.66s/batch, batch_loss=429, batch_

Validation:  70%|▋| 523/743 [2:31:53<56:54, 15.52s/batch, batch_loss=429, batch_

Validation:  70%|▋| 523/743 [2:32:08<56:54, 15.52s/batch, batch_loss=15.6, batch

Validation:  71%|▋| 524/743 [2:32:08<55:03, 15.08s/batch, batch_loss=15.6, batch

Validation:  71%|▋| 524/743 [2:32:22<55:03, 15.08s/batch, batch_loss=20.9, batch

Validation:  71%|▋| 525/743 [2:32:22<54:21, 14.96s/batch, batch_loss=20.9, batch

Validation:  71%|▋| 525/743 [2:32:36<54:21, 14.96s/batch, batch_loss=10.4, batch

Validation:  71%|▋| 526/743 [2:32:36<53:20, 14.75s/batch, batch_loss=10.4, batch

Validation:  71%|▋| 526/743 [2:32:50<53:20, 14.75s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:32:50<52:07, 14.48s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:33:05<52:07, 14.48s/batch, batch_loss=511, batch_

Validation:  71%|▋| 528/743 [2:33:05<51:34, 14.40s/batch, batch_loss=511, batch_

Validation:  71%|▋| 528/743 [2:33:19<51:34, 14.40s/batch, batch_loss=6.51e+3, ba

Validation:  71%|▋| 529/743 [2:33:19<51:30, 14.44s/batch, batch_loss=6.51e+3, ba

Validation:  71%|▋| 529/743 [2:33:34<51:30, 14.44s/batch, batch_loss=210, batch_

Validation:  71%|▋| 530/743 [2:33:34<51:31, 14.51s/batch, batch_loss=210, batch_

Validation:  71%|▋| 530/743 [2:33:49<51:31, 14.51s/batch, batch_loss=41.1, batch

Validation:  71%|▋| 531/743 [2:33:49<52:07, 14.75s/batch, batch_loss=41.1, batch

Validation:  71%|▋| 531/743 [2:34:04<52:07, 14.75s/batch, batch_loss=255, batch_

Validation:  72%|▋| 532/743 [2:34:04<52:20, 14.88s/batch, batch_loss=255, batch_

Validation:  72%|▋| 532/743 [2:34:19<52:20, 14.88s/batch, batch_loss=7.86, batch

Validation:  72%|▋| 533/743 [2:34:19<51:31, 14.72s/batch, batch_loss=7.86, batch

Validation:  72%|▋| 533/743 [2:34:36<51:31, 14.72s/batch, batch_loss=12.8, batch

Validation:  72%|▋| 534/743 [2:34:36<54:08, 15.54s/batch, batch_loss=12.8, batch

Validation:  72%|▋| 534/743 [2:34:50<54:08, 15.54s/batch, batch_loss=17.2, batch

Validation:  72%|▋| 535/743 [2:34:50<52:26, 15.13s/batch, batch_loss=17.2, batch

Validation:  72%|▋| 535/743 [2:35:04<52:26, 15.13s/batch, batch_loss=16.8, batch

Validation:  72%|▋| 536/743 [2:35:04<50:55, 14.76s/batch, batch_loss=16.8, batch

Validation:  72%|▋| 536/743 [2:35:19<50:55, 14.76s/batch, batch_loss=13.3, batch

Validation:  72%|▋| 537/743 [2:35:19<50:35, 14.74s/batch, batch_loss=13.3, batch

Validation:  72%|▋| 537/743 [2:35:33<50:35, 14.74s/batch, batch_loss=17.3, batch

Validation:  72%|▋| 538/743 [2:35:33<49:44, 14.56s/batch, batch_loss=17.3, batch

Validation:  72%|▋| 538/743 [2:35:48<49:44, 14.56s/batch, batch_loss=249, batch_

Validation:  73%|▋| 539/743 [2:35:48<49:31, 14.57s/batch, batch_loss=249, batch_

Validation:  73%|▋| 539/743 [2:36:02<49:31, 14.57s/batch, batch_loss=17.1, batch

Validation:  73%|▋| 540/743 [2:36:02<49:34, 14.65s/batch, batch_loss=17.1, batch

Validation:  73%|▋| 540/743 [2:36:17<49:34, 14.65s/batch, batch_loss=28, batch_i

Validation:  73%|▋| 541/743 [2:36:17<49:31, 14.71s/batch, batch_loss=28, batch_i

Validation:  73%|▋| 541/743 [2:36:34<49:31, 14.71s/batch, batch_loss=1.95e+3, ba

Validation:  73%|▋| 542/743 [2:36:34<51:23, 15.34s/batch, batch_loss=1.95e+3, ba

Validation:  73%|▋| 542/743 [2:36:48<51:23, 15.34s/batch, batch_loss=17.1, batch

Validation:  73%|▋| 543/743 [2:36:48<49:31, 14.86s/batch, batch_loss=17.1, batch

Validation:  73%|▋| 543/743 [2:37:02<49:31, 14.86s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:37:02<49:00, 14.78s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:37:17<49:00, 14.78s/batch, batch_loss=2.75e+3, ba

Validation:  73%|▋| 545/743 [2:37:17<48:19, 14.64s/batch, batch_loss=2.75e+3, ba

Validation:  73%|▋| 545/743 [2:37:31<48:19, 14.64s/batch, batch_loss=7.22, batch

Validation:  73%|▋| 546/743 [2:37:31<47:24, 14.44s/batch, batch_loss=7.22, batch

Validation:  73%|▋| 546/743 [2:37:45<47:24, 14.44s/batch, batch_loss=258, batch_

Validation:  74%|▋| 547/743 [2:37:45<46:45, 14.31s/batch, batch_loss=258, batch_

Validation:  74%|▋| 547/743 [2:37:59<46:45, 14.31s/batch, batch_loss=26.9, batch

Validation:  74%|▋| 548/743 [2:37:59<46:49, 14.41s/batch, batch_loss=26.9, batch

Validation:  74%|▋| 548/743 [2:38:14<46:49, 14.41s/batch, batch_loss=4.11e+3, ba

Validation:  74%|▋| 549/743 [2:38:14<46:32, 14.40s/batch, batch_loss=4.11e+3, ba

Validation:  74%|▋| 549/743 [2:38:29<46:32, 14.40s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:38:29<46:45, 14.53s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:38:44<46:45, 14.53s/batch, batch_loss=15.7, batch

Validation:  74%|▋| 551/743 [2:38:44<47:24, 14.82s/batch, batch_loss=15.7, batch

Validation:  74%|▋| 551/743 [2:38:58<47:24, 14.82s/batch, batch_loss=6.77e+3, ba

Validation:  74%|▋| 552/743 [2:38:58<46:50, 14.72s/batch, batch_loss=6.77e+3, ba

Validation:  74%|▋| 552/743 [2:39:13<46:50, 14.72s/batch, batch_loss=22.1, batch

Validation:  74%|▋| 553/743 [2:39:13<46:44, 14.76s/batch, batch_loss=22.1, batch

Validation:  74%|▋| 553/743 [2:39:28<46:44, 14.76s/batch, batch_loss=21.8, batch

Validation:  75%|▋| 554/743 [2:39:28<46:18, 14.70s/batch, batch_loss=21.8, batch

Validation:  75%|▋| 554/743 [2:39:43<46:18, 14.70s/batch, batch_loss=2.47e+3, ba

Validation:  75%|▋| 555/743 [2:39:43<46:48, 14.94s/batch, batch_loss=2.47e+3, ba

Validation:  75%|▋| 555/743 [2:39:58<46:48, 14.94s/batch, batch_loss=32.8, batch

Validation:  75%|▋| 556/743 [2:39:58<45:58, 14.75s/batch, batch_loss=32.8, batch

Validation:  75%|▋| 556/743 [2:40:12<45:58, 14.75s/batch, batch_loss=8.81, batch

Validation:  75%|▋| 557/743 [2:40:12<45:05, 14.55s/batch, batch_loss=8.81, batch

Validation:  75%|▋| 557/743 [2:40:27<45:05, 14.55s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:40:27<45:01, 14.60s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:40:41<45:01, 14.60s/batch, batch_loss=3.6e+3, bat

Validation:  75%|▊| 559/743 [2:40:41<44:32, 14.53s/batch, batch_loss=3.6e+3, bat

Validation:  75%|▊| 559/743 [2:40:58<44:32, 14.53s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:40:58<46:55, 15.39s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:41:13<46:55, 15.39s/batch, batch_loss=11.2, batch

Validation:  76%|▊| 561/743 [2:41:13<46:19, 15.27s/batch, batch_loss=11.2, batch

Validation:  76%|▊| 561/743 [2:41:28<46:19, 15.27s/batch, batch_loss=15.3, batch

Validation:  76%|▊| 562/743 [2:41:28<45:42, 15.15s/batch, batch_loss=15.3, batch

Validation:  76%|▊| 562/743 [2:41:42<45:42, 15.15s/batch, batch_loss=18.1, batch

Validation:  76%|▊| 563/743 [2:41:42<44:25, 14.81s/batch, batch_loss=18.1, batch

Validation:  76%|▊| 563/743 [2:41:56<44:25, 14.81s/batch, batch_loss=1.08e+3, ba

Validation:  76%|▊| 564/743 [2:41:56<43:19, 14.52s/batch, batch_loss=1.08e+3, ba

Validation:  76%|▊| 564/743 [2:42:11<43:19, 14.52s/batch, batch_loss=3.69e+3, ba

Validation:  76%|▊| 565/743 [2:42:11<43:17, 14.59s/batch, batch_loss=3.69e+3, ba

Validation:  76%|▊| 565/743 [2:42:26<43:17, 14.59s/batch, batch_loss=13, batch_i

Validation:  76%|▊| 566/743 [2:42:26<43:51, 14.87s/batch, batch_loss=13, batch_i

Validation:  76%|▊| 566/743 [2:42:42<43:51, 14.87s/batch, batch_loss=15.5, batch

Validation:  76%|▊| 567/743 [2:42:42<44:22, 15.13s/batch, batch_loss=15.5, batch

Validation:  76%|▊| 567/743 [2:42:57<44:22, 15.13s/batch, batch_loss=11.5, batch

Validation:  76%|▊| 568/743 [2:42:57<44:06, 15.12s/batch, batch_loss=11.5, batch

Validation:  76%|▊| 568/743 [2:43:12<44:06, 15.12s/batch, batch_loss=14.9, batch

Validation:  77%|▊| 569/743 [2:43:12<43:21, 14.95s/batch, batch_loss=14.9, batch

Validation:  77%|▊| 569/743 [2:43:27<43:21, 14.95s/batch, batch_loss=19.1, batch

Validation:  77%|▊| 570/743 [2:43:27<43:46, 15.18s/batch, batch_loss=19.1, batch

Validation:  77%|▊| 570/743 [2:43:43<43:46, 15.18s/batch, batch_loss=11.2, batch

Validation:  77%|▊| 571/743 [2:43:43<43:36, 15.21s/batch, batch_loss=11.2, batch

Validation:  77%|▊| 571/743 [2:43:58<43:36, 15.21s/batch, batch_loss=22.3, batch

Validation:  77%|▊| 572/743 [2:43:58<43:32, 15.28s/batch, batch_loss=22.3, batch

Validation:  77%|▊| 572/743 [2:44:13<43:32, 15.28s/batch, batch_loss=14.1, batch

Validation:  77%|▊| 573/743 [2:44:13<43:13, 15.26s/batch, batch_loss=14.1, batch

Validation:  77%|▊| 573/743 [2:44:30<43:13, 15.26s/batch, batch_loss=15.7, batch

Validation:  77%|▊| 574/743 [2:44:30<43:50, 15.56s/batch, batch_loss=15.7, batch

Validation:  77%|▊| 574/743 [2:44:47<43:50, 15.56s/batch, batch_loss=15.1, batch

Validation:  77%|▊| 575/743 [2:44:47<45:10, 16.13s/batch, batch_loss=15.1, batch

Validation:  77%|▊| 575/743 [2:45:01<45:10, 16.13s/batch, batch_loss=21.1, batch

Validation:  78%|▊| 576/743 [2:45:01<43:19, 15.56s/batch, batch_loss=21.1, batch

Validation:  78%|▊| 576/743 [2:45:16<43:19, 15.56s/batch, batch_loss=20.1, batch

Validation:  78%|▊| 577/743 [2:45:16<42:23, 15.32s/batch, batch_loss=20.1, batch

Validation:  78%|▊| 577/743 [2:45:30<42:23, 15.32s/batch, batch_loss=24.6, batch

Validation:  78%|▊| 578/743 [2:45:30<41:23, 15.05s/batch, batch_loss=24.6, batch

Validation:  78%|▊| 578/743 [2:45:45<41:23, 15.05s/batch, batch_loss=314, batch_

Validation:  78%|▊| 579/743 [2:45:45<40:22, 14.77s/batch, batch_loss=314, batch_

Validation:  78%|▊| 579/743 [2:45:58<40:22, 14.77s/batch, batch_loss=6.99, batch

Validation:  78%|▊| 580/743 [2:45:58<39:00, 14.36s/batch, batch_loss=6.99, batch

Validation:  78%|▊| 580/743 [2:46:12<39:00, 14.36s/batch, batch_loss=9.87, batch

Validation:  78%|▊| 581/743 [2:46:12<38:40, 14.32s/batch, batch_loss=9.87, batch

Validation:  78%|▊| 581/743 [2:46:27<38:40, 14.32s/batch, batch_loss=16.2, batch

Validation:  78%|▊| 582/743 [2:46:27<38:35, 14.38s/batch, batch_loss=16.2, batch

Validation:  78%|▊| 582/743 [2:46:40<38:35, 14.38s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:46:40<37:33, 14.08s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:46:54<37:33, 14.08s/batch, batch_loss=2.61, batch

Validation:  79%|▊| 584/743 [2:46:54<37:05, 14.00s/batch, batch_loss=2.61, batch

Validation:  79%|▊| 584/743 [2:47:11<37:05, 14.00s/batch, batch_loss=19.5, batch

Validation:  79%|▊| 585/743 [2:47:11<39:20, 14.94s/batch, batch_loss=19.5, batch

Validation:  79%|▊| 585/743 [2:47:25<39:20, 14.94s/batch, batch_loss=550, batch_

Validation:  79%|▊| 586/743 [2:47:25<38:32, 14.73s/batch, batch_loss=550, batch_

Validation:  79%|▊| 586/743 [2:47:40<38:32, 14.73s/batch, batch_loss=8.49, batch

Validation:  79%|▊| 587/743 [2:47:40<38:14, 14.71s/batch, batch_loss=8.49, batch

Validation:  79%|▊| 587/743 [2:47:54<38:14, 14.71s/batch, batch_loss=402, batch_

Validation:  79%|▊| 588/743 [2:47:54<37:32, 14.53s/batch, batch_loss=402, batch_

Validation:  79%|▊| 588/743 [2:48:08<37:32, 14.53s/batch, batch_loss=2.51e+4, ba

Validation:  79%|▊| 589/743 [2:48:08<36:36, 14.27s/batch, batch_loss=2.51e+4, ba

Validation:  79%|▊| 589/743 [2:48:22<36:36, 14.27s/batch, batch_loss=18.5, batch

Validation:  79%|▊| 590/743 [2:48:22<36:15, 14.22s/batch, batch_loss=18.5, batch

Validation:  79%|▊| 590/743 [2:48:36<36:15, 14.22s/batch, batch_loss=15, batch_i

Validation:  80%|▊| 591/743 [2:48:36<35:44, 14.11s/batch, batch_loss=15, batch_i

Validation:  80%|▊| 591/743 [2:48:50<35:44, 14.11s/batch, batch_loss=13.9, batch

Validation:  80%|▊| 592/743 [2:48:50<35:36, 14.15s/batch, batch_loss=13.9, batch

Validation:  80%|▊| 592/743 [2:49:05<35:36, 14.15s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:49:05<36:04, 14.43s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:49:19<36:04, 14.43s/batch, batch_loss=3.67, batch

Validation:  80%|▊| 594/743 [2:49:19<35:49, 14.42s/batch, batch_loss=3.67, batch

Validation:  80%|▊| 594/743 [2:49:35<35:49, 14.42s/batch, batch_loss=5.41, batch

Validation:  80%|▊| 595/743 [2:49:35<36:05, 14.63s/batch, batch_loss=5.41, batch

Validation:  80%|▊| 595/743 [2:49:49<36:05, 14.63s/batch, batch_loss=6.09, batch

Validation:  80%|▊| 596/743 [2:49:49<35:51, 14.64s/batch, batch_loss=6.09, batch

Validation:  80%|▊| 596/743 [2:50:03<35:51, 14.64s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:50:03<34:58, 14.37s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:50:17<34:58, 14.37s/batch, batch_loss=14.3, batch

Validation:  80%|▊| 598/743 [2:50:17<34:33, 14.30s/batch, batch_loss=14.3, batch

Validation:  80%|▊| 598/743 [2:50:34<34:33, 14.30s/batch, batch_loss=13.5, batch

Validation:  81%|▊| 599/743 [2:50:34<35:57, 14.98s/batch, batch_loss=13.5, batch

Validation:  81%|▊| 599/743 [2:50:48<35:57, 14.98s/batch, batch_loss=20.4, batch

Validation:  81%|▊| 600/743 [2:50:48<35:22, 14.84s/batch, batch_loss=20.4, batch

Validation:  81%|▊| 600/743 [2:51:03<35:22, 14.84s/batch, batch_loss=13.7, batch

Validation:  81%|▊| 601/743 [2:51:03<35:05, 14.82s/batch, batch_loss=13.7, batch

Validation:  81%|▊| 601/743 [2:51:17<35:05, 14.82s/batch, batch_loss=17.3, batch

Validation:  81%|▊| 602/743 [2:51:17<34:36, 14.73s/batch, batch_loss=17.3, batch

Validation:  81%|▊| 602/743 [2:51:30<34:36, 14.73s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:51:30<32:39, 14.00s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:51:42<32:39, 14.00s/batch, batch_loss=19.9, batch

Validation:  81%|▊| 604/743 [2:51:42<31:21, 13.54s/batch, batch_loss=19.9, batch

Validation:  81%|▊| 604/743 [2:51:55<31:21, 13.54s/batch, batch_loss=25, batch_i

Validation:  81%|▊| 605/743 [2:51:55<30:21, 13.20s/batch, batch_loss=25, batch_i

Validation:  81%|▊| 605/743 [2:52:09<30:21, 13.20s/batch, batch_loss=250, batch_

Validation:  82%|▊| 606/743 [2:52:09<31:02, 13.60s/batch, batch_loss=250, batch_

Validation:  82%|▊| 606/743 [2:52:25<31:02, 13.60s/batch, batch_loss=28.4, batch

Validation:  82%|▊| 607/743 [2:52:25<32:29, 14.34s/batch, batch_loss=28.4, batch

Validation:  82%|▊| 607/743 [2:52:40<32:29, 14.34s/batch, batch_loss=19.3, batch

Validation:  82%|▊| 608/743 [2:52:40<32:14, 14.33s/batch, batch_loss=19.3, batch

Validation:  82%|▊| 608/743 [2:52:54<32:14, 14.33s/batch, batch_loss=15.8, batch

Validation:  82%|▊| 609/743 [2:52:54<32:20, 14.48s/batch, batch_loss=15.8, batch

Validation:  82%|▊| 609/743 [2:53:08<32:20, 14.48s/batch, batch_loss=17.2, batch

Validation:  82%|▊| 610/743 [2:53:08<31:36, 14.26s/batch, batch_loss=17.2, batch

Validation:  82%|▊| 610/743 [2:53:22<31:36, 14.26s/batch, batch_loss=20.7, batch

Validation:  82%|▊| 611/743 [2:53:22<31:12, 14.19s/batch, batch_loss=20.7, batch

Validation:  82%|▊| 611/743 [2:53:36<31:12, 14.19s/batch, batch_loss=10.3, batch

Validation:  82%|▊| 612/743 [2:53:36<31:01, 14.21s/batch, batch_loss=10.3, batch

Validation:  82%|▊| 612/743 [2:53:51<31:01, 14.21s/batch, batch_loss=14, batch_i

Validation:  83%|▊| 613/743 [2:53:51<31:10, 14.39s/batch, batch_loss=14, batch_i

Validation:  83%|▊| 613/743 [2:54:05<31:10, 14.39s/batch, batch_loss=5.63e+3, ba

Validation:  83%|▊| 614/743 [2:54:05<30:41, 14.28s/batch, batch_loss=5.63e+3, ba

Validation:  83%|▊| 614/743 [2:54:19<30:41, 14.28s/batch, batch_loss=13, batch_i

Validation:  83%|▊| 615/743 [2:54:19<30:12, 14.16s/batch, batch_loss=13, batch_i

Validation:  83%|▊| 615/743 [2:54:33<30:12, 14.16s/batch, batch_loss=13.7, batch

Validation:  83%|▊| 616/743 [2:54:33<30:03, 14.20s/batch, batch_loss=13.7, batch

Validation:  83%|▊| 616/743 [2:54:49<30:03, 14.20s/batch, batch_loss=6.96, batch

Validation:  83%|▊| 617/743 [2:54:49<31:01, 14.78s/batch, batch_loss=6.96, batch

Validation:  83%|▊| 617/743 [2:55:03<31:01, 14.78s/batch, batch_loss=10, batch_i

Validation:  83%|▊| 618/743 [2:55:03<30:05, 14.44s/batch, batch_loss=10, batch_i

Validation:  83%|▊| 618/743 [2:55:17<30:05, 14.44s/batch, batch_loss=340, batch_

Validation:  83%|▊| 619/743 [2:55:17<29:44, 14.39s/batch, batch_loss=340, batch_

Validation:  83%|▊| 619/743 [2:55:31<29:44, 14.39s/batch, batch_loss=14.5, batch

Validation:  83%|▊| 620/743 [2:55:31<28:47, 14.04s/batch, batch_loss=14.5, batch

Validation:  83%|▊| 620/743 [2:55:45<28:47, 14.04s/batch, batch_loss=8.32, batch

Validation:  84%|▊| 621/743 [2:55:45<28:41, 14.11s/batch, batch_loss=8.32, batch

Validation:  84%|▊| 621/743 [2:56:00<28:41, 14.11s/batch, batch_loss=12.3, batch

Validation:  84%|▊| 622/743 [2:56:00<28:54, 14.33s/batch, batch_loss=12.3, batch

Validation:  84%|▊| 622/743 [2:56:14<28:54, 14.33s/batch, batch_loss=191, batch_

Validation:  84%|▊| 623/743 [2:56:14<28:43, 14.36s/batch, batch_loss=191, batch_

Validation:  84%|▊| 623/743 [2:56:28<28:43, 14.36s/batch, batch_loss=13.8, batch

Validation:  84%|▊| 624/743 [2:56:28<28:03, 14.15s/batch, batch_loss=13.8, batch

Validation:  84%|▊| 624/743 [2:56:45<28:03, 14.15s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:56:45<29:27, 14.98s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:56:59<29:27, 14.98s/batch, batch_loss=19.4, batch

Validation:  84%|▊| 626/743 [2:56:59<28:31, 14.63s/batch, batch_loss=19.4, batch

Validation:  84%|▊| 626/743 [2:57:13<28:31, 14.63s/batch, batch_loss=16.1, batch

Validation:  84%|▊| 627/743 [2:57:13<27:56, 14.46s/batch, batch_loss=16.1, batch

Validation:  84%|▊| 627/743 [2:57:27<27:56, 14.46s/batch, batch_loss=16.6, batch

Validation:  85%|▊| 628/743 [2:57:27<27:56, 14.58s/batch, batch_loss=16.6, batch

Validation:  85%|▊| 628/743 [2:57:42<27:56, 14.58s/batch, batch_loss=12.8, batch

Validation:  85%|▊| 629/743 [2:57:42<27:31, 14.48s/batch, batch_loss=12.8, batch

Validation:  85%|▊| 629/743 [2:57:56<27:31, 14.48s/batch, batch_loss=16, batch_i

Validation:  85%|▊| 630/743 [2:57:56<27:14, 14.46s/batch, batch_loss=16, batch_i

Validation:  85%|▊| 630/743 [2:58:10<27:14, 14.46s/batch, batch_loss=241, batch_

Validation:  85%|▊| 631/743 [2:58:10<26:49, 14.37s/batch, batch_loss=241, batch_

Validation:  85%|▊| 631/743 [2:58:25<26:49, 14.37s/batch, batch_loss=17.7, batch

Validation:  85%|▊| 632/743 [2:58:25<26:35, 14.38s/batch, batch_loss=17.7, batch

Validation:  85%|▊| 632/743 [2:58:39<26:35, 14.38s/batch, batch_loss=13.7, batch

Validation:  85%|▊| 633/743 [2:58:39<26:20, 14.37s/batch, batch_loss=13.7, batch

Validation:  85%|▊| 633/743 [2:58:55<26:20, 14.37s/batch, batch_loss=11.1, batch

Validation:  85%|▊| 634/743 [2:58:55<27:08, 14.94s/batch, batch_loss=11.1, batch

Validation:  85%|▊| 634/743 [2:59:10<27:08, 14.94s/batch, batch_loss=7.96, batch

Validation:  85%|▊| 635/743 [2:59:10<26:37, 14.79s/batch, batch_loss=7.96, batch

Validation:  85%|▊| 635/743 [2:59:23<26:37, 14.79s/batch, batch_loss=800, batch_

Validation:  86%|▊| 636/743 [2:59:23<25:27, 14.28s/batch, batch_loss=800, batch_

Validation:  86%|▊| 636/743 [2:59:37<25:27, 14.28s/batch, batch_loss=713, batch_

Validation:  86%|▊| 637/743 [2:59:37<24:57, 14.13s/batch, batch_loss=713, batch_

Validation:  86%|▊| 637/743 [2:59:51<24:57, 14.13s/batch, batch_loss=18.2, batch

Validation:  86%|▊| 638/743 [2:59:51<24:53, 14.22s/batch, batch_loss=18.2, batch

Validation:  86%|▊| 638/743 [3:00:05<24:53, 14.22s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [3:00:05<24:29, 14.13s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [3:00:20<24:29, 14.13s/batch, batch_loss=21, batch_i

Validation:  86%|▊| 640/743 [3:00:20<24:28, 14.26s/batch, batch_loss=21, batch_i

Validation:  86%|▊| 640/743 [3:00:34<24:28, 14.26s/batch, batch_loss=29.3, batch

Validation:  86%|▊| 641/743 [3:00:34<24:09, 14.21s/batch, batch_loss=29.3, batch

Validation:  86%|▊| 641/743 [3:00:49<24:09, 14.21s/batch, batch_loss=30, batch_i

Validation:  86%|▊| 642/743 [3:00:49<24:30, 14.56s/batch, batch_loss=30, batch_i

Validation:  86%|▊| 642/743 [3:01:04<24:30, 14.56s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [3:01:04<24:22, 14.62s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [3:01:18<24:22, 14.62s/batch, batch_loss=18.2, batch

Validation:  87%|▊| 644/743 [3:01:18<24:03, 14.58s/batch, batch_loss=18.2, batch

Validation:  87%|▊| 644/743 [3:01:33<24:03, 14.58s/batch, batch_loss=18.7, batch

Validation:  87%|▊| 645/743 [3:01:33<23:42, 14.52s/batch, batch_loss=18.7, batch

Validation:  87%|▊| 645/743 [3:01:47<23:42, 14.52s/batch, batch_loss=6.24e+3, ba

Validation:  87%|▊| 646/743 [3:01:47<23:22, 14.45s/batch, batch_loss=6.24e+3, ba

Validation:  87%|▊| 646/743 [3:02:01<23:22, 14.45s/batch, batch_loss=17.8, batch

Validation:  87%|▊| 647/743 [3:02:01<23:06, 14.45s/batch, batch_loss=17.8, batch

Validation:  87%|▊| 647/743 [3:02:16<23:06, 14.45s/batch, batch_loss=5.87, batch

Validation:  87%|▊| 648/743 [3:02:16<22:56, 14.49s/batch, batch_loss=5.87, batch

Validation:  87%|▊| 648/743 [3:02:31<22:56, 14.49s/batch, batch_loss=10.3, batch

Validation:  87%|▊| 649/743 [3:02:31<22:48, 14.56s/batch, batch_loss=10.3, batch

Validation:  87%|▊| 649/743 [3:02:47<22:48, 14.56s/batch, batch_loss=14.6, batch

Validation:  87%|▊| 650/743 [3:02:47<23:34, 15.21s/batch, batch_loss=14.6, batch

Validation:  87%|▊| 650/743 [3:03:02<23:34, 15.21s/batch, batch_loss=22, batch_i

Validation:  88%|▉| 651/743 [3:03:02<23:02, 15.03s/batch, batch_loss=22, batch_i

Validation:  88%|▉| 651/743 [3:03:16<23:02, 15.03s/batch, batch_loss=23.3, batch

Validation:  88%|▉| 652/743 [3:03:16<22:26, 14.79s/batch, batch_loss=23.3, batch

Validation:  88%|▉| 652/743 [3:03:30<22:26, 14.79s/batch, batch_loss=16.2, batch

Validation:  88%|▉| 653/743 [3:03:30<21:56, 14.62s/batch, batch_loss=16.2, batch

Validation:  88%|▉| 653/743 [3:03:45<21:56, 14.62s/batch, batch_loss=20.5, batch

Validation:  88%|▉| 654/743 [3:03:45<21:49, 14.71s/batch, batch_loss=20.5, batch

Validation:  88%|▉| 654/743 [3:04:00<21:49, 14.71s/batch, batch_loss=24.6, batch

Validation:  88%|▉| 655/743 [3:04:00<21:24, 14.60s/batch, batch_loss=24.6, batch

Validation:  88%|▉| 655/743 [3:04:14<21:24, 14.60s/batch, batch_loss=19.4, batch

Validation:  88%|▉| 656/743 [3:04:14<20:48, 14.35s/batch, batch_loss=19.4, batch

Validation:  88%|▉| 656/743 [3:04:28<20:48, 14.35s/batch, batch_loss=13.4, batch

Validation:  88%|▉| 657/743 [3:04:28<20:37, 14.40s/batch, batch_loss=13.4, batch

Validation:  88%|▉| 657/743 [3:04:43<20:37, 14.40s/batch, batch_loss=15.3, batch

Validation:  89%|▉| 658/743 [3:04:43<20:31, 14.49s/batch, batch_loss=15.3, batch

Validation:  89%|▉| 658/743 [3:04:58<20:31, 14.49s/batch, batch_loss=23.2, batch

Validation:  89%|▉| 659/743 [3:04:58<20:28, 14.63s/batch, batch_loss=23.2, batch

Validation:  89%|▉| 659/743 [3:05:13<20:28, 14.63s/batch, batch_loss=21, batch_i

Validation:  89%|▉| 660/743 [3:05:13<20:24, 14.75s/batch, batch_loss=21, batch_i

Validation:  89%|▉| 660/743 [3:05:27<20:24, 14.75s/batch, batch_loss=17.8, batch

Validation:  89%|▉| 661/743 [3:05:27<20:06, 14.72s/batch, batch_loss=17.8, batch

Validation:  89%|▉| 661/743 [3:05:43<20:06, 14.72s/batch, batch_loss=7.58, batch

Validation:  89%|▉| 662/743 [3:05:43<20:11, 14.96s/batch, batch_loss=7.58, batch

Validation:  89%|▉| 662/743 [3:05:57<20:11, 14.96s/batch, batch_loss=3.58e+3, ba

Validation:  89%|▉| 663/743 [3:05:57<19:35, 14.70s/batch, batch_loss=3.58e+3, ba

Validation:  89%|▉| 663/743 [3:06:12<19:35, 14.70s/batch, batch_loss=13.9, batch

Validation:  89%|▉| 664/743 [3:06:12<19:25, 14.75s/batch, batch_loss=13.9, batch

Validation:  89%|▉| 664/743 [3:06:26<19:25, 14.75s/batch, batch_loss=19.5, batch

Validation:  90%|▉| 665/743 [3:06:26<19:04, 14.67s/batch, batch_loss=19.5, batch

Validation:  90%|▉| 665/743 [3:06:44<19:04, 14.67s/batch, batch_loss=14.3, batch

Validation:  90%|▉| 666/743 [3:06:44<19:54, 15.51s/batch, batch_loss=14.3, batch

Validation:  90%|▉| 666/743 [3:06:58<19:54, 15.51s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [3:06:58<19:12, 15.16s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [3:07:13<19:12, 15.16s/batch, batch_loss=20.1, batch

Validation:  90%|▉| 668/743 [3:07:13<18:57, 15.17s/batch, batch_loss=20.1, batch

Validation:  90%|▉| 668/743 [3:07:27<18:57, 15.17s/batch, batch_loss=23.8, batch

Validation:  90%|▉| 669/743 [3:07:27<18:18, 14.84s/batch, batch_loss=23.8, batch

Validation:  90%|▉| 669/743 [3:07:42<18:18, 14.84s/batch, batch_loss=23.1, batch

Validation:  90%|▉| 670/743 [3:07:42<18:08, 14.92s/batch, batch_loss=23.1, batch

Validation:  90%|▉| 670/743 [3:07:57<18:08, 14.92s/batch, batch_loss=3.11e+3, ba

Validation:  90%|▉| 671/743 [3:07:57<17:45, 14.79s/batch, batch_loss=3.11e+3, ba

Validation:  90%|▉| 671/743 [3:08:11<17:45, 14.79s/batch, batch_loss=20.7, batch

Validation:  90%|▉| 672/743 [3:08:11<17:06, 14.46s/batch, batch_loss=20.7, batch

Validation:  90%|▉| 672/743 [3:08:27<17:06, 14.46s/batch, batch_loss=15.6, batch

Validation:  91%|▉| 673/743 [3:08:27<17:40, 15.16s/batch, batch_loss=15.6, batch

Validation:  91%|▉| 673/743 [3:08:42<17:40, 15.16s/batch, batch_loss=11.9, batch

Validation:  91%|▉| 674/743 [3:08:42<17:07, 14.90s/batch, batch_loss=11.9, batch

Validation:  91%|▉| 674/743 [3:08:56<17:07, 14.90s/batch, batch_loss=22.5, batch

Validation:  91%|▉| 675/743 [3:08:56<16:46, 14.80s/batch, batch_loss=22.5, batch

Validation:  91%|▉| 675/743 [3:09:11<16:46, 14.80s/batch, batch_loss=18.2, batch

Validation:  91%|▉| 676/743 [3:09:11<16:27, 14.74s/batch, batch_loss=18.2, batch

Validation:  91%|▉| 676/743 [3:09:26<16:27, 14.74s/batch, batch_loss=22.1, batch

Validation:  91%|▉| 677/743 [3:09:26<16:19, 14.83s/batch, batch_loss=22.1, batch

Validation:  91%|▉| 677/743 [3:09:40<16:19, 14.83s/batch, batch_loss=15.1, batch

Validation:  91%|▉| 678/743 [3:09:40<15:49, 14.60s/batch, batch_loss=15.1, batch

Validation:  91%|▉| 678/743 [3:09:55<15:49, 14.60s/batch, batch_loss=14.9, batch

Validation:  91%|▉| 679/743 [3:09:55<15:38, 14.66s/batch, batch_loss=14.9, batch

Validation:  91%|▉| 679/743 [3:10:09<15:38, 14.66s/batch, batch_loss=20.3, batch

Validation:  92%|▉| 680/743 [3:10:09<15:07, 14.41s/batch, batch_loss=20.3, batch

Validation:  92%|▉| 680/743 [3:10:24<15:07, 14.41s/batch, batch_loss=19.6, batch

Validation:  92%|▉| 681/743 [3:10:24<15:01, 14.55s/batch, batch_loss=19.6, batch

Validation:  92%|▉| 681/743 [3:10:38<15:01, 14.55s/batch, batch_loss=25.7, batch

Validation:  92%|▉| 682/743 [3:10:38<14:45, 14.52s/batch, batch_loss=25.7, batch

Validation:  92%|▉| 682/743 [3:10:52<14:45, 14.52s/batch, batch_loss=19.7, batch

Validation:  92%|▉| 683/743 [3:10:52<14:25, 14.42s/batch, batch_loss=19.7, batch

Validation:  92%|▉| 683/743 [3:11:07<14:25, 14.42s/batch, batch_loss=15.8, batch

Validation:  92%|▉| 684/743 [3:11:07<14:13, 14.46s/batch, batch_loss=15.8, batch

Validation:  92%|▉| 684/743 [3:11:21<14:13, 14.46s/batch, batch_loss=14.3, batch

Validation:  92%|▉| 685/743 [3:11:21<13:53, 14.37s/batch, batch_loss=14.3, batch

Validation:  92%|▉| 685/743 [3:11:36<13:53, 14.37s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [3:11:36<13:54, 14.64s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [3:11:51<13:54, 14.64s/batch, batch_loss=22.4, batch

Validation:  92%|▉| 687/743 [3:11:51<13:44, 14.72s/batch, batch_loss=22.4, batch

Validation:  92%|▉| 687/743 [3:12:05<13:44, 14.72s/batch, batch_loss=13.9, batch

Validation:  93%|▉| 688/743 [3:12:05<13:19, 14.53s/batch, batch_loss=13.9, batch

Validation:  93%|▉| 688/743 [3:12:20<13:19, 14.53s/batch, batch_loss=15.8, batch

Validation:  93%|▉| 689/743 [3:12:20<13:03, 14.52s/batch, batch_loss=15.8, batch

Validation:  93%|▉| 689/743 [3:12:33<13:03, 14.52s/batch, batch_loss=19.2, batch

Validation:  93%|▉| 690/743 [3:12:33<12:32, 14.19s/batch, batch_loss=19.2, batch

Validation:  93%|▉| 690/743 [3:12:48<12:32, 14.19s/batch, batch_loss=12.7, batch

Validation:  93%|▉| 691/743 [3:12:48<12:34, 14.52s/batch, batch_loss=12.7, batch

Validation:  93%|▉| 691/743 [3:13:02<12:34, 14.52s/batch, batch_loss=19.2, batch

Validation:  93%|▉| 692/743 [3:13:02<12:14, 14.40s/batch, batch_loss=19.2, batch

Validation:  93%|▉| 692/743 [3:13:17<12:14, 14.40s/batch, batch_loss=20.5, batch

Validation:  93%|▉| 693/743 [3:13:17<11:54, 14.29s/batch, batch_loss=20.5, batch

Validation:  93%|▉| 693/743 [3:13:34<11:54, 14.29s/batch, batch_loss=24.9, batch

Validation:  93%|▉| 694/743 [3:13:34<12:25, 15.21s/batch, batch_loss=24.9, batch

Validation:  93%|▉| 694/743 [3:13:48<12:25, 15.21s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [3:13:48<11:48, 14.77s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [3:14:02<11:48, 14.77s/batch, batch_loss=7.69, batch

Validation:  94%|▉| 696/743 [3:14:02<11:27, 14.63s/batch, batch_loss=7.69, batch

Validation:  94%|▉| 696/743 [3:14:17<11:27, 14.63s/batch, batch_loss=35.2, batch

Validation:  94%|▉| 697/743 [3:14:17<11:17, 14.74s/batch, batch_loss=35.2, batch

Validation:  94%|▉| 697/743 [3:14:30<11:17, 14.74s/batch, batch_loss=756, batch_

Validation:  94%|▉| 698/743 [3:14:30<10:44, 14.32s/batch, batch_loss=756, batch_

Validation:  94%|▉| 698/743 [3:14:45<10:44, 14.32s/batch, batch_loss=6.6, batch_

Validation:  94%|▉| 699/743 [3:14:45<10:31, 14.35s/batch, batch_loss=6.6, batch_

Validation:  94%|▉| 699/743 [3:14:59<10:31, 14.35s/batch, batch_loss=945, batch_

Validation:  94%|▉| 700/743 [3:14:59<10:22, 14.47s/batch, batch_loss=945, batch_

Validation:  94%|▉| 700/743 [3:15:14<10:22, 14.47s/batch, batch_loss=5.94, batch

Validation:  94%|▉| 701/743 [3:15:14<10:08, 14.49s/batch, batch_loss=5.94, batch

Validation:  94%|▉| 701/743 [3:15:27<10:08, 14.49s/batch, batch_loss=7.21, batch

Validation:  94%|▉| 702/743 [3:15:27<09:39, 14.14s/batch, batch_loss=7.21, batch

Validation:  94%|▉| 702/743 [3:15:42<09:39, 14.14s/batch, batch_loss=176, batch_

Validation:  95%|▉| 703/743 [3:15:42<09:31, 14.29s/batch, batch_loss=176, batch_

Validation:  95%|▉| 703/743 [3:15:56<09:31, 14.29s/batch, batch_loss=471, batch_

Validation:  95%|▉| 704/743 [3:15:56<09:09, 14.09s/batch, batch_loss=471, batch_

Validation:  95%|▉| 704/743 [3:16:10<09:09, 14.09s/batch, batch_loss=9.52, batch

Validation:  95%|▉| 705/743 [3:16:10<09:01, 14.26s/batch, batch_loss=9.52, batch

Validation:  95%|▉| 705/743 [3:16:24<09:01, 14.26s/batch, batch_loss=16.3, batch

Validation:  95%|▉| 706/743 [3:16:24<08:45, 14.21s/batch, batch_loss=16.3, batch

Validation:  95%|▉| 706/743 [3:16:39<08:45, 14.21s/batch, batch_loss=416, batch_

Validation:  95%|▉| 707/743 [3:16:39<08:32, 14.24s/batch, batch_loss=416, batch_

Validation:  95%|▉| 707/743 [3:16:52<08:32, 14.24s/batch, batch_loss=16.4, batch

Validation:  95%|▉| 708/743 [3:16:52<08:13, 14.10s/batch, batch_loss=16.4, batch

Validation:  95%|▉| 708/743 [3:17:07<08:13, 14.10s/batch, batch_loss=24.2, batch

Validation:  95%|▉| 709/743 [3:17:07<08:03, 14.21s/batch, batch_loss=24.2, batch

Validation:  95%|▉| 709/743 [3:17:21<08:03, 14.21s/batch, batch_loss=17, batch_i

Validation:  96%|▉| 710/743 [3:17:21<07:48, 14.19s/batch, batch_loss=17, batch_i

Validation:  96%|▉| 710/743 [3:17:35<07:48, 14.19s/batch, batch_loss=16.1, batch

Validation:  96%|▉| 711/743 [3:17:35<07:34, 14.20s/batch, batch_loss=16.1, batch

Validation:  96%|▉| 711/743 [3:17:49<07:34, 14.20s/batch, batch_loss=20.6, batch

Validation:  96%|▉| 712/743 [3:17:49<07:19, 14.18s/batch, batch_loss=20.6, batch

Validation:  96%|▉| 712/743 [3:18:04<07:19, 14.18s/batch, batch_loss=15.1, batch

Validation:  96%|▉| 713/743 [3:18:04<07:07, 14.25s/batch, batch_loss=15.1, batch

Validation:  96%|▉| 713/743 [3:18:20<07:07, 14.25s/batch, batch_loss=6.65, batch

Validation:  96%|▉| 714/743 [3:18:20<07:10, 14.84s/batch, batch_loss=6.65, batch

Validation:  96%|▉| 714/743 [3:18:35<07:10, 14.84s/batch, batch_loss=10.6, batch

Validation:  96%|▉| 715/743 [3:18:35<06:53, 14.77s/batch, batch_loss=10.6, batch

Validation:  96%|▉| 715/743 [3:18:49<06:53, 14.77s/batch, batch_loss=19, batch_i

Validation:  96%|▉| 716/743 [3:18:49<06:32, 14.52s/batch, batch_loss=19, batch_i

Validation:  96%|▉| 716/743 [3:19:03<06:32, 14.52s/batch, batch_loss=386, batch_

Validation:  97%|▉| 717/743 [3:19:03<06:13, 14.38s/batch, batch_loss=386, batch_

Validation:  97%|▉| 717/743 [3:19:17<06:13, 14.38s/batch, batch_loss=18.8, batch

Validation:  97%|▉| 718/743 [3:19:17<06:02, 14.52s/batch, batch_loss=18.8, batch

Validation:  97%|▉| 718/743 [3:19:32<06:02, 14.52s/batch, batch_loss=15.9, batch

Validation:  97%|▉| 719/743 [3:19:32<05:47, 14.48s/batch, batch_loss=15.9, batch

Validation:  97%|▉| 719/743 [3:19:47<05:47, 14.48s/batch, batch_loss=16.7, batch

Validation:  97%|▉| 720/743 [3:19:47<05:35, 14.57s/batch, batch_loss=16.7, batch

Validation:  97%|▉| 720/743 [3:20:00<05:35, 14.57s/batch, batch_loss=11.6, batch

Validation:  97%|▉| 721/743 [3:20:00<05:10, 14.10s/batch, batch_loss=11.6, batch

Validation:  97%|▉| 721/743 [3:20:13<05:10, 14.10s/batch, batch_loss=23.7, batch

Validation:  97%|▉| 722/743 [3:20:13<04:53, 13.97s/batch, batch_loss=23.7, batch

Validation:  97%|▉| 722/743 [3:20:28<04:53, 13.97s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [3:20:28<04:43, 14.19s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [3:20:44<04:43, 14.19s/batch, batch_loss=17.5, batch

Validation:  97%|▉| 724/743 [3:20:44<04:42, 14.85s/batch, batch_loss=17.5, batch

Validation:  97%|▉| 724/743 [3:20:59<04:42, 14.85s/batch, batch_loss=14.5, batch

Validation:  98%|▉| 725/743 [3:20:59<04:23, 14.66s/batch, batch_loss=14.5, batch

Validation:  98%|▉| 725/743 [3:21:13<04:23, 14.66s/batch, batch_loss=20, batch_i

Validation:  98%|▉| 726/743 [3:21:13<04:07, 14.53s/batch, batch_loss=20, batch_i

Validation:  98%|▉| 726/743 [3:21:27<04:07, 14.53s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [3:21:27<03:48, 14.30s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [3:21:39<03:48, 14.30s/batch, batch_loss=28.7, batch

Validation:  98%|▉| 728/743 [3:21:39<03:26, 13.78s/batch, batch_loss=28.7, batch

Validation:  98%|▉| 728/743 [3:21:52<03:26, 13.78s/batch, batch_loss=28.7, batch

Validation:  98%|▉| 729/743 [3:21:52<03:09, 13.55s/batch, batch_loss=28.7, batch

Validation:  98%|▉| 729/743 [3:22:05<03:09, 13.55s/batch, batch_loss=19.7, batch

Validation:  98%|▉| 730/743 [3:22:05<02:54, 13.41s/batch, batch_loss=19.7, batch

Validation:  98%|▉| 730/743 [3:22:20<02:54, 13.41s/batch, batch_loss=13.5, batch

Validation:  98%|▉| 731/743 [3:22:20<02:47, 13.95s/batch, batch_loss=13.5, batch

Validation:  98%|▉| 731/743 [3:22:35<02:47, 13.95s/batch, batch_loss=9.92, batch

Validation:  99%|▉| 732/743 [3:22:35<02:35, 14.15s/batch, batch_loss=9.92, batch

Validation:  99%|▉| 732/743 [3:22:49<02:35, 14.15s/batch, batch_loss=26, batch_i

Validation:  99%|▉| 733/743 [3:22:49<02:20, 14.09s/batch, batch_loss=26, batch_i

Validation:  99%|▉| 733/743 [3:23:05<02:20, 14.09s/batch, batch_loss=3.31, batch

Validation:  99%|▉| 734/743 [3:23:05<02:13, 14.80s/batch, batch_loss=3.31, batch

Validation:  99%|▉| 734/743 [3:23:20<02:13, 14.80s/batch, batch_loss=7.04, batch

Validation:  99%|▉| 735/743 [3:23:20<01:58, 14.79s/batch, batch_loss=7.04, batch

Validation:  99%|▉| 735/743 [3:23:35<01:58, 14.79s/batch, batch_loss=1.14, batch

Validation:  99%|▉| 736/743 [3:23:35<01:42, 14.66s/batch, batch_loss=1.14, batch

Validation:  99%|▉| 736/743 [3:23:47<01:42, 14.66s/batch, batch_loss=0.0664, bat

Validation:  99%|▉| 737/743 [3:23:47<01:24, 14.06s/batch, batch_loss=0.0664, bat

Validation:  99%|▉| 737/743 [3:24:00<01:24, 14.06s/batch, batch_loss=0.0664, bat

Validation:  99%|▉| 738/743 [3:24:00<01:08, 13.73s/batch, batch_loss=0.0664, bat

Validation:  99%|▉| 738/743 [3:24:13<01:08, 13.73s/batch, batch_loss=0.0664, bat

Validation:  99%|▉| 739/743 [3:24:13<00:53, 13.50s/batch, batch_loss=0.0664, bat

Validation:  99%|▉| 739/743 [3:24:28<00:53, 13.50s/batch, batch_loss=0.0664, bat

Validation: 100%|▉| 740/743 [3:24:28<00:42, 14.03s/batch, batch_loss=0.0664, bat

Validation: 100%|▉| 740/743 [3:24:40<00:42, 14.03s/batch, batch_loss=0.0664, bat

Validation: 100%|▉| 741/743 [3:24:40<00:26, 13.41s/batch, batch_loss=0.0664, bat

Validation: 100%|▉| 741/743 [3:24:53<00:26, 13.41s/batch, batch_loss=0.0664, bat

Validation: 100%|▉| 742/743 [3:24:53<00:13, 13.06s/batch, batch_loss=0.0664, bat

Validation: 100%|▉| 742/743 [3:25:04<00:13, 13.06s/batch, batch_loss=0.0688, bat

Validation: 100%|█| 743/743 [3:25:04<00:00, 12.61s/batch, batch_loss=0.0688, bat

Validation: 100%|█| 743/743 [3:25:04<00:00, 16.56s/batch, batch_loss=0.0688, bat




Val Loss: 1296.4895


Epoch 9/10:   0%|                                    | 0/991 [00:00<?, ?batch/s]

Epoch 9/10:   0%| | 0/991 [00:15<?, ?batch/s, batch_loss=16.3, batch_index=1, ba

Epoch 9/10:   0%| | 1/991 [00:15<4:14:39, 15.43s/batch, batch_loss=16.3, batch_i

Epoch 9/10:   0%| | 1/991 [00:29<4:14:39, 15.43s/batch, batch_loss=17, batch_ind

Epoch 9/10:   0%| | 2/991 [00:29<4:00:40, 14.60s/batch, batch_loss=17, batch_ind

Epoch 9/10:   0%| | 2/991 [00:44<4:00:40, 14.60s/batch, batch_loss=11.5, batch_i

Epoch 9/10:   0%| | 3/991 [00:44<4:04:06, 14.82s/batch, batch_loss=11.5, batch_i

Epoch 9/10:   0%| | 3/991 [00:58<4:04:06, 14.82s/batch, batch_loss=6.6, batch_in

Epoch 9/10:   0%| | 4/991 [00:58<3:59:57, 14.59s/batch, batch_loss=6.6, batch_in

Epoch 9/10:   0%| | 4/991 [01:13<3:59:57, 14.59s/batch, batch_loss=20.6, batch_i

Epoch 9/10:   1%| | 5/991 [01:13<3:58:36, 14.52s/batch, batch_loss=20.6, batch_i

Epoch 9/10:   1%| | 5/991 [01:29<3:58:36, 14.52s/batch, batch_loss=23.6, batch_i

Epoch 9/10:   1%| | 6/991 [01:29<4:10:20, 15.25s/batch, batch_loss=23.6, batch_i

Epoch 9/10:   1%| | 6/991 [01:44<4:10:20, 15.25s/batch, batch_loss=16.8, batch_i

Epoch 9/10:   1%| | 7/991 [01:44<4:04:55, 14.93s/batch, batch_loss=16.8, batch_i

Epoch 9/10:   1%| | 7/991 [01:58<4:04:55, 14.93s/batch, batch_loss=604, batch_in

Epoch 9/10:   1%| | 8/991 [01:58<4:02:39, 14.81s/batch, batch_loss=604, batch_in

Epoch 9/10:   1%| | 8/991 [02:13<4:02:39, 14.81s/batch, batch_loss=16, batch_ind

Epoch 9/10:   1%| | 9/991 [02:13<4:02:35, 14.82s/batch, batch_loss=16, batch_ind

Epoch 9/10:   1%| | 9/991 [02:28<4:02:35, 14.82s/batch, batch_loss=15.9, batch_i

Epoch 9/10:   1%| | 10/991 [02:28<4:04:16, 14.94s/batch, batch_loss=15.9, batch_

Epoch 9/10:   1%| | 10/991 [02:45<4:04:16, 14.94s/batch, batch_loss=12.7, batch_

Epoch 9/10:   1%| | 11/991 [02:45<4:13:47, 15.54s/batch, batch_loss=12.7, batch_

Epoch 9/10:   1%| | 11/991 [03:00<4:13:47, 15.54s/batch, batch_loss=1.99e+3, bat

Epoch 9/10:   1%| | 12/991 [03:00<4:12:28, 15.47s/batch, batch_loss=1.99e+3, bat

Epoch 9/10:   1%| | 12/991 [03:16<4:12:28, 15.47s/batch, batch_loss=19.2, batch_

Epoch 9/10:   1%| | 13/991 [03:16<4:14:26, 15.61s/batch, batch_loss=19.2, batch_

Epoch 9/10:   1%| | 13/991 [03:34<4:14:26, 15.61s/batch, batch_loss=11.3, batch_

Epoch 9/10:   1%| | 14/991 [03:34<4:22:21, 16.11s/batch, batch_loss=11.3, batch_

Epoch 9/10:   1%| | 14/991 [03:49<4:22:21, 16.11s/batch, batch_loss=11.5, batch_

Epoch 9/10:   2%| | 15/991 [03:49<4:18:15, 15.88s/batch, batch_loss=11.5, batch_

Epoch 9/10:   2%| | 15/991 [04:03<4:18:15, 15.88s/batch, batch_loss=14.1, batch_

Epoch 9/10:   2%| | 16/991 [04:03<4:10:25, 15.41s/batch, batch_loss=14.1, batch_

Epoch 9/10:   2%| | 16/991 [04:19<4:10:25, 15.41s/batch, batch_loss=15.5, batch_

Epoch 9/10:   2%| | 17/991 [04:19<4:09:58, 15.40s/batch, batch_loss=15.5, batch_

Epoch 9/10:   2%| | 17/991 [04:33<4:09:58, 15.40s/batch, batch_loss=11.2, batch_

Epoch 9/10:   2%| | 18/991 [04:33<4:02:39, 14.96s/batch, batch_loss=11.2, batch_

Epoch 9/10:   2%| | 18/991 [04:47<4:02:39, 14.96s/batch, batch_loss=8.96e+3, bat

Epoch 9/10:   2%| | 19/991 [04:47<4:01:14, 14.89s/batch, batch_loss=8.96e+3, bat

Epoch 9/10:   2%| | 19/991 [05:02<4:01:14, 14.89s/batch, batch_loss=11.3, batch_

Epoch 9/10:   2%| | 20/991 [05:02<3:58:58, 14.77s/batch, batch_loss=11.3, batch_

Epoch 9/10:   2%| | 20/991 [05:16<3:58:58, 14.77s/batch, batch_loss=17.7, batch_

Epoch 9/10:   2%| | 21/991 [05:16<3:56:56, 14.66s/batch, batch_loss=17.7, batch_

Epoch 9/10:   2%| | 21/991 [05:31<3:56:56, 14.66s/batch, batch_loss=1.07e+3, bat

Epoch 9/10:   2%| | 22/991 [05:31<3:56:49, 14.66s/batch, batch_loss=1.07e+3, bat

Epoch 9/10:   2%| | 22/991 [05:47<3:56:49, 14.66s/batch, batch_loss=8.68, batch_

Epoch 9/10:   2%| | 23/991 [05:47<4:01:46, 14.99s/batch, batch_loss=8.68, batch_

Epoch 9/10:   2%| | 23/991 [06:02<4:01:46, 14.99s/batch, batch_loss=13.7, batch_

Epoch 9/10:   2%| | 24/991 [06:02<4:03:37, 15.12s/batch, batch_loss=13.7, batch_

Epoch 9/10:   2%| | 24/991 [06:17<4:03:37, 15.12s/batch, batch_loss=12.8, batch_

Epoch 9/10:   3%| | 25/991 [06:17<4:01:13, 14.98s/batch, batch_loss=12.8, batch_

Epoch 9/10:   3%| | 25/991 [06:33<4:01:13, 14.98s/batch, batch_loss=17.5, batch_

Epoch 9/10:   3%| | 26/991 [06:33<4:07:15, 15.37s/batch, batch_loss=17.5, batch_

Epoch 9/10:   3%| | 26/991 [06:47<4:07:15, 15.37s/batch, batch_loss=16, batch_in

Epoch 9/10:   3%| | 27/991 [06:47<3:59:20, 14.90s/batch, batch_loss=16, batch_in

Epoch 9/10:   3%| | 27/991 [07:02<3:59:20, 14.90s/batch, batch_loss=1.13e+3, bat

Epoch 9/10:   3%| | 28/991 [07:02<4:00:23, 14.98s/batch, batch_loss=1.13e+3, bat

Epoch 9/10:   3%| | 28/991 [07:16<4:00:23, 14.98s/batch, batch_loss=10.1, batch_

Epoch 9/10:   3%| | 29/991 [07:16<3:56:09, 14.73s/batch, batch_loss=10.1, batch_

Epoch 9/10:   3%| | 29/991 [07:31<3:56:09, 14.73s/batch, batch_loss=11.3, batch_

Epoch 9/10:   3%| | 30/991 [07:31<3:55:45, 14.72s/batch, batch_loss=11.3, batch_

Epoch 9/10:   3%| | 30/991 [07:46<3:55:45, 14.72s/batch, batch_loss=9.3, batch_i

Epoch 9/10:   3%| | 31/991 [07:46<3:55:47, 14.74s/batch, batch_loss=9.3, batch_i

Epoch 9/10:   3%| | 31/991 [08:00<3:55:47, 14.74s/batch, batch_loss=1.3e+4, batc

Epoch 9/10:   3%| | 32/991 [08:00<3:54:18, 14.66s/batch, batch_loss=1.3e+4, batc

Epoch 9/10:   3%| | 32/991 [08:15<3:54:18, 14.66s/batch, batch_loss=13, batch_in

Epoch 9/10:   3%| | 33/991 [08:15<3:57:42, 14.89s/batch, batch_loss=13, batch_in

Epoch 9/10:   3%| | 33/991 [08:31<3:57:42, 14.89s/batch, batch_loss=8.82, batch_

Epoch 9/10:   3%| | 34/991 [08:31<3:58:45, 14.97s/batch, batch_loss=8.82, batch_

Epoch 9/10:   3%| | 34/991 [08:46<3:58:45, 14.97s/batch, batch_loss=12.3, batch_

Epoch 9/10:   4%| | 35/991 [08:46<4:00:04, 15.07s/batch, batch_loss=12.3, batch_

Epoch 9/10:   4%| | 35/991 [09:01<4:00:04, 15.07s/batch, batch_loss=9.28, batch_

Epoch 9/10:   4%| | 36/991 [09:01<4:00:15, 15.09s/batch, batch_loss=9.28, batch_

Epoch 9/10:   4%| | 36/991 [09:17<4:00:15, 15.09s/batch, batch_loss=10.6, batch_

Epoch 9/10:   4%| | 37/991 [09:17<4:01:53, 15.21s/batch, batch_loss=10.6, batch_

Epoch 9/10:   4%| | 37/991 [09:34<4:01:53, 15.21s/batch, batch_loss=6.36, batch_

Epoch 9/10:   4%| | 38/991 [09:34<4:11:52, 15.86s/batch, batch_loss=6.36, batch_

Epoch 9/10:   4%| | 38/991 [09:49<4:11:52, 15.86s/batch, batch_loss=1.72e+3, bat

Epoch 9/10:   4%| | 39/991 [09:49<4:08:47, 15.68s/batch, batch_loss=1.72e+3, bat

Epoch 9/10:   4%| | 39/991 [10:04<4:08:47, 15.68s/batch, batch_loss=14.6, batch_

Epoch 9/10:   4%| | 40/991 [10:04<4:03:29, 15.36s/batch, batch_loss=14.6, batch_

Epoch 9/10:   4%| | 40/991 [10:18<4:03:29, 15.36s/batch, batch_loss=6.23e+3, bat

Epoch 9/10:   4%| | 41/991 [10:18<3:59:36, 15.13s/batch, batch_loss=6.23e+3, bat

Epoch 9/10:   4%| | 41/991 [10:33<3:59:36, 15.13s/batch, batch_loss=15.3, batch_

Epoch 9/10:   4%| | 42/991 [10:33<3:57:40, 15.03s/batch, batch_loss=15.3, batch_

Epoch 9/10:   4%| | 42/991 [10:49<3:57:40, 15.03s/batch, batch_loss=8.87, batch_

Epoch 9/10:   4%| | 43/991 [10:49<4:00:01, 15.19s/batch, batch_loss=8.87, batch_

Epoch 9/10:   4%| | 43/991 [11:03<4:00:01, 15.19s/batch, batch_loss=15.6, batch_

Epoch 9/10:   4%| | 44/991 [11:03<3:57:24, 15.04s/batch, batch_loss=15.6, batch_

Epoch 9/10:   4%| | 44/991 [11:20<3:57:24, 15.04s/batch, batch_loss=15.6, batch_

Epoch 9/10:   5%| | 45/991 [11:20<4:04:53, 15.53s/batch, batch_loss=15.6, batch_

Epoch 9/10:   5%| | 45/991 [11:35<4:04:53, 15.53s/batch, batch_loss=12.4, batch_

Epoch 9/10:   5%| | 46/991 [11:35<4:02:29, 15.40s/batch, batch_loss=12.4, batch_

Epoch 9/10:   5%| | 46/991 [11:50<4:02:29, 15.40s/batch, batch_loss=6.02, batch_

Epoch 9/10:   5%| | 47/991 [11:50<3:58:05, 15.13s/batch, batch_loss=6.02, batch_

Epoch 9/10:   5%| | 47/991 [12:04<3:58:05, 15.13s/batch, batch_loss=13.1, batch_

Epoch 9/10:   5%| | 48/991 [12:04<3:54:53, 14.94s/batch, batch_loss=13.1, batch_

Epoch 9/10:   5%| | 48/991 [12:19<3:54:53, 14.94s/batch, batch_loss=12.2, batch_

Epoch 9/10:   5%| | 49/991 [12:19<3:52:23, 14.80s/batch, batch_loss=12.2, batch_

Epoch 9/10:   5%| | 49/991 [12:34<3:52:23, 14.80s/batch, batch_loss=14.3, batch_

Epoch 9/10:   5%| | 50/991 [12:34<3:52:34, 14.83s/batch, batch_loss=14.3, batch_

Epoch 9/10:   5%| | 50/991 [12:48<3:52:34, 14.83s/batch, batch_loss=8.79, batch_

Epoch 9/10:   5%| | 51/991 [12:48<3:50:38, 14.72s/batch, batch_loss=8.79, batch_

Epoch 9/10:   5%| | 51/991 [13:03<3:50:38, 14.72s/batch, batch_loss=13.3, batch_

Epoch 9/10:   5%| | 52/991 [13:03<3:49:32, 14.67s/batch, batch_loss=13.3, batch_

Epoch 9/10:   5%| | 52/991 [13:17<3:49:32, 14.67s/batch, batch_loss=13.7, batch_

Epoch 9/10:   5%| | 53/991 [13:17<3:46:00, 14.46s/batch, batch_loss=13.7, batch_

Epoch 9/10:   5%| | 53/991 [13:31<3:46:00, 14.46s/batch, batch_loss=8.8, batch_i

Epoch 9/10:   5%| | 54/991 [13:31<3:47:51, 14.59s/batch, batch_loss=8.8, batch_i

Epoch 9/10:   5%| | 54/991 [13:46<3:47:51, 14.59s/batch, batch_loss=9.86, batch_

Epoch 9/10:   6%| | 55/991 [13:46<3:46:11, 14.50s/batch, batch_loss=9.86, batch_

Epoch 9/10:   6%| | 55/991 [14:01<3:46:11, 14.50s/batch, batch_loss=11.6, batch_

Epoch 9/10:   6%| | 56/991 [14:01<3:47:11, 14.58s/batch, batch_loss=11.6, batch_

Epoch 9/10:   6%| | 56/991 [14:15<3:47:11, 14.58s/batch, batch_loss=7.36, batch_

Epoch 9/10:   6%| | 57/991 [14:15<3:48:36, 14.69s/batch, batch_loss=7.36, batch_

Epoch 9/10:   6%| | 57/991 [14:31<3:48:36, 14.69s/batch, batch_loss=16.2, batch_

Epoch 9/10:   6%| | 58/991 [14:31<3:49:57, 14.79s/batch, batch_loss=16.2, batch_

Epoch 9/10:   6%| | 58/991 [14:46<3:49:57, 14.79s/batch, batch_loss=10.3, batch_

Epoch 9/10:   6%| | 59/991 [14:46<3:51:35, 14.91s/batch, batch_loss=10.3, batch_

Epoch 9/10:   6%| | 59/991 [15:00<3:51:35, 14.91s/batch, batch_loss=17.4, batch_

Epoch 9/10:   6%| | 60/991 [15:00<3:49:02, 14.76s/batch, batch_loss=17.4, batch_

Epoch 9/10:   6%| | 60/991 [15:14<3:49:02, 14.76s/batch, batch_loss=11.9, batch_

Epoch 9/10:   6%| | 61/991 [15:14<3:47:00, 14.65s/batch, batch_loss=11.9, batch_

Epoch 9/10:   6%| | 61/991 [15:29<3:47:00, 14.65s/batch, batch_loss=11.8, batch_

Epoch 9/10:   6%| | 62/991 [15:29<3:47:17, 14.68s/batch, batch_loss=11.8, batch_

Epoch 9/10:   6%| | 62/991 [15:44<3:47:17, 14.68s/batch, batch_loss=421, batch_i

Epoch 9/10:   6%| | 63/991 [15:44<3:45:52, 14.60s/batch, batch_loss=421, batch_i

Epoch 9/10:   6%| | 63/991 [15:58<3:45:52, 14.60s/batch, batch_loss=794, batch_i

Epoch 9/10:   6%| | 64/991 [15:58<3:44:51, 14.55s/batch, batch_loss=794, batch_i

Epoch 9/10:   6%| | 64/991 [16:14<3:44:51, 14.55s/batch, batch_loss=2.56e+3, bat

Epoch 9/10:   7%| | 65/991 [16:14<3:50:38, 14.94s/batch, batch_loss=2.56e+3, bat

Epoch 9/10:   7%| | 65/991 [16:29<3:50:38, 14.94s/batch, batch_loss=4.03, batch_

Epoch 9/10:   7%| | 66/991 [16:29<3:52:51, 15.10s/batch, batch_loss=4.03, batch_

Epoch 9/10:   7%| | 66/991 [16:45<3:52:51, 15.10s/batch, batch_loss=10, batch_in

Epoch 9/10:   7%| | 67/991 [16:45<3:55:00, 15.26s/batch, batch_loss=10, batch_in

Epoch 9/10:   7%| | 67/991 [17:01<3:55:00, 15.26s/batch, batch_loss=8.3, batch_i

Epoch 9/10:   7%| | 68/991 [17:01<3:55:30, 15.31s/batch, batch_loss=8.3, batch_i

Epoch 9/10:   7%| | 68/991 [17:15<3:55:30, 15.31s/batch, batch_loss=18.7, batch_

Epoch 9/10:   7%| | 69/991 [17:15<3:52:11, 15.11s/batch, batch_loss=18.7, batch_

Epoch 9/10:   7%| | 69/991 [17:32<3:52:11, 15.11s/batch, batch_loss=8.45, batch_

Epoch 9/10:   7%| | 70/991 [17:32<3:59:03, 15.57s/batch, batch_loss=8.45, batch_

Epoch 9/10:   7%| | 70/991 [17:47<3:59:03, 15.57s/batch, batch_loss=10.7, batch_

Epoch 9/10:   7%| | 71/991 [17:47<3:58:45, 15.57s/batch, batch_loss=10.7, batch_

Epoch 9/10:   7%| | 71/991 [18:02<3:58:45, 15.57s/batch, batch_loss=13, batch_in

Epoch 9/10:   7%| | 72/991 [18:02<3:56:07, 15.42s/batch, batch_loss=13, batch_in

Epoch 9/10:   7%| | 72/991 [18:18<3:56:07, 15.42s/batch, batch_loss=24.5, batch_

Epoch 9/10:   7%| | 73/991 [18:18<3:56:43, 15.47s/batch, batch_loss=24.5, batch_

Epoch 9/10:   7%| | 73/991 [18:33<3:56:43, 15.47s/batch, batch_loss=1.74e+3, bat

Epoch 9/10:   7%| | 74/991 [18:33<3:53:50, 15.30s/batch, batch_loss=1.74e+3, bat

Epoch 9/10:   7%| | 74/991 [18:48<3:53:50, 15.30s/batch, batch_loss=15.6, batch_

Epoch 9/10:   8%| | 75/991 [18:48<3:50:28, 15.10s/batch, batch_loss=15.6, batch_

Epoch 9/10:   8%| | 75/991 [19:03<3:50:28, 15.10s/batch, batch_loss=10.4, batch_

Epoch 9/10:   8%| | 76/991 [19:03<3:50:48, 15.14s/batch, batch_loss=10.4, batch_

Epoch 9/10:   8%| | 76/991 [19:18<3:50:48, 15.14s/batch, batch_loss=11.8, batch_

Epoch 9/10:   8%| | 77/991 [19:18<3:49:51, 15.09s/batch, batch_loss=11.8, batch_

Epoch 9/10:   8%| | 77/991 [19:31<3:49:51, 15.09s/batch, batch_loss=13, batch_in

Epoch 9/10:   8%| | 78/991 [19:31<3:43:02, 14.66s/batch, batch_loss=13, batch_in

Epoch 9/10:   8%| | 78/991 [19:50<3:43:02, 14.66s/batch, batch_loss=9.44, batch_

Epoch 9/10:   8%| | 79/991 [19:50<3:58:48, 15.71s/batch, batch_loss=9.44, batch_

Epoch 9/10:   8%| | 79/991 [20:05<3:58:48, 15.71s/batch, batch_loss=10.1, batch_

Epoch 9/10:   8%| | 80/991 [20:05<3:58:21, 15.70s/batch, batch_loss=10.1, batch_

Epoch 9/10:   8%| | 80/991 [20:21<3:58:21, 15.70s/batch, batch_loss=13.7, batch_

Epoch 9/10:   8%| | 81/991 [20:21<3:58:01, 15.69s/batch, batch_loss=13.7, batch_

Epoch 9/10:   8%| | 81/991 [20:36<3:58:01, 15.69s/batch, batch_loss=12.9, batch_

Epoch 9/10:   8%| | 82/991 [20:36<3:54:24, 15.47s/batch, batch_loss=12.9, batch_

Epoch 9/10:   8%| | 82/991 [20:51<3:54:24, 15.47s/batch, batch_loss=8.46, batch_

Epoch 9/10:   8%| | 83/991 [20:51<3:52:34, 15.37s/batch, batch_loss=8.46, batch_

Epoch 9/10:   8%| | 83/991 [21:05<3:52:34, 15.37s/batch, batch_loss=12.2, batch_

Epoch 9/10:   8%| | 84/991 [21:05<3:47:05, 15.02s/batch, batch_loss=12.2, batch_

Epoch 9/10:   8%| | 84/991 [21:21<3:47:05, 15.02s/batch, batch_loss=9.46, batch_

Epoch 9/10:   9%| | 85/991 [21:21<3:48:21, 15.12s/batch, batch_loss=9.46, batch_

Epoch 9/10:   9%| | 85/991 [21:39<3:48:21, 15.12s/batch, batch_loss=14.2, batch_

Epoch 9/10:   9%| | 86/991 [21:39<4:01:37, 16.02s/batch, batch_loss=14.2, batch_

Epoch 9/10:   9%| | 86/991 [21:54<4:01:37, 16.02s/batch, batch_loss=11.2, batch_

Epoch 9/10:   9%| | 87/991 [21:54<3:56:26, 15.69s/batch, batch_loss=11.2, batch_

Epoch 9/10:   9%| | 87/991 [22:09<3:56:26, 15.69s/batch, batch_loss=13.6, batch_

Epoch 9/10:   9%| | 88/991 [22:09<3:54:01, 15.55s/batch, batch_loss=13.6, batch_

Epoch 9/10:   9%| | 88/991 [22:24<3:54:01, 15.55s/batch, batch_loss=5.95, batch_

Epoch 9/10:   9%| | 89/991 [22:24<3:51:25, 15.39s/batch, batch_loss=5.95, batch_

Epoch 9/10:   9%| | 89/991 [22:38<3:51:25, 15.39s/batch, batch_loss=248, batch_i

Epoch 9/10:   9%| | 90/991 [22:38<3:47:30, 15.15s/batch, batch_loss=248, batch_i

Epoch 9/10:   9%| | 90/991 [22:54<3:47:30, 15.15s/batch, batch_loss=1.55e+3, bat

Epoch 9/10:   9%| | 91/991 [22:54<3:47:14, 15.15s/batch, batch_loss=1.55e+3, bat

Epoch 9/10:   9%| | 91/991 [23:08<3:47:14, 15.15s/batch, batch_loss=22.4, batch_

Epoch 9/10:   9%| | 92/991 [23:08<3:43:02, 14.89s/batch, batch_loss=22.4, batch_

Epoch 9/10:   9%| | 92/991 [23:24<3:43:02, 14.89s/batch, batch_loss=24.6, batch_

Epoch 9/10:   9%| | 93/991 [23:24<3:46:44, 15.15s/batch, batch_loss=24.6, batch_

Epoch 9/10:   9%| | 93/991 [23:39<3:46:44, 15.15s/batch, batch_loss=20.7, batch_

Epoch 9/10:   9%| | 94/991 [23:39<3:46:11, 15.13s/batch, batch_loss=20.7, batch_

Epoch 9/10:   9%| | 94/991 [23:53<3:46:11, 15.13s/batch, batch_loss=20.3, batch_

Epoch 9/10:  10%| | 95/991 [23:53<3:43:15, 14.95s/batch, batch_loss=20.3, batch_

Epoch 9/10:  10%| | 95/991 [24:07<3:43:15, 14.95s/batch, batch_loss=21.5, batch_

Epoch 9/10:  10%| | 96/991 [24:07<3:39:10, 14.69s/batch, batch_loss=21.5, batch_

Epoch 9/10:  10%| | 96/991 [24:22<3:39:10, 14.69s/batch, batch_loss=21.4, batch_

Epoch 9/10:  10%| | 97/991 [24:22<3:38:56, 14.69s/batch, batch_loss=21.4, batch_

Epoch 9/10:  10%| | 97/991 [24:37<3:38:56, 14.69s/batch, batch_loss=24.7, batch_

Epoch 9/10:  10%| | 98/991 [24:37<3:38:13, 14.66s/batch, batch_loss=24.7, batch_

Epoch 9/10:  10%| | 98/991 [24:52<3:38:13, 14.66s/batch, batch_loss=18.7, batch_

Epoch 9/10:  10%| | 99/991 [24:52<3:40:26, 14.83s/batch, batch_loss=18.7, batch_

Epoch 9/10:  10%| | 99/991 [25:07<3:40:26, 14.83s/batch, batch_loss=20.8, batch_

Epoch 9/10:  10%| | 100/991 [25:07<3:43:27, 15.05s/batch, batch_loss=20.8, batch

Epoch 9/10:  10%| | 100/991 [25:22<3:43:27, 15.05s/batch, batch_loss=20.2, batch

Epoch 9/10:  10%| | 101/991 [25:22<3:41:47, 14.95s/batch, batch_loss=20.2, batch

Epoch 9/10:  10%| | 101/991 [25:37<3:41:47, 14.95s/batch, batch_loss=20.6, batch

Epoch 9/10:  10%| | 102/991 [25:37<3:42:08, 14.99s/batch, batch_loss=20.6, batch

Epoch 9/10:  10%| | 102/991 [25:52<3:42:08, 14.99s/batch, batch_loss=918, batch_

Epoch 9/10:  10%| | 103/991 [25:52<3:41:06, 14.94s/batch, batch_loss=918, batch_

Epoch 9/10:  10%| | 103/991 [26:10<3:41:06, 14.94s/batch, batch_loss=20.9, batch

Epoch 9/10:  10%| | 104/991 [26:10<3:53:13, 15.78s/batch, batch_loss=20.9, batch

Epoch 9/10:  10%| | 104/991 [26:23<3:53:13, 15.78s/batch, batch_loss=10.7, batch

Epoch 9/10:  11%| | 105/991 [26:23<3:40:58, 14.96s/batch, batch_loss=10.7, batch

Epoch 9/10:  11%| | 105/991 [26:36<3:40:58, 14.96s/batch, batch_loss=14.6, batch

Epoch 9/10:  11%| | 106/991 [26:36<3:34:42, 14.56s/batch, batch_loss=14.6, batch

Epoch 9/10:  11%| | 106/991 [26:49<3:34:42, 14.56s/batch, batch_loss=22.3, batch

Epoch 9/10:  11%| | 107/991 [26:49<3:27:36, 14.09s/batch, batch_loss=22.3, batch

Epoch 9/10:  11%| | 107/991 [27:03<3:27:36, 14.09s/batch, batch_loss=28.4, batch

Epoch 9/10:  11%| | 108/991 [27:03<3:26:37, 14.04s/batch, batch_loss=28.4, batch

Epoch 9/10:  11%| | 108/991 [27:17<3:26:37, 14.04s/batch, batch_loss=15.4, batch

Epoch 9/10:  11%| | 109/991 [27:17<3:25:43, 13.99s/batch, batch_loss=15.4, batch

Epoch 9/10:  11%| | 109/991 [27:32<3:25:43, 13.99s/batch, batch_loss=14.5, batch

Epoch 9/10:  11%| | 110/991 [27:32<3:26:36, 14.07s/batch, batch_loss=14.5, batch

Epoch 9/10:  11%| | 110/991 [27:47<3:26:36, 14.07s/batch, batch_loss=18.3, batch

Epoch 9/10:  11%| | 111/991 [27:47<3:33:32, 14.56s/batch, batch_loss=18.3, batch

Epoch 9/10:  11%| | 111/991 [28:02<3:33:32, 14.56s/batch, batch_loss=22.5, batch

Epoch 9/10:  11%| | 112/991 [28:02<3:35:57, 14.74s/batch, batch_loss=22.5, batch

Epoch 9/10:  11%| | 112/991 [28:18<3:35:57, 14.74s/batch, batch_loss=12.5, batch

Epoch 9/10:  11%| | 113/991 [28:18<3:37:39, 14.87s/batch, batch_loss=12.5, batch

Epoch 9/10:  11%| | 113/991 [28:33<3:37:39, 14.87s/batch, batch_loss=17.3, batch

Epoch 9/10:  12%| | 114/991 [28:33<3:37:45, 14.90s/batch, batch_loss=17.3, batch

Epoch 9/10:  12%| | 114/991 [28:48<3:37:45, 14.90s/batch, batch_loss=17.6, batch

Epoch 9/10:  12%| | 115/991 [28:48<3:38:01, 14.93s/batch, batch_loss=17.6, batch

Epoch 9/10:  12%| | 115/991 [29:02<3:38:01, 14.93s/batch, batch_loss=11.6, batch

Epoch 9/10:  12%| | 116/991 [29:02<3:36:45, 14.86s/batch, batch_loss=11.6, batch

Epoch 9/10:  12%| | 116/991 [29:17<3:36:45, 14.86s/batch, batch_loss=17.9, batch

Epoch 9/10:  12%| | 117/991 [29:17<3:37:24, 14.93s/batch, batch_loss=17.9, batch

Epoch 9/10:  12%| | 117/991 [29:32<3:37:24, 14.93s/batch, batch_loss=16.7, batch

Epoch 9/10:  12%| | 118/991 [29:32<3:38:10, 14.99s/batch, batch_loss=16.7, batch

Epoch 9/10:  12%| | 118/991 [29:49<3:38:10, 14.99s/batch, batch_loss=22.7, batch

Epoch 9/10:  12%| | 119/991 [29:49<3:46:37, 15.59s/batch, batch_loss=22.7, batch

Epoch 9/10:  12%| | 119/991 [30:04<3:46:37, 15.59s/batch, batch_loss=17.9, batch

Epoch 9/10:  12%| | 120/991 [30:04<3:43:49, 15.42s/batch, batch_loss=17.9, batch

Epoch 9/10:  12%| | 120/991 [30:19<3:43:49, 15.42s/batch, batch_loss=23.8, batch

Epoch 9/10:  12%| | 121/991 [30:19<3:40:54, 15.24s/batch, batch_loss=23.8, batch

Epoch 9/10:  12%| | 121/991 [30:34<3:40:54, 15.24s/batch, batch_loss=8.73, batch

Epoch 9/10:  12%| | 122/991 [30:34<3:39:01, 15.12s/batch, batch_loss=8.73, batch

Epoch 9/10:  12%| | 122/991 [30:49<3:39:01, 15.12s/batch, batch_loss=15, batch_i

Epoch 9/10:  12%| | 123/991 [30:49<3:37:08, 15.01s/batch, batch_loss=15, batch_i

Epoch 9/10:  12%| | 123/991 [31:03<3:37:08, 15.01s/batch, batch_loss=3.47e+3, ba

Epoch 9/10:  13%|▏| 124/991 [31:03<3:34:35, 14.85s/batch, batch_loss=3.47e+3, ba

Epoch 9/10:  13%|▏| 124/991 [31:18<3:34:35, 14.85s/batch, batch_loss=7.13, batch

Epoch 9/10:  13%|▏| 125/991 [31:18<3:32:18, 14.71s/batch, batch_loss=7.13, batch

Epoch 9/10:  13%|▏| 125/991 [31:33<3:32:18, 14.71s/batch, batch_loss=13, batch_i

Epoch 9/10:  13%|▏| 126/991 [31:33<3:33:44, 14.83s/batch, batch_loss=13, batch_i

Epoch 9/10:  13%|▏| 126/991 [31:51<3:33:44, 14.83s/batch, batch_loss=1.88e+3, ba

Epoch 9/10:  13%|▏| 127/991 [31:51<3:46:49, 15.75s/batch, batch_loss=1.88e+3, ba

Epoch 9/10:  13%|▏| 127/991 [32:06<3:46:49, 15.75s/batch, batch_loss=1.56e+3, ba

Epoch 9/10:  13%|▏| 128/991 [32:06<3:44:00, 15.57s/batch, batch_loss=1.56e+3, ba

Epoch 9/10:  13%|▏| 128/991 [32:20<3:44:00, 15.57s/batch, batch_loss=226, batch_

Epoch 9/10:  13%|▏| 129/991 [32:20<3:38:53, 15.24s/batch, batch_loss=226, batch_

Epoch 9/10:  13%|▏| 129/991 [32:36<3:38:53, 15.24s/batch, batch_loss=992, batch_

Epoch 9/10:  13%|▏| 130/991 [32:36<3:39:02, 15.26s/batch, batch_loss=992, batch_

Epoch 9/10:  13%|▏| 130/991 [32:51<3:39:02, 15.26s/batch, batch_loss=8.33e+3, ba

Epoch 9/10:  13%|▏| 131/991 [32:51<3:36:57, 15.14s/batch, batch_loss=8.33e+3, ba

Epoch 9/10:  13%|▏| 131/991 [33:05<3:36:57, 15.14s/batch, batch_loss=18.3, batch

Epoch 9/10:  13%|▏| 132/991 [33:05<3:32:16, 14.83s/batch, batch_loss=18.3, batch

Epoch 9/10:  13%|▏| 132/991 [33:19<3:32:16, 14.83s/batch, batch_loss=8.1, batch_

Epoch 9/10:  13%|▏| 133/991 [33:19<3:28:36, 14.59s/batch, batch_loss=8.1, batch_

Epoch 9/10:  13%|▏| 133/991 [33:33<3:28:36, 14.59s/batch, batch_loss=11.3, batch

Epoch 9/10:  14%|▏| 134/991 [33:33<3:27:44, 14.54s/batch, batch_loss=11.3, batch

Epoch 9/10:  14%|▏| 134/991 [33:50<3:27:44, 14.54s/batch, batch_loss=15.6, batch

Epoch 9/10:  14%|▏| 135/991 [33:50<3:37:28, 15.24s/batch, batch_loss=15.6, batch

Epoch 9/10:  14%|▏| 135/991 [34:04<3:37:28, 15.24s/batch, batch_loss=7.21, batch

Epoch 9/10:  14%|▏| 136/991 [34:04<3:31:48, 14.86s/batch, batch_loss=7.21, batch

Epoch 9/10:  14%|▏| 136/991 [34:18<3:31:48, 14.86s/batch, batch_loss=12.8, batch

Epoch 9/10:  14%|▏| 137/991 [34:18<3:29:19, 14.71s/batch, batch_loss=12.8, batch

Epoch 9/10:  14%|▏| 137/991 [34:33<3:29:19, 14.71s/batch, batch_loss=15, batch_i

Epoch 9/10:  14%|▏| 138/991 [34:33<3:28:20, 14.65s/batch, batch_loss=15, batch_i

Epoch 9/10:  14%|▏| 138/991 [34:48<3:28:20, 14.65s/batch, batch_loss=6.45, batch

Epoch 9/10:  14%|▏| 139/991 [34:48<3:29:53, 14.78s/batch, batch_loss=6.45, batch

Epoch 9/10:  14%|▏| 139/991 [35:02<3:29:53, 14.78s/batch, batch_loss=10.9, batch

Epoch 9/10:  14%|▏| 140/991 [35:02<3:28:13, 14.68s/batch, batch_loss=10.9, batch

Epoch 9/10:  14%|▏| 140/991 [35:17<3:28:13, 14.68s/batch, batch_loss=5.15, batch

Epoch 9/10:  14%|▏| 141/991 [35:17<3:29:00, 14.75s/batch, batch_loss=5.15, batch

Epoch 9/10:  14%|▏| 141/991 [35:32<3:29:00, 14.75s/batch, batch_loss=6.8, batch_

Epoch 9/10:  14%|▏| 142/991 [35:32<3:27:29, 14.66s/batch, batch_loss=6.8, batch_

Epoch 9/10:  14%|▏| 142/991 [35:47<3:27:29, 14.66s/batch, batch_loss=13.7, batch

Epoch 9/10:  14%|▏| 143/991 [35:47<3:30:42, 14.91s/batch, batch_loss=13.7, batch

Epoch 9/10:  14%|▏| 143/991 [36:01<3:30:42, 14.91s/batch, batch_loss=14.2, batch

Epoch 9/10:  15%|▏| 144/991 [36:01<3:26:37, 14.64s/batch, batch_loss=14.2, batch

Epoch 9/10:  15%|▏| 144/991 [36:15<3:26:37, 14.64s/batch, batch_loss=17, batch_i

Epoch 9/10:  15%|▏| 145/991 [36:15<3:24:51, 14.53s/batch, batch_loss=17, batch_i

Epoch 9/10:  15%|▏| 145/991 [36:30<3:24:51, 14.53s/batch, batch_loss=15, batch_i

Epoch 9/10:  15%|▏| 146/991 [36:30<3:25:33, 14.60s/batch, batch_loss=15, batch_i

Epoch 9/10:  15%|▏| 146/991 [36:45<3:25:33, 14.60s/batch, batch_loss=7.8, batch_

Epoch 9/10:  15%|▏| 147/991 [36:45<3:24:57, 14.57s/batch, batch_loss=7.8, batch_

Epoch 9/10:  15%|▏| 147/991 [37:00<3:24:57, 14.57s/batch, batch_loss=19.7, batch

Epoch 9/10:  15%|▏| 148/991 [37:00<3:27:54, 14.80s/batch, batch_loss=19.7, batch

Epoch 9/10:  15%|▏| 148/991 [37:15<3:27:54, 14.80s/batch, batch_loss=10.3, batch

Epoch 9/10:  15%|▏| 149/991 [37:15<3:29:10, 14.91s/batch, batch_loss=10.3, batch

Epoch 9/10:  15%|▏| 149/991 [37:29<3:29:10, 14.91s/batch, batch_loss=8.99, batch

Epoch 9/10:  15%|▏| 150/991 [37:29<3:25:51, 14.69s/batch, batch_loss=8.99, batch

Epoch 9/10:  15%|▏| 150/991 [37:46<3:25:51, 14.69s/batch, batch_loss=18.4, batch

Epoch 9/10:  15%|▏| 151/991 [37:46<3:34:14, 15.30s/batch, batch_loss=18.4, batch

Epoch 9/10:  15%|▏| 151/991 [38:01<3:34:14, 15.30s/batch, batch_loss=15.3, batch

Epoch 9/10:  15%|▏| 152/991 [38:01<3:31:30, 15.13s/batch, batch_loss=15.3, batch

Epoch 9/10:  15%|▏| 152/991 [38:16<3:31:30, 15.13s/batch, batch_loss=18.8, batch

Epoch 9/10:  15%|▏| 153/991 [38:16<3:30:36, 15.08s/batch, batch_loss=18.8, batch

Epoch 9/10:  15%|▏| 153/991 [38:31<3:30:36, 15.08s/batch, batch_loss=21.2, batch

Epoch 9/10:  16%|▏| 154/991 [38:31<3:32:10, 15.21s/batch, batch_loss=21.2, batch

Epoch 9/10:  16%|▏| 154/991 [38:47<3:32:10, 15.21s/batch, batch_loss=20.1, batch

Epoch 9/10:  16%|▏| 155/991 [38:47<3:35:13, 15.45s/batch, batch_loss=20.1, batch

Epoch 9/10:  16%|▏| 155/991 [39:02<3:35:13, 15.45s/batch, batch_loss=9.62, batch

Epoch 9/10:  16%|▏| 156/991 [39:02<3:30:21, 15.12s/batch, batch_loss=9.62, batch

Epoch 9/10:  16%|▏| 156/991 [39:16<3:30:21, 15.12s/batch, batch_loss=22.6, batch

Epoch 9/10:  16%|▏| 157/991 [39:16<3:26:30, 14.86s/batch, batch_loss=22.6, batch

Epoch 9/10:  16%|▏| 157/991 [39:31<3:26:30, 14.86s/batch, batch_loss=10.3, batch

Epoch 9/10:  16%|▏| 158/991 [39:31<3:25:11, 14.78s/batch, batch_loss=10.3, batch

Epoch 9/10:  16%|▏| 158/991 [39:46<3:25:11, 14.78s/batch, batch_loss=6.8, batch_

Epoch 9/10:  16%|▏| 159/991 [39:46<3:26:31, 14.89s/batch, batch_loss=6.8, batch_

Epoch 9/10:  16%|▏| 159/991 [40:00<3:26:31, 14.89s/batch, batch_loss=12.4, batch

Epoch 9/10:  16%|▏| 160/991 [40:00<3:25:12, 14.82s/batch, batch_loss=12.4, batch

Epoch 9/10:  16%|▏| 160/991 [40:15<3:25:12, 14.82s/batch, batch_loss=459, batch_

Epoch 9/10:  16%|▏| 161/991 [40:15<3:24:07, 14.76s/batch, batch_loss=459, batch_

Epoch 9/10:  16%|▏| 161/991 [40:30<3:24:07, 14.76s/batch, batch_loss=15.1, batch

Epoch 9/10:  16%|▏| 162/991 [40:30<3:23:05, 14.70s/batch, batch_loss=15.1, batch

Epoch 9/10:  16%|▏| 162/991 [40:44<3:23:05, 14.70s/batch, batch_loss=8.58, batch

Epoch 9/10:  16%|▏| 163/991 [40:44<3:22:45, 14.69s/batch, batch_loss=8.58, batch

Epoch 9/10:  16%|▏| 163/991 [40:59<3:22:45, 14.69s/batch, batch_loss=11.8, batch

Epoch 9/10:  17%|▏| 164/991 [40:59<3:22:15, 14.67s/batch, batch_loss=11.8, batch

Epoch 9/10:  17%|▏| 164/991 [41:13<3:22:15, 14.67s/batch, batch_loss=10.8, batch

Epoch 9/10:  17%|▏| 165/991 [41:13<3:21:24, 14.63s/batch, batch_loss=10.8, batch

Epoch 9/10:  17%|▏| 165/991 [41:28<3:21:24, 14.63s/batch, batch_loss=11.3, batch

Epoch 9/10:  17%|▏| 166/991 [41:28<3:23:10, 14.78s/batch, batch_loss=11.3, batch

Epoch 9/10:  17%|▏| 166/991 [41:44<3:23:10, 14.78s/batch, batch_loss=16.7, batch

Epoch 9/10:  17%|▏| 167/991 [41:44<3:26:20, 15.03s/batch, batch_loss=16.7, batch

Epoch 9/10:  17%|▏| 167/991 [42:02<3:26:20, 15.03s/batch, batch_loss=13, batch_i

Epoch 9/10:  17%|▏| 168/991 [42:02<3:36:03, 15.75s/batch, batch_loss=13, batch_i

Epoch 9/10:  17%|▏| 168/991 [42:17<3:36:03, 15.75s/batch, batch_loss=13.3, batch

Epoch 9/10:  17%|▏| 169/991 [42:17<3:34:53, 15.69s/batch, batch_loss=13.3, batch

Epoch 9/10:  17%|▏| 169/991 [42:31<3:34:53, 15.69s/batch, batch_loss=8.94, batch

Epoch 9/10:  17%|▏| 170/991 [42:31<3:28:24, 15.23s/batch, batch_loss=8.94, batch

Epoch 9/10:  17%|▏| 170/991 [42:46<3:28:24, 15.23s/batch, batch_loss=4.94, batch

Epoch 9/10:  17%|▏| 171/991 [42:46<3:24:45, 14.98s/batch, batch_loss=4.94, batch

Epoch 9/10:  17%|▏| 171/991 [43:01<3:24:45, 14.98s/batch, batch_loss=8.77, batch

Epoch 9/10:  17%|▏| 172/991 [43:01<3:25:47, 15.08s/batch, batch_loss=8.77, batch

Epoch 9/10:  17%|▏| 172/991 [43:18<3:25:47, 15.08s/batch, batch_loss=7.36, batch

Epoch 9/10:  17%|▏| 173/991 [43:18<3:35:13, 15.79s/batch, batch_loss=7.36, batch

Epoch 9/10:  17%|▏| 173/991 [43:34<3:35:13, 15.79s/batch, batch_loss=3.02e+4, ba

Epoch 9/10:  18%|▏| 174/991 [43:34<3:33:05, 15.65s/batch, batch_loss=3.02e+4, ba

Epoch 9/10:  18%|▏| 174/991 [43:48<3:33:05, 15.65s/batch, batch_loss=19.5, batch

Epoch 9/10:  18%|▏| 175/991 [43:48<3:28:38, 15.34s/batch, batch_loss=19.5, batch

Epoch 9/10:  18%|▏| 175/991 [44:03<3:28:38, 15.34s/batch, batch_loss=18.3, batch

Epoch 9/10:  18%|▏| 176/991 [44:03<3:24:41, 15.07s/batch, batch_loss=18.3, batch

Epoch 9/10:  18%|▏| 176/991 [44:18<3:24:41, 15.07s/batch, batch_loss=22.4, batch

Epoch 9/10:  18%|▏| 177/991 [44:18<3:26:05, 15.19s/batch, batch_loss=22.4, batch

Epoch 9/10:  18%|▏| 177/991 [44:33<3:26:05, 15.19s/batch, batch_loss=22.5, batch

Epoch 9/10:  18%|▏| 178/991 [44:33<3:25:11, 15.14s/batch, batch_loss=22.5, batch

Epoch 9/10:  18%|▏| 178/991 [44:49<3:25:11, 15.14s/batch, batch_loss=13, batch_i

Epoch 9/10:  18%|▏| 179/991 [44:49<3:27:12, 15.31s/batch, batch_loss=13, batch_i

Epoch 9/10:  18%|▏| 179/991 [45:04<3:27:12, 15.31s/batch, batch_loss=7.14, batch

Epoch 9/10:  18%|▏| 180/991 [45:04<3:25:52, 15.23s/batch, batch_loss=7.14, batch

Epoch 9/10:  18%|▏| 180/991 [45:19<3:25:52, 15.23s/batch, batch_loss=2.52e+4, ba

Epoch 9/10:  18%|▏| 181/991 [45:19<3:23:27, 15.07s/batch, batch_loss=2.52e+4, ba

Epoch 9/10:  18%|▏| 181/991 [45:33<3:23:27, 15.07s/batch, batch_loss=12.9, batch

Epoch 9/10:  18%|▏| 182/991 [45:33<3:20:31, 14.87s/batch, batch_loss=12.9, batch

Epoch 9/10:  18%|▏| 182/991 [45:49<3:20:31, 14.87s/batch, batch_loss=19.3, batch

Epoch 9/10:  18%|▏| 183/991 [45:49<3:22:34, 15.04s/batch, batch_loss=19.3, batch

Epoch 9/10:  18%|▏| 183/991 [46:04<3:22:34, 15.04s/batch, batch_loss=19.2, batch

Epoch 9/10:  19%|▏| 184/991 [46:04<3:23:15, 15.11s/batch, batch_loss=19.2, batch

Epoch 9/10:  19%|▏| 184/991 [46:19<3:23:15, 15.11s/batch, batch_loss=13, batch_i

Epoch 9/10:  19%|▏| 185/991 [46:19<3:24:33, 15.23s/batch, batch_loss=13, batch_i

Epoch 9/10:  19%|▏| 185/991 [46:34<3:24:33, 15.23s/batch, batch_loss=18.5, batch

Epoch 9/10:  19%|▏| 186/991 [46:34<3:22:51, 15.12s/batch, batch_loss=18.5, batch

Epoch 9/10:  19%|▏| 186/991 [46:49<3:22:51, 15.12s/batch, batch_loss=14.5, batch

Epoch 9/10:  19%|▏| 187/991 [46:49<3:22:14, 15.09s/batch, batch_loss=14.5, batch

Epoch 9/10:  19%|▏| 187/991 [47:04<3:22:14, 15.09s/batch, batch_loss=17, batch_i

Epoch 9/10:  19%|▏| 188/991 [47:04<3:19:49, 14.93s/batch, batch_loss=17, batch_i

Epoch 9/10:  19%|▏| 188/991 [47:19<3:19:49, 14.93s/batch, batch_loss=18.1, batch

Epoch 9/10:  19%|▏| 189/991 [47:19<3:20:08, 14.97s/batch, batch_loss=18.1, batch

Epoch 9/10:  19%|▏| 189/991 [47:34<3:20:08, 14.97s/batch, batch_loss=21.1, batch

Epoch 9/10:  19%|▏| 190/991 [47:34<3:21:48, 15.12s/batch, batch_loss=21.1, batch

Epoch 9/10:  19%|▏| 190/991 [47:49<3:21:48, 15.12s/batch, batch_loss=17.6, batch

Epoch 9/10:  19%|▏| 191/991 [47:49<3:20:25, 15.03s/batch, batch_loss=17.6, batch

Epoch 9/10:  19%|▏| 191/991 [48:04<3:20:25, 15.03s/batch, batch_loss=12.3, batch

Epoch 9/10:  19%|▏| 192/991 [48:04<3:18:53, 14.94s/batch, batch_loss=12.3, batch

Epoch 9/10:  19%|▏| 192/991 [48:18<3:18:53, 14.94s/batch, batch_loss=20.6, batch

Epoch 9/10:  19%|▏| 193/991 [48:19<3:17:34, 14.85s/batch, batch_loss=20.6, batch

Epoch 9/10:  19%|▏| 193/991 [48:34<3:17:34, 14.85s/batch, batch_loss=8.86, batch

Epoch 9/10:  20%|▏| 194/991 [48:34<3:20:13, 15.07s/batch, batch_loss=8.86, batch

Epoch 9/10:  20%|▏| 194/991 [48:49<3:20:13, 15.07s/batch, batch_loss=3.34, batch

Epoch 9/10:  20%|▏| 195/991 [48:49<3:19:37, 15.05s/batch, batch_loss=3.34, batch

Epoch 9/10:  20%|▏| 195/991 [49:05<3:19:37, 15.05s/batch, batch_loss=5.65, batch

Epoch 9/10:  20%|▏| 196/991 [49:05<3:22:28, 15.28s/batch, batch_loss=5.65, batch

Epoch 9/10:  20%|▏| 196/991 [49:20<3:22:28, 15.28s/batch, batch_loss=12.9, batch

Epoch 9/10:  20%|▏| 197/991 [49:20<3:22:22, 15.29s/batch, batch_loss=12.9, batch

Epoch 9/10:  20%|▏| 197/991 [49:37<3:22:22, 15.29s/batch, batch_loss=9.39, batch

Epoch 9/10:  20%|▏| 198/991 [49:37<3:27:34, 15.71s/batch, batch_loss=9.39, batch

Epoch 9/10:  20%|▏| 198/991 [49:53<3:27:34, 15.71s/batch, batch_loss=18.7, batch

Epoch 9/10:  20%|▏| 199/991 [49:53<3:27:00, 15.68s/batch, batch_loss=18.7, batch

Epoch 9/10:  20%|▏| 199/991 [50:08<3:27:00, 15.68s/batch, batch_loss=9.29, batch

Epoch 9/10:  20%|▏| 200/991 [50:08<3:26:54, 15.69s/batch, batch_loss=9.29, batch

Epoch 9/10:  20%|▏| 200/991 [50:23<3:26:54, 15.69s/batch, batch_loss=14.5, batch

Epoch 9/10:  20%|▏| 201/991 [50:24<3:24:56, 15.57s/batch, batch_loss=14.5, batch

Epoch 9/10:  20%|▏| 201/991 [50:38<3:24:56, 15.57s/batch, batch_loss=11.5, batch

Epoch 9/10:  20%|▏| 202/991 [50:38<3:20:39, 15.26s/batch, batch_loss=11.5, batch

Epoch 9/10:  20%|▏| 202/991 [50:54<3:20:39, 15.26s/batch, batch_loss=15.5, batch

Epoch 9/10:  20%|▏| 203/991 [50:54<3:22:40, 15.43s/batch, batch_loss=15.5, batch

Epoch 9/10:  20%|▏| 203/991 [51:10<3:22:40, 15.43s/batch, batch_loss=22.2, batch

Epoch 9/10:  21%|▏| 204/991 [51:10<3:23:46, 15.54s/batch, batch_loss=22.2, batch

Epoch 9/10:  21%|▏| 204/991 [51:25<3:23:46, 15.54s/batch, batch_loss=16.3, batch

Epoch 9/10:  21%|▏| 205/991 [51:25<3:23:50, 15.56s/batch, batch_loss=16.3, batch

Epoch 9/10:  21%|▏| 205/991 [51:40<3:23:50, 15.56s/batch, batch_loss=7.23, batch

Epoch 9/10:  21%|▏| 206/991 [51:40<3:21:07, 15.37s/batch, batch_loss=7.23, batch

Epoch 9/10:  21%|▏| 206/991 [51:55<3:21:07, 15.37s/batch, batch_loss=8.93, batch

Epoch 9/10:  21%|▏| 207/991 [51:55<3:18:39, 15.20s/batch, batch_loss=8.93, batch

Epoch 9/10:  21%|▏| 207/991 [52:10<3:18:39, 15.20s/batch, batch_loss=12.1, batch

Epoch 9/10:  21%|▏| 208/991 [52:10<3:17:52, 15.16s/batch, batch_loss=12.1, batch

Epoch 9/10:  21%|▏| 208/991 [52:27<3:17:52, 15.16s/batch, batch_loss=9.36, batch

Epoch 9/10:  21%|▏| 209/991 [52:27<3:23:34, 15.62s/batch, batch_loss=9.36, batch

Epoch 9/10:  21%|▏| 209/991 [52:43<3:23:34, 15.62s/batch, batch_loss=19.7, batch

Epoch 9/10:  21%|▏| 210/991 [52:43<3:25:09, 15.76s/batch, batch_loss=19.7, batch

Epoch 9/10:  21%|▏| 210/991 [52:59<3:25:09, 15.76s/batch, batch_loss=12.2, batch

Epoch 9/10:  21%|▏| 211/991 [52:59<3:25:31, 15.81s/batch, batch_loss=12.2, batch

Epoch 9/10:  21%|▏| 211/991 [53:15<3:25:31, 15.81s/batch, batch_loss=15.1, batch

Epoch 9/10:  21%|▏| 212/991 [53:15<3:27:25, 15.98s/batch, batch_loss=15.1, batch

Epoch 9/10:  21%|▏| 212/991 [53:31<3:27:25, 15.98s/batch, batch_loss=3.14, batch

Epoch 9/10:  21%|▏| 213/991 [53:31<3:26:03, 15.89s/batch, batch_loss=3.14, batch

Epoch 9/10:  21%|▏| 213/991 [53:46<3:26:03, 15.89s/batch, batch_loss=12.9, batch

Epoch 9/10:  22%|▏| 214/991 [53:46<3:22:33, 15.64s/batch, batch_loss=12.9, batch

Epoch 9/10:  22%|▏| 214/991 [54:01<3:22:33, 15.64s/batch, batch_loss=14.5, batch

Epoch 9/10:  22%|▏| 215/991 [54:01<3:21:03, 15.55s/batch, batch_loss=14.5, batch

Epoch 9/10:  22%|▏| 215/991 [54:17<3:21:03, 15.55s/batch, batch_loss=10.2, batch

Epoch 9/10:  22%|▏| 216/991 [54:17<3:20:38, 15.53s/batch, batch_loss=10.2, batch

Epoch 9/10:  22%|▏| 216/991 [54:32<3:20:38, 15.53s/batch, batch_loss=11.1, batch

Epoch 9/10:  22%|▏| 217/991 [54:32<3:18:32, 15.39s/batch, batch_loss=11.1, batch

Epoch 9/10:  22%|▏| 217/991 [54:47<3:18:32, 15.39s/batch, batch_loss=18.7, batch

Epoch 9/10:  22%|▏| 218/991 [54:47<3:19:10, 15.46s/batch, batch_loss=18.7, batch

Epoch 9/10:  22%|▏| 218/991 [55:02<3:19:10, 15.46s/batch, batch_loss=18.3, batch

Epoch 9/10:  22%|▏| 219/991 [55:02<3:17:07, 15.32s/batch, batch_loss=18.3, batch

Epoch 9/10:  22%|▏| 219/991 [55:19<3:17:07, 15.32s/batch, batch_loss=21.2, batch

Epoch 9/10:  22%|▏| 220/991 [55:19<3:20:09, 15.58s/batch, batch_loss=21.2, batch

Epoch 9/10:  22%|▏| 220/991 [55:35<3:20:09, 15.58s/batch, batch_loss=20, batch_i

Epoch 9/10:  22%|▏| 221/991 [55:35<3:23:17, 15.84s/batch, batch_loss=20, batch_i

Epoch 9/10:  22%|▏| 221/991 [55:51<3:23:17, 15.84s/batch, batch_loss=15.8, batch

Epoch 9/10:  22%|▏| 222/991 [55:51<3:22:47, 15.82s/batch, batch_loss=15.8, batch

Epoch 9/10:  22%|▏| 222/991 [56:10<3:22:47, 15.82s/batch, batch_loss=19.4, batch

Epoch 9/10:  23%|▏| 223/991 [56:10<3:33:56, 16.71s/batch, batch_loss=19.4, batch

Epoch 9/10:  23%|▏| 223/991 [56:26<3:33:56, 16.71s/batch, batch_loss=14.3, batch

Epoch 9/10:  23%|▏| 224/991 [56:26<3:30:30, 16.47s/batch, batch_loss=14.3, batch

Epoch 9/10:  23%|▏| 224/991 [56:40<3:30:30, 16.47s/batch, batch_loss=10.8, batch

Epoch 9/10:  23%|▏| 225/991 [56:40<3:21:43, 15.80s/batch, batch_loss=10.8, batch

Epoch 9/10:  23%|▏| 225/991 [56:53<3:21:43, 15.80s/batch, batch_loss=21.5, batch

Epoch 9/10:  23%|▏| 226/991 [56:53<3:12:38, 15.11s/batch, batch_loss=21.5, batch

Epoch 9/10:  23%|▏| 226/991 [57:09<3:12:38, 15.11s/batch, batch_loss=2.41e+3, ba

Epoch 9/10:  23%|▏| 227/991 [57:09<3:14:49, 15.30s/batch, batch_loss=2.41e+3, ba

Epoch 9/10:  23%|▏| 227/991 [57:26<3:14:49, 15.30s/batch, batch_loss=3.61e+3, ba

Epoch 9/10:  23%|▏| 228/991 [57:26<3:22:58, 15.96s/batch, batch_loss=3.61e+3, ba

Epoch 9/10:  23%|▏| 228/991 [57:42<3:22:58, 15.96s/batch, batch_loss=13.4, batch

Epoch 9/10:  23%|▏| 229/991 [57:42<3:21:17, 15.85s/batch, batch_loss=13.4, batch

Epoch 9/10:  23%|▏| 229/991 [57:58<3:21:17, 15.85s/batch, batch_loss=9.1, batch_

Epoch 9/10:  23%|▏| 230/991 [57:58<3:19:23, 15.72s/batch, batch_loss=9.1, batch_

Epoch 9/10:  23%|▏| 230/991 [58:14<3:19:23, 15.72s/batch, batch_loss=13.8, batch

Epoch 9/10:  23%|▏| 231/991 [58:14<3:21:17, 15.89s/batch, batch_loss=13.8, batch

Epoch 9/10:  23%|▏| 231/991 [58:30<3:21:17, 15.89s/batch, batch_loss=8.57, batch

Epoch 9/10:  23%|▏| 232/991 [58:30<3:22:57, 16.04s/batch, batch_loss=8.57, batch

Epoch 9/10:  23%|▏| 232/991 [58:46<3:22:57, 16.04s/batch, batch_loss=7.4, batch_

Epoch 9/10:  24%|▏| 233/991 [58:46<3:22:42, 16.05s/batch, batch_loss=7.4, batch_

Epoch 9/10:  24%|▏| 233/991 [59:02<3:22:42, 16.05s/batch, batch_loss=13.7, batch

Epoch 9/10:  24%|▏| 234/991 [59:02<3:19:55, 15.85s/batch, batch_loss=13.7, batch

Epoch 9/10:  24%|▏| 234/991 [59:20<3:19:55, 15.85s/batch, batch_loss=14.9, batch

Epoch 9/10:  24%|▏| 235/991 [59:20<3:29:02, 16.59s/batch, batch_loss=14.9, batch

Epoch 9/10:  24%|▏| 235/991 [59:35<3:29:02, 16.59s/batch, batch_loss=24.2, batch

Epoch 9/10:  24%|▏| 236/991 [59:35<3:24:11, 16.23s/batch, batch_loss=24.2, batch

Epoch 9/10:  24%|▏| 236/991 [59:52<3:24:11, 16.23s/batch, batch_loss=24.1, batch

Epoch 9/10:  24%|▏| 237/991 [59:52<3:24:30, 16.27s/batch, batch_loss=24.1, batch

Epoch 9/10:  24%|▏| 237/991 [1:00:08<3:24:30, 16.27s/batch, batch_loss=17.7, bat

Epoch 9/10:  24%|▏| 238/991 [1:00:08<3:25:41, 16.39s/batch, batch_loss=17.7, bat

Epoch 9/10:  24%|▏| 238/991 [1:00:24<3:25:41, 16.39s/batch, batch_loss=5.94, bat

Epoch 9/10:  24%|▏| 239/991 [1:00:24<3:20:58, 16.03s/batch, batch_loss=5.94, bat

Epoch 9/10:  24%|▏| 239/991 [1:00:39<3:20:58, 16.03s/batch, batch_loss=6.63, bat

Epoch 9/10:  24%|▏| 240/991 [1:00:39<3:20:11, 15.99s/batch, batch_loss=6.63, bat

Epoch 9/10:  24%|▏| 240/991 [1:00:55<3:20:11, 15.99s/batch, batch_loss=9.9, batc

Epoch 9/10:  24%|▏| 241/991 [1:00:55<3:19:03, 15.93s/batch, batch_loss=9.9, batc

Epoch 9/10:  24%|▏| 241/991 [1:01:12<3:19:03, 15.93s/batch, batch_loss=21.1, bat

Epoch 9/10:  24%|▏| 242/991 [1:01:12<3:20:46, 16.08s/batch, batch_loss=21.1, bat

Epoch 9/10:  24%|▏| 242/991 [1:01:27<3:20:46, 16.08s/batch, batch_loss=268, batc

Epoch 9/10:  25%|▏| 243/991 [1:01:27<3:17:48, 15.87s/batch, batch_loss=268, batc

Epoch 9/10:  25%|▏| 243/991 [1:01:43<3:17:48, 15.87s/batch, batch_loss=17.9, bat

Epoch 9/10:  25%|▏| 244/991 [1:01:43<3:16:20, 15.77s/batch, batch_loss=17.9, bat

Epoch 9/10:  25%|▏| 244/991 [1:02:01<3:16:20, 15.77s/batch, batch_loss=6.04, bat

Epoch 9/10:  25%|▏| 245/991 [1:02:01<3:24:12, 16.42s/batch, batch_loss=6.04, bat

Epoch 9/10:  25%|▏| 245/991 [1:02:16<3:24:12, 16.42s/batch, batch_loss=6.78, bat

Epoch 9/10:  25%|▏| 246/991 [1:02:16<3:19:28, 16.07s/batch, batch_loss=6.78, bat

Epoch 9/10:  25%|▏| 246/991 [1:02:31<3:19:28, 16.07s/batch, batch_loss=14.8, bat

Epoch 9/10:  25%|▏| 247/991 [1:02:31<3:14:46, 15.71s/batch, batch_loss=14.8, bat

Epoch 9/10:  25%|▏| 247/991 [1:02:45<3:14:46, 15.71s/batch, batch_loss=4.53, bat

Epoch 9/10:  25%|▎| 248/991 [1:02:45<3:11:07, 15.43s/batch, batch_loss=4.53, bat

Epoch 9/10:  25%|▎| 248/991 [1:03:01<3:11:07, 15.43s/batch, batch_loss=12.8, bat

Epoch 9/10:  25%|▎| 249/991 [1:03:01<3:09:29, 15.32s/batch, batch_loss=12.8, bat

Epoch 9/10:  25%|▎| 249/991 [1:03:16<3:09:29, 15.32s/batch, batch_loss=9.58, bat

Epoch 9/10:  25%|▎| 250/991 [1:03:16<3:11:07, 15.48s/batch, batch_loss=9.58, bat

Epoch 9/10:  25%|▎| 250/991 [1:03:32<3:11:07, 15.48s/batch, batch_loss=6.58, bat

Epoch 9/10:  25%|▎| 251/991 [1:03:32<3:11:41, 15.54s/batch, batch_loss=6.58, bat

Epoch 9/10:  25%|▎| 251/991 [1:03:50<3:11:41, 15.54s/batch, batch_loss=15.1, bat

Epoch 9/10:  25%|▎| 252/991 [1:03:50<3:21:20, 16.35s/batch, batch_loss=15.1, bat

Epoch 9/10:  25%|▎| 252/991 [1:04:06<3:21:20, 16.35s/batch, batch_loss=7.18, bat

Epoch 9/10:  26%|▎| 253/991 [1:04:06<3:18:01, 16.10s/batch, batch_loss=7.18, bat

Epoch 9/10:  26%|▎| 253/991 [1:04:21<3:18:01, 16.10s/batch, batch_loss=19.2, bat

Epoch 9/10:  26%|▎| 254/991 [1:04:21<3:16:02, 15.96s/batch, batch_loss=19.2, bat

Epoch 9/10:  26%|▎| 254/991 [1:04:37<3:16:02, 15.96s/batch, batch_loss=15.9, bat

Epoch 9/10:  26%|▎| 255/991 [1:04:37<3:13:22, 15.76s/batch, batch_loss=15.9, bat

Epoch 9/10:  26%|▎| 255/991 [1:04:51<3:13:22, 15.76s/batch, batch_loss=696, batc

Epoch 9/10:  26%|▎| 256/991 [1:04:51<3:07:47, 15.33s/batch, batch_loss=696, batc

Epoch 9/10:  26%|▎| 256/991 [1:05:06<3:07:47, 15.33s/batch, batch_loss=19, batch

Epoch 9/10:  26%|▎| 257/991 [1:05:06<3:05:30, 15.16s/batch, batch_loss=19, batch

Epoch 9/10:  26%|▎| 257/991 [1:05:21<3:05:30, 15.16s/batch, batch_loss=204, batc

Epoch 9/10:  26%|▎| 258/991 [1:05:21<3:05:34, 15.19s/batch, batch_loss=204, batc

Epoch 9/10:  26%|▎| 258/991 [1:05:36<3:05:34, 15.19s/batch, batch_loss=15.9, bat

Epoch 9/10:  26%|▎| 259/991 [1:05:36<3:03:57, 15.08s/batch, batch_loss=15.9, bat

Epoch 9/10:  26%|▎| 259/991 [1:05:52<3:03:57, 15.08s/batch, batch_loss=16.5, bat

Epoch 9/10:  26%|▎| 260/991 [1:05:52<3:06:52, 15.34s/batch, batch_loss=16.5, bat

Epoch 9/10:  26%|▎| 260/991 [1:06:07<3:06:52, 15.34s/batch, batch_loss=14.8, bat

Epoch 9/10:  26%|▎| 261/991 [1:06:07<3:04:48, 15.19s/batch, batch_loss=14.8, bat

Epoch 9/10:  26%|▎| 261/991 [1:06:21<3:04:48, 15.19s/batch, batch_loss=10.2, bat

Epoch 9/10:  26%|▎| 262/991 [1:06:21<3:00:50, 14.88s/batch, batch_loss=10.2, bat

Epoch 9/10:  26%|▎| 262/991 [1:06:36<3:00:50, 14.88s/batch, batch_loss=12.6, bat

Epoch 9/10:  27%|▎| 263/991 [1:06:36<3:01:40, 14.97s/batch, batch_loss=12.6, bat

Epoch 9/10:  27%|▎| 263/991 [1:06:51<3:01:40, 14.97s/batch, batch_loss=15.7, bat

Epoch 9/10:  27%|▎| 264/991 [1:06:51<3:01:40, 14.99s/batch, batch_loss=15.7, bat

Epoch 9/10:  27%|▎| 264/991 [1:07:05<3:01:40, 14.99s/batch, batch_loss=16.5, bat

Epoch 9/10:  27%|▎| 265/991 [1:07:05<2:59:16, 14.82s/batch, batch_loss=16.5, bat

Epoch 9/10:  27%|▎| 265/991 [1:07:19<2:59:16, 14.82s/batch, batch_loss=14, batch

Epoch 9/10:  27%|▎| 266/991 [1:07:19<2:56:02, 14.57s/batch, batch_loss=14, batch

Epoch 9/10:  27%|▎| 266/991 [1:07:34<2:56:02, 14.57s/batch, batch_loss=11.6, bat

Epoch 9/10:  27%|▎| 267/991 [1:07:34<2:55:48, 14.57s/batch, batch_loss=11.6, bat

Epoch 9/10:  27%|▎| 267/991 [1:07:49<2:55:48, 14.57s/batch, batch_loss=7.61, bat

Epoch 9/10:  27%|▎| 268/991 [1:07:49<2:57:59, 14.77s/batch, batch_loss=7.61, bat

Epoch 9/10:  27%|▎| 268/991 [1:08:05<2:57:59, 14.77s/batch, batch_loss=12.4, bat

Epoch 9/10:  27%|▎| 269/991 [1:08:05<3:00:10, 14.97s/batch, batch_loss=12.4, bat

Epoch 9/10:  27%|▎| 269/991 [1:08:20<3:00:10, 14.97s/batch, batch_loss=1.72, bat

Epoch 9/10:  27%|▎| 270/991 [1:08:20<3:01:37, 15.11s/batch, batch_loss=1.72, bat

Epoch 9/10:  27%|▎| 270/991 [1:08:35<3:01:37, 15.11s/batch, batch_loss=12.1, bat

Epoch 9/10:  27%|▎| 271/991 [1:08:35<3:00:07, 15.01s/batch, batch_loss=12.1, bat

Epoch 9/10:  27%|▎| 271/991 [1:08:50<3:00:07, 15.01s/batch, batch_loss=11.6, bat

Epoch 9/10:  27%|▎| 272/991 [1:08:50<3:00:31, 15.06s/batch, batch_loss=11.6, bat

Epoch 9/10:  27%|▎| 272/991 [1:09:05<3:00:31, 15.06s/batch, batch_loss=18.6, bat

Epoch 9/10:  28%|▎| 273/991 [1:09:05<2:59:58, 15.04s/batch, batch_loss=18.6, bat

Epoch 9/10:  28%|▎| 273/991 [1:09:20<2:59:58, 15.04s/batch, batch_loss=11.5, bat

Epoch 9/10:  28%|▎| 274/991 [1:09:20<3:00:21, 15.09s/batch, batch_loss=11.5, bat

Epoch 9/10:  28%|▎| 274/991 [1:09:35<3:00:21, 15.09s/batch, batch_loss=3.3e+3, b

Epoch 9/10:  28%|▎| 275/991 [1:09:35<2:58:52, 14.99s/batch, batch_loss=3.3e+3, b

Epoch 9/10:  28%|▎| 275/991 [1:09:53<2:58:52, 14.99s/batch, batch_loss=13.6, bat

Epoch 9/10:  28%|▎| 276/991 [1:09:53<3:08:11, 15.79s/batch, batch_loss=13.6, bat

Epoch 9/10:  28%|▎| 276/991 [1:10:08<3:08:11, 15.79s/batch, batch_loss=4.79e+3, 

Epoch 9/10:  28%|▎| 277/991 [1:10:08<3:06:46, 15.70s/batch, batch_loss=4.79e+3, 

Epoch 9/10:  28%|▎| 277/991 [1:10:23<3:06:46, 15.70s/batch, batch_loss=11.1, bat

Epoch 9/10:  28%|▎| 278/991 [1:10:23<3:04:25, 15.52s/batch, batch_loss=11.1, bat

Epoch 9/10:  28%|▎| 278/991 [1:10:38<3:04:25, 15.52s/batch, batch_loss=17.1, bat

Epoch 9/10:  28%|▎| 279/991 [1:10:38<3:02:10, 15.35s/batch, batch_loss=17.1, bat

Epoch 9/10:  28%|▎| 279/991 [1:10:54<3:02:10, 15.35s/batch, batch_loss=10.9, bat

Epoch 9/10:  28%|▎| 280/991 [1:10:54<3:02:11, 15.38s/batch, batch_loss=10.9, bat

Epoch 9/10:  28%|▎| 280/991 [1:11:08<3:02:11, 15.38s/batch, batch_loss=11.4, bat

Epoch 9/10:  28%|▎| 281/991 [1:11:08<2:59:15, 15.15s/batch, batch_loss=11.4, bat

Epoch 9/10:  28%|▎| 281/991 [1:11:25<2:59:15, 15.15s/batch, batch_loss=6.32, bat

Epoch 9/10:  28%|▎| 282/991 [1:11:25<3:04:59, 15.66s/batch, batch_loss=6.32, bat

Epoch 9/10:  28%|▎| 282/991 [1:11:40<3:04:59, 15.66s/batch, batch_loss=15.1, bat

Epoch 9/10:  29%|▎| 283/991 [1:11:40<3:02:54, 15.50s/batch, batch_loss=15.1, bat

Epoch 9/10:  29%|▎| 283/991 [1:11:56<3:02:54, 15.50s/batch, batch_loss=11, batch

Epoch 9/10:  29%|▎| 284/991 [1:11:56<3:02:07, 15.46s/batch, batch_loss=11, batch

Epoch 9/10:  29%|▎| 284/991 [1:12:11<3:02:07, 15.46s/batch, batch_loss=11.7, bat

Epoch 9/10:  29%|▎| 285/991 [1:12:11<3:01:49, 15.45s/batch, batch_loss=11.7, bat

Epoch 9/10:  29%|▎| 285/991 [1:12:26<3:01:49, 15.45s/batch, batch_loss=7.61, bat

Epoch 9/10:  29%|▎| 286/991 [1:12:26<3:01:02, 15.41s/batch, batch_loss=7.61, bat

Epoch 9/10:  29%|▎| 286/991 [1:12:41<3:01:02, 15.41s/batch, batch_loss=6.68, bat

Epoch 9/10:  29%|▎| 287/991 [1:12:41<2:57:40, 15.14s/batch, batch_loss=6.68, bat

Epoch 9/10:  29%|▎| 287/991 [1:12:57<2:57:40, 15.14s/batch, batch_loss=2.58e+3, 

Epoch 9/10:  29%|▎| 288/991 [1:12:57<3:00:42, 15.42s/batch, batch_loss=2.58e+3, 

Epoch 9/10:  29%|▎| 288/991 [1:13:12<3:00:42, 15.42s/batch, batch_loss=1.24e+3, 

Epoch 9/10:  29%|▎| 289/991 [1:13:12<2:59:08, 15.31s/batch, batch_loss=1.24e+3, 

Epoch 9/10:  29%|▎| 289/991 [1:13:28<2:59:08, 15.31s/batch, batch_loss=11.6, bat

Epoch 9/10:  29%|▎| 290/991 [1:13:28<3:01:04, 15.50s/batch, batch_loss=11.6, bat

Epoch 9/10:  29%|▎| 290/991 [1:13:43<3:01:04, 15.50s/batch, batch_loss=5.43, bat

Epoch 9/10:  29%|▎| 291/991 [1:13:43<2:59:55, 15.42s/batch, batch_loss=5.43, bat

Epoch 9/10:  29%|▎| 291/991 [1:13:59<2:59:55, 15.42s/batch, batch_loss=11, batch

Epoch 9/10:  29%|▎| 292/991 [1:13:59<3:01:22, 15.57s/batch, batch_loss=11, batch

Epoch 9/10:  29%|▎| 292/991 [1:14:15<3:01:22, 15.57s/batch, batch_loss=15.7, bat

Epoch 9/10:  30%|▎| 293/991 [1:14:15<3:03:09, 15.74s/batch, batch_loss=15.7, bat

Epoch 9/10:  30%|▎| 293/991 [1:14:31<3:03:09, 15.74s/batch, batch_loss=14.6, bat

Epoch 9/10:  30%|▎| 294/991 [1:14:31<3:02:39, 15.72s/batch, batch_loss=14.6, bat

Epoch 9/10:  30%|▎| 294/991 [1:14:47<3:02:39, 15.72s/batch, batch_loss=10.2, bat

Epoch 9/10:  30%|▎| 295/991 [1:14:47<3:01:59, 15.69s/batch, batch_loss=10.2, bat

Epoch 9/10:  30%|▎| 295/991 [1:15:02<3:01:59, 15.69s/batch, batch_loss=17.1, bat

Epoch 9/10:  30%|▎| 296/991 [1:15:02<3:00:47, 15.61s/batch, batch_loss=17.1, bat

Epoch 9/10:  30%|▎| 296/991 [1:15:18<3:00:47, 15.61s/batch, batch_loss=12, batch

Epoch 9/10:  30%|▎| 297/991 [1:15:18<3:02:03, 15.74s/batch, batch_loss=12, batch

Epoch 9/10:  30%|▎| 297/991 [1:15:33<3:02:03, 15.74s/batch, batch_loss=3.24e+4, 

Epoch 9/10:  30%|▎| 298/991 [1:15:33<2:59:01, 15.50s/batch, batch_loss=3.24e+4, 

Epoch 9/10:  30%|▎| 298/991 [1:15:49<2:59:01, 15.50s/batch, batch_loss=12.9, bat

Epoch 9/10:  30%|▎| 299/991 [1:15:49<3:00:17, 15.63s/batch, batch_loss=12.9, bat

Epoch 9/10:  30%|▎| 299/991 [1:16:04<3:00:17, 15.63s/batch, batch_loss=6.13, bat

Epoch 9/10:  30%|▎| 300/991 [1:16:04<2:58:46, 15.52s/batch, batch_loss=6.13, bat

Epoch 9/10:  30%|▎| 300/991 [1:16:21<2:58:46, 15.52s/batch, batch_loss=10.1, bat

Epoch 9/10:  30%|▎| 301/991 [1:16:21<3:01:47, 15.81s/batch, batch_loss=10.1, bat

Epoch 9/10:  30%|▎| 301/991 [1:16:37<3:01:47, 15.81s/batch, batch_loss=11.3, bat

Epoch 9/10:  30%|▎| 302/991 [1:16:37<3:02:40, 15.91s/batch, batch_loss=11.3, bat

Epoch 9/10:  30%|▎| 302/991 [1:16:53<3:02:40, 15.91s/batch, batch_loss=10.6, bat

Epoch 9/10:  31%|▎| 303/991 [1:16:53<3:02:04, 15.88s/batch, batch_loss=10.6, bat

Epoch 9/10:  31%|▎| 303/991 [1:17:08<3:02:04, 15.88s/batch, batch_loss=3.14, bat

Epoch 9/10:  31%|▎| 304/991 [1:17:08<2:58:30, 15.59s/batch, batch_loss=3.14, bat

Epoch 9/10:  31%|▎| 304/991 [1:17:24<2:58:30, 15.59s/batch, batch_loss=15, batch

Epoch 9/10:  31%|▎| 305/991 [1:17:24<3:00:04, 15.75s/batch, batch_loss=15, batch

Epoch 9/10:  31%|▎| 305/991 [1:17:39<3:00:04, 15.75s/batch, batch_loss=7.55, bat

Epoch 9/10:  31%|▎| 306/991 [1:17:39<2:58:14, 15.61s/batch, batch_loss=7.55, bat

Epoch 9/10:  31%|▎| 306/991 [1:17:55<2:58:14, 15.61s/batch, batch_loss=6.29e+3, 

Epoch 9/10:  31%|▎| 307/991 [1:17:55<3:00:23, 15.82s/batch, batch_loss=6.29e+3, 

Epoch 9/10:  31%|▎| 307/991 [1:18:11<3:00:23, 15.82s/batch, batch_loss=12.7, bat

Epoch 9/10:  31%|▎| 308/991 [1:18:11<2:58:40, 15.70s/batch, batch_loss=12.7, bat

Epoch 9/10:  31%|▎| 308/991 [1:18:27<2:58:40, 15.70s/batch, batch_loss=18.9, bat

Epoch 9/10:  31%|▎| 309/991 [1:18:27<2:58:56, 15.74s/batch, batch_loss=18.9, bat

Epoch 9/10:  31%|▎| 309/991 [1:18:43<2:58:56, 15.74s/batch, batch_loss=12.3, bat

Epoch 9/10:  31%|▎| 310/991 [1:18:43<3:01:32, 15.99s/batch, batch_loss=12.3, bat

Epoch 9/10:  31%|▎| 310/991 [1:18:58<3:01:32, 15.99s/batch, batch_loss=14.3, bat

Epoch 9/10:  31%|▎| 311/991 [1:18:58<2:58:15, 15.73s/batch, batch_loss=14.3, bat

Epoch 9/10:  31%|▎| 311/991 [1:19:13<2:58:15, 15.73s/batch, batch_loss=12.2, bat

Epoch 9/10:  31%|▎| 312/991 [1:19:13<2:55:47, 15.53s/batch, batch_loss=12.2, bat

Epoch 9/10:  31%|▎| 312/991 [1:19:29<2:55:47, 15.53s/batch, batch_loss=1.06e+4, 

Epoch 9/10:  32%|▎| 313/991 [1:19:29<2:54:43, 15.46s/batch, batch_loss=1.06e+4, 

Epoch 9/10:  32%|▎| 313/991 [1:19:44<2:54:43, 15.46s/batch, batch_loss=12.4, bat

Epoch 9/10:  32%|▎| 314/991 [1:19:44<2:53:46, 15.40s/batch, batch_loss=12.4, bat

Epoch 9/10:  32%|▎| 314/991 [1:20:02<2:53:46, 15.40s/batch, batch_loss=13.8, bat

Epoch 9/10:  32%|▎| 315/991 [1:20:02<3:02:25, 16.19s/batch, batch_loss=13.8, bat

Epoch 9/10:  32%|▎| 315/991 [1:20:18<3:02:25, 16.19s/batch, batch_loss=21.9, bat

Epoch 9/10:  32%|▎| 316/991 [1:20:18<3:00:28, 16.04s/batch, batch_loss=21.9, bat

Epoch 9/10:  32%|▎| 316/991 [1:20:33<3:00:28, 16.04s/batch, batch_loss=19.1, bat

Epoch 9/10:  32%|▎| 317/991 [1:20:33<2:59:07, 15.95s/batch, batch_loss=19.1, bat

Epoch 9/10:  32%|▎| 317/991 [1:20:48<2:59:07, 15.95s/batch, batch_loss=21.5, bat

Epoch 9/10:  32%|▎| 318/991 [1:20:48<2:54:56, 15.60s/batch, batch_loss=21.5, bat

Epoch 9/10:  32%|▎| 318/991 [1:21:02<2:54:56, 15.60s/batch, batch_loss=15, batch

Epoch 9/10:  32%|▎| 319/991 [1:21:02<2:49:21, 15.12s/batch, batch_loss=15, batch

Epoch 9/10:  32%|▎| 319/991 [1:21:16<2:49:21, 15.12s/batch, batch_loss=13.7, bat

Epoch 9/10:  32%|▎| 320/991 [1:21:16<2:46:13, 14.86s/batch, batch_loss=13.7, bat

Epoch 9/10:  32%|▎| 320/991 [1:21:32<2:46:13, 14.86s/batch, batch_loss=22.4, bat

Epoch 9/10:  32%|▎| 321/991 [1:21:32<2:47:23, 14.99s/batch, batch_loss=22.4, bat

Epoch 9/10:  32%|▎| 321/991 [1:21:47<2:47:23, 14.99s/batch, batch_loss=6.93, bat

Epoch 9/10:  32%|▎| 322/991 [1:21:47<2:48:56, 15.15s/batch, batch_loss=6.93, bat

Epoch 9/10:  32%|▎| 322/991 [1:22:05<2:48:56, 15.15s/batch, batch_loss=9.6, batc

Epoch 9/10:  33%|▎| 323/991 [1:22:05<2:58:49, 16.06s/batch, batch_loss=9.6, batc

Epoch 9/10:  33%|▎| 323/991 [1:22:20<2:58:49, 16.06s/batch, batch_loss=19.7, bat

Epoch 9/10:  33%|▎| 324/991 [1:22:20<2:53:52, 15.64s/batch, batch_loss=19.7, bat

Epoch 9/10:  33%|▎| 324/991 [1:22:35<2:53:52, 15.64s/batch, batch_loss=8.48, bat

Epoch 9/10:  33%|▎| 325/991 [1:22:35<2:51:02, 15.41s/batch, batch_loss=8.48, bat

Epoch 9/10:  33%|▎| 325/991 [1:22:50<2:51:02, 15.41s/batch, batch_loss=20.9, bat

Epoch 9/10:  33%|▎| 326/991 [1:22:50<2:50:12, 15.36s/batch, batch_loss=20.9, bat

Epoch 9/10:  33%|▎| 326/991 [1:23:05<2:50:12, 15.36s/batch, batch_loss=3.04e+3, 

Epoch 9/10:  33%|▎| 327/991 [1:23:05<2:48:41, 15.24s/batch, batch_loss=3.04e+3, 

Epoch 9/10:  33%|▎| 327/991 [1:23:20<2:48:41, 15.24s/batch, batch_loss=10.2, bat

Epoch 9/10:  33%|▎| 328/991 [1:23:20<2:46:10, 15.04s/batch, batch_loss=10.2, bat

Epoch 9/10:  33%|▎| 328/991 [1:23:34<2:46:10, 15.04s/batch, batch_loss=16.4, bat

Epoch 9/10:  33%|▎| 329/991 [1:23:34<2:44:35, 14.92s/batch, batch_loss=16.4, bat

Epoch 9/10:  33%|▎| 329/991 [1:23:49<2:44:35, 14.92s/batch, batch_loss=13.5, bat

Epoch 9/10:  33%|▎| 330/991 [1:23:49<2:44:48, 14.96s/batch, batch_loss=13.5, bat

Epoch 9/10:  33%|▎| 330/991 [1:24:04<2:44:48, 14.96s/batch, batch_loss=13.3, bat

Epoch 9/10:  33%|▎| 331/991 [1:24:04<2:42:27, 14.77s/batch, batch_loss=13.3, bat

Epoch 9/10:  33%|▎| 331/991 [1:24:18<2:42:27, 14.77s/batch, batch_loss=14.8, bat

Epoch 9/10:  34%|▎| 332/991 [1:24:18<2:42:14, 14.77s/batch, batch_loss=14.8, bat

Epoch 9/10:  34%|▎| 332/991 [1:24:33<2:42:14, 14.77s/batch, batch_loss=11.4, bat

Epoch 9/10:  34%|▎| 333/991 [1:24:33<2:40:49, 14.66s/batch, batch_loss=11.4, bat

Epoch 9/10:  34%|▎| 333/991 [1:24:47<2:40:49, 14.66s/batch, batch_loss=14.3, bat

Epoch 9/10:  34%|▎| 334/991 [1:24:47<2:39:13, 14.54s/batch, batch_loss=14.3, bat

Epoch 9/10:  34%|▎| 334/991 [1:25:02<2:39:13, 14.54s/batch, batch_loss=3.91, bat

Epoch 9/10:  34%|▎| 335/991 [1:25:02<2:38:57, 14.54s/batch, batch_loss=3.91, bat

Epoch 9/10:  34%|▎| 335/991 [1:25:19<2:38:57, 14.54s/batch, batch_loss=8.47e+3, 

Epoch 9/10:  34%|▎| 336/991 [1:25:19<2:48:15, 15.41s/batch, batch_loss=8.47e+3, 

Epoch 9/10:  34%|▎| 336/991 [1:25:33<2:48:15, 15.41s/batch, batch_loss=2.35e+3, 

Epoch 9/10:  34%|▎| 337/991 [1:25:33<2:44:30, 15.09s/batch, batch_loss=2.35e+3, 

Epoch 9/10:  34%|▎| 337/991 [1:25:48<2:44:30, 15.09s/batch, batch_loss=8.15, bat

Epoch 9/10:  34%|▎| 338/991 [1:25:48<2:41:15, 14.82s/batch, batch_loss=8.15, bat

Epoch 9/10:  34%|▎| 338/991 [1:26:03<2:41:15, 14.82s/batch, batch_loss=23, batch

Epoch 9/10:  34%|▎| 339/991 [1:26:03<2:41:44, 14.88s/batch, batch_loss=23, batch

Epoch 9/10:  34%|▎| 339/991 [1:26:17<2:41:44, 14.88s/batch, batch_loss=9.65, bat

Epoch 9/10:  34%|▎| 340/991 [1:26:17<2:40:18, 14.77s/batch, batch_loss=9.65, bat

Epoch 9/10:  34%|▎| 340/991 [1:26:30<2:40:18, 14.77s/batch, batch_loss=9.8, batc

Epoch 9/10:  34%|▎| 341/991 [1:26:30<2:34:36, 14.27s/batch, batch_loss=9.8, batc

Epoch 9/10:  34%|▎| 341/991 [1:26:42<2:34:36, 14.27s/batch, batch_loss=0.751, ba

Epoch 9/10:  35%|▎| 342/991 [1:26:42<2:27:30, 13.64s/batch, batch_loss=0.751, ba

Epoch 9/10:  35%|▎| 342/991 [1:26:55<2:27:30, 13.64s/batch, batch_loss=7.19, bat

Epoch 9/10:  35%|▎| 343/991 [1:26:55<2:24:55, 13.42s/batch, batch_loss=7.19, bat

Epoch 9/10:  35%|▎| 343/991 [1:27:10<2:24:55, 13.42s/batch, batch_loss=15.4, bat

Epoch 9/10:  35%|▎| 344/991 [1:27:10<2:28:03, 13.73s/batch, batch_loss=15.4, bat

Epoch 9/10:  35%|▎| 344/991 [1:27:25<2:28:03, 13.73s/batch, batch_loss=113, batc

Epoch 9/10:  35%|▎| 345/991 [1:27:25<2:31:20, 14.06s/batch, batch_loss=113, batc

Epoch 9/10:  35%|▎| 345/991 [1:27:39<2:31:20, 14.06s/batch, batch_loss=12.9, bat

Epoch 9/10:  35%|▎| 346/991 [1:27:39<2:31:58, 14.14s/batch, batch_loss=12.9, bat

Epoch 9/10:  35%|▎| 346/991 [1:27:53<2:31:58, 14.14s/batch, batch_loss=12.1, bat

Epoch 9/10:  35%|▎| 347/991 [1:27:53<2:29:57, 13.97s/batch, batch_loss=12.1, bat

Epoch 9/10:  35%|▎| 347/991 [1:28:07<2:29:57, 13.97s/batch, batch_loss=11.5, bat

Epoch 9/10:  35%|▎| 348/991 [1:28:07<2:31:27, 14.13s/batch, batch_loss=11.5, bat

Epoch 9/10:  35%|▎| 348/991 [1:28:22<2:31:27, 14.13s/batch, batch_loss=9.38, bat

Epoch 9/10:  35%|▎| 349/991 [1:28:22<2:32:55, 14.29s/batch, batch_loss=9.38, bat

Epoch 9/10:  35%|▎| 349/991 [1:28:36<2:32:55, 14.29s/batch, batch_loss=13, batch

Epoch 9/10:  35%|▎| 350/991 [1:28:36<2:32:00, 14.23s/batch, batch_loss=13, batch

Epoch 9/10:  35%|▎| 350/991 [1:28:51<2:32:00, 14.23s/batch, batch_loss=8.43, bat

Epoch 9/10:  35%|▎| 351/991 [1:28:51<2:33:22, 14.38s/batch, batch_loss=8.43, bat

Epoch 9/10:  35%|▎| 351/991 [1:29:05<2:33:22, 14.38s/batch, batch_loss=14.8, bat

Epoch 9/10:  36%|▎| 352/991 [1:29:05<2:33:46, 14.44s/batch, batch_loss=14.8, bat

Epoch 9/10:  36%|▎| 352/991 [1:29:21<2:33:46, 14.44s/batch, batch_loss=14.1, bat

Epoch 9/10:  36%|▎| 353/991 [1:29:21<2:36:41, 14.74s/batch, batch_loss=14.1, bat

Epoch 9/10:  36%|▎| 353/991 [1:29:39<2:36:41, 14.74s/batch, batch_loss=21.5, bat

Epoch 9/10:  36%|▎| 354/991 [1:29:39<2:47:36, 15.79s/batch, batch_loss=21.5, bat

Epoch 9/10:  36%|▎| 354/991 [1:29:54<2:47:36, 15.79s/batch, batch_loss=8.44, bat

Epoch 9/10:  36%|▎| 355/991 [1:29:54<2:45:08, 15.58s/batch, batch_loss=8.44, bat

Epoch 9/10:  36%|▎| 355/991 [1:30:09<2:45:08, 15.58s/batch, batch_loss=17, batch

Epoch 9/10:  36%|▎| 356/991 [1:30:09<2:43:43, 15.47s/batch, batch_loss=17, batch

Epoch 9/10:  36%|▎| 356/991 [1:30:24<2:43:43, 15.47s/batch, batch_loss=11.1, bat

Epoch 9/10:  36%|▎| 357/991 [1:30:24<2:42:43, 15.40s/batch, batch_loss=11.1, bat

Epoch 9/10:  36%|▎| 357/991 [1:30:39<2:42:43, 15.40s/batch, batch_loss=13, batch

Epoch 9/10:  36%|▎| 358/991 [1:30:39<2:40:19, 15.20s/batch, batch_loss=13, batch

Epoch 9/10:  36%|▎| 358/991 [1:30:53<2:40:19, 15.20s/batch, batch_loss=4.81, bat

Epoch 9/10:  36%|▎| 359/991 [1:30:53<2:37:34, 14.96s/batch, batch_loss=4.81, bat

Epoch 9/10:  36%|▎| 359/991 [1:31:08<2:37:34, 14.96s/batch, batch_loss=8.73, bat

Epoch 9/10:  36%|▎| 360/991 [1:31:08<2:37:03, 14.93s/batch, batch_loss=8.73, bat

Epoch 9/10:  36%|▎| 360/991 [1:31:23<2:37:03, 14.93s/batch, batch_loss=26.8, bat

Epoch 9/10:  36%|▎| 361/991 [1:31:23<2:35:17, 14.79s/batch, batch_loss=26.8, bat

Epoch 9/10:  36%|▎| 361/991 [1:31:39<2:35:17, 14.79s/batch, batch_loss=18.9, bat

Epoch 9/10:  37%|▎| 362/991 [1:31:39<2:38:19, 15.10s/batch, batch_loss=18.9, bat

Epoch 9/10:  37%|▎| 362/991 [1:31:56<2:38:19, 15.10s/batch, batch_loss=13.5, bat

Epoch 9/10:  37%|▎| 363/991 [1:31:56<2:44:58, 15.76s/batch, batch_loss=13.5, bat

Epoch 9/10:  37%|▎| 363/991 [1:32:10<2:44:58, 15.76s/batch, batch_loss=15.4, bat

Epoch 9/10:  37%|▎| 364/991 [1:32:10<2:41:04, 15.41s/batch, batch_loss=15.4, bat

Epoch 9/10:  37%|▎| 364/991 [1:32:26<2:41:04, 15.41s/batch, batch_loss=10.3, bat

Epoch 9/10:  37%|▎| 365/991 [1:32:26<2:39:41, 15.31s/batch, batch_loss=10.3, bat

Epoch 9/10:  37%|▎| 365/991 [1:32:40<2:39:41, 15.31s/batch, batch_loss=13.5, bat

Epoch 9/10:  37%|▎| 366/991 [1:32:40<2:36:17, 15.00s/batch, batch_loss=13.5, bat

Epoch 9/10:  37%|▎| 366/991 [1:32:55<2:36:17, 15.00s/batch, batch_loss=12.1, bat

Epoch 9/10:  37%|▎| 367/991 [1:32:55<2:36:34, 15.05s/batch, batch_loss=12.1, bat

Epoch 9/10:  37%|▎| 367/991 [1:33:10<2:36:34, 15.05s/batch, batch_loss=11.5, bat

Epoch 9/10:  37%|▎| 368/991 [1:33:10<2:34:51, 14.91s/batch, batch_loss=11.5, bat

Epoch 9/10:  37%|▎| 368/991 [1:33:24<2:34:51, 14.91s/batch, batch_loss=14.7, bat

Epoch 9/10:  37%|▎| 369/991 [1:33:24<2:33:40, 14.82s/batch, batch_loss=14.7, bat

Epoch 9/10:  37%|▎| 369/991 [1:33:41<2:33:40, 14.82s/batch, batch_loss=1.21e+4, 

Epoch 9/10:  37%|▎| 370/991 [1:33:41<2:39:56, 15.45s/batch, batch_loss=1.21e+4, 

Epoch 9/10:  37%|▎| 370/991 [1:33:55<2:39:56, 15.45s/batch, batch_loss=23, batch

Epoch 9/10:  37%|▎| 371/991 [1:33:55<2:35:23, 15.04s/batch, batch_loss=23, batch

Epoch 9/10:  37%|▎| 371/991 [1:34:10<2:35:23, 15.04s/batch, batch_loss=14.2, bat

Epoch 9/10:  38%|▍| 372/991 [1:34:10<2:32:52, 14.82s/batch, batch_loss=14.2, bat

Epoch 9/10:  38%|▍| 372/991 [1:34:25<2:32:52, 14.82s/batch, batch_loss=23.3, bat

Epoch 9/10:  38%|▍| 373/991 [1:34:25<2:33:34, 14.91s/batch, batch_loss=23.3, bat

Epoch 9/10:  38%|▍| 373/991 [1:34:39<2:33:34, 14.91s/batch, batch_loss=470, batc

Epoch 9/10:  38%|▍| 374/991 [1:34:39<2:31:38, 14.75s/batch, batch_loss=470, batc

Epoch 9/10:  38%|▍| 374/991 [1:34:54<2:31:38, 14.75s/batch, batch_loss=1.43e+3, 

Epoch 9/10:  38%|▍| 375/991 [1:34:54<2:30:42, 14.68s/batch, batch_loss=1.43e+3, 

Epoch 9/10:  38%|▍| 375/991 [1:35:08<2:30:42, 14.68s/batch, batch_loss=1.22e+3, 

Epoch 9/10:  38%|▍| 376/991 [1:35:08<2:30:40, 14.70s/batch, batch_loss=1.22e+3, 

Epoch 9/10:  38%|▍| 376/991 [1:35:22<2:30:40, 14.70s/batch, batch_loss=18.8, bat

Epoch 9/10:  38%|▍| 377/991 [1:35:22<2:28:56, 14.55s/batch, batch_loss=18.8, bat

Epoch 9/10:  38%|▍| 377/991 [1:35:37<2:28:56, 14.55s/batch, batch_loss=1.18e+3, 

Epoch 9/10:  38%|▍| 378/991 [1:35:37<2:28:10, 14.50s/batch, batch_loss=1.18e+3, 

Epoch 9/10:  38%|▍| 378/991 [1:35:51<2:28:10, 14.50s/batch, batch_loss=9.87, bat

Epoch 9/10:  38%|▍| 379/991 [1:35:51<2:27:57, 14.51s/batch, batch_loss=9.87, bat

Epoch 9/10:  38%|▍| 379/991 [1:36:06<2:27:57, 14.51s/batch, batch_loss=11.6, bat

Epoch 9/10:  38%|▍| 380/991 [1:36:06<2:28:18, 14.56s/batch, batch_loss=11.6, bat

Epoch 9/10:  38%|▍| 380/991 [1:36:20<2:28:18, 14.56s/batch, batch_loss=17.6, bat

Epoch 9/10:  38%|▍| 381/991 [1:36:20<2:27:05, 14.47s/batch, batch_loss=17.6, bat

Epoch 9/10:  38%|▍| 381/991 [1:36:35<2:27:05, 14.47s/batch, batch_loss=10.2, bat

Epoch 9/10:  39%|▍| 382/991 [1:36:35<2:26:52, 14.47s/batch, batch_loss=10.2, bat

Epoch 9/10:  39%|▍| 382/991 [1:36:49<2:26:52, 14.47s/batch, batch_loss=10.2, bat

Epoch 9/10:  39%|▍| 383/991 [1:36:49<2:26:41, 14.48s/batch, batch_loss=10.2, bat

Epoch 9/10:  39%|▍| 383/991 [1:37:03<2:26:41, 14.48s/batch, batch_loss=20.2, bat

Epoch 9/10:  39%|▍| 384/991 [1:37:03<2:25:11, 14.35s/batch, batch_loss=20.2, bat

Epoch 9/10:  39%|▍| 384/991 [1:37:18<2:25:11, 14.35s/batch, batch_loss=9.09, bat

Epoch 9/10:  39%|▍| 385/991 [1:37:18<2:25:59, 14.46s/batch, batch_loss=9.09, bat

Epoch 9/10:  39%|▍| 385/991 [1:37:32<2:25:59, 14.46s/batch, batch_loss=16.1, bat

Epoch 9/10:  39%|▍| 386/991 [1:37:32<2:24:53, 14.37s/batch, batch_loss=16.1, bat

Epoch 9/10:  39%|▍| 386/991 [1:37:49<2:24:53, 14.37s/batch, batch_loss=22.1, bat

Epoch 9/10:  39%|▍| 387/991 [1:37:49<2:32:20, 15.13s/batch, batch_loss=22.1, bat

Epoch 9/10:  39%|▍| 387/991 [1:38:03<2:32:20, 15.13s/batch, batch_loss=785, batc

Epoch 9/10:  39%|▍| 388/991 [1:38:03<2:27:47, 14.71s/batch, batch_loss=785, batc

Epoch 9/10:  39%|▍| 388/991 [1:38:17<2:27:47, 14.71s/batch, batch_loss=13.9, bat

Epoch 9/10:  39%|▍| 389/991 [1:38:17<2:26:25, 14.59s/batch, batch_loss=13.9, bat

Epoch 9/10:  39%|▍| 389/991 [1:38:31<2:26:25, 14.59s/batch, batch_loss=866, batc

Epoch 9/10:  39%|▍| 390/991 [1:38:31<2:24:09, 14.39s/batch, batch_loss=866, batc

Epoch 9/10:  39%|▍| 390/991 [1:38:45<2:24:09, 14.39s/batch, batch_loss=18.6, bat

Epoch 9/10:  39%|▍| 391/991 [1:38:45<2:22:54, 14.29s/batch, batch_loss=18.6, bat

Epoch 9/10:  39%|▍| 391/991 [1:39:01<2:22:54, 14.29s/batch, batch_loss=13.8, bat

Epoch 9/10:  40%|▍| 392/991 [1:39:01<2:26:08, 14.64s/batch, batch_loss=13.8, bat

Epoch 9/10:  40%|▍| 392/991 [1:39:15<2:26:08, 14.64s/batch, batch_loss=19.5, bat

Epoch 9/10:  40%|▍| 393/991 [1:39:15<2:26:28, 14.70s/batch, batch_loss=19.5, bat

Epoch 9/10:  40%|▍| 393/991 [1:39:30<2:26:28, 14.70s/batch, batch_loss=605, batc

Epoch 9/10:  40%|▍| 394/991 [1:39:30<2:27:05, 14.78s/batch, batch_loss=605, batc

Epoch 9/10:  40%|▍| 394/991 [1:39:45<2:27:05, 14.78s/batch, batch_loss=17.9, bat

Epoch 9/10:  40%|▍| 395/991 [1:39:45<2:26:42, 14.77s/batch, batch_loss=17.9, bat

Epoch 9/10:  40%|▍| 395/991 [1:40:00<2:26:42, 14.77s/batch, batch_loss=11.7, bat

Epoch 9/10:  40%|▍| 396/991 [1:40:00<2:25:51, 14.71s/batch, batch_loss=11.7, bat

Epoch 9/10:  40%|▍| 396/991 [1:40:15<2:25:51, 14.71s/batch, batch_loss=15.6, bat

Epoch 9/10:  40%|▍| 397/991 [1:40:15<2:27:21, 14.88s/batch, batch_loss=15.6, bat

Epoch 9/10:  40%|▍| 397/991 [1:40:29<2:27:21, 14.88s/batch, batch_loss=13.8, bat

Epoch 9/10:  40%|▍| 398/991 [1:40:29<2:25:45, 14.75s/batch, batch_loss=13.8, bat

Epoch 9/10:  40%|▍| 398/991 [1:40:44<2:25:45, 14.75s/batch, batch_loss=21.8, bat

Epoch 9/10:  40%|▍| 399/991 [1:40:44<2:26:22, 14.83s/batch, batch_loss=21.8, bat

Epoch 9/10:  40%|▍| 399/991 [1:41:00<2:26:22, 14.83s/batch, batch_loss=10.6, bat

Epoch 9/10:  40%|▍| 400/991 [1:41:00<2:27:18, 14.96s/batch, batch_loss=10.6, bat

Epoch 9/10:  40%|▍| 400/991 [1:41:14<2:27:18, 14.96s/batch, batch_loss=8.65, bat

Epoch 9/10:  40%|▍| 401/991 [1:41:14<2:24:06, 14.65s/batch, batch_loss=8.65, bat

Epoch 9/10:  40%|▍| 401/991 [1:41:28<2:24:06, 14.65s/batch, batch_loss=17.9, bat

Epoch 9/10:  41%|▍| 402/991 [1:41:28<2:22:02, 14.47s/batch, batch_loss=17.9, bat

Epoch 9/10:  41%|▍| 402/991 [1:41:44<2:22:02, 14.47s/batch, batch_loss=17.4, bat

Epoch 9/10:  41%|▍| 403/991 [1:41:44<2:27:41, 15.07s/batch, batch_loss=17.4, bat

Epoch 9/10:  41%|▍| 403/991 [1:41:59<2:27:41, 15.07s/batch, batch_loss=11.6, bat

Epoch 9/10:  41%|▍| 404/991 [1:41:59<2:25:42, 14.89s/batch, batch_loss=11.6, bat

Epoch 9/10:  41%|▍| 404/991 [1:42:13<2:25:42, 14.89s/batch, batch_loss=12, batch

Epoch 9/10:  41%|▍| 405/991 [1:42:13<2:24:37, 14.81s/batch, batch_loss=12, batch

Epoch 9/10:  41%|▍| 405/991 [1:42:28<2:24:37, 14.81s/batch, batch_loss=5.3, batc

Epoch 9/10:  41%|▍| 406/991 [1:42:28<2:22:42, 14.64s/batch, batch_loss=5.3, batc

Epoch 9/10:  41%|▍| 406/991 [1:42:41<2:22:42, 14.64s/batch, batch_loss=23.1, bat

Epoch 9/10:  41%|▍| 407/991 [1:42:41<2:19:41, 14.35s/batch, batch_loss=23.1, bat

Epoch 9/10:  41%|▍| 407/991 [1:42:56<2:19:41, 14.35s/batch, batch_loss=7.89, bat

Epoch 9/10:  41%|▍| 408/991 [1:42:56<2:19:56, 14.40s/batch, batch_loss=7.89, bat

Epoch 9/10:  41%|▍| 408/991 [1:43:11<2:19:56, 14.40s/batch, batch_loss=24.8, bat

Epoch 9/10:  41%|▍| 409/991 [1:43:11<2:22:00, 14.64s/batch, batch_loss=24.8, bat

Epoch 9/10:  41%|▍| 409/991 [1:43:27<2:22:00, 14.64s/batch, batch_loss=23.8, bat

Epoch 9/10:  41%|▍| 410/991 [1:43:27<2:26:20, 15.11s/batch, batch_loss=23.8, bat

Epoch 9/10:  41%|▍| 410/991 [1:43:42<2:26:20, 15.11s/batch, batch_loss=20.8, bat

Epoch 9/10:  41%|▍| 411/991 [1:43:42<2:26:08, 15.12s/batch, batch_loss=20.8, bat

Epoch 9/10:  41%|▍| 411/991 [1:43:57<2:26:08, 15.12s/batch, batch_loss=12.1, bat

Epoch 9/10:  42%|▍| 412/991 [1:43:57<2:24:17, 14.95s/batch, batch_loss=12.1, bat

Epoch 9/10:  42%|▍| 412/991 [1:44:10<2:24:17, 14.95s/batch, batch_loss=20.7, bat

Epoch 9/10:  42%|▍| 413/991 [1:44:10<2:20:12, 14.55s/batch, batch_loss=20.7, bat

Epoch 9/10:  42%|▍| 413/991 [1:44:24<2:20:12, 14.55s/batch, batch_loss=14.4, bat

Epoch 9/10:  42%|▍| 414/991 [1:44:24<2:18:24, 14.39s/batch, batch_loss=14.4, bat

Epoch 9/10:  42%|▍| 414/991 [1:44:40<2:18:24, 14.39s/batch, batch_loss=9.25, bat

Epoch 9/10:  42%|▍| 415/991 [1:44:40<2:20:25, 14.63s/batch, batch_loss=9.25, bat

Epoch 9/10:  42%|▍| 415/991 [1:44:54<2:20:25, 14.63s/batch, batch_loss=11.6, bat

Epoch 9/10:  42%|▍| 416/991 [1:44:54<2:19:03, 14.51s/batch, batch_loss=11.6, bat

Epoch 9/10:  42%|▍| 416/991 [1:45:09<2:19:03, 14.51s/batch, batch_loss=7.8, batc

Epoch 9/10:  42%|▍| 417/991 [1:45:09<2:20:55, 14.73s/batch, batch_loss=7.8, batc

Epoch 9/10:  42%|▍| 417/991 [1:45:24<2:20:55, 14.73s/batch, batch_loss=11.7, bat

Epoch 9/10:  42%|▍| 418/991 [1:45:24<2:20:23, 14.70s/batch, batch_loss=11.7, bat

Epoch 9/10:  42%|▍| 418/991 [1:45:39<2:20:23, 14.70s/batch, batch_loss=1.3e+3, b

Epoch 9/10:  42%|▍| 419/991 [1:45:39<2:20:34, 14.75s/batch, batch_loss=1.3e+3, b

Epoch 9/10:  42%|▍| 419/991 [1:45:54<2:20:34, 14.75s/batch, batch_loss=15.6, bat

Epoch 9/10:  42%|▍| 420/991 [1:45:54<2:21:08, 14.83s/batch, batch_loss=15.6, bat

Epoch 9/10:  42%|▍| 420/991 [1:46:09<2:21:08, 14.83s/batch, batch_loss=12.8, bat

Epoch 9/10:  42%|▍| 421/991 [1:46:09<2:21:08, 14.86s/batch, batch_loss=12.8, bat

Epoch 9/10:  42%|▍| 421/991 [1:46:23<2:21:08, 14.86s/batch, batch_loss=9.08, bat

Epoch 9/10:  43%|▍| 422/991 [1:46:23<2:20:50, 14.85s/batch, batch_loss=9.08, bat

Epoch 9/10:  43%|▍| 422/991 [1:46:38<2:20:50, 14.85s/batch, batch_loss=9.1, batc

Epoch 9/10:  43%|▍| 423/991 [1:46:38<2:20:06, 14.80s/batch, batch_loss=9.1, batc

Epoch 9/10:  43%|▍| 423/991 [1:46:52<2:20:06, 14.80s/batch, batch_loss=10.1, bat

Epoch 9/10:  43%|▍| 424/991 [1:46:52<2:16:24, 14.43s/batch, batch_loss=10.1, bat

Epoch 9/10:  43%|▍| 424/991 [1:47:07<2:16:24, 14.43s/batch, batch_loss=6.53, bat

Epoch 9/10:  43%|▍| 425/991 [1:47:07<2:17:28, 14.57s/batch, batch_loss=6.53, bat

Epoch 9/10:  43%|▍| 425/991 [1:47:21<2:17:28, 14.57s/batch, batch_loss=2.17, bat

Epoch 9/10:  43%|▍| 426/991 [1:47:21<2:16:05, 14.45s/batch, batch_loss=2.17, bat

Epoch 9/10:  43%|▍| 426/991 [1:47:36<2:16:05, 14.45s/batch, batch_loss=12.3, bat

Epoch 9/10:  43%|▍| 427/991 [1:47:36<2:17:00, 14.58s/batch, batch_loss=12.3, bat

Epoch 9/10:  43%|▍| 427/991 [1:47:51<2:17:00, 14.58s/batch, batch_loss=19.7, bat

Epoch 9/10:  43%|▍| 428/991 [1:47:51<2:18:48, 14.79s/batch, batch_loss=19.7, bat

Epoch 9/10:  43%|▍| 428/991 [1:48:06<2:18:48, 14.79s/batch, batch_loss=21.4, bat

Epoch 9/10:  43%|▍| 429/991 [1:48:06<2:18:59, 14.84s/batch, batch_loss=21.4, bat

Epoch 9/10:  43%|▍| 429/991 [1:48:20<2:18:59, 14.84s/batch, batch_loss=9.33e+3, 

Epoch 9/10:  43%|▍| 430/991 [1:48:20<2:17:17, 14.68s/batch, batch_loss=9.33e+3, 

Epoch 9/10:  43%|▍| 430/991 [1:48:34<2:17:17, 14.68s/batch, batch_loss=24.3, bat

Epoch 9/10:  43%|▍| 431/991 [1:48:34<2:15:56, 14.57s/batch, batch_loss=24.3, bat

Epoch 9/10:  43%|▍| 431/991 [1:48:50<2:15:56, 14.57s/batch, batch_loss=20.6, bat

Epoch 9/10:  44%|▍| 432/991 [1:48:50<2:17:19, 14.74s/batch, batch_loss=20.6, bat

Epoch 9/10:  44%|▍| 432/991 [1:49:04<2:17:19, 14.74s/batch, batch_loss=9.7, batc

Epoch 9/10:  44%|▍| 433/991 [1:49:04<2:16:14, 14.65s/batch, batch_loss=9.7, batc

Epoch 9/10:  44%|▍| 433/991 [1:49:19<2:16:14, 14.65s/batch, batch_loss=17.7, bat

Epoch 9/10:  44%|▍| 434/991 [1:49:19<2:18:04, 14.87s/batch, batch_loss=17.7, bat

Epoch 9/10:  44%|▍| 434/991 [1:49:34<2:18:04, 14.87s/batch, batch_loss=13.9, bat

Epoch 9/10:  44%|▍| 435/991 [1:49:34<2:17:12, 14.81s/batch, batch_loss=13.9, bat

Epoch 9/10:  44%|▍| 435/991 [1:49:49<2:17:12, 14.81s/batch, batch_loss=11.6, bat

Epoch 9/10:  44%|▍| 436/991 [1:49:49<2:16:01, 14.71s/batch, batch_loss=11.6, bat

Epoch 9/10:  44%|▍| 436/991 [1:50:03<2:16:01, 14.71s/batch, batch_loss=17.5, bat

Epoch 9/10:  44%|▍| 437/991 [1:50:03<2:14:20, 14.55s/batch, batch_loss=17.5, bat

Epoch 9/10:  44%|▍| 437/991 [1:50:18<2:14:20, 14.55s/batch, batch_loss=20.8, bat

Epoch 9/10:  44%|▍| 438/991 [1:50:18<2:14:56, 14.64s/batch, batch_loss=20.8, bat

Epoch 9/10:  44%|▍| 438/991 [1:50:31<2:14:56, 14.64s/batch, batch_loss=13.8, bat

Epoch 9/10:  44%|▍| 439/991 [1:50:31<2:11:59, 14.35s/batch, batch_loss=13.8, bat

Epoch 9/10:  44%|▍| 439/991 [1:50:46<2:11:59, 14.35s/batch, batch_loss=23.2, bat

Epoch 9/10:  44%|▍| 440/991 [1:50:46<2:13:41, 14.56s/batch, batch_loss=23.2, bat

Epoch 9/10:  44%|▍| 440/991 [1:51:04<2:13:41, 14.56s/batch, batch_loss=21.2, bat

Epoch 9/10:  45%|▍| 441/991 [1:51:04<2:21:33, 15.44s/batch, batch_loss=21.2, bat

Epoch 9/10:  45%|▍| 441/991 [1:51:19<2:21:33, 15.44s/batch, batch_loss=14.2, bat

Epoch 9/10:  45%|▍| 442/991 [1:51:19<2:19:38, 15.26s/batch, batch_loss=14.2, bat

Epoch 9/10:  45%|▍| 442/991 [1:51:33<2:19:38, 15.26s/batch, batch_loss=20.9, bat

Epoch 9/10:  45%|▍| 443/991 [1:51:33<2:15:40, 14.86s/batch, batch_loss=20.9, bat

Epoch 9/10:  45%|▍| 443/991 [1:51:47<2:15:40, 14.86s/batch, batch_loss=17, batch

Epoch 9/10:  45%|▍| 444/991 [1:51:47<2:13:40, 14.66s/batch, batch_loss=17, batch

Epoch 9/10:  45%|▍| 444/991 [1:52:02<2:13:40, 14.66s/batch, batch_loss=17.4, bat

Epoch 9/10:  45%|▍| 445/991 [1:52:02<2:14:07, 14.74s/batch, batch_loss=17.4, bat

Epoch 9/10:  45%|▍| 445/991 [1:52:16<2:14:07, 14.74s/batch, batch_loss=27.5, bat

Epoch 9/10:  45%|▍| 446/991 [1:52:16<2:13:24, 14.69s/batch, batch_loss=27.5, bat

Epoch 9/10:  45%|▍| 446/991 [1:52:32<2:13:24, 14.69s/batch, batch_loss=13.5, bat

Epoch 9/10:  45%|▍| 447/991 [1:52:32<2:15:04, 14.90s/batch, batch_loss=13.5, bat

Epoch 9/10:  45%|▍| 447/991 [1:52:47<2:15:04, 14.90s/batch, batch_loss=18.4, bat

Epoch 9/10:  45%|▍| 448/991 [1:52:47<2:15:06, 14.93s/batch, batch_loss=18.4, bat

Epoch 9/10:  45%|▍| 448/991 [1:53:01<2:15:06, 14.93s/batch, batch_loss=16.1, bat

Epoch 9/10:  45%|▍| 449/991 [1:53:01<2:14:12, 14.86s/batch, batch_loss=16.1, bat

Epoch 9/10:  45%|▍| 449/991 [1:53:16<2:14:12, 14.86s/batch, batch_loss=24.7, bat

Epoch 9/10:  45%|▍| 450/991 [1:53:16<2:13:06, 14.76s/batch, batch_loss=24.7, bat

Epoch 9/10:  45%|▍| 450/991 [1:53:30<2:13:06, 14.76s/batch, batch_loss=18.7, bat

Epoch 9/10:  46%|▍| 451/991 [1:53:30<2:11:38, 14.63s/batch, batch_loss=18.7, bat

Epoch 9/10:  46%|▍| 451/991 [1:53:45<2:11:38, 14.63s/batch, batch_loss=16.4, bat

Epoch 9/10:  46%|▍| 452/991 [1:53:45<2:11:27, 14.63s/batch, batch_loss=16.4, bat

Epoch 9/10:  46%|▍| 452/991 [1:53:59<2:11:27, 14.63s/batch, batch_loss=19.9, bat

Epoch 9/10:  46%|▍| 453/991 [1:53:59<2:09:24, 14.43s/batch, batch_loss=19.9, bat

Epoch 9/10:  46%|▍| 453/991 [1:54:14<2:09:24, 14.43s/batch, batch_loss=7.24e+3, 

Epoch 9/10:  46%|▍| 454/991 [1:54:14<2:10:46, 14.61s/batch, batch_loss=7.24e+3, 

Epoch 9/10:  46%|▍| 454/991 [1:54:29<2:10:46, 14.61s/batch, batch_loss=21, batch

Epoch 9/10:  46%|▍| 455/991 [1:54:29<2:11:20, 14.70s/batch, batch_loss=21, batch

Epoch 9/10:  46%|▍| 455/991 [1:54:43<2:11:20, 14.70s/batch, batch_loss=25.7, bat

Epoch 9/10:  46%|▍| 456/991 [1:54:43<2:11:05, 14.70s/batch, batch_loss=25.7, bat

Epoch 9/10:  46%|▍| 456/991 [1:54:58<2:11:05, 14.70s/batch, batch_loss=12.8, bat

Epoch 9/10:  46%|▍| 457/991 [1:54:58<2:11:33, 14.78s/batch, batch_loss=12.8, bat

Epoch 9/10:  46%|▍| 457/991 [1:55:13<2:11:33, 14.78s/batch, batch_loss=16.1, bat

Epoch 9/10:  46%|▍| 458/991 [1:55:13<2:10:31, 14.69s/batch, batch_loss=16.1, bat

Epoch 9/10:  46%|▍| 458/991 [1:55:30<2:10:31, 14.69s/batch, batch_loss=25.7, bat

Epoch 9/10:  46%|▍| 459/991 [1:55:30<2:16:11, 15.36s/batch, batch_loss=25.7, bat

Epoch 9/10:  46%|▍| 459/991 [1:55:45<2:16:11, 15.36s/batch, batch_loss=22.8, bat

Epoch 9/10:  46%|▍| 460/991 [1:55:45<2:14:26, 15.19s/batch, batch_loss=22.8, bat

Epoch 9/10:  46%|▍| 460/991 [1:56:00<2:14:26, 15.19s/batch, batch_loss=47.3, bat

Epoch 9/10:  47%|▍| 461/991 [1:56:00<2:14:40, 15.25s/batch, batch_loss=47.3, bat

Epoch 9/10:  47%|▍| 461/991 [1:56:16<2:14:40, 15.25s/batch, batch_loss=14.5, bat

Epoch 9/10:  47%|▍| 462/991 [1:56:16<2:15:23, 15.36s/batch, batch_loss=14.5, bat

Epoch 9/10:  47%|▍| 462/991 [1:56:29<2:15:23, 15.36s/batch, batch_loss=6.21e+4, 

Epoch 9/10:  47%|▍| 463/991 [1:56:29<2:09:55, 14.76s/batch, batch_loss=6.21e+4, 

Epoch 9/10:  47%|▍| 463/991 [1:56:42<2:09:55, 14.76s/batch, batch_loss=15.4, bat

Epoch 9/10:  47%|▍| 464/991 [1:56:42<2:04:27, 14.17s/batch, batch_loss=15.4, bat

Epoch 9/10:  47%|▍| 464/991 [1:56:55<2:04:27, 14.17s/batch, batch_loss=15.7, bat

Epoch 9/10:  47%|▍| 465/991 [1:56:55<2:01:45, 13.89s/batch, batch_loss=15.7, bat

Epoch 9/10:  47%|▍| 465/991 [1:57:10<2:01:45, 13.89s/batch, batch_loss=15.5, bat

Epoch 9/10:  47%|▍| 466/991 [1:57:10<2:03:36, 14.13s/batch, batch_loss=15.5, bat

Epoch 9/10:  47%|▍| 466/991 [1:57:25<2:03:36, 14.13s/batch, batch_loss=11.8, bat

Epoch 9/10:  47%|▍| 467/991 [1:57:25<2:05:31, 14.37s/batch, batch_loss=11.8, bat

Epoch 9/10:  47%|▍| 467/991 [1:57:42<2:05:31, 14.37s/batch, batch_loss=18.8, bat

Epoch 9/10:  47%|▍| 468/991 [1:57:42<2:13:17, 15.29s/batch, batch_loss=18.8, bat

Epoch 9/10:  47%|▍| 468/991 [1:57:57<2:13:17, 15.29s/batch, batch_loss=14, batch

Epoch 9/10:  47%|▍| 469/991 [1:57:57<2:10:56, 15.05s/batch, batch_loss=14, batch

Epoch 9/10:  47%|▍| 469/991 [1:58:11<2:10:56, 15.05s/batch, batch_loss=15.7, bat

Epoch 9/10:  47%|▍| 470/991 [1:58:11<2:09:24, 14.90s/batch, batch_loss=15.7, bat

Epoch 9/10:  47%|▍| 470/991 [1:58:26<2:09:24, 14.90s/batch, batch_loss=23.2, bat

Epoch 9/10:  48%|▍| 471/991 [1:58:26<2:09:29, 14.94s/batch, batch_loss=23.2, bat

Epoch 9/10:  48%|▍| 471/991 [1:58:41<2:09:29, 14.94s/batch, batch_loss=19.1, bat

Epoch 9/10:  48%|▍| 472/991 [1:58:41<2:08:20, 14.84s/batch, batch_loss=19.1, bat

Epoch 9/10:  48%|▍| 472/991 [1:58:55<2:08:20, 14.84s/batch, batch_loss=17.4, bat

Epoch 9/10:  48%|▍| 473/991 [1:58:55<2:06:22, 14.64s/batch, batch_loss=17.4, bat

Epoch 9/10:  48%|▍| 473/991 [1:59:10<2:06:22, 14.64s/batch, batch_loss=15.8, bat

Epoch 9/10:  48%|▍| 474/991 [1:59:10<2:07:33, 14.80s/batch, batch_loss=15.8, bat

Epoch 9/10:  48%|▍| 474/991 [1:59:27<2:07:33, 14.80s/batch, batch_loss=2.4e+3, b

Epoch 9/10:  48%|▍| 475/991 [1:59:27<2:13:03, 15.47s/batch, batch_loss=2.4e+3, b

Epoch 9/10:  48%|▍| 475/991 [1:59:41<2:13:03, 15.47s/batch, batch_loss=15.5, bat

Epoch 9/10:  48%|▍| 476/991 [1:59:41<2:08:42, 14.99s/batch, batch_loss=15.5, bat

Epoch 9/10:  48%|▍| 476/991 [1:59:56<2:08:42, 14.99s/batch, batch_loss=17.5, bat

Epoch 9/10:  48%|▍| 477/991 [1:59:56<2:08:18, 14.98s/batch, batch_loss=17.5, bat

Epoch 9/10:  48%|▍| 477/991 [2:00:11<2:08:18, 14.98s/batch, batch_loss=16.6, bat

Epoch 9/10:  48%|▍| 478/991 [2:00:11<2:07:45, 14.94s/batch, batch_loss=16.6, bat

Epoch 9/10:  48%|▍| 478/991 [2:00:26<2:07:45, 14.94s/batch, batch_loss=20.6, bat

Epoch 9/10:  48%|▍| 479/991 [2:00:26<2:08:05, 15.01s/batch, batch_loss=20.6, bat

Epoch 9/10:  48%|▍| 479/991 [2:00:42<2:08:05, 15.01s/batch, batch_loss=19.1, bat

Epoch 9/10:  48%|▍| 480/991 [2:00:42<2:10:13, 15.29s/batch, batch_loss=19.1, bat

Epoch 9/10:  48%|▍| 480/991 [2:00:57<2:10:13, 15.29s/batch, batch_loss=25.8, bat

Epoch 9/10:  49%|▍| 481/991 [2:00:57<2:09:50, 15.28s/batch, batch_loss=25.8, bat

Epoch 9/10:  49%|▍| 481/991 [2:01:12<2:09:50, 15.28s/batch, batch_loss=19.4, bat

Epoch 9/10:  49%|▍| 482/991 [2:01:12<2:08:55, 15.20s/batch, batch_loss=19.4, bat

Epoch 9/10:  49%|▍| 482/991 [2:01:29<2:08:55, 15.20s/batch, batch_loss=16.7, bat

Epoch 9/10:  49%|▍| 483/991 [2:01:29<2:12:58, 15.71s/batch, batch_loss=16.7, bat

Epoch 9/10:  49%|▍| 483/991 [2:01:44<2:12:58, 15.71s/batch, batch_loss=22.1, bat

Epoch 9/10:  49%|▍| 484/991 [2:01:44<2:10:53, 15.49s/batch, batch_loss=22.1, bat

Epoch 9/10:  49%|▍| 484/991 [2:01:58<2:10:53, 15.49s/batch, batch_loss=10.3, bat

Epoch 9/10:  49%|▍| 485/991 [2:01:58<2:07:06, 15.07s/batch, batch_loss=10.3, bat

Epoch 9/10:  49%|▍| 485/991 [2:02:13<2:07:06, 15.07s/batch, batch_loss=26.7, bat

Epoch 9/10:  49%|▍| 486/991 [2:02:13<2:07:03, 15.10s/batch, batch_loss=26.7, bat

Epoch 9/10:  49%|▍| 486/991 [2:02:28<2:07:03, 15.10s/batch, batch_loss=14.6, bat

Epoch 9/10:  49%|▍| 487/991 [2:02:28<2:06:14, 15.03s/batch, batch_loss=14.6, bat

Epoch 9/10:  49%|▍| 487/991 [2:02:44<2:06:14, 15.03s/batch, batch_loss=9.6, batc

Epoch 9/10:  49%|▍| 488/991 [2:02:44<2:07:10, 15.17s/batch, batch_loss=9.6, batc

Epoch 9/10:  49%|▍| 488/991 [2:02:58<2:07:10, 15.17s/batch, batch_loss=9.97, bat

Epoch 9/10:  49%|▍| 489/991 [2:02:58<2:06:01, 15.06s/batch, batch_loss=9.97, bat

Epoch 9/10:  49%|▍| 489/991 [2:03:14<2:06:01, 15.06s/batch, batch_loss=9.16, bat

Epoch 9/10:  49%|▍| 490/991 [2:03:14<2:05:50, 15.07s/batch, batch_loss=9.16, bat

Epoch 9/10:  49%|▍| 490/991 [2:03:31<2:05:50, 15.07s/batch, batch_loss=19.3, bat

Epoch 9/10:  50%|▍| 491/991 [2:03:31<2:10:30, 15.66s/batch, batch_loss=19.3, bat

Epoch 9/10:  50%|▍| 491/991 [2:03:46<2:10:30, 15.66s/batch, batch_loss=17.8, bat

Epoch 9/10:  50%|▍| 492/991 [2:03:46<2:09:36, 15.58s/batch, batch_loss=17.8, bat

Epoch 9/10:  50%|▍| 492/991 [2:04:01<2:09:36, 15.58s/batch, batch_loss=19.6, bat

Epoch 9/10:  50%|▍| 493/991 [2:04:01<2:07:03, 15.31s/batch, batch_loss=19.6, bat

Epoch 9/10:  50%|▍| 493/991 [2:04:15<2:07:03, 15.31s/batch, batch_loss=9.27, bat

Epoch 9/10:  50%|▍| 494/991 [2:04:15<2:03:13, 14.88s/batch, batch_loss=9.27, bat

Epoch 9/10:  50%|▍| 494/991 [2:04:29<2:03:13, 14.88s/batch, batch_loss=8.54e+4, 

Epoch 9/10:  50%|▍| 495/991 [2:04:29<2:01:27, 14.69s/batch, batch_loss=8.54e+4, 

Epoch 9/10:  50%|▍| 495/991 [2:04:44<2:01:27, 14.69s/batch, batch_loss=20.2, bat

Epoch 9/10:  50%|▌| 496/991 [2:04:44<2:02:42, 14.87s/batch, batch_loss=20.2, bat

Epoch 9/10:  50%|▌| 496/991 [2:04:59<2:02:42, 14.87s/batch, batch_loss=177, batc

Epoch 9/10:  50%|▌| 497/991 [2:04:59<2:01:54, 14.81s/batch, batch_loss=177, batc

Epoch 9/10:  50%|▌| 497/991 [2:05:13<2:01:54, 14.81s/batch, batch_loss=14.4, bat

Epoch 9/10:  50%|▌| 498/991 [2:05:13<2:00:00, 14.61s/batch, batch_loss=14.4, bat

Epoch 9/10:  50%|▌| 498/991 [2:05:27<2:00:00, 14.61s/batch, batch_loss=407, batc

Epoch 9/10:  50%|▌| 499/991 [2:05:27<1:59:13, 14.54s/batch, batch_loss=407, batc

Epoch 9/10:  50%|▌| 499/991 [2:05:44<1:59:13, 14.54s/batch, batch_loss=16, batch

Epoch 9/10:  50%|▌| 500/991 [2:05:44<2:03:43, 15.12s/batch, batch_loss=16, batch

Epoch 9/10:  50%|▌| 500/991 [2:05:59<2:03:43, 15.12s/batch, batch_loss=8.09, bat

Epoch 9/10:  51%|▌| 501/991 [2:05:59<2:04:43, 15.27s/batch, batch_loss=8.09, bat

Epoch 9/10:  51%|▌| 501/991 [2:06:15<2:04:43, 15.27s/batch, batch_loss=9.86, bat

Epoch 9/10:  51%|▌| 502/991 [2:06:15<2:04:15, 15.25s/batch, batch_loss=9.86, bat

Epoch 9/10:  51%|▌| 502/991 [2:06:29<2:04:15, 15.25s/batch, batch_loss=18.2, bat

Epoch 9/10:  51%|▌| 503/991 [2:06:29<2:01:12, 14.90s/batch, batch_loss=18.2, bat

Epoch 9/10:  51%|▌| 503/991 [2:06:44<2:01:12, 14.90s/batch, batch_loss=10.6, bat

Epoch 9/10:  51%|▌| 504/991 [2:06:44<2:02:25, 15.08s/batch, batch_loss=10.6, bat

Epoch 9/10:  51%|▌| 504/991 [2:06:58<2:02:25, 15.08s/batch, batch_loss=7.85, bat

Epoch 9/10:  51%|▌| 505/991 [2:06:58<2:00:05, 14.83s/batch, batch_loss=7.85, bat

Epoch 9/10:  51%|▌| 505/991 [2:07:13<2:00:05, 14.83s/batch, batch_loss=14.4, bat

Epoch 9/10:  51%|▌| 506/991 [2:07:13<1:58:44, 14.69s/batch, batch_loss=14.4, bat

Epoch 9/10:  51%|▌| 506/991 [2:07:27<1:58:44, 14.69s/batch, batch_loss=10.8, bat

Epoch 9/10:  51%|▌| 507/991 [2:07:27<1:56:18, 14.42s/batch, batch_loss=10.8, bat

Epoch 9/10:  51%|▌| 507/991 [2:07:41<1:56:18, 14.42s/batch, batch_loss=16, batch

Epoch 9/10:  51%|▌| 508/991 [2:07:41<1:55:45, 14.38s/batch, batch_loss=16, batch

Epoch 9/10:  51%|▌| 508/991 [2:07:55<1:55:45, 14.38s/batch, batch_loss=16.9, bat

Epoch 9/10:  51%|▌| 509/991 [2:07:55<1:55:17, 14.35s/batch, batch_loss=16.9, bat

Epoch 9/10:  51%|▌| 509/991 [2:08:11<1:55:17, 14.35s/batch, batch_loss=14, batch

Epoch 9/10:  51%|▌| 510/991 [2:08:11<1:57:37, 14.67s/batch, batch_loss=14, batch

Epoch 9/10:  51%|▌| 510/991 [2:08:25<1:57:37, 14.67s/batch, batch_loss=14.3, bat

Epoch 9/10:  52%|▌| 511/991 [2:08:25<1:57:47, 14.72s/batch, batch_loss=14.3, bat

Epoch 9/10:  52%|▌| 511/991 [2:08:41<1:57:47, 14.72s/batch, batch_loss=9.96, bat

Epoch 9/10:  52%|▌| 512/991 [2:08:41<1:59:51, 15.01s/batch, batch_loss=9.96, bat

Epoch 9/10:  52%|▌| 512/991 [2:08:56<1:59:51, 15.01s/batch, batch_loss=10.3, bat

Epoch 9/10:  52%|▌| 513/991 [2:08:56<2:00:09, 15.08s/batch, batch_loss=10.3, bat

Epoch 9/10:  52%|▌| 513/991 [2:09:11<2:00:09, 15.08s/batch, batch_loss=16.8, bat

Epoch 9/10:  52%|▌| 514/991 [2:09:11<1:58:55, 14.96s/batch, batch_loss=16.8, bat

Epoch 9/10:  52%|▌| 514/991 [2:09:25<1:58:55, 14.96s/batch, batch_loss=12.7, bat

Epoch 9/10:  52%|▌| 515/991 [2:09:25<1:57:09, 14.77s/batch, batch_loss=12.7, bat

Epoch 9/10:  52%|▌| 515/991 [2:09:43<1:57:09, 14.77s/batch, batch_loss=15.1, bat

Epoch 9/10:  52%|▌| 516/991 [2:09:43<2:04:41, 15.75s/batch, batch_loss=15.1, bat

Epoch 9/10:  52%|▌| 516/991 [2:09:58<2:04:41, 15.75s/batch, batch_loss=13.4, bat

Epoch 9/10:  52%|▌| 517/991 [2:09:58<2:01:56, 15.44s/batch, batch_loss=13.4, bat

Epoch 9/10:  52%|▌| 517/991 [2:10:13<2:01:56, 15.44s/batch, batch_loss=22.5, bat

Epoch 9/10:  52%|▌| 518/991 [2:10:13<2:00:48, 15.32s/batch, batch_loss=22.5, bat

Epoch 9/10:  52%|▌| 518/991 [2:10:28<2:00:48, 15.32s/batch, batch_loss=14, batch

Epoch 9/10:  52%|▌| 519/991 [2:10:28<2:00:10, 15.28s/batch, batch_loss=14, batch

Epoch 9/10:  52%|▌| 519/991 [2:10:44<2:00:10, 15.28s/batch, batch_loss=11.7, bat

Epoch 9/10:  52%|▌| 520/991 [2:10:44<2:00:22, 15.33s/batch, batch_loss=11.7, bat

Epoch 9/10:  52%|▌| 520/991 [2:10:59<2:00:22, 15.33s/batch, batch_loss=7.6, batc

Epoch 9/10:  53%|▌| 521/991 [2:10:59<1:59:11, 15.22s/batch, batch_loss=7.6, batc

Epoch 9/10:  53%|▌| 521/991 [2:11:14<1:59:11, 15.22s/batch, batch_loss=10.1, bat

Epoch 9/10:  53%|▌| 522/991 [2:11:14<1:58:22, 15.14s/batch, batch_loss=10.1, bat

Epoch 9/10:  53%|▌| 522/991 [2:11:31<1:58:22, 15.14s/batch, batch_loss=2.99, bat

Epoch 9/10:  53%|▌| 523/991 [2:11:31<2:03:20, 15.81s/batch, batch_loss=2.99, bat

Epoch 9/10:  53%|▌| 523/991 [2:11:46<2:03:20, 15.81s/batch, batch_loss=8.73, bat

Epoch 9/10:  53%|▌| 524/991 [2:11:46<2:01:56, 15.67s/batch, batch_loss=8.73, bat

Epoch 9/10:  53%|▌| 524/991 [2:12:02<2:01:56, 15.67s/batch, batch_loss=6.86, bat

Epoch 9/10:  53%|▌| 525/991 [2:12:02<2:00:52, 15.56s/batch, batch_loss=6.86, bat

Epoch 9/10:  53%|▌| 525/991 [2:12:18<2:00:52, 15.56s/batch, batch_loss=7.25, bat

Epoch 9/10:  53%|▌| 526/991 [2:12:18<2:01:50, 15.72s/batch, batch_loss=7.25, bat

Epoch 9/10:  53%|▌| 526/991 [2:12:33<2:01:50, 15.72s/batch, batch_loss=16.6, bat

Epoch 9/10:  53%|▌| 527/991 [2:12:33<2:01:05, 15.66s/batch, batch_loss=16.6, bat

Epoch 9/10:  53%|▌| 527/991 [2:12:48<2:01:05, 15.66s/batch, batch_loss=13.5, bat

Epoch 9/10:  53%|▌| 528/991 [2:12:48<1:59:12, 15.45s/batch, batch_loss=13.5, bat

Epoch 9/10:  53%|▌| 528/991 [2:13:03<1:59:12, 15.45s/batch, batch_loss=9.4, batc

Epoch 9/10:  53%|▌| 529/991 [2:13:03<1:58:04, 15.33s/batch, batch_loss=9.4, batc

Epoch 9/10:  53%|▌| 529/991 [2:13:18<1:58:04, 15.33s/batch, batch_loss=17.7, bat

Epoch 9/10:  53%|▌| 530/991 [2:13:18<1:56:13, 15.13s/batch, batch_loss=17.7, bat

Epoch 9/10:  53%|▌| 530/991 [2:13:32<1:56:13, 15.13s/batch, batch_loss=15.6, bat

Epoch 9/10:  54%|▌| 531/991 [2:13:32<1:52:50, 14.72s/batch, batch_loss=15.6, bat

Epoch 9/10:  54%|▌| 531/991 [2:13:47<1:52:50, 14.72s/batch, batch_loss=15.3, bat

Epoch 9/10:  54%|▌| 532/991 [2:13:47<1:53:59, 14.90s/batch, batch_loss=15.3, bat

Epoch 9/10:  54%|▌| 532/991 [2:14:03<1:53:59, 14.90s/batch, batch_loss=13, batch

Epoch 9/10:  54%|▌| 533/991 [2:14:03<1:55:27, 15.13s/batch, batch_loss=13, batch

Epoch 9/10:  54%|▌| 533/991 [2:14:18<1:55:27, 15.13s/batch, batch_loss=13.7, bat

Epoch 9/10:  54%|▌| 534/991 [2:14:18<1:54:34, 15.04s/batch, batch_loss=13.7, bat

Epoch 9/10:  54%|▌| 534/991 [2:14:32<1:54:34, 15.04s/batch, batch_loss=17.1, bat

Epoch 9/10:  54%|▌| 535/991 [2:14:32<1:52:52, 14.85s/batch, batch_loss=17.1, bat

Epoch 9/10:  54%|▌| 535/991 [2:14:47<1:52:52, 14.85s/batch, batch_loss=11.2, bat

Epoch 9/10:  54%|▌| 536/991 [2:14:47<1:52:13, 14.80s/batch, batch_loss=11.2, bat

Epoch 9/10:  54%|▌| 536/991 [2:15:01<1:52:13, 14.80s/batch, batch_loss=9.49, bat

Epoch 9/10:  54%|▌| 537/991 [2:15:01<1:50:21, 14.59s/batch, batch_loss=9.49, bat

Epoch 9/10:  54%|▌| 537/991 [2:15:15<1:50:21, 14.59s/batch, batch_loss=1.79e+3, 

Epoch 9/10:  54%|▌| 538/991 [2:15:15<1:50:25, 14.63s/batch, batch_loss=1.79e+3, 

Epoch 9/10:  54%|▌| 538/991 [2:15:30<1:50:25, 14.63s/batch, batch_loss=28.8, bat

Epoch 9/10:  54%|▌| 539/991 [2:15:30<1:49:21, 14.52s/batch, batch_loss=28.8, bat

Epoch 9/10:  54%|▌| 539/991 [2:15:44<1:49:21, 14.52s/batch, batch_loss=24, batch

Epoch 9/10:  54%|▌| 540/991 [2:15:44<1:49:19, 14.54s/batch, batch_loss=24, batch

Epoch 9/10:  54%|▌| 540/991 [2:15:59<1:49:19, 14.54s/batch, batch_loss=1.3e+4, b

Epoch 9/10:  55%|▌| 541/991 [2:15:59<1:48:43, 14.50s/batch, batch_loss=1.3e+4, b

Epoch 9/10:  55%|▌| 541/991 [2:16:14<1:48:43, 14.50s/batch, batch_loss=2.84e+3, 

Epoch 9/10:  55%|▌| 542/991 [2:16:14<1:49:53, 14.69s/batch, batch_loss=2.84e+3, 

Epoch 9/10:  55%|▌| 542/991 [2:16:28<1:49:53, 14.69s/batch, batch_loss=16.7, bat

Epoch 9/10:  55%|▌| 543/991 [2:16:28<1:48:57, 14.59s/batch, batch_loss=16.7, bat

Epoch 9/10:  55%|▌| 543/991 [2:16:43<1:48:57, 14.59s/batch, batch_loss=22.5, bat

Epoch 9/10:  55%|▌| 544/991 [2:16:43<1:48:25, 14.55s/batch, batch_loss=22.5, bat

Epoch 9/10:  55%|▌| 544/991 [2:16:58<1:48:25, 14.55s/batch, batch_loss=16.1, bat

Epoch 9/10:  55%|▌| 545/991 [2:16:58<1:50:17, 14.84s/batch, batch_loss=16.1, bat

Epoch 9/10:  55%|▌| 545/991 [2:17:14<1:50:17, 14.84s/batch, batch_loss=299, batc

Epoch 9/10:  55%|▌| 546/991 [2:17:14<1:51:23, 15.02s/batch, batch_loss=299, batc

Epoch 9/10:  55%|▌| 546/991 [2:17:30<1:51:23, 15.02s/batch, batch_loss=18.8, bat

Epoch 9/10:  55%|▌| 547/991 [2:17:30<1:53:51, 15.39s/batch, batch_loss=18.8, bat

Epoch 9/10:  55%|▌| 547/991 [2:17:46<1:53:51, 15.39s/batch, batch_loss=15.2, bat

Epoch 9/10:  55%|▌| 548/991 [2:17:46<1:54:23, 15.49s/batch, batch_loss=15.2, bat

Epoch 9/10:  55%|▌| 548/991 [2:18:00<1:54:23, 15.49s/batch, batch_loss=9.44, bat

Epoch 9/10:  55%|▌| 549/991 [2:18:00<1:52:33, 15.28s/batch, batch_loss=9.44, bat

Epoch 9/10:  55%|▌| 549/991 [2:18:16<1:52:33, 15.28s/batch, batch_loss=19.8, bat

Epoch 9/10:  55%|▌| 550/991 [2:18:16<1:53:01, 15.38s/batch, batch_loss=19.8, bat

Epoch 9/10:  55%|▌| 550/991 [2:18:32<1:53:01, 15.38s/batch, batch_loss=16.5, bat

Epoch 9/10:  56%|▌| 551/991 [2:18:32<1:54:58, 15.68s/batch, batch_loss=16.5, bat

Epoch 9/10:  56%|▌| 551/991 [2:18:48<1:54:58, 15.68s/batch, batch_loss=14.6, bat

Epoch 9/10:  56%|▌| 552/991 [2:18:48<1:54:57, 15.71s/batch, batch_loss=14.6, bat

Epoch 9/10:  56%|▌| 552/991 [2:19:04<1:54:57, 15.71s/batch, batch_loss=15.9, bat

Epoch 9/10:  56%|▌| 553/991 [2:19:04<1:54:20, 15.66s/batch, batch_loss=15.9, bat

Epoch 9/10:  56%|▌| 553/991 [2:19:19<1:54:20, 15.66s/batch, batch_loss=5.74e+3, 

Epoch 9/10:  56%|▌| 554/991 [2:19:19<1:53:53, 15.64s/batch, batch_loss=5.74e+3, 

Epoch 9/10:  56%|▌| 554/991 [2:19:36<1:53:53, 15.64s/batch, batch_loss=2.57e+3, 

Epoch 9/10:  56%|▌| 555/991 [2:19:36<1:54:58, 15.82s/batch, batch_loss=2.57e+3, 

Epoch 9/10:  56%|▌| 555/991 [2:19:54<1:54:58, 15.82s/batch, batch_loss=17.4, bat

Epoch 9/10:  56%|▌| 556/991 [2:19:54<1:59:41, 16.51s/batch, batch_loss=17.4, bat

Epoch 9/10:  56%|▌| 556/991 [2:20:08<1:59:41, 16.51s/batch, batch_loss=1.27e+4, 

Epoch 9/10:  56%|▌| 557/991 [2:20:08<1:55:17, 15.94s/batch, batch_loss=1.27e+4, 

Epoch 9/10:  56%|▌| 557/991 [2:20:23<1:55:17, 15.94s/batch, batch_loss=8.95, bat

Epoch 9/10:  56%|▌| 558/991 [2:20:23<1:52:12, 15.55s/batch, batch_loss=8.95, bat

Epoch 9/10:  56%|▌| 558/991 [2:20:38<1:52:12, 15.55s/batch, batch_loss=18.8, bat

Epoch 9/10:  56%|▌| 559/991 [2:20:38<1:50:07, 15.29s/batch, batch_loss=18.8, bat

Epoch 9/10:  56%|▌| 559/991 [2:20:53<1:50:07, 15.29s/batch, batch_loss=7.55, bat

Epoch 9/10:  57%|▌| 560/991 [2:20:53<1:49:55, 15.30s/batch, batch_loss=7.55, bat

Epoch 9/10:  57%|▌| 560/991 [2:21:08<1:49:55, 15.30s/batch, batch_loss=8.12, bat

Epoch 9/10:  57%|▌| 561/991 [2:21:08<1:50:03, 15.36s/batch, batch_loss=8.12, bat

Epoch 9/10:  57%|▌| 561/991 [2:21:24<1:50:03, 15.36s/batch, batch_loss=15.1, bat

Epoch 9/10:  57%|▌| 562/991 [2:21:24<1:50:25, 15.44s/batch, batch_loss=15.1, bat

Epoch 9/10:  57%|▌| 562/991 [2:21:42<1:50:25, 15.44s/batch, batch_loss=7.51, bat

Epoch 9/10:  57%|▌| 563/991 [2:21:42<1:55:43, 16.22s/batch, batch_loss=7.51, bat

Epoch 9/10:  57%|▌| 563/991 [2:21:57<1:55:43, 16.22s/batch, batch_loss=11.7, bat

Epoch 9/10:  57%|▌| 564/991 [2:21:57<1:52:18, 15.78s/batch, batch_loss=11.7, bat

Epoch 9/10:  57%|▌| 564/991 [2:22:12<1:52:18, 15.78s/batch, batch_loss=498, batc

Epoch 9/10:  57%|▌| 565/991 [2:22:12<1:49:51, 15.47s/batch, batch_loss=498, batc

Epoch 9/10:  57%|▌| 565/991 [2:22:27<1:49:51, 15.47s/batch, batch_loss=11, batch

Epoch 9/10:  57%|▌| 566/991 [2:22:27<1:48:55, 15.38s/batch, batch_loss=11, batch

Epoch 9/10:  57%|▌| 566/991 [2:22:42<1:48:55, 15.38s/batch, batch_loss=21.5, bat

Epoch 9/10:  57%|▌| 567/991 [2:22:42<1:48:24, 15.34s/batch, batch_loss=21.5, bat

Epoch 9/10:  57%|▌| 567/991 [2:22:58<1:48:24, 15.34s/batch, batch_loss=296, batc

Epoch 9/10:  57%|▌| 568/991 [2:22:58<1:48:32, 15.40s/batch, batch_loss=296, batc

Epoch 9/10:  57%|▌| 568/991 [2:23:12<1:48:32, 15.40s/batch, batch_loss=25.5, bat

Epoch 9/10:  57%|▌| 569/991 [2:23:12<1:46:25, 15.13s/batch, batch_loss=25.5, bat

Epoch 9/10:  57%|▌| 569/991 [2:23:27<1:46:25, 15.13s/batch, batch_loss=8.5e+3, b

Epoch 9/10:  58%|▌| 570/991 [2:23:27<1:44:54, 14.95s/batch, batch_loss=8.5e+3, b

Epoch 9/10:  58%|▌| 570/991 [2:23:42<1:44:54, 14.95s/batch, batch_loss=11.7, bat

Epoch 9/10:  58%|▌| 571/991 [2:23:42<1:44:41, 14.96s/batch, batch_loss=11.7, bat

Epoch 9/10:  58%|▌| 571/991 [2:23:55<1:44:41, 14.96s/batch, batch_loss=13, batch

Epoch 9/10:  58%|▌| 572/991 [2:23:55<1:41:45, 14.57s/batch, batch_loss=13, batch

Epoch 9/10:  58%|▌| 572/991 [2:24:11<1:41:45, 14.57s/batch, batch_loss=10, batch

Epoch 9/10:  58%|▌| 573/991 [2:24:11<1:43:01, 14.79s/batch, batch_loss=10, batch

Epoch 9/10:  58%|▌| 573/991 [2:24:25<1:43:01, 14.79s/batch, batch_loss=12.1, bat

Epoch 9/10:  58%|▌| 574/991 [2:24:25<1:42:13, 14.71s/batch, batch_loss=12.1, bat

Epoch 9/10:  58%|▌| 574/991 [2:24:40<1:42:13, 14.71s/batch, batch_loss=19.3, bat

Epoch 9/10:  58%|▌| 575/991 [2:24:40<1:42:06, 14.73s/batch, batch_loss=19.3, bat

Epoch 9/10:  58%|▌| 575/991 [2:24:54<1:42:06, 14.73s/batch, batch_loss=24.1, bat

Epoch 9/10:  58%|▌| 576/991 [2:24:54<1:40:49, 14.58s/batch, batch_loss=24.1, bat

Epoch 9/10:  58%|▌| 576/991 [2:25:09<1:40:49, 14.58s/batch, batch_loss=13.6, bat

Epoch 9/10:  58%|▌| 577/991 [2:25:09<1:40:47, 14.61s/batch, batch_loss=13.6, bat

Epoch 9/10:  58%|▌| 577/991 [2:25:26<1:40:47, 14.61s/batch, batch_loss=10.7, bat

Epoch 9/10:  58%|▌| 578/991 [2:25:26<1:46:01, 15.40s/batch, batch_loss=10.7, bat

Epoch 9/10:  58%|▌| 578/991 [2:25:41<1:46:01, 15.40s/batch, batch_loss=12.2, bat

Epoch 9/10:  58%|▌| 579/991 [2:25:41<1:44:13, 15.18s/batch, batch_loss=12.2, bat

Epoch 9/10:  58%|▌| 579/991 [2:25:55<1:44:13, 15.18s/batch, batch_loss=18.2, bat

Epoch 9/10:  59%|▌| 580/991 [2:25:55<1:42:45, 15.00s/batch, batch_loss=18.2, bat

Epoch 9/10:  59%|▌| 580/991 [2:26:10<1:42:45, 15.00s/batch, batch_loss=9.26, bat

Epoch 9/10:  59%|▌| 581/991 [2:26:10<1:42:01, 14.93s/batch, batch_loss=9.26, bat

Epoch 9/10:  59%|▌| 581/991 [2:26:24<1:42:01, 14.93s/batch, batch_loss=3.48, bat

Epoch 9/10:  59%|▌| 582/991 [2:26:24<1:39:36, 14.61s/batch, batch_loss=3.48, bat

Epoch 9/10:  59%|▌| 582/991 [2:26:37<1:39:36, 14.61s/batch, batch_loss=6.62e+3, 

Epoch 9/10:  59%|▌| 583/991 [2:26:37<1:35:21, 14.02s/batch, batch_loss=6.62e+3, 

Epoch 9/10:  59%|▌| 583/991 [2:26:50<1:35:21, 14.02s/batch, batch_loss=13.1, bat

Epoch 9/10:  59%|▌| 584/991 [2:26:50<1:33:26, 13.78s/batch, batch_loss=13.1, bat

Epoch 9/10:  59%|▌| 584/991 [2:27:06<1:33:26, 13.78s/batch, batch_loss=9.91, bat

Epoch 9/10:  59%|▌| 585/991 [2:27:06<1:39:19, 14.68s/batch, batch_loss=9.91, bat

Epoch 9/10:  59%|▌| 585/991 [2:27:22<1:39:19, 14.68s/batch, batch_loss=24.9, bat

Epoch 9/10:  59%|▌| 586/991 [2:27:22<1:40:25, 14.88s/batch, batch_loss=24.9, bat

Epoch 9/10:  59%|▌| 586/991 [2:27:36<1:40:25, 14.88s/batch, batch_loss=23.3, bat

Epoch 9/10:  59%|▌| 587/991 [2:27:36<1:39:08, 14.72s/batch, batch_loss=23.3, bat

Epoch 9/10:  59%|▌| 587/991 [2:27:51<1:39:08, 14.72s/batch, batch_loss=17.7, bat

Epoch 9/10:  59%|▌| 588/991 [2:27:51<1:38:10, 14.62s/batch, batch_loss=17.7, bat

Epoch 9/10:  59%|▌| 588/991 [2:28:05<1:38:10, 14.62s/batch, batch_loss=10.3, bat

Epoch 9/10:  59%|▌| 589/991 [2:28:05<1:38:05, 14.64s/batch, batch_loss=10.3, bat

Epoch 9/10:  59%|▌| 589/991 [2:28:20<1:38:05, 14.64s/batch, batch_loss=17.2, bat

Epoch 9/10:  60%|▌| 590/991 [2:28:20<1:38:47, 14.78s/batch, batch_loss=17.2, bat

Epoch 9/10:  60%|▌| 590/991 [2:28:35<1:38:47, 14.78s/batch, batch_loss=15.6, bat

Epoch 9/10:  60%|▌| 591/991 [2:28:35<1:38:31, 14.78s/batch, batch_loss=15.6, bat

Epoch 9/10:  60%|▌| 591/991 [2:28:50<1:38:31, 14.78s/batch, batch_loss=8.67, bat

Epoch 9/10:  60%|▌| 592/991 [2:28:50<1:37:48, 14.71s/batch, batch_loss=8.67, bat

Epoch 9/10:  60%|▌| 592/991 [2:29:04<1:37:48, 14.71s/batch, batch_loss=11.2, bat

Epoch 9/10:  60%|▌| 593/991 [2:29:04<1:37:29, 14.70s/batch, batch_loss=11.2, bat

Epoch 9/10:  60%|▌| 593/991 [2:29:19<1:37:29, 14.70s/batch, batch_loss=12.2, bat

Epoch 9/10:  60%|▌| 594/991 [2:29:19<1:37:04, 14.67s/batch, batch_loss=12.2, bat

Epoch 9/10:  60%|▌| 594/991 [2:29:33<1:37:04, 14.67s/batch, batch_loss=8.47, bat

Epoch 9/10:  60%|▌| 595/991 [2:29:33<1:35:37, 14.49s/batch, batch_loss=8.47, bat

Epoch 9/10:  60%|▌| 595/991 [2:29:50<1:35:37, 14.49s/batch, batch_loss=8.42, bat

Epoch 9/10:  60%|▌| 596/991 [2:29:50<1:40:13, 15.22s/batch, batch_loss=8.42, bat

Epoch 9/10:  60%|▌| 596/991 [2:30:05<1:40:13, 15.22s/batch, batch_loss=23.6, bat

Epoch 9/10:  60%|▌| 597/991 [2:30:05<1:38:59, 15.08s/batch, batch_loss=23.6, bat

Epoch 9/10:  60%|▌| 597/991 [2:30:20<1:38:59, 15.08s/batch, batch_loss=9.59, bat

Epoch 9/10:  60%|▌| 598/991 [2:30:20<1:38:14, 15.00s/batch, batch_loss=9.59, bat

Epoch 9/10:  60%|▌| 598/991 [2:30:35<1:38:14, 15.00s/batch, batch_loss=17.6, bat

Epoch 9/10:  60%|▌| 599/991 [2:30:35<1:38:47, 15.12s/batch, batch_loss=17.6, bat

Epoch 9/10:  60%|▌| 599/991 [2:30:50<1:38:47, 15.12s/batch, batch_loss=14.3, bat

Epoch 9/10:  61%|▌| 600/991 [2:30:50<1:37:59, 15.04s/batch, batch_loss=14.3, bat

Epoch 9/10:  61%|▌| 600/991 [2:31:05<1:37:59, 15.04s/batch, batch_loss=17.2, bat

Epoch 9/10:  61%|▌| 601/991 [2:31:05<1:37:24, 14.99s/batch, batch_loss=17.2, bat

Epoch 9/10:  61%|▌| 601/991 [2:31:19<1:37:24, 14.99s/batch, batch_loss=12, batch

Epoch 9/10:  61%|▌| 602/991 [2:31:19<1:36:55, 14.95s/batch, batch_loss=12, batch

Epoch 9/10:  61%|▌| 602/991 [2:31:37<1:36:55, 14.95s/batch, batch_loss=8.06, bat

Epoch 9/10:  61%|▌| 603/991 [2:31:37<1:40:52, 15.60s/batch, batch_loss=8.06, bat

Epoch 9/10:  61%|▌| 603/991 [2:31:51<1:40:52, 15.60s/batch, batch_loss=1.01e+4, 

Epoch 9/10:  61%|▌| 604/991 [2:31:51<1:38:01, 15.20s/batch, batch_loss=1.01e+4, 

Epoch 9/10:  61%|▌| 604/991 [2:32:06<1:38:01, 15.20s/batch, batch_loss=11.6, bat

Epoch 9/10:  61%|▌| 605/991 [2:32:06<1:37:03, 15.09s/batch, batch_loss=11.6, bat

Epoch 9/10:  61%|▌| 605/991 [2:32:20<1:37:03, 15.09s/batch, batch_loss=10, batch

Epoch 9/10:  61%|▌| 606/991 [2:32:20<1:36:07, 14.98s/batch, batch_loss=10, batch

Epoch 9/10:  61%|▌| 606/991 [2:32:36<1:36:07, 14.98s/batch, batch_loss=12.1, bat

Epoch 9/10:  61%|▌| 607/991 [2:32:36<1:36:12, 15.03s/batch, batch_loss=12.1, bat

Epoch 9/10:  61%|▌| 607/991 [2:32:49<1:36:12, 15.03s/batch, batch_loss=15.2, bat

Epoch 9/10:  61%|▌| 608/991 [2:32:49<1:33:47, 14.69s/batch, batch_loss=15.2, bat

Epoch 9/10:  61%|▌| 608/991 [2:33:04<1:33:47, 14.69s/batch, batch_loss=16.8, bat

Epoch 9/10:  61%|▌| 609/991 [2:33:04<1:34:02, 14.77s/batch, batch_loss=16.8, bat

Epoch 9/10:  61%|▌| 609/991 [2:33:19<1:34:02, 14.77s/batch, batch_loss=16.3, bat

Epoch 9/10:  62%|▌| 610/991 [2:33:19<1:33:52, 14.78s/batch, batch_loss=16.3, bat

Epoch 9/10:  62%|▌| 610/991 [2:33:34<1:33:52, 14.78s/batch, batch_loss=26.3, bat

Epoch 9/10:  62%|▌| 611/991 [2:33:34<1:33:37, 14.78s/batch, batch_loss=26.3, bat

Epoch 9/10:  62%|▌| 611/991 [2:33:52<1:33:37, 14.78s/batch, batch_loss=8.44, bat

Epoch 9/10:  62%|▌| 612/991 [2:33:52<1:38:37, 15.61s/batch, batch_loss=8.44, bat

Epoch 9/10:  62%|▌| 612/991 [2:34:07<1:38:37, 15.61s/batch, batch_loss=14.7, bat

Epoch 9/10:  62%|▌| 613/991 [2:34:07<1:37:14, 15.43s/batch, batch_loss=14.7, bat

Epoch 9/10:  62%|▌| 613/991 [2:34:22<1:37:14, 15.43s/batch, batch_loss=1.73e+4, 

Epoch 9/10:  62%|▌| 614/991 [2:34:22<1:36:00, 15.28s/batch, batch_loss=1.73e+4, 

Epoch 9/10:  62%|▌| 614/991 [2:34:37<1:36:00, 15.28s/batch, batch_loss=987, batc

Epoch 9/10:  62%|▌| 615/991 [2:34:37<1:36:13, 15.35s/batch, batch_loss=987, batc

Epoch 9/10:  62%|▌| 615/991 [2:34:51<1:36:13, 15.35s/batch, batch_loss=10.3, bat

Epoch 9/10:  62%|▌| 616/991 [2:34:51<1:33:40, 14.99s/batch, batch_loss=10.3, bat

Epoch 9/10:  62%|▌| 616/991 [2:35:06<1:33:40, 14.99s/batch, batch_loss=19.8, bat

Epoch 9/10:  62%|▌| 617/991 [2:35:06<1:33:12, 14.95s/batch, batch_loss=19.8, bat

Epoch 9/10:  62%|▌| 617/991 [2:35:21<1:33:12, 14.95s/batch, batch_loss=16.2, bat

Epoch 9/10:  62%|▌| 618/991 [2:35:21<1:32:24, 14.86s/batch, batch_loss=16.2, bat

Epoch 9/10:  62%|▌| 618/991 [2:35:35<1:32:24, 14.86s/batch, batch_loss=17.5, bat

Epoch 9/10:  62%|▌| 619/991 [2:35:35<1:31:59, 14.84s/batch, batch_loss=17.5, bat

Epoch 9/10:  62%|▌| 619/991 [2:35:51<1:31:59, 14.84s/batch, batch_loss=12.2, bat

Epoch 9/10:  63%|▋| 620/991 [2:35:51<1:32:39, 14.99s/batch, batch_loss=12.2, bat

Epoch 9/10:  63%|▋| 620/991 [2:36:06<1:32:39, 14.99s/batch, batch_loss=10.2, bat

Epoch 9/10:  63%|▋| 621/991 [2:36:06<1:32:03, 14.93s/batch, batch_loss=10.2, bat

Epoch 9/10:  63%|▋| 621/991 [2:36:20<1:32:03, 14.93s/batch, batch_loss=5.5e+3, b

Epoch 9/10:  63%|▋| 622/991 [2:36:20<1:31:01, 14.80s/batch, batch_loss=5.5e+3, b

Epoch 9/10:  63%|▋| 622/991 [2:36:35<1:31:01, 14.80s/batch, batch_loss=17.6, bat

Epoch 9/10:  63%|▋| 623/991 [2:36:35<1:30:41, 14.79s/batch, batch_loss=17.6, bat

Epoch 9/10:  63%|▋| 623/991 [2:36:49<1:30:41, 14.79s/batch, batch_loss=1.6e+4, b

Epoch 9/10:  63%|▋| 624/991 [2:36:49<1:30:03, 14.72s/batch, batch_loss=1.6e+4, b

Epoch 9/10:  63%|▋| 624/991 [2:37:04<1:30:03, 14.72s/batch, batch_loss=9.72, bat

Epoch 9/10:  63%|▋| 625/991 [2:37:04<1:29:43, 14.71s/batch, batch_loss=9.72, bat

Epoch 9/10:  63%|▋| 625/991 [2:37:19<1:29:43, 14.71s/batch, batch_loss=7.59, bat

Epoch 9/10:  63%|▋| 626/991 [2:37:19<1:30:03, 14.80s/batch, batch_loss=7.59, bat

Epoch 9/10:  63%|▋| 626/991 [2:37:33<1:30:03, 14.80s/batch, batch_loss=4.31e+3, 

Epoch 9/10:  63%|▋| 627/991 [2:37:33<1:28:41, 14.62s/batch, batch_loss=4.31e+3, 

Epoch 9/10:  63%|▋| 627/991 [2:37:47<1:28:41, 14.62s/batch, batch_loss=1.05e+3, 

Epoch 9/10:  63%|▋| 628/991 [2:37:47<1:27:23, 14.45s/batch, batch_loss=1.05e+3, 

Epoch 9/10:  63%|▋| 628/991 [2:38:01<1:27:23, 14.45s/batch, batch_loss=13.6, bat

Epoch 9/10:  63%|▋| 629/991 [2:38:01<1:26:06, 14.27s/batch, batch_loss=13.6, bat

Epoch 9/10:  63%|▋| 629/991 [2:38:18<1:26:06, 14.27s/batch, batch_loss=19.3, bat

Epoch 9/10:  64%|▋| 630/991 [2:38:18<1:30:35, 15.06s/batch, batch_loss=19.3, bat

Epoch 9/10:  64%|▋| 630/991 [2:38:33<1:30:35, 15.06s/batch, batch_loss=17.1, bat

Epoch 9/10:  64%|▋| 631/991 [2:38:33<1:29:57, 14.99s/batch, batch_loss=17.1, bat

Epoch 9/10:  64%|▋| 631/991 [2:38:47<1:29:57, 14.99s/batch, batch_loss=3.98, bat

Epoch 9/10:  64%|▋| 632/991 [2:38:47<1:27:44, 14.66s/batch, batch_loss=3.98, bat

Epoch 9/10:  64%|▋| 632/991 [2:39:01<1:27:44, 14.66s/batch, batch_loss=19, batch

Epoch 9/10:  64%|▋| 633/991 [2:39:01<1:27:21, 14.64s/batch, batch_loss=19, batch

Epoch 9/10:  64%|▋| 633/991 [2:39:15<1:27:21, 14.64s/batch, batch_loss=30.3, bat

Epoch 9/10:  64%|▋| 634/991 [2:39:15<1:24:50, 14.26s/batch, batch_loss=30.3, bat

Epoch 9/10:  64%|▋| 634/991 [2:39:29<1:24:50, 14.26s/batch, batch_loss=24.9, bat

Epoch 9/10:  64%|▋| 635/991 [2:39:29<1:23:59, 14.16s/batch, batch_loss=24.9, bat

Epoch 9/10:  64%|▋| 635/991 [2:39:44<1:23:59, 14.16s/batch, batch_loss=18.6, bat

Epoch 9/10:  64%|▋| 636/991 [2:39:44<1:25:46, 14.50s/batch, batch_loss=18.6, bat

Epoch 9/10:  64%|▋| 636/991 [2:39:59<1:25:46, 14.50s/batch, batch_loss=17.9, bat

Epoch 9/10:  64%|▋| 637/991 [2:39:59<1:25:57, 14.57s/batch, batch_loss=17.9, bat

Epoch 9/10:  64%|▋| 637/991 [2:40:13<1:25:57, 14.57s/batch, batch_loss=17.6, bat

Epoch 9/10:  64%|▋| 638/991 [2:40:13<1:25:53, 14.60s/batch, batch_loss=17.6, bat

Epoch 9/10:  64%|▋| 638/991 [2:40:29<1:25:53, 14.60s/batch, batch_loss=14.4, bat

Epoch 9/10:  64%|▋| 639/991 [2:40:29<1:26:30, 14.74s/batch, batch_loss=14.4, bat

Epoch 9/10:  64%|▋| 639/991 [2:40:44<1:26:30, 14.74s/batch, batch_loss=672, batc

Epoch 9/10:  65%|▋| 640/991 [2:40:44<1:27:41, 14.99s/batch, batch_loss=672, batc

Epoch 9/10:  65%|▋| 640/991 [2:41:00<1:27:41, 14.99s/batch, batch_loss=14, batch

Epoch 9/10:  65%|▋| 641/991 [2:41:00<1:28:15, 15.13s/batch, batch_loss=14, batch

Epoch 9/10:  65%|▋| 641/991 [2:41:14<1:28:15, 15.13s/batch, batch_loss=9.57, bat

Epoch 9/10:  65%|▋| 642/991 [2:41:14<1:26:55, 14.94s/batch, batch_loss=9.57, bat

Epoch 9/10:  65%|▋| 642/991 [2:41:29<1:26:55, 14.94s/batch, batch_loss=2.12e+4, 

Epoch 9/10:  65%|▋| 643/991 [2:41:29<1:26:38, 14.94s/batch, batch_loss=2.12e+4, 

Epoch 9/10:  65%|▋| 643/991 [2:41:46<1:26:38, 14.94s/batch, batch_loss=1.76e+4, 

Epoch 9/10:  65%|▋| 644/991 [2:41:46<1:30:40, 15.68s/batch, batch_loss=1.76e+4, 

Epoch 9/10:  65%|▋| 644/991 [2:42:01<1:30:40, 15.68s/batch, batch_loss=2.2e+3, b

Epoch 9/10:  65%|▋| 645/991 [2:42:01<1:28:19, 15.32s/batch, batch_loss=2.2e+3, b

Epoch 9/10:  65%|▋| 645/991 [2:42:16<1:28:19, 15.32s/batch, batch_loss=11.7, bat

Epoch 9/10:  65%|▋| 646/991 [2:42:16<1:28:18, 15.36s/batch, batch_loss=11.7, bat

Epoch 9/10:  65%|▋| 646/991 [2:42:32<1:28:18, 15.36s/batch, batch_loss=11.9, bat

Epoch 9/10:  65%|▋| 647/991 [2:42:32<1:27:48, 15.31s/batch, batch_loss=11.9, bat

Epoch 9/10:  65%|▋| 647/991 [2:42:46<1:27:48, 15.31s/batch, batch_loss=16, batch

Epoch 9/10:  65%|▋| 648/991 [2:42:46<1:25:22, 14.93s/batch, batch_loss=16, batch

Epoch 9/10:  65%|▋| 648/991 [2:43:01<1:25:22, 14.93s/batch, batch_loss=15.8, bat

Epoch 9/10:  65%|▋| 649/991 [2:43:01<1:25:35, 15.02s/batch, batch_loss=15.8, bat

Epoch 9/10:  65%|▋| 649/991 [2:43:18<1:25:35, 15.02s/batch, batch_loss=1.35e+4, 

Epoch 9/10:  66%|▋| 650/991 [2:43:18<1:29:39, 15.78s/batch, batch_loss=1.35e+4, 

Epoch 9/10:  66%|▋| 650/991 [2:43:33<1:29:39, 15.78s/batch, batch_loss=9.45, bat

Epoch 9/10:  66%|▋| 651/991 [2:43:33<1:27:21, 15.42s/batch, batch_loss=9.45, bat

Epoch 9/10:  66%|▋| 651/991 [2:43:47<1:27:21, 15.42s/batch, batch_loss=13.9, bat

Epoch 9/10:  66%|▋| 652/991 [2:43:47<1:24:55, 15.03s/batch, batch_loss=13.9, bat

Epoch 9/10:  66%|▋| 652/991 [2:44:02<1:24:55, 15.03s/batch, batch_loss=19, batch

Epoch 9/10:  66%|▋| 653/991 [2:44:02<1:23:53, 14.89s/batch, batch_loss=19, batch

Epoch 9/10:  66%|▋| 653/991 [2:44:16<1:23:53, 14.89s/batch, batch_loss=17.8, bat

Epoch 9/10:  66%|▋| 654/991 [2:44:16<1:22:47, 14.74s/batch, batch_loss=17.8, bat

Epoch 9/10:  66%|▋| 654/991 [2:44:31<1:22:47, 14.74s/batch, batch_loss=3.83e+3, 

Epoch 9/10:  66%|▋| 655/991 [2:44:31<1:22:13, 14.68s/batch, batch_loss=3.83e+3, 

Epoch 9/10:  66%|▋| 655/991 [2:44:46<1:22:13, 14.68s/batch, batch_loss=5.19e+3, 

Epoch 9/10:  66%|▋| 656/991 [2:44:46<1:22:30, 14.78s/batch, batch_loss=5.19e+3, 

Epoch 9/10:  66%|▋| 656/991 [2:45:00<1:22:30, 14.78s/batch, batch_loss=4.21e+3, 

Epoch 9/10:  66%|▋| 657/991 [2:45:00<1:22:12, 14.77s/batch, batch_loss=4.21e+3, 

Epoch 9/10:  66%|▋| 657/991 [2:45:15<1:22:12, 14.77s/batch, batch_loss=2.2e+4, b

Epoch 9/10:  66%|▋| 658/991 [2:45:15<1:22:31, 14.87s/batch, batch_loss=2.2e+4, b

Epoch 9/10:  66%|▋| 658/991 [2:45:30<1:22:31, 14.87s/batch, batch_loss=4.47, bat

Epoch 9/10:  66%|▋| 659/991 [2:45:30<1:22:21, 14.88s/batch, batch_loss=4.47, bat

Epoch 9/10:  66%|▋| 659/991 [2:45:45<1:22:21, 14.88s/batch, batch_loss=5.86, bat

Epoch 9/10:  67%|▋| 660/991 [2:45:45<1:21:43, 14.82s/batch, batch_loss=5.86, bat

Epoch 9/10:  67%|▋| 660/991 [2:46:00<1:21:43, 14.82s/batch, batch_loss=13.8, bat

Epoch 9/10:  67%|▋| 661/991 [2:46:00<1:21:55, 14.89s/batch, batch_loss=13.8, bat

Epoch 9/10:  67%|▋| 661/991 [2:46:16<1:21:55, 14.89s/batch, batch_loss=16.1, bat

Epoch 9/10:  67%|▋| 662/991 [2:46:16<1:23:11, 15.17s/batch, batch_loss=16.1, bat

Epoch 9/10:  67%|▋| 662/991 [2:46:31<1:23:11, 15.17s/batch, batch_loss=17.1, bat

Epoch 9/10:  67%|▋| 663/991 [2:46:31<1:22:10, 15.03s/batch, batch_loss=17.1, bat

Epoch 9/10:  67%|▋| 663/991 [2:46:45<1:22:10, 15.03s/batch, batch_loss=3.05e+3, 

Epoch 9/10:  67%|▋| 664/991 [2:46:45<1:21:27, 14.95s/batch, batch_loss=3.05e+3, 

Epoch 9/10:  67%|▋| 664/991 [2:47:00<1:21:27, 14.95s/batch, batch_loss=14.7, bat

Epoch 9/10:  67%|▋| 665/991 [2:47:00<1:20:57, 14.90s/batch, batch_loss=14.7, bat

Epoch 9/10:  67%|▋| 665/991 [2:47:15<1:20:57, 14.90s/batch, batch_loss=3.07e+3, 

Epoch 9/10:  67%|▋| 666/991 [2:47:15<1:20:28, 14.86s/batch, batch_loss=3.07e+3, 

Epoch 9/10:  67%|▋| 666/991 [2:47:30<1:20:28, 14.86s/batch, batch_loss=19.3, bat

Epoch 9/10:  67%|▋| 667/991 [2:47:30<1:20:21, 14.88s/batch, batch_loss=19.3, bat

Epoch 9/10:  67%|▋| 667/991 [2:47:44<1:20:21, 14.88s/batch, batch_loss=374, batc

Epoch 9/10:  67%|▋| 668/991 [2:47:44<1:19:02, 14.68s/batch, batch_loss=374, batc

Epoch 9/10:  67%|▋| 668/991 [2:47:59<1:19:02, 14.68s/batch, batch_loss=2.96e+3, 

Epoch 9/10:  68%|▋| 669/991 [2:47:59<1:19:30, 14.82s/batch, batch_loss=2.96e+3, 

Epoch 9/10:  68%|▋| 669/991 [2:48:13<1:19:30, 14.82s/batch, batch_loss=1.01e+3, 

Epoch 9/10:  68%|▋| 670/991 [2:48:13<1:18:20, 14.64s/batch, batch_loss=1.01e+3, 

Epoch 9/10:  68%|▋| 670/991 [2:48:29<1:18:20, 14.64s/batch, batch_loss=11.3, bat

Epoch 9/10:  68%|▋| 671/991 [2:48:29<1:18:57, 14.81s/batch, batch_loss=11.3, bat

Epoch 9/10:  68%|▋| 671/991 [2:48:46<1:18:57, 14.81s/batch, batch_loss=14.4, bat

Epoch 9/10:  68%|▋| 672/991 [2:48:46<1:22:14, 15.47s/batch, batch_loss=14.4, bat

Epoch 9/10:  68%|▋| 672/991 [2:49:01<1:22:14, 15.47s/batch, batch_loss=17.3, bat

Epoch 9/10:  68%|▋| 673/991 [2:49:01<1:21:36, 15.40s/batch, batch_loss=17.3, bat

Epoch 9/10:  68%|▋| 673/991 [2:49:16<1:21:36, 15.40s/batch, batch_loss=15.8, bat

Epoch 9/10:  68%|▋| 674/991 [2:49:16<1:20:26, 15.22s/batch, batch_loss=15.8, bat

Epoch 9/10:  68%|▋| 674/991 [2:49:31<1:20:26, 15.22s/batch, batch_loss=4.56, bat

Epoch 9/10:  68%|▋| 675/991 [2:49:31<1:19:38, 15.12s/batch, batch_loss=4.56, bat

Epoch 9/10:  68%|▋| 675/991 [2:49:46<1:19:38, 15.12s/batch, batch_loss=8.87, bat

Epoch 9/10:  68%|▋| 676/991 [2:49:46<1:19:37, 15.17s/batch, batch_loss=8.87, bat

Epoch 9/10:  68%|▋| 676/991 [2:50:00<1:19:37, 15.17s/batch, batch_loss=15.7, bat

Epoch 9/10:  68%|▋| 677/991 [2:50:00<1:18:30, 15.00s/batch, batch_loss=15.7, bat

Epoch 9/10:  68%|▋| 677/991 [2:50:16<1:18:30, 15.00s/batch, batch_loss=7.72, bat

Epoch 9/10:  68%|▋| 678/991 [2:50:16<1:18:34, 15.06s/batch, batch_loss=7.72, bat

Epoch 9/10:  68%|▋| 678/991 [2:50:30<1:18:34, 15.06s/batch, batch_loss=3.81e+3, 

Epoch 9/10:  69%|▋| 679/991 [2:50:30<1:17:39, 14.94s/batch, batch_loss=3.81e+3, 

Epoch 9/10:  69%|▋| 679/991 [2:50:44<1:17:39, 14.94s/batch, batch_loss=6.1e+3, b

Epoch 9/10:  69%|▋| 680/991 [2:50:44<1:16:13, 14.71s/batch, batch_loss=6.1e+3, b

Epoch 9/10:  69%|▋| 680/991 [2:50:59<1:16:13, 14.71s/batch, batch_loss=7.24e+4, 

Epoch 9/10:  69%|▋| 681/991 [2:50:59<1:15:22, 14.59s/batch, batch_loss=7.24e+4, 

Epoch 9/10:  69%|▋| 681/991 [2:51:14<1:15:22, 14.59s/batch, batch_loss=17.2, bat

Epoch 9/10:  69%|▋| 682/991 [2:51:14<1:16:24, 14.84s/batch, batch_loss=17.2, bat

Epoch 9/10:  69%|▋| 682/991 [2:51:28<1:16:24, 14.84s/batch, batch_loss=375, batc

Epoch 9/10:  69%|▋| 683/991 [2:51:28<1:15:12, 14.65s/batch, batch_loss=375, batc

Epoch 9/10:  69%|▋| 683/991 [2:51:43<1:15:12, 14.65s/batch, batch_loss=6.09, bat

Epoch 9/10:  69%|▋| 684/991 [2:51:43<1:14:16, 14.52s/batch, batch_loss=6.09, bat

Epoch 9/10:  69%|▋| 684/991 [2:51:58<1:14:16, 14.52s/batch, batch_loss=13.9, bat

Epoch 9/10:  69%|▋| 685/991 [2:51:58<1:15:01, 14.71s/batch, batch_loss=13.9, bat

Epoch 9/10:  69%|▋| 685/991 [2:52:12<1:15:01, 14.71s/batch, batch_loss=11.8, bat

Epoch 9/10:  69%|▋| 686/991 [2:52:12<1:14:31, 14.66s/batch, batch_loss=11.8, bat

Epoch 9/10:  69%|▋| 686/991 [2:52:27<1:14:31, 14.66s/batch, batch_loss=538, batc

Epoch 9/10:  69%|▋| 687/991 [2:52:27<1:14:11, 14.64s/batch, batch_loss=538, batc

Epoch 9/10:  69%|▋| 687/991 [2:52:42<1:14:11, 14.64s/batch, batch_loss=4.93, bat

Epoch 9/10:  69%|▋| 688/991 [2:52:42<1:14:42, 14.79s/batch, batch_loss=4.93, bat

Epoch 9/10:  69%|▋| 688/991 [2:52:57<1:14:42, 14.79s/batch, batch_loss=7.59, bat

Epoch 9/10:  70%|▋| 689/991 [2:52:57<1:14:47, 14.86s/batch, batch_loss=7.59, bat

Epoch 9/10:  70%|▋| 689/991 [2:53:12<1:14:47, 14.86s/batch, batch_loss=13.5, bat

Epoch 9/10:  70%|▋| 690/991 [2:53:12<1:14:33, 14.86s/batch, batch_loss=13.5, bat

Epoch 9/10:  70%|▋| 690/991 [2:53:26<1:14:33, 14.86s/batch, batch_loss=17.1, bat

Epoch 9/10:  70%|▋| 691/991 [2:53:26<1:13:08, 14.63s/batch, batch_loss=17.1, bat

Epoch 9/10:  70%|▋| 691/991 [2:53:41<1:13:08, 14.63s/batch, batch_loss=5.91, bat

Epoch 9/10:  70%|▋| 692/991 [2:53:41<1:13:42, 14.79s/batch, batch_loss=5.91, bat

Epoch 9/10:  70%|▋| 692/991 [2:53:56<1:13:42, 14.79s/batch, batch_loss=4.66e+3, 

Epoch 9/10:  70%|▋| 693/991 [2:53:56<1:14:12, 14.94s/batch, batch_loss=4.66e+3, 

Epoch 9/10:  70%|▋| 693/991 [2:54:12<1:14:12, 14.94s/batch, batch_loss=459, batc

Epoch 9/10:  70%|▋| 694/991 [2:54:12<1:14:15, 15.00s/batch, batch_loss=459, batc

Epoch 9/10:  70%|▋| 694/991 [2:54:26<1:14:15, 15.00s/batch, batch_loss=773, batc

Epoch 9/10:  70%|▋| 695/991 [2:54:26<1:13:21, 14.87s/batch, batch_loss=773, batc

Epoch 9/10:  70%|▋| 695/991 [2:54:41<1:13:21, 14.87s/batch, batch_loss=10.3, bat

Epoch 9/10:  70%|▋| 696/991 [2:54:41<1:12:50, 14.82s/batch, batch_loss=10.3, bat

Epoch 9/10:  70%|▋| 696/991 [2:54:56<1:12:50, 14.82s/batch, batch_loss=6.8e+3, b

Epoch 9/10:  70%|▋| 697/991 [2:54:56<1:13:29, 15.00s/batch, batch_loss=6.8e+3, b

Epoch 9/10:  70%|▋| 697/991 [2:55:11<1:13:29, 15.00s/batch, batch_loss=11.8, bat

Epoch 9/10:  70%|▋| 698/991 [2:55:11<1:13:10, 14.98s/batch, batch_loss=11.8, bat

Epoch 9/10:  70%|▋| 698/991 [2:55:26<1:13:10, 14.98s/batch, batch_loss=8.97, bat

Epoch 9/10:  71%|▋| 699/991 [2:55:26<1:12:53, 14.98s/batch, batch_loss=8.97, bat

Epoch 9/10:  71%|▋| 699/991 [2:55:44<1:12:53, 14.98s/batch, batch_loss=10.1, bat

Epoch 9/10:  71%|▋| 700/991 [2:55:44<1:16:40, 15.81s/batch, batch_loss=10.1, bat

Epoch 9/10:  71%|▋| 700/991 [2:55:59<1:16:40, 15.81s/batch, batch_loss=213, batc

Epoch 9/10:  71%|▋| 701/991 [2:55:59<1:14:37, 15.44s/batch, batch_loss=213, batc

Epoch 9/10:  71%|▋| 701/991 [2:56:14<1:14:37, 15.44s/batch, batch_loss=18.1, bat

Epoch 9/10:  71%|▋| 702/991 [2:56:14<1:14:00, 15.37s/batch, batch_loss=18.1, bat

Epoch 9/10:  71%|▋| 702/991 [2:56:27<1:14:00, 15.37s/batch, batch_loss=275, batc

Epoch 9/10:  71%|▋| 703/991 [2:56:27<1:10:40, 14.73s/batch, batch_loss=275, batc

Epoch 9/10:  71%|▋| 703/991 [2:56:40<1:10:40, 14.73s/batch, batch_loss=8.63, bat

Epoch 9/10:  71%|▋| 704/991 [2:56:40<1:08:00, 14.22s/batch, batch_loss=8.63, bat

Epoch 9/10:  71%|▋| 704/991 [2:56:53<1:08:00, 14.22s/batch, batch_loss=11.8, bat

Epoch 9/10:  71%|▋| 705/991 [2:56:53<1:06:05, 13.87s/batch, batch_loss=11.8, bat

Epoch 9/10:  71%|▋| 705/991 [2:57:08<1:06:05, 13.87s/batch, batch_loss=17, batch

Epoch 9/10:  71%|▋| 706/991 [2:57:08<1:07:03, 14.12s/batch, batch_loss=17, batch

Epoch 9/10:  71%|▋| 706/991 [2:57:22<1:07:03, 14.12s/batch, batch_loss=15, batch

Epoch 9/10:  71%|▋| 707/991 [2:57:22<1:07:39, 14.30s/batch, batch_loss=15, batch

Epoch 9/10:  71%|▋| 707/991 [2:57:37<1:07:39, 14.30s/batch, batch_loss=9.06, bat

Epoch 9/10:  71%|▋| 708/991 [2:57:37<1:07:54, 14.40s/batch, batch_loss=9.06, bat

Epoch 9/10:  71%|▋| 708/991 [2:57:55<1:07:54, 14.40s/batch, batch_loss=7.47, bat

Epoch 9/10:  72%|▋| 709/991 [2:57:55<1:12:00, 15.32s/batch, batch_loss=7.47, bat

Epoch 9/10:  72%|▋| 709/991 [2:58:09<1:12:00, 15.32s/batch, batch_loss=31.4, bat

Epoch 9/10:  72%|▋| 710/991 [2:58:09<1:10:19, 15.02s/batch, batch_loss=31.4, bat

Epoch 9/10:  72%|▋| 710/991 [2:58:24<1:10:19, 15.02s/batch, batch_loss=98.2, bat

Epoch 9/10:  72%|▋| 711/991 [2:58:24<1:09:47, 14.96s/batch, batch_loss=98.2, bat

Epoch 9/10:  72%|▋| 711/991 [2:58:38<1:09:47, 14.96s/batch, batch_loss=14.2, bat

Epoch 9/10:  72%|▋| 712/991 [2:58:38<1:08:03, 14.64s/batch, batch_loss=14.2, bat

Epoch 9/10:  72%|▋| 712/991 [2:58:53<1:08:03, 14.64s/batch, batch_loss=75.9, bat

Epoch 9/10:  72%|▋| 713/991 [2:58:53<1:08:35, 14.80s/batch, batch_loss=75.9, bat

Epoch 9/10:  72%|▋| 713/991 [2:59:08<1:08:35, 14.80s/batch, batch_loss=20.4, bat

Epoch 9/10:  72%|▋| 714/991 [2:59:08<1:09:07, 14.97s/batch, batch_loss=20.4, bat

Epoch 9/10:  72%|▋| 714/991 [2:59:26<1:09:07, 14.97s/batch, batch_loss=16.1, bat

Epoch 9/10:  72%|▋| 715/991 [2:59:26<1:13:10, 15.91s/batch, batch_loss=16.1, bat

Epoch 9/10:  72%|▋| 715/991 [2:59:42<1:13:10, 15.91s/batch, batch_loss=15.2, bat

Epoch 9/10:  72%|▋| 716/991 [2:59:42<1:12:37, 15.85s/batch, batch_loss=15.2, bat

Epoch 9/10:  72%|▋| 716/991 [2:59:58<1:12:37, 15.85s/batch, batch_loss=15.5, bat

Epoch 9/10:  72%|▋| 717/991 [2:59:58<1:12:14, 15.82s/batch, batch_loss=15.5, bat

Epoch 9/10:  72%|▋| 717/991 [3:00:13<1:12:14, 15.82s/batch, batch_loss=21.6, bat

Epoch 9/10:  72%|▋| 718/991 [3:00:13<1:11:32, 15.72s/batch, batch_loss=21.6, bat

Epoch 9/10:  72%|▋| 718/991 [3:00:27<1:11:32, 15.72s/batch, batch_loss=12.2, bat

Epoch 9/10:  73%|▋| 719/991 [3:00:27<1:09:16, 15.28s/batch, batch_loss=12.2, bat

Epoch 9/10:  73%|▋| 719/991 [3:00:43<1:09:16, 15.28s/batch, batch_loss=12.4, bat

Epoch 9/10:  73%|▋| 720/991 [3:00:43<1:08:52, 15.25s/batch, batch_loss=12.4, bat

Epoch 9/10:  73%|▋| 720/991 [3:00:57<1:08:52, 15.25s/batch, batch_loss=18.1, bat

Epoch 9/10:  73%|▋| 721/991 [3:00:57<1:07:52, 15.08s/batch, batch_loss=18.1, bat

Epoch 9/10:  73%|▋| 721/991 [3:01:12<1:07:52, 15.08s/batch, batch_loss=20.4, bat

Epoch 9/10:  73%|▋| 722/991 [3:01:12<1:07:41, 15.10s/batch, batch_loss=20.4, bat

Epoch 9/10:  73%|▋| 722/991 [3:01:30<1:07:41, 15.10s/batch, batch_loss=7.22e+3, 

Epoch 9/10:  73%|▋| 723/991 [3:01:30<1:10:59, 15.90s/batch, batch_loss=7.22e+3, 

Epoch 9/10:  73%|▋| 723/991 [3:01:45<1:10:59, 15.90s/batch, batch_loss=4.18, bat

Epoch 9/10:  73%|▋| 724/991 [3:01:45<1:08:59, 15.50s/batch, batch_loss=4.18, bat

Epoch 9/10:  73%|▋| 724/991 [3:02:00<1:08:59, 15.50s/batch, batch_loss=15.4, bat

Epoch 9/10:  73%|▋| 725/991 [3:02:00<1:08:06, 15.36s/batch, batch_loss=15.4, bat

Epoch 9/10:  73%|▋| 725/991 [3:02:15<1:08:06, 15.36s/batch, batch_loss=11.2, bat

Epoch 9/10:  73%|▋| 726/991 [3:02:15<1:07:36, 15.31s/batch, batch_loss=11.2, bat

Epoch 9/10:  73%|▋| 726/991 [3:02:31<1:07:36, 15.31s/batch, batch_loss=1.3e+4, b

Epoch 9/10:  73%|▋| 727/991 [3:02:31<1:07:38, 15.37s/batch, batch_loss=1.3e+4, b

Epoch 9/10:  73%|▋| 727/991 [3:02:45<1:07:38, 15.37s/batch, batch_loss=12.1, bat

Epoch 9/10:  73%|▋| 728/991 [3:02:45<1:06:31, 15.18s/batch, batch_loss=12.1, bat

Epoch 9/10:  73%|▋| 728/991 [3:03:00<1:06:31, 15.18s/batch, batch_loss=129, batc

Epoch 9/10:  74%|▋| 729/991 [3:03:00<1:05:49, 15.07s/batch, batch_loss=129, batc

Epoch 9/10:  74%|▋| 729/991 [3:03:15<1:05:49, 15.07s/batch, batch_loss=10.9, bat

Epoch 9/10:  74%|▋| 730/991 [3:03:15<1:05:39, 15.09s/batch, batch_loss=10.9, bat

Epoch 9/10:  74%|▋| 730/991 [3:03:33<1:05:39, 15.09s/batch, batch_loss=106, batc

Epoch 9/10:  74%|▋| 731/991 [3:03:33<1:08:24, 15.79s/batch, batch_loss=106, batc

Epoch 9/10:  74%|▋| 731/991 [3:03:48<1:08:24, 15.79s/batch, batch_loss=1.39e+4, 

Epoch 9/10:  74%|▋| 732/991 [3:03:48<1:07:02, 15.53s/batch, batch_loss=1.39e+4, 

Epoch 9/10:  74%|▋| 732/991 [3:04:03<1:07:02, 15.53s/batch, batch_loss=17.4, bat

Epoch 9/10:  74%|▋| 733/991 [3:04:03<1:06:14, 15.40s/batch, batch_loss=17.4, bat

Epoch 9/10:  74%|▋| 733/991 [3:04:18<1:06:14, 15.40s/batch, batch_loss=6.82e+3, 

Epoch 9/10:  74%|▋| 734/991 [3:04:18<1:06:24, 15.50s/batch, batch_loss=6.82e+3, 

Epoch 9/10:  74%|▋| 734/991 [3:04:34<1:06:24, 15.50s/batch, batch_loss=16.8, bat

Epoch 9/10:  74%|▋| 735/991 [3:04:34<1:05:45, 15.41s/batch, batch_loss=16.8, bat

Epoch 9/10:  74%|▋| 735/991 [3:04:49<1:05:45, 15.41s/batch, batch_loss=11.9, bat

Epoch 9/10:  74%|▋| 736/991 [3:04:49<1:05:04, 15.31s/batch, batch_loss=11.9, bat

Epoch 9/10:  74%|▋| 736/991 [3:05:03<1:05:04, 15.31s/batch, batch_loss=9.18, bat

Epoch 9/10:  74%|▋| 737/991 [3:05:03<1:03:34, 15.02s/batch, batch_loss=9.18, bat

Epoch 9/10:  74%|▋| 737/991 [3:05:18<1:03:34, 15.02s/batch, batch_loss=1.47e+3, 

Epoch 9/10:  74%|▋| 738/991 [3:05:18<1:03:22, 15.03s/batch, batch_loss=1.47e+3, 

Epoch 9/10:  74%|▋| 738/991 [3:05:35<1:03:22, 15.03s/batch, batch_loss=27.8, bat

Epoch 9/10:  75%|▋| 739/991 [3:05:35<1:06:09, 15.75s/batch, batch_loss=27.8, bat

Epoch 9/10:  75%|▋| 739/991 [3:05:51<1:06:09, 15.75s/batch, batch_loss=9.54, bat

Epoch 9/10:  75%|▋| 740/991 [3:05:51<1:05:11, 15.59s/batch, batch_loss=9.54, bat

Epoch 9/10:  75%|▋| 740/991 [3:06:05<1:05:11, 15.59s/batch, batch_loss=1.81e+4, 

Epoch 9/10:  75%|▋| 741/991 [3:06:05<1:03:30, 15.24s/batch, batch_loss=1.81e+4, 

Epoch 9/10:  75%|▋| 741/991 [3:06:21<1:03:30, 15.24s/batch, batch_loss=2.27e+3, 

Epoch 9/10:  75%|▋| 742/991 [3:06:21<1:03:32, 15.31s/batch, batch_loss=2.27e+3, 

Epoch 9/10:  75%|▋| 742/991 [3:06:36<1:03:32, 15.31s/batch, batch_loss=9.48, bat

Epoch 9/10:  75%|▋| 743/991 [3:06:36<1:02:57, 15.23s/batch, batch_loss=9.48, bat

Epoch 9/10:  75%|▋| 743/991 [3:06:50<1:02:57, 15.23s/batch, batch_loss=12.3, bat

Epoch 9/10:  75%|▊| 744/991 [3:06:50<1:01:49, 15.02s/batch, batch_loss=12.3, bat

Epoch 9/10:  75%|▊| 744/991 [3:07:05<1:01:49, 15.02s/batch, batch_loss=15, batch

Epoch 9/10:  75%|▊| 745/991 [3:07:05<1:00:50, 14.84s/batch, batch_loss=15, batch

Epoch 9/10:  75%|▊| 745/991 [3:07:20<1:00:50, 14.84s/batch, batch_loss=1.15e+3, 

Epoch 9/10:  75%|▊| 746/991 [3:07:20<1:00:54, 14.92s/batch, batch_loss=1.15e+3, 

Epoch 9/10:  75%|▊| 746/991 [3:07:34<1:00:54, 14.92s/batch, batch_loss=3.87e+3, 

Epoch 9/10:  75%|▊| 747/991 [3:07:34<1:00:08, 14.79s/batch, batch_loss=3.87e+3, 

Epoch 9/10:  75%|▊| 747/991 [3:07:49<1:00:08, 14.79s/batch, batch_loss=12.9, bat

Epoch 9/10:  75%|▊| 748/991 [3:07:49<1:00:21, 14.90s/batch, batch_loss=12.9, bat

Epoch 9/10:  75%|▊| 748/991 [3:08:04<1:00:21, 14.90s/batch, batch_loss=12.4, bat

Epoch 9/10:  76%|▊| 749/991 [3:08:04<59:41, 14.80s/batch, batch_loss=12.4, batch

Epoch 9/10:  76%|▊| 749/991 [3:08:19<59:41, 14.80s/batch, batch_loss=9.53, batch

Epoch 9/10:  76%|▊| 750/991 [3:08:19<59:36, 14.84s/batch, batch_loss=9.53, batch

Epoch 9/10:  76%|▊| 750/991 [3:08:33<59:36, 14.84s/batch, batch_loss=10.5, batch

Epoch 9/10:  76%|▊| 751/991 [3:08:33<58:51, 14.71s/batch, batch_loss=10.5, batch

Epoch 9/10:  76%|▊| 751/991 [3:08:48<58:51, 14.71s/batch, batch_loss=6.32, batch

Epoch 9/10:  76%|▊| 752/991 [3:08:48<58:42, 14.74s/batch, batch_loss=6.32, batch

Epoch 9/10:  76%|▊| 752/991 [3:09:03<58:42, 14.74s/batch, batch_loss=7.19, batch

Epoch 9/10:  76%|▊| 753/991 [3:09:03<58:08, 14.66s/batch, batch_loss=7.19, batch

Epoch 9/10:  76%|▊| 753/991 [3:09:18<58:08, 14.66s/batch, batch_loss=4.8, batch_

Epoch 9/10:  76%|▊| 754/991 [3:09:18<59:18, 15.01s/batch, batch_loss=4.8, batch_

Epoch 9/10:  76%|▊| 754/991 [3:09:36<59:18, 15.01s/batch, batch_loss=12.9, batch

Epoch 9/10:  76%|▊| 755/991 [3:09:36<1:02:06, 15.79s/batch, batch_loss=12.9, bat

Epoch 9/10:  76%|▊| 755/991 [3:09:50<1:02:06, 15.79s/batch, batch_loss=13.6, bat

Epoch 9/10:  76%|▊| 756/991 [3:09:50<1:00:16, 15.39s/batch, batch_loss=13.6, bat

Epoch 9/10:  76%|▊| 756/991 [3:10:05<1:00:16, 15.39s/batch, batch_loss=4.75, bat

Epoch 9/10:  76%|▊| 757/991 [3:10:05<58:51, 15.09s/batch, batch_loss=4.75, batch

Epoch 9/10:  76%|▊| 757/991 [3:10:20<58:51, 15.09s/batch, batch_loss=14.9, batch

Epoch 9/10:  76%|▊| 758/991 [3:10:20<59:18, 15.27s/batch, batch_loss=14.9, batch

Epoch 9/10:  76%|▊| 758/991 [3:10:35<59:18, 15.27s/batch, batch_loss=15.5, batch

Epoch 9/10:  77%|▊| 759/991 [3:10:35<58:35, 15.15s/batch, batch_loss=15.5, batch

Epoch 9/10:  77%|▊| 759/991 [3:10:51<58:35, 15.15s/batch, batch_loss=16.7, batch

Epoch 9/10:  77%|▊| 760/991 [3:10:51<58:27, 15.18s/batch, batch_loss=16.7, batch

Epoch 9/10:  77%|▊| 760/991 [3:11:06<58:27, 15.18s/batch, batch_loss=16.1, batch

Epoch 9/10:  77%|▊| 761/991 [3:11:06<58:45, 15.33s/batch, batch_loss=16.1, batch

Epoch 9/10:  77%|▊| 761/991 [3:11:21<58:45, 15.33s/batch, batch_loss=23.7, batch

Epoch 9/10:  77%|▊| 762/991 [3:11:21<57:22, 15.03s/batch, batch_loss=23.7, batch

Epoch 9/10:  77%|▊| 762/991 [3:11:35<57:22, 15.03s/batch, batch_loss=514, batch_

Epoch 9/10:  77%|▊| 763/991 [3:11:35<56:16, 14.81s/batch, batch_loss=514, batch_

Epoch 9/10:  77%|▊| 763/991 [3:11:52<56:16, 14.81s/batch, batch_loss=9.31, batch

Epoch 9/10:  77%|▊| 764/991 [3:11:52<58:25, 15.44s/batch, batch_loss=9.31, batch

Epoch 9/10:  77%|▊| 764/991 [3:12:07<58:25, 15.44s/batch, batch_loss=3.25, batch

Epoch 9/10:  77%|▊| 765/991 [3:12:07<57:44, 15.33s/batch, batch_loss=3.25, batch

Epoch 9/10:  77%|▊| 765/991 [3:12:22<57:44, 15.33s/batch, batch_loss=12.6, batch

Epoch 9/10:  77%|▊| 766/991 [3:12:22<57:00, 15.20s/batch, batch_loss=12.6, batch

Epoch 9/10:  77%|▊| 766/991 [3:12:38<57:00, 15.20s/batch, batch_loss=13.6, batch

Epoch 9/10:  77%|▊| 767/991 [3:12:38<57:28, 15.40s/batch, batch_loss=13.6, batch

Epoch 9/10:  77%|▊| 767/991 [3:12:52<57:28, 15.40s/batch, batch_loss=3.75, batch

Epoch 9/10:  77%|▊| 768/991 [3:12:52<56:35, 15.23s/batch, batch_loss=3.75, batch

Epoch 9/10:  77%|▊| 768/991 [3:13:07<56:35, 15.23s/batch, batch_loss=2.4, batch_

Epoch 9/10:  78%|▊| 769/991 [3:13:07<55:59, 15.13s/batch, batch_loss=2.4, batch_

Epoch 9/10:  78%|▊| 769/991 [3:13:22<55:59, 15.13s/batch, batch_loss=11.3, batch

Epoch 9/10:  78%|▊| 770/991 [3:13:22<54:42, 14.85s/batch, batch_loss=11.3, batch

Epoch 9/10:  78%|▊| 770/991 [3:13:36<54:42, 14.85s/batch, batch_loss=2.73e+3, ba

Epoch 9/10:  78%|▊| 771/991 [3:13:36<54:15, 14.80s/batch, batch_loss=2.73e+3, ba

Epoch 9/10:  78%|▊| 771/991 [3:13:51<54:15, 14.80s/batch, batch_loss=4.87, batch

Epoch 9/10:  78%|▊| 772/991 [3:13:51<54:10, 14.84s/batch, batch_loss=4.87, batch

Epoch 9/10:  78%|▊| 772/991 [3:14:05<54:10, 14.84s/batch, batch_loss=1.38, batch

Epoch 9/10:  78%|▊| 773/991 [3:14:05<52:45, 14.52s/batch, batch_loss=1.38, batch

Epoch 9/10:  78%|▊| 773/991 [3:14:20<52:45, 14.52s/batch, batch_loss=7.72, batch

Epoch 9/10:  78%|▊| 774/991 [3:14:20<52:46, 14.59s/batch, batch_loss=7.72, batch

Epoch 9/10:  78%|▊| 774/991 [3:14:34<52:46, 14.59s/batch, batch_loss=7.52, batch

Epoch 9/10:  78%|▊| 775/991 [3:14:34<52:36, 14.61s/batch, batch_loss=7.52, batch

Epoch 9/10:  78%|▊| 775/991 [3:14:49<52:36, 14.61s/batch, batch_loss=255, batch_

Epoch 9/10:  78%|▊| 776/991 [3:14:49<52:23, 14.62s/batch, batch_loss=255, batch_

Epoch 9/10:  78%|▊| 776/991 [3:15:04<52:23, 14.62s/batch, batch_loss=0.622, batc

Epoch 9/10:  78%|▊| 777/991 [3:15:04<52:34, 14.74s/batch, batch_loss=0.622, batc

Epoch 9/10:  78%|▊| 777/991 [3:15:19<52:34, 14.74s/batch, batch_loss=0.903, batc

Epoch 9/10:  79%|▊| 778/991 [3:15:19<52:20, 14.74s/batch, batch_loss=0.903, batc

Epoch 9/10:  79%|▊| 778/991 [3:15:34<52:20, 14.74s/batch, batch_loss=5.8, batch_

Epoch 9/10:  79%|▊| 779/991 [3:15:34<52:36, 14.89s/batch, batch_loss=5.8, batch_

Epoch 9/10:  79%|▊| 779/991 [3:15:49<52:36, 14.89s/batch, batch_loss=3.03, batch

Epoch 9/10:  79%|▊| 780/991 [3:15:49<52:10, 14.84s/batch, batch_loss=3.03, batch

Epoch 9/10:  79%|▊| 780/991 [3:16:04<52:10, 14.84s/batch, batch_loss=3.35, batch

Epoch 9/10:  79%|▊| 781/991 [3:16:04<52:02, 14.87s/batch, batch_loss=3.35, batch

Epoch 9/10:  79%|▊| 781/991 [3:16:18<52:02, 14.87s/batch, batch_loss=2.51e+4, ba

Epoch 9/10:  79%|▊| 782/991 [3:16:18<51:10, 14.69s/batch, batch_loss=2.51e+4, ba

Epoch 9/10:  79%|▊| 782/991 [3:16:33<51:10, 14.69s/batch, batch_loss=17.1, batch

Epoch 9/10:  79%|▊| 783/991 [3:16:33<51:11, 14.77s/batch, batch_loss=17.1, batch

Epoch 9/10:  79%|▊| 783/991 [3:16:47<51:11, 14.77s/batch, batch_loss=14.6, batch

Epoch 9/10:  79%|▊| 784/991 [3:16:47<50:25, 14.61s/batch, batch_loss=14.6, batch

Epoch 9/10:  79%|▊| 784/991 [3:17:03<50:25, 14.61s/batch, batch_loss=12.8, batch

Epoch 9/10:  79%|▊| 785/991 [3:17:03<50:54, 14.83s/batch, batch_loss=12.8, batch

Epoch 9/10:  79%|▊| 785/991 [3:17:20<50:54, 14.83s/batch, batch_loss=7.94, batch

Epoch 9/10:  79%|▊| 786/991 [3:17:20<53:19, 15.61s/batch, batch_loss=7.94, batch

Epoch 9/10:  79%|▊| 786/991 [3:17:37<53:19, 15.61s/batch, batch_loss=2.48e+4, ba

Epoch 9/10:  79%|▊| 787/991 [3:17:37<54:48, 16.12s/batch, batch_loss=2.48e+4, ba

Epoch 9/10:  79%|▊| 787/991 [3:17:53<54:48, 16.12s/batch, batch_loss=686, batch_

Epoch 9/10:  80%|▊| 788/991 [3:17:53<53:49, 15.91s/batch, batch_loss=686, batch_

Epoch 9/10:  80%|▊| 788/991 [3:18:07<53:49, 15.91s/batch, batch_loss=17.9, batch

Epoch 9/10:  80%|▊| 789/991 [3:18:07<52:04, 15.47s/batch, batch_loss=17.9, batch

Epoch 9/10:  80%|▊| 789/991 [3:18:23<52:04, 15.47s/batch, batch_loss=12.7, batch

Epoch 9/10:  80%|▊| 790/991 [3:18:23<52:03, 15.54s/batch, batch_loss=12.7, batch

Epoch 9/10:  80%|▊| 790/991 [3:18:39<52:03, 15.54s/batch, batch_loss=13.5, batch

Epoch 9/10:  80%|▊| 791/991 [3:18:39<52:04, 15.62s/batch, batch_loss=13.5, batch

Epoch 9/10:  80%|▊| 791/991 [3:18:54<52:04, 15.62s/batch, batch_loss=1.04e+4, ba

Epoch 9/10:  80%|▊| 792/991 [3:18:54<51:30, 15.53s/batch, batch_loss=1.04e+4, ba

Epoch 9/10:  80%|▊| 792/991 [3:19:09<51:30, 15.53s/batch, batch_loss=8.2, batch_

Epoch 9/10:  80%|▊| 793/991 [3:19:09<51:01, 15.46s/batch, batch_loss=8.2, batch_

Epoch 9/10:  80%|▊| 793/991 [3:19:25<51:01, 15.46s/batch, batch_loss=1.96, batch

Epoch 9/10:  80%|▊| 794/991 [3:19:25<50:44, 15.45s/batch, batch_loss=1.96, batch

Epoch 9/10:  80%|▊| 794/991 [3:19:43<50:44, 15.45s/batch, batch_loss=7.66, batch

Epoch 9/10:  80%|▊| 795/991 [3:19:43<53:28, 16.37s/batch, batch_loss=7.66, batch

Epoch 9/10:  80%|▊| 795/991 [3:19:59<53:28, 16.37s/batch, batch_loss=10.9, batch

Epoch 9/10:  80%|▊| 796/991 [3:19:59<52:25, 16.13s/batch, batch_loss=10.9, batch

Epoch 9/10:  80%|▊| 796/991 [3:20:14<52:25, 16.13s/batch, batch_loss=18.6, batch

Epoch 9/10:  80%|▊| 797/991 [3:20:14<51:25, 15.90s/batch, batch_loss=18.6, batch

Epoch 9/10:  80%|▊| 797/991 [3:20:29<51:25, 15.90s/batch, batch_loss=334, batch_

Epoch 9/10:  81%|▊| 798/991 [3:20:29<50:28, 15.69s/batch, batch_loss=334, batch_

Epoch 9/10:  81%|▊| 798/991 [3:20:44<50:28, 15.69s/batch, batch_loss=9.7, batch_

Epoch 9/10:  81%|▊| 799/991 [3:20:44<48:47, 15.25s/batch, batch_loss=9.7, batch_

Epoch 9/10:  81%|▊| 799/991 [3:20:59<48:47, 15.25s/batch, batch_loss=14.1, batch

Epoch 9/10:  81%|▊| 800/991 [3:20:59<48:22, 15.20s/batch, batch_loss=14.1, batch

Epoch 9/10:  81%|▊| 800/991 [3:21:13<48:22, 15.20s/batch, batch_loss=10.8, batch

Epoch 9/10:  81%|▊| 801/991 [3:21:13<47:28, 14.99s/batch, batch_loss=10.8, batch

Epoch 9/10:  81%|▊| 801/991 [3:21:28<47:28, 14.99s/batch, batch_loss=14.8, batch

Epoch 9/10:  81%|▊| 802/991 [3:21:28<47:31, 15.09s/batch, batch_loss=14.8, batch

Epoch 9/10:  81%|▊| 802/991 [3:21:45<47:31, 15.09s/batch, batch_loss=6.04, batch

Epoch 9/10:  81%|▊| 803/991 [3:21:45<48:51, 15.59s/batch, batch_loss=6.04, batch

Epoch 9/10:  81%|▊| 803/991 [3:22:00<48:51, 15.59s/batch, batch_loss=11.8, batch

Epoch 9/10:  81%|▊| 804/991 [3:22:00<48:05, 15.43s/batch, batch_loss=11.8, batch

Epoch 9/10:  81%|▊| 804/991 [3:22:15<48:05, 15.43s/batch, batch_loss=5.8, batch_

Epoch 9/10:  81%|▊| 805/991 [3:22:15<47:33, 15.34s/batch, batch_loss=5.8, batch_

Epoch 9/10:  81%|▊| 805/991 [3:22:31<47:33, 15.34s/batch, batch_loss=9.86, batch

Epoch 9/10:  81%|▊| 806/991 [3:22:31<47:29, 15.40s/batch, batch_loss=9.86, batch

Epoch 9/10:  81%|▊| 806/991 [3:22:46<47:29, 15.40s/batch, batch_loss=8.89, batch

Epoch 9/10:  81%|▊| 807/991 [3:22:46<46:44, 15.24s/batch, batch_loss=8.89, batch

Epoch 9/10:  81%|▊| 807/991 [3:23:00<46:44, 15.24s/batch, batch_loss=16.8, batch

Epoch 9/10:  82%|▊| 808/991 [3:23:00<45:53, 15.05s/batch, batch_loss=16.8, batch

Epoch 9/10:  82%|▊| 808/991 [3:23:15<45:53, 15.05s/batch, batch_loss=1.21e+4, ba

Epoch 9/10:  82%|▊| 809/991 [3:23:15<44:52, 14.80s/batch, batch_loss=1.21e+4, ba

Epoch 9/10:  82%|▊| 809/991 [3:23:31<44:52, 14.80s/batch, batch_loss=13.3, batch

Epoch 9/10:  82%|▊| 810/991 [3:23:31<46:12, 15.32s/batch, batch_loss=13.3, batch

Epoch 9/10:  82%|▊| 810/991 [3:23:47<46:12, 15.32s/batch, batch_loss=6.94, batch

Epoch 9/10:  82%|▊| 811/991 [3:23:47<46:23, 15.46s/batch, batch_loss=6.94, batch

Epoch 9/10:  82%|▊| 811/991 [3:24:02<46:23, 15.46s/batch, batch_loss=6.34, batch

Epoch 9/10:  82%|▊| 812/991 [3:24:02<45:49, 15.36s/batch, batch_loss=6.34, batch

Epoch 9/10:  82%|▊| 812/991 [3:24:17<45:49, 15.36s/batch, batch_loss=7.31, batch

Epoch 9/10:  82%|▊| 813/991 [3:24:17<44:56, 15.15s/batch, batch_loss=7.31, batch

Epoch 9/10:  82%|▊| 813/991 [3:24:32<44:56, 15.15s/batch, batch_loss=11.3, batch

Epoch 9/10:  82%|▊| 814/991 [3:24:32<44:43, 15.16s/batch, batch_loss=11.3, batch

Epoch 9/10:  82%|▊| 814/991 [3:24:47<44:43, 15.16s/batch, batch_loss=6.96, batch

Epoch 9/10:  82%|▊| 815/991 [3:24:47<44:06, 15.04s/batch, batch_loss=6.96, batch

Epoch 9/10:  82%|▊| 815/991 [3:25:02<44:06, 15.04s/batch, batch_loss=90.6, batch

Epoch 9/10:  82%|▊| 816/991 [3:25:02<43:54, 15.06s/batch, batch_loss=90.6, batch

Epoch 9/10:  82%|▊| 816/991 [3:25:18<43:54, 15.06s/batch, batch_loss=357, batch_

Epoch 9/10:  82%|▊| 817/991 [3:25:18<44:52, 15.48s/batch, batch_loss=357, batch_

Epoch 9/10:  82%|▊| 817/991 [3:25:32<44:52, 15.48s/batch, batch_loss=359, batch_

Epoch 9/10:  83%|▊| 818/991 [3:25:32<43:01, 14.92s/batch, batch_loss=359, batch_

Epoch 9/10:  83%|▊| 818/991 [3:25:47<43:01, 14.92s/batch, batch_loss=12.6, batch

Epoch 9/10:  83%|▊| 819/991 [3:25:47<43:00, 15.00s/batch, batch_loss=12.6, batch

Epoch 9/10:  83%|▊| 819/991 [3:26:02<43:00, 15.00s/batch, batch_loss=6.92, batch

Epoch 9/10:  83%|▊| 820/991 [3:26:02<42:49, 15.03s/batch, batch_loss=6.92, batch

Epoch 9/10:  83%|▊| 820/991 [3:26:16<42:49, 15.03s/batch, batch_loss=6.73, batch

Epoch 9/10:  83%|▊| 821/991 [3:26:16<41:43, 14.73s/batch, batch_loss=6.73, batch

Epoch 9/10:  83%|▊| 821/991 [3:26:33<41:43, 14.73s/batch, batch_loss=10, batch_i

Epoch 9/10:  83%|▊| 822/991 [3:26:33<42:59, 15.27s/batch, batch_loss=10, batch_i

Epoch 9/10:  83%|▊| 822/991 [3:26:47<42:59, 15.27s/batch, batch_loss=157, batch_

Epoch 9/10:  83%|▊| 823/991 [3:26:47<41:36, 14.86s/batch, batch_loss=157, batch_

Epoch 9/10:  83%|▊| 823/991 [3:27:01<41:36, 14.86s/batch, batch_loss=6.84, batch

Epoch 9/10:  83%|▊| 824/991 [3:27:01<40:38, 14.60s/batch, batch_loss=6.84, batch

Epoch 9/10:  83%|▊| 824/991 [3:27:16<40:38, 14.60s/batch, batch_loss=14.4, batch

Epoch 9/10:  83%|▊| 825/991 [3:27:16<41:03, 14.84s/batch, batch_loss=14.4, batch

Epoch 9/10:  83%|▊| 825/991 [3:27:31<41:03, 14.84s/batch, batch_loss=2.6e+3, bat

Epoch 9/10:  83%|▊| 826/991 [3:27:31<40:43, 14.81s/batch, batch_loss=2.6e+3, bat

Epoch 9/10:  83%|▊| 826/991 [3:27:46<40:43, 14.81s/batch, batch_loss=23.9, batch

Epoch 9/10:  83%|▊| 827/991 [3:27:46<40:29, 14.81s/batch, batch_loss=23.9, batch

Epoch 9/10:  83%|▊| 827/991 [3:28:00<40:29, 14.81s/batch, batch_loss=19.8, batch

Epoch 9/10:  84%|▊| 828/991 [3:28:00<39:42, 14.62s/batch, batch_loss=19.8, batch

Epoch 9/10:  84%|▊| 828/991 [3:28:14<39:42, 14.62s/batch, batch_loss=7.58, batch

Epoch 9/10:  84%|▊| 829/991 [3:28:14<39:18, 14.56s/batch, batch_loss=7.58, batch

Epoch 9/10:  84%|▊| 829/991 [3:28:29<39:18, 14.56s/batch, batch_loss=14.8, batch

Epoch 9/10:  84%|▊| 830/991 [3:28:29<39:28, 14.71s/batch, batch_loss=14.8, batch

Epoch 9/10:  84%|▊| 830/991 [3:28:42<39:28, 14.71s/batch, batch_loss=10.2, batch

Epoch 9/10:  84%|▊| 831/991 [3:28:42<37:58, 14.24s/batch, batch_loss=10.2, batch

Epoch 9/10:  84%|▊| 831/991 [3:28:57<37:58, 14.24s/batch, batch_loss=20.6, batch

Epoch 9/10:  84%|▊| 832/991 [3:28:57<37:50, 14.28s/batch, batch_loss=20.6, batch

Epoch 9/10:  84%|▊| 832/991 [3:29:12<37:50, 14.28s/batch, batch_loss=216, batch_

Epoch 9/10:  84%|▊| 833/991 [3:29:12<38:27, 14.60s/batch, batch_loss=216, batch_

Epoch 9/10:  84%|▊| 833/991 [3:29:28<38:27, 14.60s/batch, batch_loss=20.7, batch

Epoch 9/10:  84%|▊| 834/991 [3:29:28<38:53, 14.86s/batch, batch_loss=20.7, batch

Epoch 9/10:  84%|▊| 834/991 [3:29:45<38:53, 14.86s/batch, batch_loss=16.9, batch

Epoch 9/10:  84%|▊| 835/991 [3:29:45<40:31, 15.59s/batch, batch_loss=16.9, batch

Epoch 9/10:  84%|▊| 835/991 [3:29:59<40:31, 15.59s/batch, batch_loss=3.27e+3, ba

Epoch 9/10:  84%|▊| 836/991 [3:29:59<39:25, 15.26s/batch, batch_loss=3.27e+3, ba

Epoch 9/10:  84%|▊| 836/991 [3:30:15<39:25, 15.26s/batch, batch_loss=4.91e+3, ba

Epoch 9/10:  84%|▊| 837/991 [3:30:15<39:32, 15.41s/batch, batch_loss=4.91e+3, ba

Epoch 9/10:  84%|▊| 837/991 [3:30:30<39:32, 15.41s/batch, batch_loss=18.3, batch

Epoch 9/10:  85%|▊| 838/991 [3:30:30<38:56, 15.27s/batch, batch_loss=18.3, batch

Epoch 9/10:  85%|▊| 838/991 [3:30:44<38:56, 15.27s/batch, batch_loss=4.68, batch

Epoch 9/10:  85%|▊| 839/991 [3:30:44<37:57, 14.98s/batch, batch_loss=4.68, batch

Epoch 9/10:  85%|▊| 839/991 [3:30:59<37:57, 14.98s/batch, batch_loss=4.5, batch_

Epoch 9/10:  85%|▊| 840/991 [3:30:59<37:23, 14.85s/batch, batch_loss=4.5, batch_

Epoch 9/10:  85%|▊| 840/991 [3:31:14<37:23, 14.85s/batch, batch_loss=16.8, batch

Epoch 9/10:  85%|▊| 841/991 [3:31:14<37:01, 14.81s/batch, batch_loss=16.8, batch

Epoch 9/10:  85%|▊| 841/991 [3:31:28<37:01, 14.81s/batch, batch_loss=16.7, batch

Epoch 9/10:  85%|▊| 842/991 [3:31:28<36:38, 14.76s/batch, batch_loss=16.7, batch

Epoch 9/10:  85%|▊| 842/991 [3:31:46<36:38, 14.76s/batch, batch_loss=9.39, batch

Epoch 9/10:  85%|▊| 843/991 [3:31:46<38:26, 15.58s/batch, batch_loss=9.39, batch

Epoch 9/10:  85%|▊| 843/991 [3:32:00<38:26, 15.58s/batch, batch_loss=1.69e+3, ba

Epoch 9/10:  85%|▊| 844/991 [3:32:00<37:12, 15.19s/batch, batch_loss=1.69e+3, ba

Epoch 9/10:  85%|▊| 844/991 [3:32:15<37:12, 15.19s/batch, batch_loss=16.6, batch

Epoch 9/10:  85%|▊| 845/991 [3:32:15<37:10, 15.28s/batch, batch_loss=16.6, batch

Epoch 9/10:  85%|▊| 845/991 [3:32:31<37:10, 15.28s/batch, batch_loss=1.18e+4, ba

Epoch 9/10:  85%|▊| 846/991 [3:32:31<36:51, 15.25s/batch, batch_loss=1.18e+4, ba

Epoch 9/10:  85%|▊| 846/991 [3:32:46<36:51, 15.25s/batch, batch_loss=20.4, batch

Epoch 9/10:  85%|▊| 847/991 [3:32:46<36:29, 15.20s/batch, batch_loss=20.4, batch

Epoch 9/10:  85%|▊| 847/991 [3:33:00<36:29, 15.20s/batch, batch_loss=28.5, batch

Epoch 9/10:  86%|▊| 848/991 [3:33:00<35:42, 14.98s/batch, batch_loss=28.5, batch

Epoch 9/10:  86%|▊| 848/991 [3:33:15<35:42, 14.98s/batch, batch_loss=1e+3, batch

Epoch 9/10:  86%|▊| 849/991 [3:33:15<35:35, 15.04s/batch, batch_loss=1e+3, batch

Epoch 9/10:  86%|▊| 849/991 [3:33:30<35:35, 15.04s/batch, batch_loss=7.81, batch

Epoch 9/10:  86%|▊| 850/991 [3:33:30<35:06, 14.94s/batch, batch_loss=7.81, batch

Epoch 9/10:  86%|▊| 850/991 [3:33:47<35:06, 14.94s/batch, batch_loss=17.6, batch

Epoch 9/10:  86%|▊| 851/991 [3:33:47<36:02, 15.45s/batch, batch_loss=17.6, batch

Epoch 9/10:  86%|▊| 851/991 [3:34:01<36:02, 15.45s/batch, batch_loss=14.9, batch

Epoch 9/10:  86%|▊| 852/991 [3:34:01<35:02, 15.13s/batch, batch_loss=14.9, batch

Epoch 9/10:  86%|▊| 852/991 [3:34:16<35:02, 15.13s/batch, batch_loss=7.65e+3, ba

Epoch 9/10:  86%|▊| 853/991 [3:34:16<34:53, 15.17s/batch, batch_loss=7.65e+3, ba

Epoch 9/10:  86%|▊| 853/991 [3:34:32<34:53, 15.17s/batch, batch_loss=18.8, batch

Epoch 9/10:  86%|▊| 854/991 [3:34:32<35:00, 15.33s/batch, batch_loss=18.8, batch

Epoch 9/10:  86%|▊| 854/991 [3:34:46<35:00, 15.33s/batch, batch_loss=7.76, batch

Epoch 9/10:  86%|▊| 855/991 [3:34:46<34:02, 15.02s/batch, batch_loss=7.76, batch

Epoch 9/10:  86%|▊| 855/991 [3:35:02<34:02, 15.02s/batch, batch_loss=8.56, batch

Epoch 9/10:  86%|▊| 856/991 [3:35:02<33:52, 15.05s/batch, batch_loss=8.56, batch

Epoch 9/10:  86%|▊| 856/991 [3:35:17<33:52, 15.05s/batch, batch_loss=8.85, batch

Epoch 9/10:  86%|▊| 857/991 [3:35:17<33:37, 15.05s/batch, batch_loss=8.85, batch

Epoch 9/10:  86%|▊| 857/991 [3:35:31<33:37, 15.05s/batch, batch_loss=19.1, batch

Epoch 9/10:  87%|▊| 858/991 [3:35:31<33:03, 14.91s/batch, batch_loss=19.1, batch

Epoch 9/10:  87%|▊| 858/991 [3:35:46<33:03, 14.91s/batch, batch_loss=13.1, batch

Epoch 9/10:  87%|▊| 859/991 [3:35:46<32:53, 14.95s/batch, batch_loss=13.1, batch

Epoch 9/10:  87%|▊| 859/991 [3:36:01<32:53, 14.95s/batch, batch_loss=18.3, batch

Epoch 9/10:  87%|▊| 860/991 [3:36:01<32:42, 14.98s/batch, batch_loss=18.3, batch

Epoch 9/10:  87%|▊| 860/991 [3:36:17<32:42, 14.98s/batch, batch_loss=7.63, batch

Epoch 9/10:  87%|▊| 861/991 [3:36:17<32:41, 15.08s/batch, batch_loss=7.63, batch

Epoch 9/10:  87%|▊| 861/991 [3:36:32<32:41, 15.08s/batch, batch_loss=15.3, batch

Epoch 9/10:  87%|▊| 862/991 [3:36:32<32:52, 15.29s/batch, batch_loss=15.3, batch

Epoch 9/10:  87%|▊| 862/991 [3:36:47<32:52, 15.29s/batch, batch_loss=25.9, batch

Epoch 9/10:  87%|▊| 863/991 [3:36:47<32:17, 15.14s/batch, batch_loss=25.9, batch

Epoch 9/10:  87%|▊| 863/991 [3:37:02<32:17, 15.14s/batch, batch_loss=9.69, batch

Epoch 9/10:  87%|▊| 864/991 [3:37:02<31:42, 14.98s/batch, batch_loss=9.69, batch

Epoch 9/10:  87%|▊| 864/991 [3:37:17<31:42, 14.98s/batch, batch_loss=15.3, batch

Epoch 9/10:  87%|▊| 865/991 [3:37:17<31:21, 14.94s/batch, batch_loss=15.3, batch

Epoch 9/10:  87%|▊| 865/991 [3:37:31<31:21, 14.94s/batch, batch_loss=17.9, batch

Epoch 9/10:  87%|▊| 866/991 [3:37:31<30:53, 14.83s/batch, batch_loss=17.9, batch

Epoch 9/10:  87%|▊| 866/991 [3:37:49<30:53, 14.83s/batch, batch_loss=19.8, batch

Epoch 9/10:  87%|▊| 867/991 [3:37:49<32:36, 15.77s/batch, batch_loss=19.8, batch

Epoch 9/10:  87%|▊| 867/991 [3:38:03<32:36, 15.77s/batch, batch_loss=18.9, batch

Epoch 9/10:  88%|▉| 868/991 [3:38:03<31:19, 15.28s/batch, batch_loss=18.9, batch

Epoch 9/10:  88%|▉| 868/991 [3:38:18<31:19, 15.28s/batch, batch_loss=10.6, batch

Epoch 9/10:  88%|▉| 869/991 [3:38:18<30:57, 15.22s/batch, batch_loss=10.6, batch

Epoch 9/10:  88%|▉| 869/991 [3:38:34<30:57, 15.22s/batch, batch_loss=13.7, batch

Epoch 9/10:  88%|▉| 870/991 [3:38:34<30:44, 15.24s/batch, batch_loss=13.7, batch

Epoch 9/10:  88%|▉| 870/991 [3:38:48<30:44, 15.24s/batch, batch_loss=8.65, batch

Epoch 9/10:  88%|▉| 871/991 [3:38:48<30:10, 15.09s/batch, batch_loss=8.65, batch

Epoch 9/10:  88%|▉| 871/991 [3:39:04<30:10, 15.09s/batch, batch_loss=17.2, batch

Epoch 9/10:  88%|▉| 872/991 [3:39:04<30:16, 15.26s/batch, batch_loss=17.2, batch

Epoch 9/10:  88%|▉| 872/991 [3:39:20<30:16, 15.26s/batch, batch_loss=14.3, batch

Epoch 9/10:  88%|▉| 873/991 [3:39:20<30:18, 15.41s/batch, batch_loss=14.3, batch

Epoch 9/10:  88%|▉| 873/991 [3:39:34<30:18, 15.41s/batch, batch_loss=8.79, batch

Epoch 9/10:  88%|▉| 874/991 [3:39:34<29:36, 15.18s/batch, batch_loss=8.79, batch

Epoch 9/10:  88%|▉| 874/991 [3:39:49<29:36, 15.18s/batch, batch_loss=12.7, batch

Epoch 9/10:  88%|▉| 875/991 [3:39:49<29:16, 15.14s/batch, batch_loss=12.7, batch

Epoch 9/10:  88%|▉| 875/991 [3:40:04<29:16, 15.14s/batch, batch_loss=22.7, batch

Epoch 9/10:  88%|▉| 876/991 [3:40:04<28:51, 15.06s/batch, batch_loss=22.7, batch

Epoch 9/10:  88%|▉| 876/991 [3:40:20<28:51, 15.06s/batch, batch_loss=18.5, batch

Epoch 9/10:  88%|▉| 877/991 [3:40:20<28:48, 15.17s/batch, batch_loss=18.5, batch

Epoch 9/10:  88%|▉| 877/991 [3:40:35<28:48, 15.17s/batch, batch_loss=24.8, batch

Epoch 9/10:  89%|▉| 878/991 [3:40:35<28:32, 15.16s/batch, batch_loss=24.8, batch

Epoch 9/10:  89%|▉| 878/991 [3:40:50<28:32, 15.16s/batch, batch_loss=17.7, batch

Epoch 9/10:  89%|▉| 879/991 [3:40:50<28:08, 15.07s/batch, batch_loss=17.7, batch

Epoch 9/10:  89%|▉| 879/991 [3:41:04<28:08, 15.07s/batch, batch_loss=11.5, batch

Epoch 9/10:  89%|▉| 880/991 [3:41:04<27:40, 14.96s/batch, batch_loss=11.5, batch

Epoch 9/10:  89%|▉| 880/991 [3:41:20<27:40, 14.96s/batch, batch_loss=5.12e+3, ba

Epoch 9/10:  89%|▉| 881/991 [3:41:20<27:41, 15.10s/batch, batch_loss=5.12e+3, ba

Epoch 9/10:  89%|▉| 881/991 [3:41:38<27:41, 15.10s/batch, batch_loss=16.1, batch

Epoch 9/10:  89%|▉| 882/991 [3:41:38<28:56, 15.93s/batch, batch_loss=16.1, batch

Epoch 9/10:  89%|▉| 882/991 [3:41:52<28:56, 15.93s/batch, batch_loss=15.2, batch

Epoch 9/10:  89%|▉| 883/991 [3:41:52<27:49, 15.46s/batch, batch_loss=15.2, batch

Epoch 9/10:  89%|▉| 883/991 [3:42:07<27:49, 15.46s/batch, batch_loss=9.21, batch

Epoch 9/10:  89%|▉| 884/991 [3:42:07<27:05, 15.19s/batch, batch_loss=9.21, batch

Epoch 9/10:  89%|▉| 884/991 [3:42:22<27:05, 15.19s/batch, batch_loss=13.9, batch

Epoch 9/10:  89%|▉| 885/991 [3:42:22<26:51, 15.20s/batch, batch_loss=13.9, batch

Epoch 9/10:  89%|▉| 885/991 [3:42:37<26:51, 15.20s/batch, batch_loss=17.1, batch

Epoch 9/10:  89%|▉| 886/991 [3:42:37<26:39, 15.24s/batch, batch_loss=17.1, batch

Epoch 9/10:  89%|▉| 886/991 [3:42:52<26:39, 15.24s/batch, batch_loss=1.93e+4, ba

Epoch 9/10:  90%|▉| 887/991 [3:42:52<26:12, 15.12s/batch, batch_loss=1.93e+4, ba

Epoch 9/10:  90%|▉| 887/991 [3:43:07<26:12, 15.12s/batch, batch_loss=17.6, batch

Epoch 9/10:  90%|▉| 888/991 [3:43:07<25:51, 15.06s/batch, batch_loss=17.6, batch

Epoch 9/10:  90%|▉| 888/991 [3:43:24<25:51, 15.06s/batch, batch_loss=18.6, batch

Epoch 9/10:  90%|▉| 889/991 [3:43:24<26:39, 15.69s/batch, batch_loss=18.6, batch

Epoch 9/10:  90%|▉| 889/991 [3:43:39<26:39, 15.69s/batch, batch_loss=11.6, batch

Epoch 9/10:  90%|▉| 890/991 [3:43:39<25:51, 15.36s/batch, batch_loss=11.6, batch

Epoch 9/10:  90%|▉| 890/991 [3:43:54<25:51, 15.36s/batch, batch_loss=13.5, batch

Epoch 9/10:  90%|▉| 891/991 [3:43:54<25:28, 15.29s/batch, batch_loss=13.5, batch

Epoch 9/10:  90%|▉| 891/991 [3:44:09<25:28, 15.29s/batch, batch_loss=18.3, batch

Epoch 9/10:  90%|▉| 892/991 [3:44:09<25:12, 15.28s/batch, batch_loss=18.3, batch

Epoch 9/10:  90%|▉| 892/991 [3:44:24<25:12, 15.28s/batch, batch_loss=3.7e+3, bat

Epoch 9/10:  90%|▉| 893/991 [3:44:24<24:49, 15.20s/batch, batch_loss=3.7e+3, bat

Epoch 9/10:  90%|▉| 893/991 [3:44:39<24:49, 15.20s/batch, batch_loss=10.9, batch

Epoch 9/10:  90%|▉| 894/991 [3:44:39<24:39, 15.25s/batch, batch_loss=10.9, batch

Epoch 9/10:  90%|▉| 894/991 [3:44:55<24:39, 15.25s/batch, batch_loss=16, batch_i

Epoch 9/10:  90%|▉| 895/991 [3:44:55<24:20, 15.21s/batch, batch_loss=16, batch_i

Epoch 9/10:  90%|▉| 895/991 [3:45:09<24:20, 15.21s/batch, batch_loss=12.7, batch

Epoch 9/10:  90%|▉| 896/991 [3:45:09<23:49, 15.05s/batch, batch_loss=12.7, batch

Epoch 9/10:  90%|▉| 896/991 [3:45:24<23:49, 15.05s/batch, batch_loss=14.7, batch

Epoch 9/10:  91%|▉| 897/991 [3:45:24<23:23, 14.93s/batch, batch_loss=14.7, batch

Epoch 9/10:  91%|▉| 897/991 [3:45:39<23:23, 14.93s/batch, batch_loss=22.3, batch

Epoch 9/10:  91%|▉| 898/991 [3:45:39<23:04, 14.89s/batch, batch_loss=22.3, batch

Epoch 9/10:  91%|▉| 898/991 [3:45:54<23:04, 14.89s/batch, batch_loss=15.4, batch

Epoch 9/10:  91%|▉| 899/991 [3:45:54<22:57, 14.98s/batch, batch_loss=15.4, batch

Epoch 9/10:  91%|▉| 899/991 [3:46:09<22:57, 14.98s/batch, batch_loss=18.5, batch

Epoch 9/10:  91%|▉| 900/991 [3:46:09<22:41, 14.96s/batch, batch_loss=18.5, batch

Epoch 9/10:  91%|▉| 900/991 [3:46:24<22:41, 14.96s/batch, batch_loss=11, batch_i

Epoch 9/10:  91%|▉| 901/991 [3:46:24<22:18, 14.87s/batch, batch_loss=11, batch_i

Epoch 9/10:  91%|▉| 901/991 [3:46:39<22:18, 14.87s/batch, batch_loss=12.5, batch

Epoch 9/10:  91%|▉| 902/991 [3:46:39<22:14, 14.99s/batch, batch_loss=12.5, batch

Epoch 9/10:  91%|▉| 902/991 [3:46:53<22:14, 14.99s/batch, batch_loss=8.07, batch

Epoch 9/10:  91%|▉| 903/991 [3:46:53<21:50, 14.89s/batch, batch_loss=8.07, batch

Epoch 9/10:  91%|▉| 903/991 [3:47:08<21:50, 14.89s/batch, batch_loss=7.31, batch

Epoch 9/10:  91%|▉| 904/991 [3:47:08<21:39, 14.93s/batch, batch_loss=7.31, batch

Epoch 9/10:  91%|▉| 904/991 [3:47:23<21:39, 14.93s/batch, batch_loss=24.1, batch

Epoch 9/10:  91%|▉| 905/991 [3:47:23<21:03, 14.70s/batch, batch_loss=24.1, batch

Epoch 9/10:  91%|▉| 905/991 [3:47:38<21:03, 14.70s/batch, batch_loss=17.9, batch

Epoch 9/10:  91%|▉| 906/991 [3:47:38<20:56, 14.79s/batch, batch_loss=17.9, batch

Epoch 9/10:  91%|▉| 906/991 [3:47:52<20:56, 14.79s/batch, batch_loss=18.1, batch

Epoch 9/10:  92%|▉| 907/991 [3:47:52<20:43, 14.80s/batch, batch_loss=18.1, batch

Epoch 9/10:  92%|▉| 907/991 [3:48:08<20:43, 14.80s/batch, batch_loss=12.3, batch

Epoch 9/10:  92%|▉| 908/991 [3:48:08<20:40, 14.95s/batch, batch_loss=12.3, batch

Epoch 9/10:  92%|▉| 908/991 [3:48:22<20:40, 14.95s/batch, batch_loss=6.27, batch

Epoch 9/10:  92%|▉| 909/991 [3:48:22<20:14, 14.81s/batch, batch_loss=6.27, batch

Epoch 9/10:  92%|▉| 909/991 [3:48:37<20:14, 14.81s/batch, batch_loss=684, batch_

Epoch 9/10:  92%|▉| 910/991 [3:48:37<19:55, 14.76s/batch, batch_loss=684, batch_

Epoch 9/10:  92%|▉| 910/991 [3:48:52<19:55, 14.76s/batch, batch_loss=1.02e+3, ba

Epoch 9/10:  92%|▉| 911/991 [3:48:52<19:39, 14.75s/batch, batch_loss=1.02e+3, ba

Epoch 9/10:  92%|▉| 911/991 [3:49:07<19:39, 14.75s/batch, batch_loss=24.2, batch

Epoch 9/10:  92%|▉| 912/991 [3:49:07<19:38, 14.92s/batch, batch_loss=24.2, batch

Epoch 9/10:  92%|▉| 912/991 [3:49:22<19:38, 14.92s/batch, batch_loss=20.7, batch

Epoch 9/10:  92%|▉| 913/991 [3:49:22<19:28, 14.98s/batch, batch_loss=20.7, batch

Epoch 9/10:  92%|▉| 913/991 [3:49:37<19:28, 14.98s/batch, batch_loss=19.6, batch

Epoch 9/10:  92%|▉| 914/991 [3:49:37<19:12, 14.97s/batch, batch_loss=19.6, batch

Epoch 9/10:  92%|▉| 914/991 [3:49:51<19:12, 14.97s/batch, batch_loss=17.7, batch

Epoch 9/10:  92%|▉| 915/991 [3:49:51<18:43, 14.78s/batch, batch_loss=17.7, batch

Epoch 9/10:  92%|▉| 915/991 [3:50:06<18:43, 14.78s/batch, batch_loss=16.1, batch

Epoch 9/10:  92%|▉| 916/991 [3:50:06<18:25, 14.74s/batch, batch_loss=16.1, batch

Epoch 9/10:  92%|▉| 916/991 [3:50:21<18:25, 14.74s/batch, batch_loss=7.31, batch

Epoch 9/10:  93%|▉| 917/991 [3:50:21<18:07, 14.70s/batch, batch_loss=7.31, batch

Epoch 9/10:  93%|▉| 917/991 [3:50:35<18:07, 14.70s/batch, batch_loss=11.7, batch

Epoch 9/10:  93%|▉| 918/991 [3:50:35<17:57, 14.76s/batch, batch_loss=11.7, batch

Epoch 9/10:  93%|▉| 918/991 [3:50:50<17:57, 14.76s/batch, batch_loss=13, batch_i

Epoch 9/10:  93%|▉| 919/991 [3:50:50<17:28, 14.57s/batch, batch_loss=13, batch_i

Epoch 9/10:  93%|▉| 919/991 [3:51:04<17:28, 14.57s/batch, batch_loss=13.7, batch

Epoch 9/10:  93%|▉| 920/991 [3:51:04<17:14, 14.56s/batch, batch_loss=13.7, batch

Epoch 9/10:  93%|▉| 920/991 [3:51:19<17:14, 14.56s/batch, batch_loss=17, batch_i

Epoch 9/10:  93%|▉| 921/991 [3:51:19<17:10, 14.72s/batch, batch_loss=17, batch_i

Epoch 9/10:  93%|▉| 921/991 [3:51:33<17:10, 14.72s/batch, batch_loss=20.7, batch

Epoch 9/10:  93%|▉| 922/991 [3:51:33<16:42, 14.53s/batch, batch_loss=20.7, batch

Epoch 9/10:  93%|▉| 922/991 [3:51:47<16:42, 14.53s/batch, batch_loss=6.16, batch

Epoch 9/10:  93%|▉| 923/991 [3:51:47<16:18, 14.40s/batch, batch_loss=6.16, batch

Epoch 9/10:  93%|▉| 923/991 [3:52:02<16:18, 14.40s/batch, batch_loss=11.7, batch

Epoch 9/10:  93%|▉| 924/991 [3:52:02<16:06, 14.43s/batch, batch_loss=11.7, batch

Epoch 9/10:  93%|▉| 924/991 [3:52:17<16:06, 14.43s/batch, batch_loss=10.3, batch

Epoch 9/10:  93%|▉| 925/991 [3:52:17<16:01, 14.57s/batch, batch_loss=10.3, batch

Epoch 9/10:  93%|▉| 925/991 [3:52:32<16:01, 14.57s/batch, batch_loss=3e+4, batch

Epoch 9/10:  93%|▉| 926/991 [3:52:32<16:00, 14.77s/batch, batch_loss=3e+4, batch

Epoch 9/10:  93%|▉| 926/991 [3:52:47<16:00, 14.77s/batch, batch_loss=6.54, batch

Epoch 9/10:  94%|▉| 927/991 [3:52:47<15:47, 14.81s/batch, batch_loss=6.54, batch

Epoch 9/10:  94%|▉| 927/991 [3:53:01<15:47, 14.81s/batch, batch_loss=853, batch_

Epoch 9/10:  94%|▉| 928/991 [3:53:01<15:22, 14.64s/batch, batch_loss=853, batch_

Epoch 9/10:  94%|▉| 928/991 [3:53:16<15:22, 14.64s/batch, batch_loss=10.8, batch

Epoch 9/10:  94%|▉| 929/991 [3:53:16<15:05, 14.60s/batch, batch_loss=10.8, batch

Epoch 9/10:  94%|▉| 929/991 [3:53:30<15:05, 14.60s/batch, batch_loss=8.75, batch

Epoch 9/10:  94%|▉| 930/991 [3:53:30<14:50, 14.61s/batch, batch_loss=8.75, batch

Epoch 9/10:  94%|▉| 930/991 [3:53:45<14:50, 14.61s/batch, batch_loss=11.8, batch

Epoch 9/10:  94%|▉| 931/991 [3:53:45<14:34, 14.58s/batch, batch_loss=11.8, batch

Epoch 9/10:  94%|▉| 931/991 [3:54:00<14:34, 14.58s/batch, batch_loss=11.3, batch

Epoch 9/10:  94%|▉| 932/991 [3:54:00<14:25, 14.67s/batch, batch_loss=11.3, batch

Epoch 9/10:  94%|▉| 932/991 [3:54:15<14:25, 14.67s/batch, batch_loss=11.9, batch

Epoch 9/10:  94%|▉| 933/991 [3:54:15<14:13, 14.71s/batch, batch_loss=11.9, batch

Epoch 9/10:  94%|▉| 933/991 [3:54:29<14:13, 14.71s/batch, batch_loss=2.06, batch

Epoch 9/10:  94%|▉| 934/991 [3:54:29<14:02, 14.78s/batch, batch_loss=2.06, batch

Epoch 9/10:  94%|▉| 934/991 [3:54:45<14:02, 14.78s/batch, batch_loss=2.17, batch

Epoch 9/10:  94%|▉| 935/991 [3:54:45<13:57, 14.95s/batch, batch_loss=2.17, batch

Epoch 9/10:  94%|▉| 935/991 [3:55:00<13:57, 14.95s/batch, batch_loss=164, batch_

Epoch 9/10:  94%|▉| 936/991 [3:55:00<13:43, 14.97s/batch, batch_loss=164, batch_

Epoch 9/10:  94%|▉| 936/991 [3:55:15<13:43, 14.97s/batch, batch_loss=36.6, batch

Epoch 9/10:  95%|▉| 937/991 [3:55:15<13:35, 15.10s/batch, batch_loss=36.6, batch

Epoch 9/10:  95%|▉| 937/991 [3:55:30<13:35, 15.10s/batch, batch_loss=10.5, batch

Epoch 9/10:  95%|▉| 938/991 [3:55:30<13:17, 15.05s/batch, batch_loss=10.5, batch

Epoch 9/10:  95%|▉| 938/991 [3:55:45<13:17, 15.05s/batch, batch_loss=9.32, batch

Epoch 9/10:  95%|▉| 939/991 [3:55:45<13:01, 15.03s/batch, batch_loss=9.32, batch

Epoch 9/10:  95%|▉| 939/991 [3:55:59<13:01, 15.03s/batch, batch_loss=416, batch_

Epoch 9/10:  95%|▉| 940/991 [3:55:59<12:28, 14.67s/batch, batch_loss=416, batch_

Epoch 9/10:  95%|▉| 940/991 [3:56:14<12:28, 14.67s/batch, batch_loss=16.2, batch

Epoch 9/10:  95%|▉| 941/991 [3:56:14<12:24, 14.88s/batch, batch_loss=16.2, batch

Epoch 9/10:  95%|▉| 941/991 [3:56:28<12:24, 14.88s/batch, batch_loss=12.6, batch

Epoch 9/10:  95%|▉| 942/991 [3:56:28<11:48, 14.46s/batch, batch_loss=12.6, batch

Epoch 9/10:  95%|▉| 942/991 [3:56:40<11:48, 14.46s/batch, batch_loss=9.73, batch

Epoch 9/10:  95%|▉| 943/991 [3:56:40<11:07, 13.91s/batch, batch_loss=9.73, batch

Epoch 9/10:  95%|▉| 943/991 [3:56:54<11:07, 13.91s/batch, batch_loss=15.1, batch

Epoch 9/10:  95%|▉| 944/991 [3:56:54<10:53, 13.91s/batch, batch_loss=15.1, batch

Epoch 9/10:  95%|▉| 944/991 [3:57:09<10:53, 13.91s/batch, batch_loss=1.62, batch

Epoch 9/10:  95%|▉| 945/991 [3:57:09<10:46, 14.05s/batch, batch_loss=1.62, batch

Epoch 9/10:  95%|▉| 945/991 [3:57:23<10:46, 14.05s/batch, batch_loss=12.3, batch

Epoch 9/10:  95%|▉| 946/991 [3:57:23<10:41, 14.26s/batch, batch_loss=12.3, batch

Epoch 9/10:  95%|▉| 946/991 [3:57:41<10:41, 14.26s/batch, batch_loss=13.5, batch

Epoch 9/10:  96%|▉| 947/991 [3:57:41<11:04, 15.10s/batch, batch_loss=13.5, batch

Epoch 9/10:  96%|▉| 947/991 [3:57:55<11:04, 15.10s/batch, batch_loss=11.2, batch

Epoch 9/10:  96%|▉| 948/991 [3:57:55<10:41, 14.93s/batch, batch_loss=11.2, batch

Epoch 9/10:  96%|▉| 948/991 [3:58:10<10:41, 14.93s/batch, batch_loss=5.85, batch

Epoch 9/10:  96%|▉| 949/991 [3:58:10<10:30, 15.01s/batch, batch_loss=5.85, batch

Epoch 9/10:  96%|▉| 949/991 [3:58:25<10:30, 15.01s/batch, batch_loss=8.42, batch

Epoch 9/10:  96%|▉| 950/991 [3:58:25<10:08, 14.85s/batch, batch_loss=8.42, batch

Epoch 9/10:  96%|▉| 950/991 [3:58:39<10:08, 14.85s/batch, batch_loss=15.8, batch

Epoch 9/10:  96%|▉| 951/991 [3:58:39<09:52, 14.81s/batch, batch_loss=15.8, batch

Epoch 9/10:  96%|▉| 951/991 [3:58:54<09:52, 14.81s/batch, batch_loss=14.4, batch

Epoch 9/10:  96%|▉| 952/991 [3:58:54<09:29, 14.59s/batch, batch_loss=14.4, batch

Epoch 9/10:  96%|▉| 952/991 [3:59:09<09:29, 14.59s/batch, batch_loss=7.81, batch

Epoch 9/10:  96%|▉| 953/991 [3:59:09<09:19, 14.74s/batch, batch_loss=7.81, batch

Epoch 9/10:  96%|▉| 953/991 [3:59:27<09:19, 14.74s/batch, batch_loss=331, batch_

Epoch 9/10:  96%|▉| 954/991 [3:59:27<09:42, 15.76s/batch, batch_loss=331, batch_

Epoch 9/10:  96%|▉| 954/991 [3:59:41<09:42, 15.76s/batch, batch_loss=12.8, batch

Epoch 9/10:  96%|▉| 955/991 [3:59:41<09:13, 15.38s/batch, batch_loss=12.8, batch

Epoch 9/10:  96%|▉| 955/991 [3:59:56<09:13, 15.38s/batch, batch_loss=14.7, batch

Epoch 9/10:  96%|▉| 956/991 [3:59:56<08:50, 15.17s/batch, batch_loss=14.7, batch

Epoch 9/10:  96%|▉| 956/991 [4:00:11<08:50, 15.17s/batch, batch_loss=14.9, batch

Epoch 9/10:  97%|▉| 957/991 [4:00:11<08:33, 15.11s/batch, batch_loss=14.9, batch

Epoch 9/10:  97%|▉| 957/991 [4:00:25<08:33, 15.11s/batch, batch_loss=12.6, batch

Epoch 9/10:  97%|▉| 958/991 [4:00:25<08:07, 14.76s/batch, batch_loss=12.6, batch

Epoch 9/10:  97%|▉| 958/991 [4:00:39<08:07, 14.76s/batch, batch_loss=7.43, batch

Epoch 9/10:  97%|▉| 959/991 [4:00:39<07:50, 14.70s/batch, batch_loss=7.43, batch

Epoch 9/10:  97%|▉| 959/991 [4:00:54<07:50, 14.70s/batch, batch_loss=10.5, batch

Epoch 9/10:  97%|▉| 960/991 [4:00:54<07:35, 14.68s/batch, batch_loss=10.5, batch

Epoch 9/10:  97%|▉| 960/991 [4:01:08<07:35, 14.68s/batch, batch_loss=14.1, batch

Epoch 9/10:  97%|▉| 961/991 [4:01:08<07:18, 14.61s/batch, batch_loss=14.1, batch

Epoch 9/10:  97%|▉| 961/991 [4:01:26<07:18, 14.61s/batch, batch_loss=5.05, batch

Epoch 9/10:  97%|▉| 962/991 [4:01:26<07:26, 15.40s/batch, batch_loss=5.05, batch

Epoch 9/10:  97%|▉| 962/991 [4:01:40<07:26, 15.40s/batch, batch_loss=6.6, batch_

Epoch 9/10:  97%|▉| 963/991 [4:01:40<07:02, 15.09s/batch, batch_loss=6.6, batch_

Epoch 9/10:  97%|▉| 963/991 [4:01:55<07:02, 15.09s/batch, batch_loss=9.45e+3, ba

Epoch 9/10:  97%|▉| 964/991 [4:01:55<06:42, 14.91s/batch, batch_loss=9.45e+3, ba

Epoch 9/10:  97%|▉| 964/991 [4:02:09<06:42, 14.91s/batch, batch_loss=19.8, batch

Epoch 9/10:  97%|▉| 965/991 [4:02:09<06:25, 14.81s/batch, batch_loss=19.8, batch

Epoch 9/10:  97%|▉| 965/991 [4:02:24<06:25, 14.81s/batch, batch_loss=16, batch_i

Epoch 9/10:  97%|▉| 966/991 [4:02:24<06:09, 14.76s/batch, batch_loss=16, batch_i

Epoch 9/10:  97%|▉| 966/991 [4:02:38<06:09, 14.76s/batch, batch_loss=2.41e+4, ba

Epoch 9/10:  98%|▉| 967/991 [4:02:38<05:50, 14.59s/batch, batch_loss=2.41e+4, ba

Epoch 9/10:  98%|▉| 967/991 [4:02:53<05:50, 14.59s/batch, batch_loss=409, batch_

Epoch 9/10:  98%|▉| 968/991 [4:02:53<05:35, 14.59s/batch, batch_loss=409, batch_

Epoch 9/10:  98%|▉| 968/991 [4:03:07<05:35, 14.59s/batch, batch_loss=20.4, batch

Epoch 9/10:  98%|▉| 969/991 [4:03:07<05:22, 14.66s/batch, batch_loss=20.4, batch

Epoch 9/10:  98%|▉| 969/991 [4:03:21<05:22, 14.66s/batch, batch_loss=1, batch_in

Epoch 9/10:  98%|▉| 970/991 [4:03:21<05:03, 14.46s/batch, batch_loss=1, batch_in

Epoch 9/10:  98%|▉| 970/991 [4:03:38<05:03, 14.46s/batch, batch_loss=10.5, batch

Epoch 9/10:  98%|▉| 971/991 [4:03:38<05:02, 15.14s/batch, batch_loss=10.5, batch

Epoch 9/10:  98%|▉| 971/991 [4:03:52<05:02, 15.14s/batch, batch_loss=27.2, batch

Epoch 9/10:  98%|▉| 972/991 [4:03:52<04:42, 14.86s/batch, batch_loss=27.2, batch

Epoch 9/10:  98%|▉| 972/991 [4:04:06<04:42, 14.86s/batch, batch_loss=21.7, batch

Epoch 9/10:  98%|▉| 973/991 [4:04:06<04:20, 14.50s/batch, batch_loss=21.7, batch

Epoch 9/10:  98%|▉| 973/991 [4:04:21<04:20, 14.50s/batch, batch_loss=19.5, batch

Epoch 9/10:  98%|▉| 974/991 [4:04:21<04:11, 14.77s/batch, batch_loss=19.5, batch

Epoch 9/10:  98%|▉| 974/991 [4:04:36<04:11, 14.77s/batch, batch_loss=11.6, batch

Epoch 9/10:  98%|▉| 975/991 [4:04:36<03:53, 14.62s/batch, batch_loss=11.6, batch

Epoch 9/10:  98%|▉| 975/991 [4:04:50<03:53, 14.62s/batch, batch_loss=31.9, batch

Epoch 9/10:  98%|▉| 976/991 [4:04:50<03:36, 14.46s/batch, batch_loss=31.9, batch

Epoch 9/10:  98%|▉| 976/991 [4:05:03<03:36, 14.46s/batch, batch_loss=6.41, batch

Epoch 9/10:  99%|▉| 977/991 [4:05:03<03:18, 14.19s/batch, batch_loss=6.41, batch

Epoch 9/10:  99%|▉| 977/991 [4:05:17<03:18, 14.19s/batch, batch_loss=4.99, batch

Epoch 9/10:  99%|▉| 978/991 [4:05:17<03:02, 14.01s/batch, batch_loss=4.99, batch

Epoch 9/10:  99%|▉| 978/991 [4:05:30<03:02, 14.01s/batch, batch_loss=2.86, batch

Epoch 9/10:  99%|▉| 979/991 [4:05:30<02:44, 13.74s/batch, batch_loss=2.86, batch

Epoch 9/10:  99%|▉| 979/991 [4:05:43<02:44, 13.74s/batch, batch_loss=0.815, batc

Epoch 9/10:  99%|▉| 980/991 [4:05:43<02:27, 13.39s/batch, batch_loss=0.815, batc

Epoch 9/10:  99%|▉| 980/991 [4:05:58<02:27, 13.39s/batch, batch_loss=0.156, batc

Epoch 9/10:  99%|▉| 981/991 [4:05:58<02:19, 13.91s/batch, batch_loss=0.156, batc

Epoch 9/10:  99%|▉| 981/991 [4:06:11<02:19, 13.91s/batch, batch_loss=0.0996, bat

Epoch 9/10:  99%|▉| 982/991 [4:06:11<02:03, 13.67s/batch, batch_loss=0.0996, bat

Epoch 9/10:  99%|▉| 982/991 [4:06:24<02:03, 13.67s/batch, batch_loss=0.14, batch

Epoch 9/10:  99%|▉| 983/991 [4:06:24<01:47, 13.40s/batch, batch_loss=0.14, batch

Epoch 9/10:  99%|▉| 983/991 [4:06:37<01:47, 13.40s/batch, batch_loss=0.254, batc

Epoch 9/10:  99%|▉| 984/991 [4:06:37<01:33, 13.35s/batch, batch_loss=0.254, batc

Epoch 9/10:  99%|▉| 984/991 [4:06:50<01:33, 13.35s/batch, batch_loss=0.319, batc

Epoch 9/10:  99%|▉| 985/991 [4:06:50<01:20, 13.34s/batch, batch_loss=0.319, batc

Epoch 9/10:  99%|▉| 985/991 [4:07:03<01:20, 13.34s/batch, batch_loss=0.304, batc

Epoch 9/10:  99%|▉| 986/991 [4:07:03<01:05, 13.16s/batch, batch_loss=0.304, batc

Epoch 9/10:  99%|▉| 986/991 [4:07:16<01:05, 13.16s/batch, batch_loss=0.286, batc

Epoch 9/10: 100%|▉| 987/991 [4:07:16<00:52, 13.06s/batch, batch_loss=0.286, batc

Epoch 9/10: 100%|▉| 987/991 [4:07:28<00:52, 13.06s/batch, batch_loss=0.251, batc

Epoch 9/10: 100%|▉| 988/991 [4:07:28<00:38, 12.92s/batch, batch_loss=0.251, batc

Epoch 9/10: 100%|▉| 988/991 [4:07:41<00:38, 12.92s/batch, batch_loss=0.212, batc

Epoch 9/10: 100%|▉| 989/991 [4:07:41<00:25, 12.95s/batch, batch_loss=0.212, batc

Epoch 9/10: 100%|▉| 989/991 [4:07:54<00:25, 12.95s/batch, batch_loss=0.177, batc

Epoch 9/10: 100%|▉| 990/991 [4:07:54<00:12, 12.84s/batch, batch_loss=0.177, batc

Epoch 9/10: 100%|▉| 990/991 [4:08:05<00:12, 12.84s/batch, batch_loss=0.149, batc

Epoch 9/10: 100%|█| 991/991 [4:08:05<00:00, 12.31s/batch, batch_loss=0.149, batc

Epoch 9/10: 100%|█| 991/991 [4:08:05<00:00, 15.02s/batch, batch_loss=0.149, batc




Epoch 9, Loss: 986.8134


Validation:   0%|                                    | 0/743 [00:00<?, ?batch/s]

Validation:   0%| | 0/743 [00:14<?, ?batch/s, batch_loss=24.9, batch_index=1, ba

Validation:   0%| | 1/743 [00:14<3:05:04, 14.97s/batch, batch_loss=24.9, batch_i

Validation:   0%| | 1/743 [00:29<3:05:04, 14.97s/batch, batch_loss=23.8, batch_i

Validation:   0%| | 2/743 [00:29<2:59:34, 14.54s/batch, batch_loss=23.8, batch_i

Validation:   0%| | 2/743 [00:43<2:59:34, 14.54s/batch, batch_loss=21.6, batch_i

Validation:   0%| | 3/743 [00:43<3:00:16, 14.62s/batch, batch_loss=21.6, batch_i

Validation:   0%| | 3/743 [00:57<3:00:16, 14.62s/batch, batch_loss=11.6, batch_i

Validation:   1%| | 4/743 [00:57<2:56:10, 14.30s/batch, batch_loss=11.6, batch_i

Validation:   1%| | 4/743 [01:12<2:56:10, 14.30s/batch, batch_loss=40.9, batch_i

Validation:   1%| | 5/743 [01:12<2:56:29, 14.35s/batch, batch_loss=40.9, batch_i

Validation:   1%| | 5/743 [01:28<2:56:29, 14.35s/batch, batch_loss=33.5, batch_i

Validation:   1%| | 6/743 [01:28<3:04:26, 15.02s/batch, batch_loss=33.5, batch_i

Validation:   1%| | 6/743 [01:42<3:04:26, 15.02s/batch, batch_loss=567, batch_in

Validation:   1%| | 7/743 [01:42<3:01:55, 14.83s/batch, batch_loss=567, batch_in

Validation:   1%| | 7/743 [01:57<3:01:55, 14.83s/batch, batch_loss=22.1, batch_i

Validation:   1%| | 8/743 [01:57<2:59:57, 14.69s/batch, batch_loss=22.1, batch_i

Validation:   1%| | 8/743 [02:12<2:59:57, 14.69s/batch, batch_loss=16.9, batch_i

Validation:   1%| | 9/743 [02:12<3:00:19, 14.74s/batch, batch_loss=16.9, batch_i

Validation:   1%| | 9/743 [02:26<3:00:19, 14.74s/batch, batch_loss=16.6, batch_i

Validation:   1%| | 10/743 [02:26<2:59:34, 14.70s/batch, batch_loss=16.6, batch_

Validation:   1%| | 10/743 [02:41<2:59:34, 14.70s/batch, batch_loss=13.4, batch_

Validation:   1%| | 11/743 [02:41<2:59:52, 14.74s/batch, batch_loss=13.4, batch_

Validation:   1%| | 11/743 [02:55<2:59:52, 14.74s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [02:55<2:57:05, 14.54s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:10<2:57:05, 14.54s/batch, batch_loss=24.1, batch_

Validation:   2%| | 13/743 [03:10<2:57:16, 14.57s/batch, batch_loss=24.1, batch_

Validation:   2%| | 13/743 [03:24<2:57:16, 14.57s/batch, batch_loss=15.6, batch_

Validation:   2%| | 14/743 [03:24<2:55:03, 14.41s/batch, batch_loss=15.6, batch_

Validation:   2%| | 14/743 [03:38<2:55:03, 14.41s/batch, batch_loss=23.9, batch_

Validation:   2%| | 15/743 [03:38<2:55:19, 14.45s/batch, batch_loss=23.9, batch_

Validation:   2%| | 15/743 [03:55<2:55:19, 14.45s/batch, batch_loss=22.5, batch_

Validation:   2%| | 16/743 [03:55<3:03:00, 15.10s/batch, batch_loss=22.5, batch_

Validation:   2%| | 16/743 [04:10<3:03:00, 15.10s/batch, batch_loss=13.3, batch_

Validation:   2%| | 17/743 [04:10<3:01:33, 15.01s/batch, batch_loss=13.3, batch_

Validation:   2%| | 17/743 [04:24<3:01:33, 15.01s/batch, batch_loss=4.56e+3, bat

Validation:   2%| | 18/743 [04:24<2:58:17, 14.76s/batch, batch_loss=4.56e+3, bat

Validation:   2%| | 18/743 [04:39<2:58:17, 14.76s/batch, batch_loss=14.6, batch_

Validation:   3%| | 19/743 [04:39<2:57:47, 14.73s/batch, batch_loss=14.6, batch_

Validation:   3%| | 19/743 [04:54<2:57:47, 14.73s/batch, batch_loss=16.2, batch_

Validation:   3%| | 20/743 [04:54<2:58:17, 14.80s/batch, batch_loss=16.2, batch_

Validation:   3%| | 20/743 [05:08<2:58:17, 14.80s/batch, batch_loss=978, batch_i

Validation:   3%| | 21/743 [05:08<2:56:08, 14.64s/batch, batch_loss=978, batch_i

Validation:   3%| | 21/743 [05:22<2:56:08, 14.64s/batch, batch_loss=16.7, batch_

Validation:   3%| | 22/743 [05:22<2:53:33, 14.44s/batch, batch_loss=16.7, batch_

Validation:   3%| | 22/743 [05:36<2:53:33, 14.44s/batch, batch_loss=5.99, batch_

Validation:   3%| | 23/743 [05:36<2:52:03, 14.34s/batch, batch_loss=5.99, batch_

Validation:   3%| | 23/743 [05:50<2:52:03, 14.34s/batch, batch_loss=19.4, batch_

Validation:   3%| | 24/743 [05:51<2:52:33, 14.40s/batch, batch_loss=19.4, batch_

Validation:   3%| | 24/743 [06:05<2:52:33, 14.40s/batch, batch_loss=19.4, batch_

Validation:   3%| | 25/743 [06:05<2:53:47, 14.52s/batch, batch_loss=19.4, batch_

Validation:   3%| | 25/743 [06:20<2:53:47, 14.52s/batch, batch_loss=27.2, batch_

Validation:   3%| | 26/743 [06:20<2:53:14, 14.50s/batch, batch_loss=27.2, batch_

Validation:   3%| | 26/743 [06:34<2:53:14, 14.50s/batch, batch_loss=1.64e+3, bat

Validation:   4%| | 27/743 [06:34<2:53:39, 14.55s/batch, batch_loss=1.64e+3, bat

Validation:   4%| | 27/743 [06:48<2:53:39, 14.55s/batch, batch_loss=14.2, batch_

Validation:   4%| | 28/743 [06:48<2:50:23, 14.30s/batch, batch_loss=14.2, batch_

Validation:   4%| | 28/743 [07:02<2:50:23, 14.30s/batch, batch_loss=21.3, batch_

Validation:   4%| | 29/743 [07:02<2:49:39, 14.26s/batch, batch_loss=21.3, batch_

Validation:   4%| | 29/743 [07:17<2:49:39, 14.26s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:17<2:49:54, 14.30s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:32<2:49:54, 14.30s/batch, batch_loss=23.8, batch_

Validation:   4%| | 31/743 [07:32<2:52:14, 14.51s/batch, batch_loss=23.8, batch_

Validation:   4%| | 31/743 [07:47<2:52:14, 14.51s/batch, batch_loss=20.9, batch_

Validation:   4%| | 32/743 [07:47<2:54:19, 14.71s/batch, batch_loss=20.9, batch_

Validation:   4%| | 32/743 [08:01<2:54:19, 14.71s/batch, batch_loss=20, batch_in

Validation:   4%| | 33/743 [08:01<2:52:45, 14.60s/batch, batch_loss=20, batch_in

Validation:   4%| | 33/743 [08:17<2:52:45, 14.60s/batch, batch_loss=22.5, batch_

Validation:   5%| | 34/743 [08:17<2:55:08, 14.82s/batch, batch_loss=22.5, batch_

Validation:   5%| | 34/743 [08:31<2:55:08, 14.82s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [08:31<2:52:52, 14.65s/batch, batch_loss=2.81e+3, bat

Validation:   5%| | 35/743 [08:45<2:52:52, 14.65s/batch, batch_loss=19.8, batch_

Validation:   5%| | 36/743 [08:45<2:49:38, 14.40s/batch, batch_loss=19.8, batch_

Validation:   5%| | 36/743 [08:59<2:49:38, 14.40s/batch, batch_loss=170, batch_i

Validation:   5%| | 37/743 [08:59<2:48:12, 14.29s/batch, batch_loss=170, batch_i

Validation:   5%| | 37/743 [09:16<2:48:12, 14.29s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:16<2:59:27, 15.27s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:32<2:59:27, 15.27s/batch, batch_loss=15.8, batch_

Validation:   5%| | 39/743 [09:32<3:00:22, 15.37s/batch, batch_loss=15.8, batch_

Validation:   5%| | 39/743 [09:47<3:00:22, 15.37s/batch, batch_loss=25.1, batch_

Validation:   5%| | 40/743 [09:47<2:58:26, 15.23s/batch, batch_loss=25.1, batch_

Validation:   5%| | 40/743 [10:02<2:58:26, 15.23s/batch, batch_loss=18.8, batch_

Validation:   6%| | 41/743 [10:02<2:58:19, 15.24s/batch, batch_loss=18.8, batch_

Validation:   6%| | 41/743 [10:16<2:58:19, 15.24s/batch, batch_loss=19.7, batch_

Validation:   6%| | 42/743 [10:16<2:54:29, 14.94s/batch, batch_loss=19.7, batch_

Validation:   6%| | 42/743 [10:32<2:54:29, 14.94s/batch, batch_loss=10.4, batch_

Validation:   6%| | 43/743 [10:32<2:55:41, 15.06s/batch, batch_loss=10.4, batch_

Validation:   6%| | 43/743 [10:47<2:55:41, 15.06s/batch, batch_loss=15.5, batch_

Validation:   6%| | 44/743 [10:47<2:56:13, 15.13s/batch, batch_loss=15.5, batch_

Validation:   6%| | 44/743 [11:02<2:56:13, 15.13s/batch, batch_loss=32.8, batch_

Validation:   6%| | 45/743 [11:02<2:55:20, 15.07s/batch, batch_loss=32.8, batch_

Validation:   6%| | 45/743 [11:17<2:55:20, 15.07s/batch, batch_loss=7.63, batch_

Validation:   6%| | 46/743 [11:17<2:53:53, 14.97s/batch, batch_loss=7.63, batch_

Validation:   6%| | 46/743 [11:34<2:53:53, 14.97s/batch, batch_loss=21.4, batch_

Validation:   6%| | 47/743 [11:34<3:02:39, 15.75s/batch, batch_loss=21.4, batch_

Validation:   6%| | 47/743 [11:50<3:02:39, 15.75s/batch, batch_loss=23.3, batch_

Validation:   6%| | 48/743 [11:50<3:01:37, 15.68s/batch, batch_loss=23.3, batch_

Validation:   6%| | 48/743 [12:05<3:01:37, 15.68s/batch, batch_loss=20.3, batch_

Validation:   7%| | 49/743 [12:05<3:00:29, 15.60s/batch, batch_loss=20.3, batch_

Validation:   7%| | 49/743 [12:21<3:00:29, 15.60s/batch, batch_loss=19.1, batch_

Validation:   7%| | 50/743 [12:21<3:01:31, 15.72s/batch, batch_loss=19.1, batch_

Validation:   7%| | 50/743 [12:36<3:01:31, 15.72s/batch, batch_loss=21.9, batch_

Validation:   7%| | 51/743 [12:36<2:58:33, 15.48s/batch, batch_loss=21.9, batch_

Validation:   7%| | 51/743 [12:51<2:58:33, 15.48s/batch, batch_loss=23.3, batch_

Validation:   7%| | 52/743 [12:51<2:55:22, 15.23s/batch, batch_loss=23.3, batch_

Validation:   7%| | 52/743 [13:05<2:55:22, 15.23s/batch, batch_loss=31.4, batch_

Validation:   7%| | 53/743 [13:05<2:52:30, 15.00s/batch, batch_loss=31.4, batch_

Validation:   7%| | 53/743 [13:19<2:52:30, 15.00s/batch, batch_loss=13.8, batch_

Validation:   7%| | 54/743 [13:19<2:48:20, 14.66s/batch, batch_loss=13.8, batch_

Validation:   7%| | 54/743 [13:36<2:48:20, 14.66s/batch, batch_loss=33.5, batch_

Validation:   7%| | 55/743 [13:36<2:55:46, 15.33s/batch, batch_loss=33.5, batch_

Validation:   7%| | 55/743 [13:51<2:55:46, 15.33s/batch, batch_loss=26.1, batch_

Validation:   8%| | 56/743 [13:51<2:54:12, 15.21s/batch, batch_loss=26.1, batch_

Validation:   8%| | 56/743 [14:06<2:54:12, 15.21s/batch, batch_loss=13, batch_in

Validation:   8%| | 57/743 [14:06<2:52:36, 15.10s/batch, batch_loss=13, batch_in

Validation:   8%| | 57/743 [14:20<2:52:36, 15.10s/batch, batch_loss=22.6, batch_

Validation:   8%| | 58/743 [14:20<2:48:54, 14.80s/batch, batch_loss=22.6, batch_

Validation:   8%| | 58/743 [14:35<2:48:54, 14.80s/batch, batch_loss=113, batch_i

Validation:   8%| | 59/743 [14:35<2:50:07, 14.92s/batch, batch_loss=113, batch_i

Validation:   8%| | 59/743 [14:49<2:50:07, 14.92s/batch, batch_loss=6.12e+3, bat

Validation:   8%| | 60/743 [14:49<2:45:48, 14.57s/batch, batch_loss=6.12e+3, bat

Validation:   8%| | 60/743 [15:03<2:45:48, 14.57s/batch, batch_loss=9.84, batch_

Validation:   8%| | 61/743 [15:03<2:43:30, 14.39s/batch, batch_loss=9.84, batch_

Validation:   8%| | 61/743 [15:16<2:43:30, 14.39s/batch, batch_loss=10.6, batch_

Validation:   8%| | 62/743 [15:16<2:41:05, 14.19s/batch, batch_loss=10.6, batch_

Validation:   8%| | 62/743 [15:31<2:41:05, 14.19s/batch, batch_loss=31.9, batch_

Validation:   8%| | 63/743 [15:31<2:42:50, 14.37s/batch, batch_loss=31.9, batch_

Validation:   8%| | 63/743 [15:46<2:42:50, 14.37s/batch, batch_loss=16.2, batch_

Validation:   9%| | 64/743 [15:46<2:43:43, 14.47s/batch, batch_loss=16.2, batch_

Validation:   9%| | 64/743 [16:00<2:43:43, 14.47s/batch, batch_loss=23, batch_in

Validation:   9%| | 65/743 [16:00<2:41:50, 14.32s/batch, batch_loss=23, batch_in

Validation:   9%| | 65/743 [16:14<2:41:50, 14.32s/batch, batch_loss=1.26e+3, bat

Validation:   9%| | 66/743 [16:14<2:41:34, 14.32s/batch, batch_loss=1.26e+3, bat

Validation:   9%| | 66/743 [16:29<2:41:34, 14.32s/batch, batch_loss=21, batch_in

Validation:   9%| | 67/743 [16:29<2:42:53, 14.46s/batch, batch_loss=21, batch_in

Validation:   9%| | 67/743 [16:43<2:42:53, 14.46s/batch, batch_loss=20.8, batch_

Validation:   9%| | 68/743 [16:43<2:41:29, 14.35s/batch, batch_loss=20.8, batch_

Validation:   9%| | 68/743 [16:57<2:41:29, 14.35s/batch, batch_loss=11.3, batch_

Validation:   9%| | 69/743 [16:57<2:39:13, 14.17s/batch, batch_loss=11.3, batch_

Validation:   9%| | 69/743 [17:11<2:39:13, 14.17s/batch, batch_loss=16.1, batch_

Validation:   9%| | 70/743 [17:11<2:40:46, 14.33s/batch, batch_loss=16.1, batch_

Validation:   9%| | 70/743 [17:26<2:40:46, 14.33s/batch, batch_loss=10.3, batch_

Validation:  10%| | 71/743 [17:26<2:40:18, 14.31s/batch, batch_loss=10.3, batch_

Validation:  10%| | 71/743 [17:43<2:40:18, 14.31s/batch, batch_loss=18.4, batch_

Validation:  10%| | 72/743 [17:43<2:51:21, 15.32s/batch, batch_loss=18.4, batch_

Validation:  10%| | 72/743 [17:59<2:51:21, 15.32s/batch, batch_loss=18.5, batch_

Validation:  10%| | 73/743 [17:59<2:50:57, 15.31s/batch, batch_loss=18.5, batch_

Validation:  10%| | 73/743 [18:12<2:50:57, 15.31s/batch, batch_loss=25.8, batch_

Validation:  10%| | 74/743 [18:12<2:45:02, 14.80s/batch, batch_loss=25.8, batch_

Validation:  10%| | 74/743 [18:25<2:45:02, 14.80s/batch, batch_loss=12.3, batch_

Validation:  10%| | 75/743 [18:25<2:38:43, 14.26s/batch, batch_loss=12.3, batch_

Validation:  10%| | 75/743 [18:38<2:38:43, 14.26s/batch, batch_loss=16.9, batch_

Validation:  10%| | 76/743 [18:38<2:33:41, 13.83s/batch, batch_loss=16.9, batch_

Validation:  10%| | 76/743 [18:52<2:33:41, 13.83s/batch, batch_loss=14.2, batch_

Validation:  10%| | 77/743 [18:52<2:32:37, 13.75s/batch, batch_loss=14.2, batch_

Validation:  10%| | 77/743 [19:06<2:32:37, 13.75s/batch, batch_loss=22.6, batch_

Validation:  10%| | 78/743 [19:06<2:34:22, 13.93s/batch, batch_loss=22.6, batch_

Validation:  10%| | 78/743 [19:21<2:34:22, 13.93s/batch, batch_loss=10.2, batch_

Validation:  11%| | 79/743 [19:21<2:36:11, 14.11s/batch, batch_loss=10.2, batch_

Validation:  11%| | 79/743 [19:35<2:36:11, 14.11s/batch, batch_loss=7.47, batch_

Validation:  11%| | 80/743 [19:35<2:37:36, 14.26s/batch, batch_loss=7.47, batch_

Validation:  11%| | 80/743 [19:50<2:37:36, 14.26s/batch, batch_loss=152, batch_i

Validation:  11%| | 81/743 [19:50<2:39:12, 14.43s/batch, batch_loss=152, batch_i

Validation:  11%| | 81/743 [20:04<2:39:12, 14.43s/batch, batch_loss=1.52e+3, bat

Validation:  11%| | 82/743 [20:04<2:36:48, 14.23s/batch, batch_loss=1.52e+3, bat

Validation:  11%| | 82/743 [20:18<2:36:48, 14.23s/batch, batch_loss=42.9, batch_

Validation:  11%| | 83/743 [20:18<2:36:01, 14.18s/batch, batch_loss=42.9, batch_

Validation:  11%| | 83/743 [20:32<2:36:01, 14.18s/batch, batch_loss=20.8, batch_

Validation:  11%| | 84/743 [20:32<2:34:37, 14.08s/batch, batch_loss=20.8, batch_

Validation:  11%| | 84/743 [20:46<2:34:37, 14.08s/batch, batch_loss=28.8, batch_

Validation:  11%| | 85/743 [20:46<2:36:28, 14.27s/batch, batch_loss=28.8, batch_

Validation:  11%| | 85/743 [21:00<2:36:28, 14.27s/batch, batch_loss=32.5, batch_

Validation:  12%| | 86/743 [21:00<2:33:22, 14.01s/batch, batch_loss=32.5, batch_

Validation:  12%| | 86/743 [21:14<2:33:22, 14.01s/batch, batch_loss=43.4, batch_

Validation:  12%| | 87/743 [21:14<2:35:05, 14.19s/batch, batch_loss=43.4, batch_

Validation:  12%| | 87/743 [21:31<2:35:05, 14.19s/batch, batch_loss=37.6, batch_

Validation:  12%| | 88/743 [21:31<2:41:47, 14.82s/batch, batch_loss=37.6, batch_

Validation:  12%| | 88/743 [21:46<2:41:47, 14.82s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [21:46<2:43:12, 14.97s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [22:00<2:43:12, 14.97s/batch, batch_loss=5.31, batch_

Validation:  12%| | 90/743 [22:00<2:40:00, 14.70s/batch, batch_loss=5.31, batch_

Validation:  12%| | 90/743 [22:15<2:40:00, 14.70s/batch, batch_loss=45.9, batch_

Validation:  12%| | 91/743 [22:15<2:41:07, 14.83s/batch, batch_loss=45.9, batch_

Validation:  12%| | 91/743 [22:31<2:41:07, 14.83s/batch, batch_loss=47.2, batch_

Validation:  12%| | 92/743 [22:31<2:42:52, 15.01s/batch, batch_loss=47.2, batch_

Validation:  12%| | 92/743 [22:45<2:42:52, 15.01s/batch, batch_loss=38.6, batch_

Validation:  13%|▏| 93/743 [22:45<2:40:39, 14.83s/batch, batch_loss=38.6, batch_

Validation:  13%|▏| 93/743 [22:59<2:40:39, 14.83s/batch, batch_loss=38.2, batch_

Validation:  13%|▏| 94/743 [22:59<2:38:53, 14.69s/batch, batch_loss=38.2, batch_

Validation:  13%|▏| 94/743 [23:14<2:38:53, 14.69s/batch, batch_loss=16, batch_in

Validation:  13%|▏| 95/743 [23:14<2:37:31, 14.59s/batch, batch_loss=16, batch_in

Validation:  13%|▏| 95/743 [23:29<2:37:31, 14.59s/batch, batch_loss=33.3, batch_

Validation:  13%|▏| 96/743 [23:29<2:40:23, 14.87s/batch, batch_loss=33.3, batch_

Validation:  13%|▏| 96/743 [23:47<2:40:23, 14.87s/batch, batch_loss=38.4, batch_

Validation:  13%|▏| 97/743 [23:47<2:48:05, 15.61s/batch, batch_loss=38.4, batch_

Validation:  13%|▏| 97/743 [24:01<2:48:05, 15.61s/batch, batch_loss=23, batch_in

Validation:  13%|▏| 98/743 [24:01<2:43:15, 15.19s/batch, batch_loss=23, batch_in

Validation:  13%|▏| 98/743 [24:16<2:43:15, 15.19s/batch, batch_loss=28.8, batch_

Validation:  13%|▏| 99/743 [24:16<2:41:30, 15.05s/batch, batch_loss=28.8, batch_

Validation:  13%|▏| 99/743 [24:30<2:41:30, 15.05s/batch, batch_loss=14.6, batch_

Validation:  13%|▏| 100/743 [24:30<2:40:18, 14.96s/batch, batch_loss=14.6, batch

Validation:  13%|▏| 100/743 [24:45<2:40:18, 14.96s/batch, batch_loss=24.5, batch

Validation:  14%|▏| 101/743 [24:45<2:39:20, 14.89s/batch, batch_loss=24.5, batch

Validation:  14%|▏| 101/743 [24:59<2:39:20, 14.89s/batch, batch_loss=17.1, batch

Validation:  14%|▏| 102/743 [24:59<2:35:41, 14.57s/batch, batch_loss=17.1, batch

Validation:  14%|▏| 102/743 [25:14<2:35:41, 14.57s/batch, batch_loss=3.43e+3, ba

Validation:  14%|▏| 103/743 [25:14<2:35:55, 14.62s/batch, batch_loss=3.43e+3, ba

Validation:  14%|▏| 103/743 [25:28<2:35:55, 14.62s/batch, batch_loss=20.4, batch

Validation:  14%|▏| 104/743 [25:28<2:34:45, 14.53s/batch, batch_loss=20.4, batch

Validation:  14%|▏| 104/743 [25:44<2:34:45, 14.53s/batch, batch_loss=6.3, batch_

Validation:  14%|▏| 105/743 [25:44<2:40:20, 15.08s/batch, batch_loss=6.3, batch_

Validation:  14%|▏| 105/743 [25:59<2:40:20, 15.08s/batch, batch_loss=16.9, batch

Validation:  14%|▏| 106/743 [25:59<2:38:34, 14.94s/batch, batch_loss=16.9, batch

Validation:  14%|▏| 106/743 [26:13<2:38:34, 14.94s/batch, batch_loss=721, batch_

Validation:  14%|▏| 107/743 [26:13<2:36:01, 14.72s/batch, batch_loss=721, batch_

Validation:  14%|▏| 107/743 [26:27<2:36:01, 14.72s/batch, batch_loss=1.61e+3, ba

Validation:  15%|▏| 108/743 [26:27<2:32:51, 14.44s/batch, batch_loss=1.61e+3, ba

Validation:  15%|▏| 108/743 [26:41<2:32:51, 14.44s/batch, batch_loss=209, batch_

Validation:  15%|▏| 109/743 [26:41<2:30:07, 14.21s/batch, batch_loss=209, batch_

Validation:  15%|▏| 109/743 [26:55<2:30:07, 14.21s/batch, batch_loss=41.7, batch

Validation:  15%|▏| 110/743 [26:55<2:29:07, 14.13s/batch, batch_loss=41.7, batch

Validation:  15%|▏| 110/743 [27:09<2:29:07, 14.13s/batch, batch_loss=13.8, batch

Validation:  15%|▏| 111/743 [27:09<2:28:53, 14.14s/batch, batch_loss=13.8, batch

Validation:  15%|▏| 111/743 [27:24<2:28:53, 14.14s/batch, batch_loss=38.2, batch

Validation:  15%|▏| 112/743 [27:24<2:31:58, 14.45s/batch, batch_loss=38.2, batch

Validation:  15%|▏| 112/743 [27:38<2:31:58, 14.45s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [27:38<2:31:29, 14.43s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [27:52<2:31:29, 14.43s/batch, batch_loss=21, batch_i

Validation:  15%|▏| 114/743 [27:52<2:30:41, 14.38s/batch, batch_loss=21, batch_i

Validation:  15%|▏| 114/743 [28:07<2:30:41, 14.38s/batch, batch_loss=27.8, batch

Validation:  15%|▏| 115/743 [28:07<2:31:33, 14.48s/batch, batch_loss=27.8, batch

Validation:  15%|▏| 115/743 [28:22<2:31:33, 14.48s/batch, batch_loss=25.3, batch

Validation:  16%|▏| 116/743 [28:22<2:33:32, 14.69s/batch, batch_loss=25.3, batch

Validation:  16%|▏| 116/743 [28:36<2:33:32, 14.69s/batch, batch_loss=41.1, batch

Validation:  16%|▏| 117/743 [28:36<2:30:44, 14.45s/batch, batch_loss=41.1, batch

Validation:  16%|▏| 117/743 [28:51<2:30:44, 14.45s/batch, batch_loss=33.4, batch

Validation:  16%|▏| 118/743 [28:51<2:30:04, 14.41s/batch, batch_loss=33.4, batch

Validation:  16%|▏| 118/743 [29:05<2:30:04, 14.41s/batch, batch_loss=19.9, batch

Validation:  16%|▏| 119/743 [29:05<2:29:12, 14.35s/batch, batch_loss=19.9, batch

Validation:  16%|▏| 119/743 [29:19<2:29:12, 14.35s/batch, batch_loss=26.8, batch

Validation:  16%|▏| 120/743 [29:19<2:29:10, 14.37s/batch, batch_loss=26.8, batch

Validation:  16%|▏| 120/743 [29:36<2:29:10, 14.37s/batch, batch_loss=17.4, batch

Validation:  16%|▏| 121/743 [29:36<2:37:10, 15.16s/batch, batch_loss=17.4, batch

Validation:  16%|▏| 121/743 [29:51<2:37:10, 15.16s/batch, batch_loss=3.93, batch

Validation:  16%|▏| 122/743 [29:51<2:34:44, 14.95s/batch, batch_loss=3.93, batch

Validation:  16%|▏| 122/743 [30:05<2:34:44, 14.95s/batch, batch_loss=8.47, batch

Validation:  17%|▏| 123/743 [30:05<2:33:29, 14.85s/batch, batch_loss=8.47, batch

Validation:  17%|▏| 123/743 [30:21<2:33:29, 14.85s/batch, batch_loss=10.8, batch

Validation:  17%|▏| 124/743 [30:21<2:35:34, 15.08s/batch, batch_loss=10.8, batch

Validation:  17%|▏| 124/743 [30:36<2:35:34, 15.08s/batch, batch_loss=39, batch_i

Validation:  17%|▏| 125/743 [30:36<2:34:42, 15.02s/batch, batch_loss=39, batch_i

Validation:  17%|▏| 125/743 [30:50<2:34:42, 15.02s/batch, batch_loss=18.9, batch

Validation:  17%|▏| 126/743 [30:50<2:30:48, 14.67s/batch, batch_loss=18.9, batch

Validation:  17%|▏| 126/743 [31:04<2:30:48, 14.67s/batch, batch_loss=13.2, batch

Validation:  17%|▏| 127/743 [31:04<2:30:43, 14.68s/batch, batch_loss=13.2, batch

Validation:  17%|▏| 127/743 [31:19<2:30:43, 14.68s/batch, batch_loss=31.8, batch

Validation:  17%|▏| 128/743 [31:19<2:28:56, 14.53s/batch, batch_loss=31.8, batch

Validation:  17%|▏| 128/743 [31:33<2:28:56, 14.53s/batch, batch_loss=16.8, batch

Validation:  17%|▏| 129/743 [31:33<2:29:03, 14.57s/batch, batch_loss=16.8, batch

Validation:  17%|▏| 129/743 [31:48<2:29:03, 14.57s/batch, batch_loss=30, batch_i

Validation:  17%|▏| 130/743 [31:48<2:29:31, 14.64s/batch, batch_loss=30, batch_i

Validation:  17%|▏| 130/743 [32:03<2:29:31, 14.64s/batch, batch_loss=41.1, batch

Validation:  18%|▏| 131/743 [32:03<2:29:52, 14.69s/batch, batch_loss=41.1, batch

Validation:  18%|▏| 131/743 [32:17<2:29:52, 14.69s/batch, batch_loss=27.4, batch

Validation:  18%|▏| 132/743 [32:17<2:28:09, 14.55s/batch, batch_loss=27.4, batch

Validation:  18%|▏| 132/743 [32:31<2:28:09, 14.55s/batch, batch_loss=50.8, batch

Validation:  18%|▏| 133/743 [32:31<2:27:43, 14.53s/batch, batch_loss=50.8, batch

Validation:  18%|▏| 133/743 [32:45<2:27:43, 14.53s/batch, batch_loss=41.5, batch

Validation:  18%|▏| 134/743 [32:45<2:25:13, 14.31s/batch, batch_loss=41.5, batch

Validation:  18%|▏| 134/743 [32:59<2:25:13, 14.31s/batch, batch_loss=43.3, batch

Validation:  18%|▏| 135/743 [32:59<2:22:55, 14.10s/batch, batch_loss=43.3, batch

Validation:  18%|▏| 135/743 [33:13<2:22:55, 14.10s/batch, batch_loss=25.8, batch

Validation:  18%|▏| 136/743 [33:13<2:22:29, 14.08s/batch, batch_loss=25.8, batch

Validation:  18%|▏| 136/743 [33:27<2:22:29, 14.08s/batch, batch_loss=25.8, batch

Validation:  18%|▏| 137/743 [33:27<2:20:41, 13.93s/batch, batch_loss=25.8, batch

Validation:  18%|▏| 137/743 [33:44<2:20:41, 13.93s/batch, batch_loss=6.65, batch

Validation:  19%|▏| 138/743 [33:44<2:30:37, 14.94s/batch, batch_loss=6.65, batch

Validation:  19%|▏| 138/743 [33:58<2:30:37, 14.94s/batch, batch_loss=255, batch_

Validation:  19%|▏| 139/743 [33:58<2:27:12, 14.62s/batch, batch_loss=255, batch_

Validation:  19%|▏| 139/743 [34:12<2:27:12, 14.62s/batch, batch_loss=28.8, batch

Validation:  19%|▏| 140/743 [34:12<2:26:45, 14.60s/batch, batch_loss=28.8, batch

Validation:  19%|▏| 140/743 [34:27<2:26:45, 14.60s/batch, batch_loss=13.2, batch

Validation:  19%|▏| 141/743 [34:27<2:26:12, 14.57s/batch, batch_loss=13.2, batch

Validation:  19%|▏| 141/743 [34:41<2:26:12, 14.57s/batch, batch_loss=20.1, batch

Validation:  19%|▏| 142/743 [34:41<2:23:44, 14.35s/batch, batch_loss=20.1, batch

Validation:  19%|▏| 142/743 [34:55<2:23:44, 14.35s/batch, batch_loss=18.8, batch

Validation:  19%|▏| 143/743 [34:55<2:24:17, 14.43s/batch, batch_loss=18.8, batch

Validation:  19%|▏| 143/743 [35:13<2:24:17, 14.43s/batch, batch_loss=30.7, batch

Validation:  19%|▏| 144/743 [35:13<2:33:42, 15.40s/batch, batch_loss=30.7, batch

Validation:  19%|▏| 144/743 [35:29<2:33:42, 15.40s/batch, batch_loss=16.4, batch

Validation:  20%|▏| 145/743 [35:29<2:34:43, 15.52s/batch, batch_loss=16.4, batch

Validation:  20%|▏| 145/743 [35:43<2:34:43, 15.52s/batch, batch_loss=22.4, batch

Validation:  20%|▏| 146/743 [35:43<2:31:57, 15.27s/batch, batch_loss=22.4, batch

Validation:  20%|▏| 146/743 [35:58<2:31:57, 15.27s/batch, batch_loss=18.5, batch

Validation:  20%|▏| 147/743 [35:58<2:28:45, 14.98s/batch, batch_loss=18.5, batch

Validation:  20%|▏| 147/743 [36:12<2:28:45, 14.98s/batch, batch_loss=3.19e+4, ba

Validation:  20%|▏| 148/743 [36:12<2:25:51, 14.71s/batch, batch_loss=3.19e+4, ba

Validation:  20%|▏| 148/743 [36:26<2:25:51, 14.71s/batch, batch_loss=35.7, batch

Validation:  20%|▏| 149/743 [36:26<2:22:54, 14.43s/batch, batch_loss=35.7, batch

Validation:  20%|▏| 149/743 [36:40<2:22:54, 14.43s/batch, batch_loss=34.6, batch

Validation:  20%|▏| 150/743 [36:40<2:23:31, 14.52s/batch, batch_loss=34.6, batch

Validation:  20%|▏| 150/743 [36:55<2:23:31, 14.52s/batch, batch_loss=19, batch_i

Validation:  20%|▏| 151/743 [36:55<2:23:45, 14.57s/batch, batch_loss=19, batch_i

Validation:  20%|▏| 151/743 [37:09<2:23:45, 14.57s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [37:09<2:22:29, 14.47s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [37:24<2:22:29, 14.47s/batch, batch_loss=19, batch_i

Validation:  21%|▏| 153/743 [37:24<2:23:08, 14.56s/batch, batch_loss=19, batch_i

Validation:  21%|▏| 153/743 [37:38<2:23:08, 14.56s/batch, batch_loss=18.2, batch

Validation:  21%|▏| 154/743 [37:38<2:22:03, 14.47s/batch, batch_loss=18.2, batch

Validation:  21%|▏| 154/743 [37:52<2:22:03, 14.47s/batch, batch_loss=25.2, batch

Validation:  21%|▏| 155/743 [37:52<2:20:01, 14.29s/batch, batch_loss=25.2, batch

Validation:  21%|▏| 155/743 [38:07<2:20:01, 14.29s/batch, batch_loss=22.7, batch

Validation:  21%|▏| 156/743 [38:07<2:21:08, 14.43s/batch, batch_loss=22.7, batch

Validation:  21%|▏| 156/743 [38:22<2:21:08, 14.43s/batch, batch_loss=26.9, batch

Validation:  21%|▏| 157/743 [38:22<2:22:19, 14.57s/batch, batch_loss=26.9, batch

Validation:  21%|▏| 157/743 [38:37<2:22:19, 14.57s/batch, batch_loss=26.5, batch

Validation:  21%|▏| 158/743 [38:37<2:23:53, 14.76s/batch, batch_loss=26.5, batch

Validation:  21%|▏| 158/743 [38:51<2:23:53, 14.76s/batch, batch_loss=30.2, batch

Validation:  21%|▏| 159/743 [38:51<2:22:28, 14.64s/batch, batch_loss=30.2, batch

Validation:  21%|▏| 159/743 [39:06<2:22:28, 14.64s/batch, batch_loss=18.3, batch

Validation:  22%|▏| 160/743 [39:06<2:23:08, 14.73s/batch, batch_loss=18.3, batch

Validation:  22%|▏| 160/743 [39:20<2:23:08, 14.73s/batch, batch_loss=28.1, batch

Validation:  22%|▏| 161/743 [39:20<2:20:46, 14.51s/batch, batch_loss=28.1, batch

Validation:  22%|▏| 161/743 [39:35<2:20:46, 14.51s/batch, batch_loss=30.2, batch

Validation:  22%|▏| 162/743 [39:35<2:22:43, 14.74s/batch, batch_loss=30.2, batch

Validation:  22%|▏| 162/743 [39:50<2:22:43, 14.74s/batch, batch_loss=14.9, batch

Validation:  22%|▏| 163/743 [39:50<2:22:33, 14.75s/batch, batch_loss=14.9, batch

Validation:  22%|▏| 163/743 [40:05<2:22:33, 14.75s/batch, batch_loss=9.87, batch

Validation:  22%|▏| 164/743 [40:05<2:22:16, 14.74s/batch, batch_loss=9.87, batch

Validation:  22%|▏| 164/743 [40:19<2:22:16, 14.74s/batch, batch_loss=19.6, batch

Validation:  22%|▏| 165/743 [40:19<2:21:10, 14.65s/batch, batch_loss=19.6, batch

Validation:  22%|▏| 165/743 [40:34<2:21:10, 14.65s/batch, batch_loss=13.2, batch

Validation:  22%|▏| 166/743 [40:34<2:20:29, 14.61s/batch, batch_loss=13.2, batch

Validation:  22%|▏| 166/743 [40:49<2:20:29, 14.61s/batch, batch_loss=13.7, batch

Validation:  22%|▏| 167/743 [40:49<2:20:49, 14.67s/batch, batch_loss=13.7, batch

Validation:  22%|▏| 167/743 [41:03<2:20:49, 14.67s/batch, batch_loss=26.6, batch

Validation:  23%|▏| 168/743 [41:03<2:19:52, 14.60s/batch, batch_loss=26.6, batch

Validation:  23%|▏| 168/743 [41:18<2:19:52, 14.60s/batch, batch_loss=27.2, batch

Validation:  23%|▏| 169/743 [41:18<2:19:30, 14.58s/batch, batch_loss=27.2, batch

Validation:  23%|▏| 169/743 [41:32<2:19:30, 14.58s/batch, batch_loss=29.6, batch

Validation:  23%|▏| 170/743 [41:32<2:18:43, 14.53s/batch, batch_loss=29.6, batch

Validation:  23%|▏| 170/743 [41:46<2:18:43, 14.53s/batch, batch_loss=29.3, batch

Validation:  23%|▏| 171/743 [41:46<2:15:51, 14.25s/batch, batch_loss=29.3, batch

Validation:  23%|▏| 171/743 [42:00<2:15:51, 14.25s/batch, batch_loss=32.6, batch

Validation:  23%|▏| 172/743 [42:00<2:15:05, 14.20s/batch, batch_loss=32.6, batch

Validation:  23%|▏| 172/743 [42:15<2:15:05, 14.20s/batch, batch_loss=26.5, batch

Validation:  23%|▏| 173/743 [42:15<2:17:13, 14.45s/batch, batch_loss=26.5, batch

Validation:  23%|▏| 173/743 [42:30<2:17:13, 14.45s/batch, batch_loss=26.3, batch

Validation:  23%|▏| 174/743 [42:30<2:19:08, 14.67s/batch, batch_loss=26.3, batch

Validation:  23%|▏| 174/743 [42:45<2:19:08, 14.67s/batch, batch_loss=28.5, batch

Validation:  24%|▏| 175/743 [42:45<2:18:54, 14.67s/batch, batch_loss=28.5, batch

Validation:  24%|▏| 175/743 [42:59<2:18:54, 14.67s/batch, batch_loss=18.6, batch

Validation:  24%|▏| 176/743 [42:59<2:16:28, 14.44s/batch, batch_loss=18.6, batch

Validation:  24%|▏| 176/743 [43:13<2:16:28, 14.44s/batch, batch_loss=21.5, batch

Validation:  24%|▏| 177/743 [43:13<2:15:03, 14.32s/batch, batch_loss=21.5, batch

Validation:  24%|▏| 177/743 [43:27<2:15:03, 14.32s/batch, batch_loss=43.5, batch

Validation:  24%|▏| 178/743 [43:27<2:15:30, 14.39s/batch, batch_loss=43.5, batch

Validation:  24%|▏| 178/743 [43:41<2:15:30, 14.39s/batch, batch_loss=31.9, batch

Validation:  24%|▏| 179/743 [43:41<2:14:37, 14.32s/batch, batch_loss=31.9, batch

Validation:  24%|▏| 179/743 [43:56<2:14:37, 14.32s/batch, batch_loss=7.25e+3, ba

Validation:  24%|▏| 180/743 [43:56<2:14:58, 14.39s/batch, batch_loss=7.25e+3, ba

Validation:  24%|▏| 180/743 [44:10<2:14:58, 14.39s/batch, batch_loss=32, batch_i

Validation:  24%|▏| 181/743 [44:10<2:14:59, 14.41s/batch, batch_loss=32, batch_i

Validation:  24%|▏| 181/743 [44:24<2:14:59, 14.41s/batch, batch_loss=24.7, batch

Validation:  24%|▏| 182/743 [44:24<2:13:43, 14.30s/batch, batch_loss=24.7, batch

Validation:  24%|▏| 182/743 [44:39<2:13:43, 14.30s/batch, batch_loss=26, batch_i

Validation:  25%|▏| 183/743 [44:39<2:14:26, 14.40s/batch, batch_loss=26, batch_i

Validation:  25%|▏| 183/743 [44:54<2:14:26, 14.40s/batch, batch_loss=14.1, batch

Validation:  25%|▏| 184/743 [44:54<2:15:56, 14.59s/batch, batch_loss=14.1, batch

Validation:  25%|▏| 184/743 [45:09<2:15:56, 14.59s/batch, batch_loss=29.5, batch

Validation:  25%|▏| 185/743 [45:09<2:17:18, 14.76s/batch, batch_loss=29.5, batch

Validation:  25%|▏| 185/743 [45:24<2:17:18, 14.76s/batch, batch_loss=26.2, batch

Validation:  25%|▎| 186/743 [45:24<2:17:09, 14.77s/batch, batch_loss=26.2, batch

Validation:  25%|▎| 186/743 [45:39<2:17:09, 14.77s/batch, batch_loss=49.5, batch

Validation:  25%|▎| 187/743 [45:39<2:17:21, 14.82s/batch, batch_loss=49.5, batch

Validation:  25%|▎| 187/743 [45:54<2:17:21, 14.82s/batch, batch_loss=23, batch_i

Validation:  25%|▎| 188/743 [45:54<2:16:22, 14.74s/batch, batch_loss=23, batch_i

Validation:  25%|▎| 188/743 [46:08<2:16:22, 14.74s/batch, batch_loss=21.4, batch

Validation:  25%|▎| 189/743 [46:08<2:14:26, 14.56s/batch, batch_loss=21.4, batch

Validation:  25%|▎| 189/743 [46:22<2:14:26, 14.56s/batch, batch_loss=984, batch_

Validation:  26%|▎| 190/743 [46:22<2:13:42, 14.51s/batch, batch_loss=984, batch_

Validation:  26%|▎| 190/743 [46:36<2:13:42, 14.51s/batch, batch_loss=32.4, batch

Validation:  26%|▎| 191/743 [46:36<2:13:06, 14.47s/batch, batch_loss=32.4, batch

Validation:  26%|▎| 191/743 [46:50<2:13:06, 14.47s/batch, batch_loss=18.3, batch

Validation:  26%|▎| 192/743 [46:50<2:09:10, 14.07s/batch, batch_loss=18.3, batch

Validation:  26%|▎| 192/743 [47:04<2:09:10, 14.07s/batch, batch_loss=22.4, batch

Validation:  26%|▎| 193/743 [47:04<2:10:01, 14.18s/batch, batch_loss=22.4, batch

Validation:  26%|▎| 193/743 [47:21<2:10:01, 14.18s/batch, batch_loss=23.5, batch

Validation:  26%|▎| 194/743 [47:21<2:16:45, 14.95s/batch, batch_loss=23.5, batch

Validation:  26%|▎| 194/743 [47:35<2:16:45, 14.95s/batch, batch_loss=11.7, batch

Validation:  26%|▎| 195/743 [47:35<2:13:32, 14.62s/batch, batch_loss=11.7, batch

Validation:  26%|▎| 195/743 [47:49<2:13:32, 14.62s/batch, batch_loss=22.9, batch

Validation:  26%|▎| 196/743 [47:49<2:13:55, 14.69s/batch, batch_loss=22.9, batch

Validation:  26%|▎| 196/743 [48:04<2:13:55, 14.69s/batch, batch_loss=10.8, batch

Validation:  27%|▎| 197/743 [48:04<2:12:32, 14.56s/batch, batch_loss=10.8, batch

Validation:  27%|▎| 197/743 [48:17<2:12:32, 14.56s/batch, batch_loss=22.6, batch

Validation:  27%|▎| 198/743 [48:17<2:09:32, 14.26s/batch, batch_loss=22.6, batch

Validation:  27%|▎| 198/743 [48:30<2:09:32, 14.26s/batch, batch_loss=21.2, batch

Validation:  27%|▎| 199/743 [48:30<2:04:39, 13.75s/batch, batch_loss=21.2, batch

Validation:  27%|▎| 199/743 [48:42<2:04:39, 13.75s/batch, batch_loss=282, batch_

Validation:  27%|▎| 200/743 [48:42<2:00:36, 13.33s/batch, batch_loss=282, batch_

Validation:  27%|▎| 200/743 [48:55<2:00:36, 13.33s/batch, batch_loss=57.1, batch

Validation:  27%|▎| 201/743 [48:55<1:59:23, 13.22s/batch, batch_loss=57.1, batch

Validation:  27%|▎| 201/743 [49:09<1:59:23, 13.22s/batch, batch_loss=39.5, batch

Validation:  27%|▎| 202/743 [49:09<2:01:31, 13.48s/batch, batch_loss=39.5, batch

Validation:  27%|▎| 202/743 [49:23<2:01:31, 13.48s/batch, batch_loss=24, batch_i

Validation:  27%|▎| 203/743 [49:23<2:01:42, 13.52s/batch, batch_loss=24, batch_i

Validation:  27%|▎| 203/743 [49:38<2:01:42, 13.52s/batch, batch_loss=20.6, batch

Validation:  27%|▎| 204/743 [49:38<2:05:33, 13.98s/batch, batch_loss=20.6, batch

Validation:  27%|▎| 204/743 [49:52<2:05:33, 13.98s/batch, batch_loss=31.8, batch

Validation:  28%|▎| 205/743 [49:52<2:06:15, 14.08s/batch, batch_loss=31.8, batch

Validation:  28%|▎| 205/743 [50:07<2:06:15, 14.08s/batch, batch_loss=18.8, batch

Validation:  28%|▎| 206/743 [50:07<2:06:35, 14.14s/batch, batch_loss=18.8, batch

Validation:  28%|▎| 206/743 [50:20<2:06:35, 14.14s/batch, batch_loss=25.4, batch

Validation:  28%|▎| 207/743 [50:20<2:05:18, 14.03s/batch, batch_loss=25.4, batch

Validation:  28%|▎| 207/743 [50:36<2:05:18, 14.03s/batch, batch_loss=22, batch_i

Validation:  28%|▎| 208/743 [50:36<2:08:46, 14.44s/batch, batch_loss=22, batch_i

Validation:  28%|▎| 208/743 [50:50<2:08:46, 14.44s/batch, batch_loss=12, batch_i

Validation:  28%|▎| 209/743 [50:50<2:08:28, 14.44s/batch, batch_loss=12, batch_i

Validation:  28%|▎| 209/743 [51:05<2:08:28, 14.44s/batch, batch_loss=12.4, batch

Validation:  28%|▎| 210/743 [51:05<2:08:24, 14.45s/batch, batch_loss=12.4, batch

Validation:  28%|▎| 210/743 [51:22<2:08:24, 14.45s/batch, batch_loss=18.9, batch

Validation:  28%|▎| 211/743 [51:22<2:15:36, 15.29s/batch, batch_loss=18.9, batch

Validation:  28%|▎| 211/743 [51:37<2:15:36, 15.29s/batch, batch_loss=18.5, batch

Validation:  29%|▎| 212/743 [51:37<2:14:01, 15.14s/batch, batch_loss=18.5, batch

Validation:  29%|▎| 212/743 [51:51<2:14:01, 15.14s/batch, batch_loss=540, batch_

Validation:  29%|▎| 213/743 [51:51<2:11:30, 14.89s/batch, batch_loss=540, batch_

Validation:  29%|▎| 213/743 [52:06<2:11:30, 14.89s/batch, batch_loss=12.7, batch

Validation:  29%|▎| 214/743 [52:06<2:11:43, 14.94s/batch, batch_loss=12.7, batch

Validation:  29%|▎| 214/743 [52:20<2:11:43, 14.94s/batch, batch_loss=24.9, batch

Validation:  29%|▎| 215/743 [52:20<2:10:01, 14.78s/batch, batch_loss=24.9, batch

Validation:  29%|▎| 215/743 [52:35<2:10:01, 14.78s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [52:35<2:08:48, 14.67s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [52:49<2:08:48, 14.67s/batch, batch_loss=35.6, batch

Validation:  29%|▎| 217/743 [52:49<2:07:55, 14.59s/batch, batch_loss=35.6, batch

Validation:  29%|▎| 217/743 [53:04<2:07:55, 14.59s/batch, batch_loss=15.7, batch

Validation:  29%|▎| 218/743 [53:04<2:07:25, 14.56s/batch, batch_loss=15.7, batch

Validation:  29%|▎| 218/743 [53:21<2:07:25, 14.56s/batch, batch_loss=39.1, batch

Validation:  29%|▎| 219/743 [53:21<2:14:51, 15.44s/batch, batch_loss=39.1, batch

Validation:  29%|▎| 219/743 [53:35<2:14:51, 15.44s/batch, batch_loss=51.3, batch

Validation:  30%|▎| 220/743 [53:35<2:10:32, 14.98s/batch, batch_loss=51.3, batch

Validation:  30%|▎| 220/743 [53:50<2:10:32, 14.98s/batch, batch_loss=23.4, batch

Validation:  30%|▎| 221/743 [53:50<2:09:49, 14.92s/batch, batch_loss=23.4, batch

Validation:  30%|▎| 221/743 [54:04<2:09:49, 14.92s/batch, batch_loss=11.3, batch

Validation:  30%|▎| 222/743 [54:04<2:07:16, 14.66s/batch, batch_loss=11.3, batch

Validation:  30%|▎| 222/743 [54:18<2:07:16, 14.66s/batch, batch_loss=9.98, batch

Validation:  30%|▎| 223/743 [54:18<2:06:07, 14.55s/batch, batch_loss=9.98, batch

Validation:  30%|▎| 223/743 [54:32<2:06:07, 14.55s/batch, batch_loss=14.7, batch

Validation:  30%|▎| 224/743 [54:32<2:04:34, 14.40s/batch, batch_loss=14.7, batch

Validation:  30%|▎| 224/743 [54:47<2:04:34, 14.40s/batch, batch_loss=4.95e+3, ba

Validation:  30%|▎| 225/743 [54:47<2:05:26, 14.53s/batch, batch_loss=4.95e+3, ba

Validation:  30%|▎| 225/743 [55:01<2:05:26, 14.53s/batch, batch_loss=21, batch_i

Validation:  30%|▎| 226/743 [55:01<2:03:43, 14.36s/batch, batch_loss=21, batch_i

Validation:  30%|▎| 226/743 [55:17<2:03:43, 14.36s/batch, batch_loss=20.6, batch

Validation:  31%|▎| 227/743 [55:17<2:08:13, 14.91s/batch, batch_loss=20.6, batch

Validation:  31%|▎| 227/743 [55:32<2:08:13, 14.91s/batch, batch_loss=20.7, batch

Validation:  31%|▎| 228/743 [55:32<2:06:20, 14.72s/batch, batch_loss=20.7, batch

Validation:  31%|▎| 228/743 [55:46<2:06:20, 14.72s/batch, batch_loss=25.2, batch

Validation:  31%|▎| 229/743 [55:46<2:05:47, 14.68s/batch, batch_loss=25.2, batch

Validation:  31%|▎| 229/743 [56:00<2:05:47, 14.68s/batch, batch_loss=25.4, batch

Validation:  31%|▎| 230/743 [56:00<2:02:41, 14.35s/batch, batch_loss=25.4, batch

Validation:  31%|▎| 230/743 [56:14<2:02:41, 14.35s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [56:14<2:02:02, 14.30s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [56:28<2:02:02, 14.30s/batch, batch_loss=22.1, batch

Validation:  31%|▎| 232/743 [56:28<2:01:22, 14.25s/batch, batch_loss=22.1, batch

Validation:  31%|▎| 232/743 [56:42<2:01:22, 14.25s/batch, batch_loss=10.3, batch

Validation:  31%|▎| 233/743 [56:42<2:00:18, 14.15s/batch, batch_loss=10.3, batch

Validation:  31%|▎| 233/743 [56:56<2:00:18, 14.15s/batch, batch_loss=15.7, batch

Validation:  31%|▎| 234/743 [56:56<1:59:23, 14.07s/batch, batch_loss=15.7, batch

Validation:  31%|▎| 234/743 [57:10<1:59:23, 14.07s/batch, batch_loss=21, batch_i

Validation:  32%|▎| 235/743 [57:10<1:59:55, 14.16s/batch, batch_loss=21, batch_i

Validation:  32%|▎| 235/743 [57:25<1:59:55, 14.16s/batch, batch_loss=2.16, batch

Validation:  32%|▎| 236/743 [57:25<2:01:58, 14.43s/batch, batch_loss=2.16, batch

Validation:  32%|▎| 236/743 [57:42<2:01:58, 14.43s/batch, batch_loss=23.3, batch

Validation:  32%|▎| 237/743 [57:42<2:07:27, 15.11s/batch, batch_loss=23.3, batch

Validation:  32%|▎| 237/743 [57:56<2:07:27, 15.11s/batch, batch_loss=18.1, batch

Validation:  32%|▎| 238/743 [57:56<2:04:53, 14.84s/batch, batch_loss=18.1, batch

Validation:  32%|▎| 238/743 [58:10<2:04:53, 14.84s/batch, batch_loss=4.49e+3, ba

Validation:  32%|▎| 239/743 [58:10<2:01:30, 14.47s/batch, batch_loss=4.49e+3, ba

Validation:  32%|▎| 239/743 [58:24<2:01:30, 14.47s/batch, batch_loss=29.2, batch

Validation:  32%|▎| 240/743 [58:24<2:01:31, 14.50s/batch, batch_loss=29.2, batch

Validation:  32%|▎| 240/743 [58:39<2:01:31, 14.50s/batch, batch_loss=25.6, batch

Validation:  32%|▎| 241/743 [58:39<2:01:09, 14.48s/batch, batch_loss=25.6, batch

Validation:  32%|▎| 241/743 [58:53<2:01:09, 14.48s/batch, batch_loss=246, batch_

Validation:  33%|▎| 242/743 [58:53<1:59:29, 14.31s/batch, batch_loss=246, batch_

Validation:  33%|▎| 242/743 [59:06<1:59:29, 14.31s/batch, batch_loss=9.57, batch

Validation:  33%|▎| 243/743 [59:06<1:57:18, 14.08s/batch, batch_loss=9.57, batch

Validation:  33%|▎| 243/743 [59:21<1:57:18, 14.08s/batch, batch_loss=14.9, batch

Validation:  33%|▎| 244/743 [59:21<1:58:39, 14.27s/batch, batch_loss=14.9, batch

Validation:  33%|▎| 244/743 [59:36<1:58:39, 14.27s/batch, batch_loss=30.7, batch

Validation:  33%|▎| 245/743 [59:36<1:59:12, 14.36s/batch, batch_loss=30.7, batch

Validation:  33%|▎| 245/743 [59:50<1:59:12, 14.36s/batch, batch_loss=6.31, batch

Validation:  33%|▎| 246/743 [59:50<1:59:27, 14.42s/batch, batch_loss=6.31, batch

Validation:  33%|▎| 246/743 [1:00:04<1:59:27, 14.42s/batch, batch_loss=19.2, bat

Validation:  33%|▎| 247/743 [1:00:04<1:57:35, 14.23s/batch, batch_loss=19.2, bat

Validation:  33%|▎| 247/743 [1:00:19<1:57:35, 14.23s/batch, batch_loss=58.9, bat

Validation:  33%|▎| 248/743 [1:00:19<1:58:45, 14.39s/batch, batch_loss=58.9, bat

Validation:  33%|▎| 248/743 [1:00:32<1:58:45, 14.39s/batch, batch_loss=10.8, bat

Validation:  34%|▎| 249/743 [1:00:32<1:56:25, 14.14s/batch, batch_loss=10.8, bat

Validation:  34%|▎| 249/743 [1:00:46<1:56:25, 14.14s/batch, batch_loss=27.4, bat

Validation:  34%|▎| 250/743 [1:00:46<1:55:24, 14.04s/batch, batch_loss=27.4, bat

Validation:  34%|▎| 250/743 [1:01:00<1:55:24, 14.04s/batch, batch_loss=24.4, bat

Validation:  34%|▎| 251/743 [1:01:00<1:55:04, 14.03s/batch, batch_loss=24.4, bat

Validation:  34%|▎| 251/743 [1:01:15<1:55:04, 14.03s/batch, batch_loss=26.4, bat

Validation:  34%|▎| 252/743 [1:01:15<1:57:23, 14.34s/batch, batch_loss=26.4, bat

Validation:  34%|▎| 252/743 [1:01:32<1:57:23, 14.34s/batch, batch_loss=35.7, bat

Validation:  34%|▎| 253/743 [1:01:32<2:02:23, 14.99s/batch, batch_loss=35.7, bat

Validation:  34%|▎| 253/743 [1:01:46<2:02:23, 14.99s/batch, batch_loss=1.14e+4, 

Validation:  34%|▎| 254/743 [1:01:46<2:01:59, 14.97s/batch, batch_loss=1.14e+4, 

Validation:  34%|▎| 254/743 [1:02:01<2:01:59, 14.97s/batch, batch_loss=2.46e+3, 

Validation:  34%|▎| 255/743 [1:02:01<2:01:06, 14.89s/batch, batch_loss=2.46e+3, 

Validation:  34%|▎| 255/743 [1:02:15<2:01:06, 14.89s/batch, batch_loss=21.5, bat

Validation:  34%|▎| 256/743 [1:02:15<1:58:32, 14.60s/batch, batch_loss=21.5, bat

Validation:  34%|▎| 256/743 [1:02:29<1:58:32, 14.60s/batch, batch_loss=37.8, bat

Validation:  35%|▎| 257/743 [1:02:29<1:56:57, 14.44s/batch, batch_loss=37.8, bat

Validation:  35%|▎| 257/743 [1:02:43<1:56:57, 14.44s/batch, batch_loss=12.1, bat

Validation:  35%|▎| 258/743 [1:02:43<1:55:19, 14.27s/batch, batch_loss=12.1, bat

Validation:  35%|▎| 258/743 [1:02:58<1:55:19, 14.27s/batch, batch_loss=3.54, bat

Validation:  35%|▎| 259/743 [1:02:58<1:55:33, 14.33s/batch, batch_loss=3.54, bat

Validation:  35%|▎| 259/743 [1:03:12<1:55:33, 14.33s/batch, batch_loss=1.6, batc

Validation:  35%|▎| 260/743 [1:03:12<1:55:15, 14.32s/batch, batch_loss=1.6, batc

Validation:  35%|▎| 260/743 [1:03:26<1:55:15, 14.32s/batch, batch_loss=11.5, bat

Validation:  35%|▎| 261/743 [1:03:26<1:53:37, 14.14s/batch, batch_loss=11.5, bat

Validation:  35%|▎| 261/743 [1:03:40<1:53:37, 14.14s/batch, batch_loss=37.1, bat

Validation:  35%|▎| 262/743 [1:03:40<1:54:43, 14.31s/batch, batch_loss=37.1, bat

Validation:  35%|▎| 262/743 [1:03:57<1:54:43, 14.31s/batch, batch_loss=2.72e+3, 

Validation:  35%|▎| 263/743 [1:03:57<2:00:37, 15.08s/batch, batch_loss=2.72e+3, 

Validation:  35%|▎| 263/743 [1:04:11<2:00:37, 15.08s/batch, batch_loss=18, batch

Validation:  36%|▎| 264/743 [1:04:11<1:58:08, 14.80s/batch, batch_loss=18, batch

Validation:  36%|▎| 264/743 [1:04:25<1:58:08, 14.80s/batch, batch_loss=26.6, bat

Validation:  36%|▎| 265/743 [1:04:25<1:55:44, 14.53s/batch, batch_loss=26.6, bat

Validation:  36%|▎| 265/743 [1:04:39<1:55:44, 14.53s/batch, batch_loss=42.1, bat

Validation:  36%|▎| 266/743 [1:04:39<1:53:49, 14.32s/batch, batch_loss=42.1, bat

Validation:  36%|▎| 266/743 [1:04:53<1:53:49, 14.32s/batch, batch_loss=25, batch

Validation:  36%|▎| 267/743 [1:04:53<1:52:52, 14.23s/batch, batch_loss=25, batch

Validation:  36%|▎| 267/743 [1:05:07<1:52:52, 14.23s/batch, batch_loss=3.03e+3, 

Validation:  36%|▎| 268/743 [1:05:07<1:51:28, 14.08s/batch, batch_loss=3.03e+3, 

Validation:  36%|▎| 268/743 [1:05:20<1:51:28, 14.08s/batch, batch_loss=51.9, bat

Validation:  36%|▎| 269/743 [1:05:20<1:49:07, 13.81s/batch, batch_loss=51.9, bat

Validation:  36%|▎| 269/743 [1:05:34<1:49:07, 13.81s/batch, batch_loss=46.7, bat

Validation:  36%|▎| 270/743 [1:05:34<1:49:35, 13.90s/batch, batch_loss=46.7, bat

Validation:  36%|▎| 270/743 [1:05:49<1:49:35, 13.90s/batch, batch_loss=39.3, bat

Validation:  36%|▎| 271/743 [1:05:49<1:50:47, 14.08s/batch, batch_loss=39.3, bat

Validation:  36%|▎| 271/743 [1:06:02<1:50:47, 14.08s/batch, batch_loss=1.06e+3, 

Validation:  37%|▎| 272/743 [1:06:02<1:49:25, 13.94s/batch, batch_loss=1.06e+3, 

Validation:  37%|▎| 272/743 [1:06:16<1:49:25, 13.94s/batch, batch_loss=20, batch

Validation:  37%|▎| 273/743 [1:06:16<1:49:42, 14.01s/batch, batch_loss=20, batch

Validation:  37%|▎| 273/743 [1:06:30<1:49:42, 14.01s/batch, batch_loss=26, batch

Validation:  37%|▎| 274/743 [1:06:30<1:49:39, 14.03s/batch, batch_loss=26, batch

Validation:  37%|▎| 274/743 [1:06:45<1:49:39, 14.03s/batch, batch_loss=24.6, bat

Validation:  37%|▎| 275/743 [1:06:45<1:50:25, 14.16s/batch, batch_loss=24.6, bat

Validation:  37%|▎| 275/743 [1:06:59<1:50:25, 14.16s/batch, batch_loss=19.7, bat

Validation:  37%|▎| 276/743 [1:06:59<1:49:02, 14.01s/batch, batch_loss=19.7, bat

Validation:  37%|▎| 276/743 [1:07:13<1:49:02, 14.01s/batch, batch_loss=32.7, bat

Validation:  37%|▎| 277/743 [1:07:13<1:49:43, 14.13s/batch, batch_loss=32.7, bat

Validation:  37%|▎| 277/743 [1:07:28<1:49:43, 14.13s/batch, batch_loss=26.7, bat

Validation:  37%|▎| 278/743 [1:07:28<1:51:36, 14.40s/batch, batch_loss=26.7, bat

Validation:  37%|▎| 278/743 [1:07:43<1:51:36, 14.40s/batch, batch_loss=19.7, bat

Validation:  38%|▍| 279/743 [1:07:43<1:52:52, 14.60s/batch, batch_loss=19.7, bat

Validation:  38%|▍| 279/743 [1:07:58<1:52:52, 14.60s/batch, batch_loss=20.2, bat

Validation:  38%|▍| 280/743 [1:07:58<1:52:55, 14.63s/batch, batch_loss=20.2, bat

Validation:  38%|▍| 280/743 [1:08:12<1:52:55, 14.63s/batch, batch_loss=23.1, bat

Validation:  38%|▍| 281/743 [1:08:12<1:51:51, 14.53s/batch, batch_loss=23.1, bat

Validation:  38%|▍| 281/743 [1:08:26<1:51:51, 14.53s/batch, batch_loss=23.3, bat

Validation:  38%|▍| 282/743 [1:08:26<1:51:13, 14.48s/batch, batch_loss=23.3, bat

Validation:  38%|▍| 282/743 [1:08:41<1:51:13, 14.48s/batch, batch_loss=18.9, bat

Validation:  38%|▍| 283/743 [1:08:41<1:50:44, 14.45s/batch, batch_loss=18.9, bat

Validation:  38%|▍| 283/743 [1:08:55<1:50:44, 14.45s/batch, batch_loss=30.4, bat

Validation:  38%|▍| 284/743 [1:08:55<1:50:46, 14.48s/batch, batch_loss=30.4, bat

Validation:  38%|▍| 284/743 [1:09:10<1:50:46, 14.48s/batch, batch_loss=20.1, bat

Validation:  38%|▍| 285/743 [1:09:10<1:50:32, 14.48s/batch, batch_loss=20.1, bat

Validation:  38%|▍| 285/743 [1:09:25<1:50:32, 14.48s/batch, batch_loss=19, batch

Validation:  38%|▍| 286/743 [1:09:25<1:51:29, 14.64s/batch, batch_loss=19, batch

Validation:  38%|▍| 286/743 [1:09:41<1:51:29, 14.64s/batch, batch_loss=1.18e+4, 

Validation:  39%|▍| 287/743 [1:09:41<1:54:47, 15.10s/batch, batch_loss=1.18e+4, 

Validation:  39%|▍| 287/743 [1:09:57<1:54:47, 15.10s/batch, batch_loss=35.5, bat

Validation:  39%|▍| 288/743 [1:09:57<1:56:06, 15.31s/batch, batch_loss=35.5, bat

Validation:  39%|▍| 288/743 [1:10:12<1:56:06, 15.31s/batch, batch_loss=32.1, bat

Validation:  39%|▍| 289/743 [1:10:12<1:56:20, 15.38s/batch, batch_loss=32.1, bat

Validation:  39%|▍| 289/743 [1:10:28<1:56:20, 15.38s/batch, batch_loss=493, batc

Validation:  39%|▍| 290/743 [1:10:28<1:56:12, 15.39s/batch, batch_loss=493, batc

Validation:  39%|▍| 290/743 [1:10:42<1:56:12, 15.39s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:10:42<1:53:56, 15.13s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:10:57<1:53:56, 15.13s/batch, batch_loss=1.19e+3, 

Validation:  39%|▍| 292/743 [1:10:57<1:53:23, 15.09s/batch, batch_loss=1.19e+3, 

Validation:  39%|▍| 292/743 [1:11:11<1:53:23, 15.09s/batch, batch_loss=35.2, bat

Validation:  39%|▍| 293/743 [1:11:11<1:50:16, 14.70s/batch, batch_loss=35.2, bat

Validation:  39%|▍| 293/743 [1:11:26<1:50:16, 14.70s/batch, batch_loss=1.09e+3, 

Validation:  40%|▍| 294/743 [1:11:26<1:49:36, 14.65s/batch, batch_loss=1.09e+3, 

Validation:  40%|▍| 294/743 [1:11:42<1:49:36, 14.65s/batch, batch_loss=29.6, bat

Validation:  40%|▍| 295/743 [1:11:42<1:54:25, 15.33s/batch, batch_loss=29.6, bat

Validation:  40%|▍| 295/743 [1:11:56<1:54:25, 15.33s/batch, batch_loss=22.5, bat

Validation:  40%|▍| 296/743 [1:11:56<1:51:04, 14.91s/batch, batch_loss=22.5, bat

Validation:  40%|▍| 296/743 [1:12:11<1:51:04, 14.91s/batch, batch_loss=13.6, bat

Validation:  40%|▍| 297/743 [1:12:11<1:49:36, 14.74s/batch, batch_loss=13.6, bat

Validation:  40%|▍| 297/743 [1:12:25<1:49:36, 14.74s/batch, batch_loss=38.1, bat

Validation:  40%|▍| 298/743 [1:12:25<1:48:39, 14.65s/batch, batch_loss=38.1, bat

Validation:  40%|▍| 298/743 [1:12:40<1:48:39, 14.65s/batch, batch_loss=46, batch

Validation:  40%|▍| 299/743 [1:12:40<1:48:15, 14.63s/batch, batch_loss=46, batch

Validation:  40%|▍| 299/743 [1:12:53<1:48:15, 14.63s/batch, batch_loss=48.7, bat

Validation:  40%|▍| 300/743 [1:12:53<1:45:42, 14.32s/batch, batch_loss=48.7, bat

Validation:  40%|▍| 300/743 [1:13:07<1:45:42, 14.32s/batch, batch_loss=849, batc

Validation:  41%|▍| 301/743 [1:13:07<1:43:57, 14.11s/batch, batch_loss=849, batc

Validation:  41%|▍| 301/743 [1:13:22<1:43:57, 14.11s/batch, batch_loss=10.3, bat

Validation:  41%|▍| 302/743 [1:13:22<1:45:08, 14.30s/batch, batch_loss=10.3, bat

Validation:  41%|▍| 302/743 [1:13:39<1:45:08, 14.30s/batch, batch_loss=18.1, bat

Validation:  41%|▍| 303/743 [1:13:39<1:50:32, 15.07s/batch, batch_loss=18.1, bat

Validation:  41%|▍| 303/743 [1:13:53<1:50:32, 15.07s/batch, batch_loss=21.2, bat

Validation:  41%|▍| 304/743 [1:13:53<1:48:04, 14.77s/batch, batch_loss=21.2, bat

Validation:  41%|▍| 304/743 [1:14:08<1:48:04, 14.77s/batch, batch_loss=13.3, bat

Validation:  41%|▍| 305/743 [1:14:08<1:48:13, 14.83s/batch, batch_loss=13.3, bat

Validation:  41%|▍| 305/743 [1:14:22<1:48:13, 14.83s/batch, batch_loss=28.4, bat

Validation:  41%|▍| 306/743 [1:14:22<1:47:37, 14.78s/batch, batch_loss=28.4, bat

Validation:  41%|▍| 306/743 [1:14:36<1:47:37, 14.78s/batch, batch_loss=42.4, bat

Validation:  41%|▍| 307/743 [1:14:36<1:45:59, 14.59s/batch, batch_loss=42.4, bat

Validation:  41%|▍| 307/743 [1:14:51<1:45:59, 14.59s/batch, batch_loss=892, batc

Validation:  41%|▍| 308/743 [1:14:51<1:45:31, 14.56s/batch, batch_loss=892, batc

Validation:  41%|▍| 308/743 [1:15:05<1:45:31, 14.56s/batch, batch_loss=34.9, bat

Validation:  42%|▍| 309/743 [1:15:05<1:43:43, 14.34s/batch, batch_loss=34.9, bat

Validation:  42%|▍| 309/743 [1:15:19<1:43:43, 14.34s/batch, batch_loss=28.1, bat

Validation:  42%|▍| 310/743 [1:15:19<1:43:50, 14.39s/batch, batch_loss=28.1, bat

Validation:  42%|▍| 310/743 [1:15:36<1:43:50, 14.39s/batch, batch_loss=26.3, bat

Validation:  42%|▍| 311/743 [1:15:36<1:49:09, 15.16s/batch, batch_loss=26.3, bat

Validation:  42%|▍| 311/743 [1:15:50<1:49:09, 15.16s/batch, batch_loss=16.6, bat

Validation:  42%|▍| 312/743 [1:15:50<1:46:36, 14.84s/batch, batch_loss=16.6, bat

Validation:  42%|▍| 312/743 [1:16:05<1:46:36, 14.84s/batch, batch_loss=10.1, bat

Validation:  42%|▍| 313/743 [1:16:05<1:46:17, 14.83s/batch, batch_loss=10.1, bat

Validation:  42%|▍| 313/743 [1:16:19<1:46:17, 14.83s/batch, batch_loss=20.7, bat

Validation:  42%|▍| 314/743 [1:16:19<1:43:07, 14.42s/batch, batch_loss=20.7, bat

Validation:  42%|▍| 314/743 [1:16:32<1:43:07, 14.42s/batch, batch_loss=23.5, bat

Validation:  42%|▍| 315/743 [1:16:32<1:41:42, 14.26s/batch, batch_loss=23.5, bat

Validation:  42%|▍| 315/743 [1:16:47<1:41:42, 14.26s/batch, batch_loss=21, batch

Validation:  43%|▍| 316/743 [1:16:47<1:41:09, 14.21s/batch, batch_loss=21, batch

Validation:  43%|▍| 316/743 [1:17:00<1:41:09, 14.21s/batch, batch_loss=41.6, bat

Validation:  43%|▍| 317/743 [1:17:00<1:40:05, 14.10s/batch, batch_loss=41.6, bat

Validation:  43%|▍| 317/743 [1:17:18<1:40:05, 14.10s/batch, batch_loss=25, batch

Validation:  43%|▍| 318/743 [1:17:18<1:46:50, 15.08s/batch, batch_loss=25, batch

Validation:  43%|▍| 318/743 [1:17:31<1:46:50, 15.08s/batch, batch_loss=28.8, bat

Validation:  43%|▍| 319/743 [1:17:31<1:43:01, 14.58s/batch, batch_loss=28.8, bat

Validation:  43%|▍| 319/743 [1:17:46<1:43:01, 14.58s/batch, batch_loss=27.2, bat

Validation:  43%|▍| 320/743 [1:17:46<1:43:20, 14.66s/batch, batch_loss=27.2, bat

Validation:  43%|▍| 320/743 [1:18:01<1:43:20, 14.66s/batch, batch_loss=23.9, bat

Validation:  43%|▍| 321/743 [1:18:01<1:43:56, 14.78s/batch, batch_loss=23.9, bat

Validation:  43%|▍| 321/743 [1:18:14<1:43:56, 14.78s/batch, batch_loss=22, batch

Validation:  43%|▍| 322/743 [1:18:14<1:39:53, 14.24s/batch, batch_loss=22, batch

Validation:  43%|▍| 322/743 [1:18:27<1:39:53, 14.24s/batch, batch_loss=27.7, bat

Validation:  43%|▍| 323/743 [1:18:27<1:36:35, 13.80s/batch, batch_loss=27.7, bat

Validation:  43%|▍| 323/743 [1:18:40<1:36:35, 13.80s/batch, batch_loss=293, batc

Validation:  44%|▍| 324/743 [1:18:40<1:34:17, 13.50s/batch, batch_loss=293, batc

Validation:  44%|▍| 324/743 [1:18:53<1:34:17, 13.50s/batch, batch_loss=34.5, bat

Validation:  44%|▍| 325/743 [1:18:53<1:32:57, 13.34s/batch, batch_loss=34.5, bat

Validation:  44%|▍| 325/743 [1:19:07<1:32:57, 13.34s/batch, batch_loss=30.3, bat

Validation:  44%|▍| 326/743 [1:19:07<1:35:47, 13.78s/batch, batch_loss=30.3, bat

Validation:  44%|▍| 326/743 [1:19:22<1:35:47, 13.78s/batch, batch_loss=27.7, bat

Validation:  44%|▍| 327/743 [1:19:22<1:37:34, 14.07s/batch, batch_loss=27.7, bat

Validation:  44%|▍| 327/743 [1:19:37<1:37:34, 14.07s/batch, batch_loss=24.9, bat

Validation:  44%|▍| 328/743 [1:19:37<1:38:45, 14.28s/batch, batch_loss=24.9, bat

Validation:  44%|▍| 328/743 [1:19:51<1:38:45, 14.28s/batch, batch_loss=6.21, bat

Validation:  44%|▍| 329/743 [1:19:51<1:38:16, 14.24s/batch, batch_loss=6.21, bat

Validation:  44%|▍| 329/743 [1:20:06<1:38:16, 14.24s/batch, batch_loss=24.4, bat

Validation:  44%|▍| 330/743 [1:20:06<1:38:36, 14.33s/batch, batch_loss=24.4, bat

Validation:  44%|▍| 330/743 [1:20:19<1:38:36, 14.33s/batch, batch_loss=39.1, bat

Validation:  45%|▍| 331/743 [1:20:19<1:37:00, 14.13s/batch, batch_loss=39.1, bat

Validation:  45%|▍| 331/743 [1:20:34<1:37:00, 14.13s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:20:34<1:38:09, 14.33s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:20:49<1:38:09, 14.33s/batch, batch_loss=36.7, bat

Validation:  45%|▍| 333/743 [1:20:49<1:39:46, 14.60s/batch, batch_loss=36.7, bat

Validation:  45%|▍| 333/743 [1:21:04<1:39:46, 14.60s/batch, batch_loss=34.4, bat

Validation:  45%|▍| 334/743 [1:21:04<1:39:33, 14.61s/batch, batch_loss=34.4, bat

Validation:  45%|▍| 334/743 [1:21:18<1:39:33, 14.61s/batch, batch_loss=47.1, bat

Validation:  45%|▍| 335/743 [1:21:18<1:38:40, 14.51s/batch, batch_loss=47.1, bat

Validation:  45%|▍| 335/743 [1:21:33<1:38:40, 14.51s/batch, batch_loss=15.2, bat

Validation:  45%|▍| 336/743 [1:21:33<1:38:07, 14.47s/batch, batch_loss=15.2, bat

Validation:  45%|▍| 336/743 [1:21:48<1:38:07, 14.47s/batch, batch_loss=46, batch

Validation:  45%|▍| 337/743 [1:21:48<1:38:45, 14.60s/batch, batch_loss=46, batch

Validation:  45%|▍| 337/743 [1:22:05<1:38:45, 14.60s/batch, batch_loss=56.6, bat

Validation:  45%|▍| 338/743 [1:22:05<1:44:23, 15.46s/batch, batch_loss=56.6, bat

Validation:  45%|▍| 338/743 [1:22:19<1:44:23, 15.46s/batch, batch_loss=42.3, bat

Validation:  46%|▍| 339/743 [1:22:19<1:41:50, 15.12s/batch, batch_loss=42.3, bat

Validation:  46%|▍| 339/743 [1:22:34<1:41:50, 15.12s/batch, batch_loss=48.7, bat

Validation:  46%|▍| 340/743 [1:22:34<1:40:14, 14.92s/batch, batch_loss=48.7, bat

Validation:  46%|▍| 340/743 [1:22:48<1:40:14, 14.92s/batch, batch_loss=27.9, bat

Validation:  46%|▍| 341/743 [1:22:48<1:39:05, 14.79s/batch, batch_loss=27.9, bat

Validation:  46%|▍| 341/743 [1:23:02<1:39:05, 14.79s/batch, batch_loss=32.8, bat

Validation:  46%|▍| 342/743 [1:23:02<1:36:49, 14.49s/batch, batch_loss=32.8, bat

Validation:  46%|▍| 342/743 [1:23:17<1:36:49, 14.49s/batch, batch_loss=32.5, bat

Validation:  46%|▍| 343/743 [1:23:17<1:36:34, 14.49s/batch, batch_loss=32.5, bat

Validation:  46%|▍| 343/743 [1:23:34<1:36:34, 14.49s/batch, batch_loss=31, batch

Validation:  46%|▍| 344/743 [1:23:34<1:42:28, 15.41s/batch, batch_loss=31, batch

Validation:  46%|▍| 344/743 [1:23:48<1:42:28, 15.41s/batch, batch_loss=27.6, bat

Validation:  46%|▍| 345/743 [1:23:48<1:40:08, 15.10s/batch, batch_loss=27.6, bat

Validation:  46%|▍| 345/743 [1:24:03<1:40:08, 15.10s/batch, batch_loss=41.6, bat

Validation:  47%|▍| 346/743 [1:24:03<1:38:22, 14.87s/batch, batch_loss=41.6, bat

Validation:  47%|▍| 346/743 [1:24:17<1:38:22, 14.87s/batch, batch_loss=40.7, bat

Validation:  47%|▍| 347/743 [1:24:17<1:37:19, 14.75s/batch, batch_loss=40.7, bat

Validation:  47%|▍| 347/743 [1:24:32<1:37:19, 14.75s/batch, batch_loss=47.1, bat

Validation:  47%|▍| 348/743 [1:24:32<1:36:18, 14.63s/batch, batch_loss=47.1, bat

Validation:  47%|▍| 348/743 [1:24:46<1:36:18, 14.63s/batch, batch_loss=40.1, bat

Validation:  47%|▍| 349/743 [1:24:46<1:36:09, 14.64s/batch, batch_loss=40.1, bat

Validation:  47%|▍| 349/743 [1:25:00<1:36:09, 14.64s/batch, batch_loss=28.3, bat

Validation:  47%|▍| 350/743 [1:25:00<1:34:36, 14.44s/batch, batch_loss=28.3, bat

Validation:  47%|▍| 350/743 [1:25:15<1:34:36, 14.44s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:25:15<1:35:24, 14.60s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:25:31<1:35:24, 14.60s/batch, batch_loss=46.9, bat

Validation:  47%|▍| 352/743 [1:25:31<1:37:26, 14.95s/batch, batch_loss=46.9, bat

Validation:  47%|▍| 352/743 [1:25:46<1:37:26, 14.95s/batch, batch_loss=32.5, bat

Validation:  48%|▍| 353/743 [1:25:46<1:36:42, 14.88s/batch, batch_loss=32.5, bat

Validation:  48%|▍| 353/743 [1:26:00<1:36:42, 14.88s/batch, batch_loss=38.7, bat

Validation:  48%|▍| 354/743 [1:26:00<1:35:11, 14.68s/batch, batch_loss=38.7, bat

Validation:  48%|▍| 354/743 [1:26:15<1:35:11, 14.68s/batch, batch_loss=49.8, bat

Validation:  48%|▍| 355/743 [1:26:15<1:34:51, 14.67s/batch, batch_loss=49.8, bat

Validation:  48%|▍| 355/743 [1:26:29<1:34:51, 14.67s/batch, batch_loss=57.4, bat

Validation:  48%|▍| 356/743 [1:26:29<1:34:20, 14.63s/batch, batch_loss=57.4, bat

Validation:  48%|▍| 356/743 [1:26:43<1:34:20, 14.63s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:26:43<1:33:09, 14.48s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:26:57<1:33:09, 14.48s/batch, batch_loss=23.4, bat

Validation:  48%|▍| 358/743 [1:26:57<1:31:36, 14.28s/batch, batch_loss=23.4, bat

Validation:  48%|▍| 358/743 [1:27:12<1:31:36, 14.28s/batch, batch_loss=14.8, bat

Validation:  48%|▍| 359/743 [1:27:12<1:31:55, 14.36s/batch, batch_loss=14.8, bat

Validation:  48%|▍| 359/743 [1:27:26<1:31:55, 14.36s/batch, batch_loss=33.4, bat

Validation:  48%|▍| 360/743 [1:27:26<1:32:26, 14.48s/batch, batch_loss=33.4, bat

Validation:  48%|▍| 360/743 [1:27:41<1:32:26, 14.48s/batch, batch_loss=23.3, bat

Validation:  49%|▍| 361/743 [1:27:41<1:31:43, 14.41s/batch, batch_loss=23.3, bat

Validation:  49%|▍| 361/743 [1:27:55<1:31:43, 14.41s/batch, batch_loss=49.8, bat

Validation:  49%|▍| 362/743 [1:27:55<1:31:40, 14.44s/batch, batch_loss=49.8, bat

Validation:  49%|▍| 362/743 [1:28:10<1:31:40, 14.44s/batch, batch_loss=31.8, bat

Validation:  49%|▍| 363/743 [1:28:10<1:31:21, 14.42s/batch, batch_loss=31.8, bat

Validation:  49%|▍| 363/743 [1:28:23<1:31:21, 14.42s/batch, batch_loss=23.2, bat

Validation:  49%|▍| 364/743 [1:28:23<1:30:16, 14.29s/batch, batch_loss=23.2, bat

Validation:  49%|▍| 364/743 [1:28:38<1:30:16, 14.29s/batch, batch_loss=21.7, bat

Validation:  49%|▍| 365/743 [1:28:38<1:29:44, 14.25s/batch, batch_loss=21.7, bat

Validation:  49%|▍| 365/743 [1:28:52<1:29:44, 14.25s/batch, batch_loss=25.9, bat

Validation:  49%|▍| 366/743 [1:28:52<1:29:45, 14.28s/batch, batch_loss=25.9, bat

Validation:  49%|▍| 366/743 [1:29:06<1:29:45, 14.28s/batch, batch_loss=18.5, bat

Validation:  49%|▍| 367/743 [1:29:06<1:28:36, 14.14s/batch, batch_loss=18.5, bat

Validation:  49%|▍| 367/743 [1:29:20<1:28:36, 14.14s/batch, batch_loss=4.82e+3, 

Validation:  50%|▍| 368/743 [1:29:20<1:29:16, 14.28s/batch, batch_loss=4.82e+3, 

Validation:  50%|▍| 368/743 [1:29:38<1:29:16, 14.28s/batch, batch_loss=27.2, bat

Validation:  50%|▍| 369/743 [1:29:38<1:34:58, 15.24s/batch, batch_loss=27.2, bat

Validation:  50%|▍| 369/743 [1:29:52<1:34:58, 15.24s/batch, batch_loss=38.2, bat

Validation:  50%|▍| 370/743 [1:29:52<1:33:13, 15.00s/batch, batch_loss=38.2, bat

Validation:  50%|▍| 370/743 [1:30:06<1:33:13, 15.00s/batch, batch_loss=31.6, bat

Validation:  50%|▍| 371/743 [1:30:06<1:31:22, 14.74s/batch, batch_loss=31.6, bat

Validation:  50%|▍| 371/743 [1:30:21<1:31:22, 14.74s/batch, batch_loss=28, batch

Validation:  50%|▌| 372/743 [1:30:21<1:30:41, 14.67s/batch, batch_loss=28, batch

Validation:  50%|▌| 372/743 [1:30:35<1:30:41, 14.67s/batch, batch_loss=31.6, bat

Validation:  50%|▌| 373/743 [1:30:35<1:29:20, 14.49s/batch, batch_loss=31.6, bat

Validation:  50%|▌| 373/743 [1:30:49<1:29:20, 14.49s/batch, batch_loss=23, batch

Validation:  50%|▌| 374/743 [1:30:49<1:28:28, 14.39s/batch, batch_loss=23, batch

Validation:  50%|▌| 374/743 [1:31:04<1:28:28, 14.39s/batch, batch_loss=10.9, bat

Validation:  50%|▌| 375/743 [1:31:04<1:28:23, 14.41s/batch, batch_loss=10.9, bat

Validation:  50%|▌| 375/743 [1:31:18<1:28:23, 14.41s/batch, batch_loss=56.5, bat

Validation:  51%|▌| 376/743 [1:31:18<1:28:23, 14.45s/batch, batch_loss=56.5, bat

Validation:  51%|▌| 376/743 [1:31:32<1:28:23, 14.45s/batch, batch_loss=14.4, bat

Validation:  51%|▌| 377/743 [1:31:32<1:27:22, 14.32s/batch, batch_loss=14.4, bat

Validation:  51%|▌| 377/743 [1:31:47<1:27:22, 14.32s/batch, batch_loss=29.8, bat

Validation:  51%|▌| 378/743 [1:31:47<1:27:25, 14.37s/batch, batch_loss=29.8, bat

Validation:  51%|▌| 378/743 [1:32:01<1:27:25, 14.37s/batch, batch_loss=17.4, bat

Validation:  51%|▌| 379/743 [1:32:01<1:27:21, 14.40s/batch, batch_loss=17.4, bat

Validation:  51%|▌| 379/743 [1:32:16<1:27:21, 14.40s/batch, batch_loss=9.4, batc

Validation:  51%|▌| 380/743 [1:32:16<1:27:40, 14.49s/batch, batch_loss=9.4, batc

Validation:  51%|▌| 380/743 [1:32:30<1:27:40, 14.49s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:32:30<1:27:15, 14.46s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:32:45<1:27:15, 14.46s/batch, batch_loss=908, batc

Validation:  51%|▌| 382/743 [1:32:45<1:26:53, 14.44s/batch, batch_loss=908, batc

Validation:  51%|▌| 382/743 [1:32:59<1:26:53, 14.44s/batch, batch_loss=212, batc

Validation:  52%|▌| 383/743 [1:32:59<1:26:32, 14.42s/batch, batch_loss=212, batc

Validation:  52%|▌| 383/743 [1:33:14<1:26:32, 14.42s/batch, batch_loss=293, batc

Validation:  52%|▌| 384/743 [1:33:14<1:27:18, 14.59s/batch, batch_loss=293, batc

Validation:  52%|▌| 384/743 [1:33:31<1:27:18, 14.59s/batch, batch_loss=29.7, bat

Validation:  52%|▌| 385/743 [1:33:31<1:31:15, 15.29s/batch, batch_loss=29.7, bat

Validation:  52%|▌| 385/743 [1:33:45<1:31:15, 15.29s/batch, batch_loss=9.99, bat

Validation:  52%|▌| 386/743 [1:33:45<1:29:06, 14.98s/batch, batch_loss=9.99, bat

Validation:  52%|▌| 386/743 [1:33:59<1:29:06, 14.98s/batch, batch_loss=8.3, batc

Validation:  52%|▌| 387/743 [1:33:59<1:27:01, 14.67s/batch, batch_loss=8.3, batc

Validation:  52%|▌| 387/743 [1:34:13<1:27:01, 14.67s/batch, batch_loss=24.5, bat

Validation:  52%|▌| 388/743 [1:34:13<1:25:29, 14.45s/batch, batch_loss=24.5, bat

Validation:  52%|▌| 388/743 [1:34:27<1:25:29, 14.45s/batch, batch_loss=17.2, bat

Validation:  52%|▌| 389/743 [1:34:27<1:24:36, 14.34s/batch, batch_loss=17.2, bat

Validation:  52%|▌| 389/743 [1:34:42<1:24:36, 14.34s/batch, batch_loss=22, batch

Validation:  52%|▌| 390/743 [1:34:42<1:24:36, 14.38s/batch, batch_loss=22, batch

Validation:  52%|▌| 390/743 [1:34:56<1:24:36, 14.38s/batch, batch_loss=27.1, bat

Validation:  53%|▌| 391/743 [1:34:56<1:24:24, 14.39s/batch, batch_loss=27.1, bat

Validation:  53%|▌| 391/743 [1:35:13<1:24:24, 14.39s/batch, batch_loss=24.8, bat

Validation:  53%|▌| 392/743 [1:35:13<1:28:28, 15.12s/batch, batch_loss=24.8, bat

Validation:  53%|▌| 392/743 [1:35:28<1:28:28, 15.12s/batch, batch_loss=19.5, bat

Validation:  53%|▌| 393/743 [1:35:28<1:28:08, 15.11s/batch, batch_loss=19.5, bat

Validation:  53%|▌| 393/743 [1:35:42<1:28:08, 15.11s/batch, batch_loss=23.6, bat

Validation:  53%|▌| 394/743 [1:35:42<1:26:05, 14.80s/batch, batch_loss=23.6, bat

Validation:  53%|▌| 394/743 [1:35:56<1:26:05, 14.80s/batch, batch_loss=15.4, bat

Validation:  53%|▌| 395/743 [1:35:56<1:23:43, 14.44s/batch, batch_loss=15.4, bat

Validation:  53%|▌| 395/743 [1:36:11<1:23:43, 14.44s/batch, batch_loss=20.7, bat

Validation:  53%|▌| 396/743 [1:36:11<1:25:25, 14.77s/batch, batch_loss=20.7, bat

Validation:  53%|▌| 396/743 [1:36:26<1:25:25, 14.77s/batch, batch_loss=13.1, bat

Validation:  53%|▌| 397/743 [1:36:26<1:25:18, 14.79s/batch, batch_loss=13.1, bat

Validation:  53%|▌| 397/743 [1:36:41<1:25:18, 14.79s/batch, batch_loss=36.1, bat

Validation:  54%|▌| 398/743 [1:36:41<1:24:57, 14.78s/batch, batch_loss=36.1, bat

Validation:  54%|▌| 398/743 [1:36:55<1:24:57, 14.78s/batch, batch_loss=24.4, bat

Validation:  54%|▌| 399/743 [1:36:55<1:23:55, 14.64s/batch, batch_loss=24.4, bat

Validation:  54%|▌| 399/743 [1:37:09<1:23:55, 14.64s/batch, batch_loss=26.3, bat

Validation:  54%|▌| 400/743 [1:37:09<1:22:39, 14.46s/batch, batch_loss=26.3, bat

Validation:  54%|▌| 400/743 [1:37:24<1:22:39, 14.46s/batch, batch_loss=23.7, bat

Validation:  54%|▌| 401/743 [1:37:24<1:23:39, 14.68s/batch, batch_loss=23.7, bat

Validation:  54%|▌| 401/743 [1:37:39<1:23:39, 14.68s/batch, batch_loss=6.92, bat

Validation:  54%|▌| 402/743 [1:37:39<1:22:41, 14.55s/batch, batch_loss=6.92, bat

Validation:  54%|▌| 402/743 [1:37:54<1:22:41, 14.55s/batch, batch_loss=23.7, bat

Validation:  54%|▌| 403/743 [1:37:54<1:23:22, 14.71s/batch, batch_loss=23.7, bat

Validation:  54%|▌| 403/743 [1:38:09<1:23:22, 14.71s/batch, batch_loss=16.8, bat

Validation:  54%|▌| 404/743 [1:38:09<1:23:32, 14.79s/batch, batch_loss=16.8, bat

Validation:  54%|▌| 404/743 [1:38:23<1:23:32, 14.79s/batch, batch_loss=12, batch

Validation:  55%|▌| 405/743 [1:38:23<1:22:47, 14.70s/batch, batch_loss=12, batch

Validation:  55%|▌| 405/743 [1:38:38<1:22:47, 14.70s/batch, batch_loss=20, batch

Validation:  55%|▌| 406/743 [1:38:38<1:22:54, 14.76s/batch, batch_loss=20, batch

Validation:  55%|▌| 406/743 [1:38:53<1:22:54, 14.76s/batch, batch_loss=18.7, bat

Validation:  55%|▌| 407/743 [1:38:53<1:22:42, 14.77s/batch, batch_loss=18.7, bat

Validation:  55%|▌| 407/743 [1:39:08<1:22:42, 14.77s/batch, batch_loss=36.6, bat

Validation:  55%|▌| 408/743 [1:39:08<1:22:28, 14.77s/batch, batch_loss=36.6, bat

Validation:  55%|▌| 408/743 [1:39:22<1:22:28, 14.77s/batch, batch_loss=11.1, bat

Validation:  55%|▌| 409/743 [1:39:22<1:21:56, 14.72s/batch, batch_loss=11.1, bat

Validation:  55%|▌| 409/743 [1:39:37<1:21:56, 14.72s/batch, batch_loss=24.2, bat

Validation:  55%|▌| 410/743 [1:39:37<1:22:37, 14.89s/batch, batch_loss=24.2, bat

Validation:  55%|▌| 410/743 [1:39:52<1:22:37, 14.89s/batch, batch_loss=24.8, bat

Validation:  55%|▌| 411/743 [1:39:52<1:21:29, 14.73s/batch, batch_loss=24.8, bat

Validation:  55%|▌| 411/743 [1:40:06<1:21:29, 14.73s/batch, batch_loss=20.4, bat

Validation:  55%|▌| 412/743 [1:40:06<1:20:22, 14.57s/batch, batch_loss=20.4, bat

Validation:  55%|▌| 412/743 [1:40:20<1:20:22, 14.57s/batch, batch_loss=1.92e+3, 

Validation:  56%|▌| 413/743 [1:40:20<1:18:38, 14.30s/batch, batch_loss=1.92e+3, 

Validation:  56%|▌| 413/743 [1:40:34<1:18:38, 14.30s/batch, batch_loss=42.5, bat

Validation:  56%|▌| 414/743 [1:40:34<1:18:20, 14.29s/batch, batch_loss=42.5, bat

Validation:  56%|▌| 414/743 [1:40:48<1:18:20, 14.29s/batch, batch_loss=39.4, bat

Validation:  56%|▌| 415/743 [1:40:48<1:18:11, 14.30s/batch, batch_loss=39.4, bat

Validation:  56%|▌| 415/743 [1:41:03<1:18:11, 14.30s/batch, batch_loss=6.46e+3, 

Validation:  56%|▌| 416/743 [1:41:03<1:18:19, 14.37s/batch, batch_loss=6.46e+3, 

Validation:  56%|▌| 416/743 [1:41:17<1:18:19, 14.37s/batch, batch_loss=19.7, bat

Validation:  56%|▌| 417/743 [1:41:17<1:18:17, 14.41s/batch, batch_loss=19.7, bat

Validation:  56%|▌| 417/743 [1:41:31<1:18:17, 14.41s/batch, batch_loss=20.2, bat

Validation:  56%|▌| 418/743 [1:41:31<1:17:09, 14.24s/batch, batch_loss=20.2, bat

Validation:  56%|▌| 418/743 [1:41:45<1:17:09, 14.24s/batch, batch_loss=19.1, bat

Validation:  56%|▌| 419/743 [1:41:45<1:17:00, 14.26s/batch, batch_loss=19.1, bat

Validation:  56%|▌| 419/743 [1:42:00<1:17:00, 14.26s/batch, batch_loss=16.5, bat

Validation:  57%|▌| 420/743 [1:42:00<1:17:30, 14.40s/batch, batch_loss=16.5, bat

Validation:  57%|▌| 420/743 [1:42:15<1:17:30, 14.40s/batch, batch_loss=42.3, bat

Validation:  57%|▌| 421/743 [1:42:15<1:18:08, 14.56s/batch, batch_loss=42.3, bat

Validation:  57%|▌| 421/743 [1:42:30<1:18:08, 14.56s/batch, batch_loss=9.81, bat

Validation:  57%|▌| 422/743 [1:42:30<1:17:41, 14.52s/batch, batch_loss=9.81, bat

Validation:  57%|▌| 422/743 [1:42:44<1:17:41, 14.52s/batch, batch_loss=37.5, bat

Validation:  57%|▌| 423/743 [1:42:44<1:17:48, 14.59s/batch, batch_loss=37.5, bat

Validation:  57%|▌| 423/743 [1:42:59<1:17:48, 14.59s/batch, batch_loss=333, batc

Validation:  57%|▌| 424/743 [1:42:59<1:17:54, 14.65s/batch, batch_loss=333, batc

Validation:  57%|▌| 424/743 [1:43:14<1:17:54, 14.65s/batch, batch_loss=46.2, bat

Validation:  57%|▌| 425/743 [1:43:14<1:17:35, 14.64s/batch, batch_loss=46.2, bat

Validation:  57%|▌| 425/743 [1:43:28<1:17:35, 14.64s/batch, batch_loss=32.3, bat

Validation:  57%|▌| 426/743 [1:43:28<1:17:35, 14.69s/batch, batch_loss=32.3, bat

Validation:  57%|▌| 426/743 [1:43:43<1:17:35, 14.69s/batch, batch_loss=33.8, bat

Validation:  57%|▌| 427/743 [1:43:43<1:16:53, 14.60s/batch, batch_loss=33.8, bat

Validation:  57%|▌| 427/743 [1:43:58<1:16:53, 14.60s/batch, batch_loss=5.26e+3, 

Validation:  58%|▌| 428/743 [1:43:58<1:17:09, 14.70s/batch, batch_loss=5.26e+3, 

Validation:  58%|▌| 428/743 [1:44:12<1:17:09, 14.70s/batch, batch_loss=26.8, bat

Validation:  58%|▌| 429/743 [1:44:12<1:16:39, 14.65s/batch, batch_loss=26.8, bat

Validation:  58%|▌| 429/743 [1:44:27<1:16:39, 14.65s/batch, batch_loss=5.38e+3, 

Validation:  58%|▌| 430/743 [1:44:27<1:16:30, 14.67s/batch, batch_loss=5.38e+3, 

Validation:  58%|▌| 430/743 [1:44:42<1:16:30, 14.67s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [1:44:42<1:16:01, 14.62s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [1:44:56<1:16:01, 14.62s/batch, batch_loss=973, batc

Validation:  58%|▌| 432/743 [1:44:56<1:15:52, 14.64s/batch, batch_loss=973, batc

Validation:  58%|▌| 432/743 [1:45:11<1:15:52, 14.64s/batch, batch_loss=19.7, bat

Validation:  58%|▌| 433/743 [1:45:11<1:15:47, 14.67s/batch, batch_loss=19.7, bat

Validation:  58%|▌| 433/743 [1:45:26<1:15:47, 14.67s/batch, batch_loss=15.4, bat

Validation:  58%|▌| 434/743 [1:45:26<1:16:47, 14.91s/batch, batch_loss=15.4, bat

Validation:  58%|▌| 434/743 [1:45:42<1:16:47, 14.91s/batch, batch_loss=18.9, bat

Validation:  59%|▌| 435/743 [1:45:42<1:16:52, 14.98s/batch, batch_loss=18.9, bat

Validation:  59%|▌| 435/743 [1:45:56<1:16:52, 14.98s/batch, batch_loss=20.1, bat

Validation:  59%|▌| 436/743 [1:45:56<1:15:41, 14.79s/batch, batch_loss=20.1, bat

Validation:  59%|▌| 436/743 [1:46:11<1:15:41, 14.79s/batch, batch_loss=29.5, bat

Validation:  59%|▌| 437/743 [1:46:11<1:15:25, 14.79s/batch, batch_loss=29.5, bat

Validation:  59%|▌| 437/743 [1:46:25<1:15:25, 14.79s/batch, batch_loss=1e+3, bat

Validation:  59%|▌| 438/743 [1:46:25<1:14:49, 14.72s/batch, batch_loss=1e+3, bat

Validation:  59%|▌| 438/743 [1:46:40<1:14:49, 14.72s/batch, batch_loss=917, batc

Validation:  59%|▌| 439/743 [1:46:40<1:14:42, 14.74s/batch, batch_loss=917, batc

Validation:  59%|▌| 439/743 [1:46:54<1:14:42, 14.74s/batch, batch_loss=21.9, bat

Validation:  59%|▌| 440/743 [1:46:54<1:13:04, 14.47s/batch, batch_loss=21.9, bat

Validation:  59%|▌| 440/743 [1:47:08<1:13:04, 14.47s/batch, batch_loss=17.8, bat

Validation:  59%|▌| 441/743 [1:47:08<1:11:43, 14.25s/batch, batch_loss=17.8, bat

Validation:  59%|▌| 441/743 [1:47:22<1:11:43, 14.25s/batch, batch_loss=24.1, bat

Validation:  59%|▌| 442/743 [1:47:22<1:11:07, 14.18s/batch, batch_loss=24.1, bat

Validation:  59%|▌| 442/743 [1:47:39<1:11:07, 14.18s/batch, batch_loss=11.5, bat

Validation:  60%|▌| 443/743 [1:47:39<1:15:27, 15.09s/batch, batch_loss=11.5, bat

Validation:  60%|▌| 443/743 [1:47:52<1:15:27, 15.09s/batch, batch_loss=20, batch

Validation:  60%|▌| 444/743 [1:47:52<1:12:54, 14.63s/batch, batch_loss=20, batch

Validation:  60%|▌| 444/743 [1:48:07<1:12:54, 14.63s/batch, batch_loss=8.93, bat

Validation:  60%|▌| 445/743 [1:48:07<1:12:26, 14.58s/batch, batch_loss=8.93, bat

Validation:  60%|▌| 445/743 [1:48:21<1:12:26, 14.58s/batch, batch_loss=19.5, bat

Validation:  60%|▌| 446/743 [1:48:21<1:10:53, 14.32s/batch, batch_loss=19.5, bat

Validation:  60%|▌| 446/743 [1:48:33<1:10:53, 14.32s/batch, batch_loss=6844.25, 

Validation:  60%|▌| 447/743 [1:48:33<1:08:18, 13.85s/batch, batch_loss=6844.25, 

Validation:  60%|▌| 447/743 [1:48:46<1:08:18, 13.85s/batch, batch_loss=5.3, batc

Validation:  60%|▌| 448/743 [1:48:46<1:06:23, 13.50s/batch, batch_loss=5.3, batc

Validation:  60%|▌| 448/743 [1:49:00<1:06:23, 13.50s/batch, batch_loss=22.4, bat

Validation:  60%|▌| 449/743 [1:49:00<1:06:41, 13.61s/batch, batch_loss=22.4, bat

Validation:  60%|▌| 449/743 [1:49:14<1:06:41, 13.61s/batch, batch_loss=18.6, bat

Validation:  61%|▌| 450/743 [1:49:14<1:07:20, 13.79s/batch, batch_loss=18.6, bat

Validation:  61%|▌| 450/743 [1:49:29<1:07:20, 13.79s/batch, batch_loss=15.9, bat

Validation:  61%|▌| 451/743 [1:49:29<1:09:08, 14.21s/batch, batch_loss=15.9, bat

Validation:  61%|▌| 451/743 [1:49:44<1:09:08, 14.21s/batch, batch_loss=31.5, bat

Validation:  61%|▌| 452/743 [1:49:44<1:09:42, 14.37s/batch, batch_loss=31.5, bat

Validation:  61%|▌| 452/743 [1:49:58<1:09:42, 14.37s/batch, batch_loss=18.7, bat

Validation:  61%|▌| 453/743 [1:49:58<1:09:22, 14.35s/batch, batch_loss=18.7, bat

Validation:  61%|▌| 453/743 [1:50:14<1:09:22, 14.35s/batch, batch_loss=8.18, bat

Validation:  61%|▌| 454/743 [1:50:14<1:11:30, 14.85s/batch, batch_loss=8.18, bat

Validation:  61%|▌| 454/743 [1:50:28<1:11:30, 14.85s/batch, batch_loss=12.5, bat

Validation:  61%|▌| 455/743 [1:50:28<1:09:47, 14.54s/batch, batch_loss=12.5, bat

Validation:  61%|▌| 455/743 [1:50:42<1:09:47, 14.54s/batch, batch_loss=14, batch

Validation:  61%|▌| 456/743 [1:50:42<1:08:59, 14.42s/batch, batch_loss=14, batch

Validation:  61%|▌| 456/743 [1:50:57<1:08:59, 14.42s/batch, batch_loss=22.3, bat

Validation:  62%|▌| 457/743 [1:50:57<1:08:23, 14.35s/batch, batch_loss=22.3, bat

Validation:  62%|▌| 457/743 [1:51:12<1:08:23, 14.35s/batch, batch_loss=47.2, bat

Validation:  62%|▌| 458/743 [1:51:12<1:10:21, 14.81s/batch, batch_loss=47.2, bat

Validation:  62%|▌| 458/743 [1:51:27<1:10:21, 14.81s/batch, batch_loss=19, batch

Validation:  62%|▌| 459/743 [1:51:27<1:10:15, 14.84s/batch, batch_loss=19, batch

Validation:  62%|▌| 459/743 [1:51:42<1:10:15, 14.84s/batch, batch_loss=28.3, bat

Validation:  62%|▌| 460/743 [1:51:42<1:09:51, 14.81s/batch, batch_loss=28.3, bat

Validation:  62%|▌| 460/743 [1:51:57<1:09:51, 14.81s/batch, batch_loss=21.1, bat

Validation:  62%|▌| 461/743 [1:51:57<1:09:18, 14.75s/batch, batch_loss=21.1, bat

Validation:  62%|▌| 461/743 [1:52:10<1:09:18, 14.75s/batch, batch_loss=15.5, bat

Validation:  62%|▌| 462/743 [1:52:10<1:07:17, 14.37s/batch, batch_loss=15.5, bat

Validation:  62%|▌| 462/743 [1:52:25<1:07:17, 14.37s/batch, batch_loss=15.1, bat

Validation:  62%|▌| 463/743 [1:52:25<1:07:03, 14.37s/batch, batch_loss=15.1, bat

Validation:  62%|▌| 463/743 [1:52:40<1:07:03, 14.37s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:52:40<1:07:44, 14.57s/batch, batch_loss=1.35e+4, 

Validation:  62%|▌| 464/743 [1:52:54<1:07:44, 14.57s/batch, batch_loss=22.3, bat

Validation:  63%|▋| 465/743 [1:52:54<1:07:23, 14.54s/batch, batch_loss=22.3, bat

Validation:  63%|▋| 465/743 [1:53:08<1:07:23, 14.54s/batch, batch_loss=17.2, bat

Validation:  63%|▋| 466/743 [1:53:08<1:06:56, 14.50s/batch, batch_loss=17.2, bat

Validation:  63%|▋| 466/743 [1:53:26<1:06:56, 14.50s/batch, batch_loss=31.3, bat

Validation:  63%|▋| 467/743 [1:53:26<1:10:12, 15.26s/batch, batch_loss=31.3, bat

Validation:  63%|▋| 467/743 [1:53:39<1:10:12, 15.26s/batch, batch_loss=22.7, bat

Validation:  63%|▋| 468/743 [1:53:39<1:08:02, 14.84s/batch, batch_loss=22.7, bat

Validation:  63%|▋| 468/743 [1:53:54<1:08:02, 14.84s/batch, batch_loss=30.7, bat

Validation:  63%|▋| 469/743 [1:53:54<1:07:46, 14.84s/batch, batch_loss=30.7, bat

Validation:  63%|▋| 469/743 [1:54:09<1:07:46, 14.84s/batch, batch_loss=6.36e+4, 

Validation:  63%|▋| 470/743 [1:54:09<1:07:18, 14.79s/batch, batch_loss=6.36e+4, 

Validation:  63%|▋| 470/743 [1:54:24<1:07:18, 14.79s/batch, batch_loss=16.9, bat

Validation:  63%|▋| 471/743 [1:54:24<1:07:39, 14.92s/batch, batch_loss=16.9, bat

Validation:  63%|▋| 471/743 [1:54:39<1:07:39, 14.92s/batch, batch_loss=38, batch

Validation:  64%|▋| 472/743 [1:54:39<1:06:53, 14.81s/batch, batch_loss=38, batch

Validation:  64%|▋| 472/743 [1:54:53<1:06:53, 14.81s/batch, batch_loss=604, batc

Validation:  64%|▋| 473/743 [1:54:53<1:06:35, 14.80s/batch, batch_loss=604, batc

Validation:  64%|▋| 473/743 [1:55:08<1:06:35, 14.80s/batch, batch_loss=29.6, bat

Validation:  64%|▋| 474/743 [1:55:08<1:05:58, 14.72s/batch, batch_loss=29.6, bat

Validation:  64%|▋| 474/743 [1:55:25<1:05:58, 14.72s/batch, batch_loss=25.1, bat

Validation:  64%|▋| 475/743 [1:55:25<1:08:46, 15.40s/batch, batch_loss=25.1, bat

Validation:  64%|▋| 475/743 [1:55:40<1:08:46, 15.40s/batch, batch_loss=8.6, batc

Validation:  64%|▋| 476/743 [1:55:40<1:07:52, 15.25s/batch, batch_loss=8.6, batc

Validation:  64%|▋| 476/743 [1:55:55<1:07:52, 15.25s/batch, batch_loss=13.3, bat

Validation:  64%|▋| 477/743 [1:55:55<1:06:49, 15.07s/batch, batch_loss=13.3, bat

Validation:  64%|▋| 477/743 [1:56:09<1:06:49, 15.07s/batch, batch_loss=2.46e+3, 

Validation:  64%|▋| 478/743 [1:56:09<1:05:20, 14.80s/batch, batch_loss=2.46e+3, 

Validation:  64%|▋| 478/743 [1:56:24<1:05:20, 14.80s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [1:56:24<1:05:24, 14.87s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [1:56:38<1:05:24, 14.87s/batch, batch_loss=11, batch

Validation:  65%|▋| 480/743 [1:56:38<1:04:29, 14.71s/batch, batch_loss=11, batch

Validation:  65%|▋| 480/743 [1:56:52<1:04:29, 14.71s/batch, batch_loss=13.7, bat

Validation:  65%|▋| 481/743 [1:56:52<1:03:17, 14.50s/batch, batch_loss=13.7, bat

Validation:  65%|▋| 481/743 [1:57:06<1:03:17, 14.50s/batch, batch_loss=6.95e+3, 

Validation:  65%|▋| 482/743 [1:57:06<1:02:19, 14.33s/batch, batch_loss=6.95e+3, 

Validation:  65%|▋| 482/743 [1:57:20<1:02:19, 14.33s/batch, batch_loss=23.9, bat

Validation:  65%|▋| 483/743 [1:57:20<1:01:55, 14.29s/batch, batch_loss=23.9, bat

Validation:  65%|▋| 483/743 [1:57:34<1:01:55, 14.29s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [1:57:34<1:01:31, 14.25s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [1:57:51<1:01:31, 14.25s/batch, batch_loss=3.12e+4, 

Validation:  65%|▋| 485/743 [1:57:51<1:04:53, 15.09s/batch, batch_loss=3.12e+4, 

Validation:  65%|▋| 485/743 [1:58:05<1:04:53, 15.09s/batch, batch_loss=20.4, bat

Validation:  65%|▋| 486/743 [1:58:05<1:03:03, 14.72s/batch, batch_loss=20.4, bat

Validation:  65%|▋| 486/743 [1:58:19<1:03:03, 14.72s/batch, batch_loss=49.4, bat

Validation:  66%|▋| 487/743 [1:58:19<1:02:06, 14.56s/batch, batch_loss=49.4, bat

Validation:  66%|▋| 487/743 [1:58:34<1:02:06, 14.56s/batch, batch_loss=40.3, bat

Validation:  66%|▋| 488/743 [1:58:34<1:02:11, 14.63s/batch, batch_loss=40.3, bat

Validation:  66%|▋| 488/743 [1:58:48<1:02:11, 14.63s/batch, batch_loss=16.6, bat

Validation:  66%|▋| 489/743 [1:58:48<1:01:03, 14.42s/batch, batch_loss=16.6, bat

Validation:  66%|▋| 489/743 [1:59:03<1:01:03, 14.42s/batch, batch_loss=33.8, bat

Validation:  66%|▋| 490/743 [1:59:03<1:01:01, 14.47s/batch, batch_loss=33.8, bat

Validation:  66%|▋| 490/743 [1:59:17<1:01:01, 14.47s/batch, batch_loss=29.4, bat

Validation:  66%|▋| 491/743 [1:59:17<1:00:58, 14.52s/batch, batch_loss=29.4, bat

Validation:  66%|▋| 491/743 [1:59:32<1:00:58, 14.52s/batch, batch_loss=1.04e+3, 

Validation:  66%|▋| 492/743 [1:59:32<1:00:50, 14.54s/batch, batch_loss=1.04e+3, 

Validation:  66%|▋| 492/743 [1:59:47<1:00:50, 14.54s/batch, batch_loss=1.44e+4, 

Validation:  66%|▋| 493/743 [1:59:47<1:01:07, 14.67s/batch, batch_loss=1.44e+4, 

Validation:  66%|▋| 493/743 [2:00:02<1:01:07, 14.67s/batch, batch_loss=11.7, bat

Validation:  66%|▋| 494/743 [2:00:02<1:01:23, 14.79s/batch, batch_loss=11.7, bat

Validation:  66%|▋| 494/743 [2:00:17<1:01:23, 14.79s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:00:17<1:00:52, 14.73s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:00:31<1:00:52, 14.73s/batch, batch_loss=24.8, bat

Validation:  67%|▋| 496/743 [2:00:31<59:59, 14.57s/batch, batch_loss=24.8, batch

Validation:  67%|▋| 496/743 [2:00:45<59:59, 14.57s/batch, batch_loss=17, batch_i

Validation:  67%|▋| 497/743 [2:00:45<59:36, 14.54s/batch, batch_loss=17, batch_i

Validation:  67%|▋| 497/743 [2:00:59<59:36, 14.54s/batch, batch_loss=18.5, batch

Validation:  67%|▋| 498/743 [2:00:59<58:13, 14.26s/batch, batch_loss=18.5, batch

Validation:  67%|▋| 498/743 [2:01:13<58:13, 14.26s/batch, batch_loss=4.43, batch

Validation:  67%|▋| 499/743 [2:01:13<57:43, 14.19s/batch, batch_loss=4.43, batch

Validation:  67%|▋| 499/743 [2:01:30<57:43, 14.19s/batch, batch_loss=2.52e+4, ba

Validation:  67%|▋| 500/743 [2:01:30<1:00:50, 15.02s/batch, batch_loss=2.52e+4, 

Validation:  67%|▋| 500/743 [2:01:45<1:00:50, 15.02s/batch, batch_loss=28.1, bat

Validation:  67%|▋| 501/743 [2:01:45<1:00:43, 15.05s/batch, batch_loss=28.1, bat

Validation:  67%|▋| 501/743 [2:02:00<1:00:43, 15.05s/batch, batch_loss=3.16e+3, 

Validation:  68%|▋| 502/743 [2:02:00<59:56, 14.92s/batch, batch_loss=3.16e+3, ba

Validation:  68%|▋| 502/743 [2:02:14<59:56, 14.92s/batch, batch_loss=24.6, batch

Validation:  68%|▋| 503/743 [2:02:14<58:42, 14.68s/batch, batch_loss=24.6, batch

Validation:  68%|▋| 503/743 [2:02:28<58:42, 14.68s/batch, batch_loss=14.5, batch

Validation:  68%|▋| 504/743 [2:02:28<58:21, 14.65s/batch, batch_loss=14.5, batch

Validation:  68%|▋| 504/743 [2:02:43<58:21, 14.65s/batch, batch_loss=35.6, batch

Validation:  68%|▋| 505/743 [2:02:43<58:00, 14.63s/batch, batch_loss=35.6, batch

Validation:  68%|▋| 505/743 [2:02:58<58:00, 14.63s/batch, batch_loss=2.86e+3, ba

Validation:  68%|▋| 506/743 [2:02:58<58:38, 14.84s/batch, batch_loss=2.86e+3, ba

Validation:  68%|▋| 506/743 [2:03:13<58:38, 14.84s/batch, batch_loss=2e+3, batch

Validation:  68%|▋| 507/743 [2:03:13<58:31, 14.88s/batch, batch_loss=2e+3, batch

Validation:  68%|▋| 507/743 [2:03:28<58:31, 14.88s/batch, batch_loss=8.41e+3, ba

Validation:  68%|▋| 508/743 [2:03:28<58:13, 14.87s/batch, batch_loss=8.41e+3, ba

Validation:  68%|▋| 508/743 [2:03:43<58:13, 14.87s/batch, batch_loss=8.5e+3, bat

Validation:  69%|▋| 509/743 [2:03:43<57:56, 14.86s/batch, batch_loss=8.5e+3, bat

Validation:  69%|▋| 509/743 [2:03:57<57:56, 14.86s/batch, batch_loss=20.4, batch

Validation:  69%|▋| 510/743 [2:03:57<57:06, 14.71s/batch, batch_loss=20.4, batch

Validation:  69%|▋| 510/743 [2:04:14<57:06, 14.71s/batch, batch_loss=33, batch_i

Validation:  69%|▋| 511/743 [2:04:14<59:23, 15.36s/batch, batch_loss=33, batch_i

Validation:  69%|▋| 511/743 [2:04:29<59:23, 15.36s/batch, batch_loss=18.1, batch

Validation:  69%|▋| 512/743 [2:04:29<58:26, 15.18s/batch, batch_loss=18.1, batch

Validation:  69%|▋| 512/743 [2:04:44<58:26, 15.18s/batch, batch_loss=21.3, batch

Validation:  69%|▋| 513/743 [2:04:44<57:50, 15.09s/batch, batch_loss=21.3, batch

Validation:  69%|▋| 513/743 [2:04:58<57:50, 15.09s/batch, batch_loss=24.2, batch

Validation:  69%|▋| 514/743 [2:04:58<56:31, 14.81s/batch, batch_loss=24.2, batch

Validation:  69%|▋| 514/743 [2:05:12<56:31, 14.81s/batch, batch_loss=22, batch_i

Validation:  69%|▋| 515/743 [2:05:12<55:28, 14.60s/batch, batch_loss=22, batch_i

Validation:  69%|▋| 515/743 [2:05:26<55:28, 14.60s/batch, batch_loss=23.5, batch

Validation:  69%|▋| 516/743 [2:05:26<55:03, 14.55s/batch, batch_loss=23.5, batch

Validation:  69%|▋| 516/743 [2:05:41<55:03, 14.55s/batch, batch_loss=6.15e+4, ba

Validation:  70%|▋| 517/743 [2:05:41<55:09, 14.64s/batch, batch_loss=6.15e+4, ba

Validation:  70%|▋| 517/743 [2:05:56<55:09, 14.64s/batch, batch_loss=506, batch_

Validation:  70%|▋| 518/743 [2:05:56<54:23, 14.51s/batch, batch_loss=506, batch_

Validation:  70%|▋| 518/743 [2:06:10<54:23, 14.51s/batch, batch_loss=10.3, batch

Validation:  70%|▋| 519/743 [2:06:10<53:57, 14.45s/batch, batch_loss=10.3, batch

Validation:  70%|▋| 519/743 [2:06:25<53:57, 14.45s/batch, batch_loss=25.2, batch

Validation:  70%|▋| 520/743 [2:06:25<54:34, 14.69s/batch, batch_loss=25.2, batch

Validation:  70%|▋| 520/743 [2:06:40<54:34, 14.69s/batch, batch_loss=18.1, batch

Validation:  70%|▋| 521/743 [2:06:40<54:19, 14.68s/batch, batch_loss=18.1, batch

Validation:  70%|▋| 521/743 [2:06:55<54:19, 14.68s/batch, batch_loss=13.2, batch

Validation:  70%|▋| 522/743 [2:06:55<54:19, 14.75s/batch, batch_loss=13.2, batch

Validation:  70%|▋| 522/743 [2:07:09<54:19, 14.75s/batch, batch_loss=425, batch_

Validation:  70%|▋| 523/743 [2:07:09<53:45, 14.66s/batch, batch_loss=425, batch_

Validation:  70%|▋| 523/743 [2:07:23<53:45, 14.66s/batch, batch_loss=19.1, batch

Validation:  71%|▋| 524/743 [2:07:23<53:08, 14.56s/batch, batch_loss=19.1, batch

Validation:  71%|▋| 524/743 [2:07:38<53:08, 14.56s/batch, batch_loss=36.4, batch

Validation:  71%|▋| 525/743 [2:07:38<52:46, 14.52s/batch, batch_loss=36.4, batch

Validation:  71%|▋| 525/743 [2:07:52<52:46, 14.52s/batch, batch_loss=9.95, batch

Validation:  71%|▋| 526/743 [2:07:52<52:18, 14.46s/batch, batch_loss=9.95, batch

Validation:  71%|▋| 526/743 [2:08:07<52:18, 14.46s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:08:07<52:02, 14.46s/batch, batch_loss=3.75e+3, ba

Validation:  71%|▋| 527/743 [2:08:22<52:02, 14.46s/batch, batch_loss=511, batch_

Validation:  71%|▋| 528/743 [2:08:22<52:20, 14.61s/batch, batch_loss=511, batch_

Validation:  71%|▋| 528/743 [2:08:37<52:20, 14.61s/batch, batch_loss=6.49e+3, ba

Validation:  71%|▋| 529/743 [2:08:37<52:53, 14.83s/batch, batch_loss=6.49e+3, ba

Validation:  71%|▋| 529/743 [2:08:51<52:53, 14.83s/batch, batch_loss=215, batch_

Validation:  71%|▋| 530/743 [2:08:51<52:11, 14.70s/batch, batch_loss=215, batch_

Validation:  71%|▋| 530/743 [2:09:06<52:11, 14.70s/batch, batch_loss=51.8, batch

Validation:  71%|▋| 531/743 [2:09:06<52:02, 14.73s/batch, batch_loss=51.8, batch

Validation:  71%|▋| 531/743 [2:09:22<52:02, 14.73s/batch, batch_loss=278, batch_

Validation:  72%|▋| 532/743 [2:09:22<53:09, 15.11s/batch, batch_loss=278, batch_

Validation:  72%|▋| 532/743 [2:09:38<53:09, 15.11s/batch, batch_loss=11.4, batch

Validation:  72%|▋| 533/743 [2:09:38<53:48, 15.37s/batch, batch_loss=11.4, batch

Validation:  72%|▋| 533/743 [2:09:53<53:48, 15.37s/batch, batch_loss=15.6, batch

Validation:  72%|▋| 534/743 [2:09:53<52:45, 15.15s/batch, batch_loss=15.6, batch

Validation:  72%|▋| 534/743 [2:10:08<52:45, 15.15s/batch, batch_loss=41.1, batch

Validation:  72%|▋| 535/743 [2:10:08<52:31, 15.15s/batch, batch_loss=41.1, batch

Validation:  72%|▋| 535/743 [2:10:22<52:31, 15.15s/batch, batch_loss=29, batch_i

Validation:  72%|▋| 536/743 [2:10:22<51:20, 14.88s/batch, batch_loss=29, batch_i

Validation:  72%|▋| 536/743 [2:10:37<51:20, 14.88s/batch, batch_loss=16.8, batch

Validation:  72%|▋| 537/743 [2:10:37<50:54, 14.83s/batch, batch_loss=16.8, batch

Validation:  72%|▋| 537/743 [2:10:52<50:54, 14.83s/batch, batch_loss=24.4, batch

Validation:  72%|▋| 538/743 [2:10:52<50:29, 14.78s/batch, batch_loss=24.4, batch

Validation:  72%|▋| 538/743 [2:11:06<50:29, 14.78s/batch, batch_loss=267, batch_

Validation:  73%|▋| 539/743 [2:11:06<49:58, 14.70s/batch, batch_loss=267, batch_

Validation:  73%|▋| 539/743 [2:11:21<49:58, 14.70s/batch, batch_loss=25.2, batch

Validation:  73%|▋| 540/743 [2:11:21<49:57, 14.77s/batch, batch_loss=25.2, batch

Validation:  73%|▋| 540/743 [2:11:38<49:57, 14.77s/batch, batch_loss=40.8, batch

Validation:  73%|▋| 541/743 [2:11:38<52:18, 15.54s/batch, batch_loss=40.8, batch

Validation:  73%|▋| 541/743 [2:11:53<52:18, 15.54s/batch, batch_loss=1.97e+3, ba

Validation:  73%|▋| 542/743 [2:11:53<50:42, 15.14s/batch, batch_loss=1.97e+3, ba

Validation:  73%|▋| 542/743 [2:12:08<50:42, 15.14s/batch, batch_loss=29.5, batch

Validation:  73%|▋| 543/743 [2:12:08<50:21, 15.11s/batch, batch_loss=29.5, batch

Validation:  73%|▋| 543/743 [2:12:22<50:21, 15.11s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:12:22<49:52, 15.04s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:12:38<49:52, 15.04s/batch, batch_loss=2.76e+3, ba

Validation:  73%|▋| 545/743 [2:12:38<50:07, 15.19s/batch, batch_loss=2.76e+3, ba

Validation:  73%|▋| 545/743 [2:12:53<50:07, 15.19s/batch, batch_loss=10.4, batch

Validation:  73%|▋| 546/743 [2:12:53<49:29, 15.08s/batch, batch_loss=10.4, batch

Validation:  73%|▋| 546/743 [2:13:08<49:29, 15.08s/batch, batch_loss=279, batch_

Validation:  74%|▋| 547/743 [2:13:08<49:41, 15.21s/batch, batch_loss=279, batch_

Validation:  74%|▋| 547/743 [2:13:24<49:41, 15.21s/batch, batch_loss=37.2, batch

Validation:  74%|▋| 548/743 [2:13:24<49:43, 15.30s/batch, batch_loss=37.2, batch

Validation:  74%|▋| 548/743 [2:13:42<49:43, 15.30s/batch, batch_loss=4.11e+3, ba

Validation:  74%|▋| 549/743 [2:13:42<52:17, 16.17s/batch, batch_loss=4.11e+3, ba

Validation:  74%|▋| 549/743 [2:13:57<52:17, 16.17s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:13:57<50:29, 15.70s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:14:12<50:29, 15.70s/batch, batch_loss=18.6, batch

Validation:  74%|▋| 551/743 [2:14:12<49:59, 15.62s/batch, batch_loss=18.6, batch

Validation:  74%|▋| 551/743 [2:14:26<49:59, 15.62s/batch, batch_loss=6.77e+3, ba

Validation:  74%|▋| 552/743 [2:14:26<48:32, 15.25s/batch, batch_loss=6.77e+3, ba

Validation:  74%|▋| 552/743 [2:14:42<48:32, 15.25s/batch, batch_loss=46.1, batch

Validation:  74%|▋| 553/743 [2:14:42<48:11, 15.22s/batch, batch_loss=46.1, batch

Validation:  74%|▋| 553/743 [2:14:57<48:11, 15.22s/batch, batch_loss=29.9, batch

Validation:  75%|▋| 554/743 [2:14:57<48:31, 15.40s/batch, batch_loss=29.9, batch

Validation:  75%|▋| 554/743 [2:15:12<48:31, 15.40s/batch, batch_loss=2.48e+3, ba

Validation:  75%|▋| 555/743 [2:15:12<47:31, 15.17s/batch, batch_loss=2.48e+3, ba

Validation:  75%|▋| 555/743 [2:15:27<47:31, 15.17s/batch, batch_loss=60, batch_i

Validation:  75%|▋| 556/743 [2:15:27<47:03, 15.10s/batch, batch_loss=60, batch_i

Validation:  75%|▋| 556/743 [2:15:43<47:03, 15.10s/batch, batch_loss=9.63, batch

Validation:  75%|▋| 557/743 [2:15:43<47:16, 15.25s/batch, batch_loss=9.63, batch

Validation:  75%|▋| 557/743 [2:15:57<47:16, 15.25s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:15:57<46:28, 15.07s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:16:12<46:28, 15.07s/batch, batch_loss=3.6e+3, bat

Validation:  75%|▊| 559/743 [2:16:12<45:53, 14.96s/batch, batch_loss=3.6e+3, bat

Validation:  75%|▊| 559/743 [2:16:27<45:53, 14.96s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:16:27<45:53, 15.05s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:16:42<45:53, 15.05s/batch, batch_loss=13.3, batch

Validation:  76%|▊| 561/743 [2:16:42<45:10, 14.89s/batch, batch_loss=13.3, batch

Validation:  76%|▊| 561/743 [2:16:56<45:10, 14.89s/batch, batch_loss=28.1, batch

Validation:  76%|▊| 562/743 [2:16:56<44:34, 14.78s/batch, batch_loss=28.1, batch

Validation:  76%|▊| 562/743 [2:17:11<44:34, 14.78s/batch, batch_loss=28.5, batch

Validation:  76%|▊| 563/743 [2:17:11<44:36, 14.87s/batch, batch_loss=28.5, batch

Validation:  76%|▊| 563/743 [2:17:26<44:36, 14.87s/batch, batch_loss=1.09e+3, ba

Validation:  76%|▊| 564/743 [2:17:26<44:30, 14.92s/batch, batch_loss=1.09e+3, ba

Validation:  76%|▊| 564/743 [2:17:41<44:30, 14.92s/batch, batch_loss=3.72e+3, ba

Validation:  76%|▊| 565/743 [2:17:41<44:18, 14.93s/batch, batch_loss=3.72e+3, ba

Validation:  76%|▊| 565/743 [2:17:55<44:18, 14.93s/batch, batch_loss=11.9, batch

Validation:  76%|▊| 566/743 [2:17:55<42:59, 14.57s/batch, batch_loss=11.9, batch

Validation:  76%|▊| 566/743 [2:18:10<42:59, 14.57s/batch, batch_loss=22.1, batch

Validation:  76%|▊| 567/743 [2:18:10<42:50, 14.60s/batch, batch_loss=22.1, batch

Validation:  76%|▊| 567/743 [2:18:23<42:50, 14.60s/batch, batch_loss=20.6, batch

Validation:  76%|▊| 568/743 [2:18:23<41:19, 14.17s/batch, batch_loss=20.6, batch

Validation:  76%|▊| 568/743 [2:18:36<41:19, 14.17s/batch, batch_loss=20.8, batch

Validation:  77%|▊| 569/743 [2:18:36<40:13, 13.87s/batch, batch_loss=20.8, batch

Validation:  77%|▊| 569/743 [2:18:49<40:13, 13.87s/batch, batch_loss=21, batch_i

Validation:  77%|▊| 570/743 [2:18:49<39:33, 13.72s/batch, batch_loss=21, batch_i

Validation:  77%|▊| 570/743 [2:19:04<39:33, 13.72s/batch, batch_loss=10.8, batch

Validation:  77%|▊| 571/743 [2:19:04<40:27, 14.11s/batch, batch_loss=10.8, batch

Validation:  77%|▊| 571/743 [2:19:19<40:27, 14.11s/batch, batch_loss=46.9, batch

Validation:  77%|▊| 572/743 [2:19:19<40:21, 14.16s/batch, batch_loss=46.9, batch

Validation:  77%|▊| 572/743 [2:19:34<40:21, 14.16s/batch, batch_loss=15.9, batch

Validation:  77%|▊| 573/743 [2:19:34<40:45, 14.38s/batch, batch_loss=15.9, batch

Validation:  77%|▊| 573/743 [2:19:47<40:45, 14.38s/batch, batch_loss=20.3, batch

Validation:  77%|▊| 574/743 [2:19:47<40:04, 14.23s/batch, batch_loss=20.3, batch

Validation:  77%|▊| 574/743 [2:20:02<40:04, 14.23s/batch, batch_loss=17.6, batch

Validation:  77%|▊| 575/743 [2:20:02<40:16, 14.38s/batch, batch_loss=17.6, batch

Validation:  77%|▊| 575/743 [2:20:18<40:16, 14.38s/batch, batch_loss=37.4, batch

Validation:  78%|▊| 576/743 [2:20:18<40:47, 14.65s/batch, batch_loss=37.4, batch

Validation:  78%|▊| 576/743 [2:20:33<40:47, 14.65s/batch, batch_loss=40.2, batch

Validation:  78%|▊| 577/743 [2:20:33<40:55, 14.79s/batch, batch_loss=40.2, batch

Validation:  78%|▊| 577/743 [2:20:48<40:55, 14.79s/batch, batch_loss=37.2, batch

Validation:  78%|▊| 578/743 [2:20:48<40:54, 14.88s/batch, batch_loss=37.2, batch

Validation:  78%|▊| 578/743 [2:21:02<40:54, 14.88s/batch, batch_loss=332, batch_

Validation:  78%|▊| 579/743 [2:21:02<40:27, 14.80s/batch, batch_loss=332, batch_

Validation:  78%|▊| 579/743 [2:21:17<40:27, 14.80s/batch, batch_loss=9.28, batch

Validation:  78%|▊| 580/743 [2:21:17<40:11, 14.79s/batch, batch_loss=9.28, batch

Validation:  78%|▊| 580/743 [2:21:34<40:11, 14.79s/batch, batch_loss=15.3, batch

Validation:  78%|▊| 581/743 [2:21:34<41:52, 15.51s/batch, batch_loss=15.3, batch

Validation:  78%|▊| 581/743 [2:21:49<41:52, 15.51s/batch, batch_loss=23.6, batch

Validation:  78%|▊| 582/743 [2:21:49<41:15, 15.38s/batch, batch_loss=23.6, batch

Validation:  78%|▊| 582/743 [2:22:04<41:15, 15.38s/batch, batch_loss=2.39e+3, ba

Validation:  78%|▊| 583/743 [2:22:04<40:34, 15.21s/batch, batch_loss=2.39e+3, ba

Validation:  78%|▊| 583/743 [2:22:18<40:34, 15.21s/batch, batch_loss=1.71, batch

Validation:  79%|▊| 584/743 [2:22:18<39:00, 14.72s/batch, batch_loss=1.71, batch

Validation:  79%|▊| 584/743 [2:22:33<39:00, 14.72s/batch, batch_loss=25.2, batch

Validation:  79%|▊| 585/743 [2:22:33<39:08, 14.86s/batch, batch_loss=25.2, batch

Validation:  79%|▊| 585/743 [2:22:47<39:08, 14.86s/batch, batch_loss=562, batch_

Validation:  79%|▊| 586/743 [2:22:47<38:22, 14.67s/batch, batch_loss=562, batch_

Validation:  79%|▊| 586/743 [2:23:02<38:22, 14.67s/batch, batch_loss=14.7, batch

Validation:  79%|▊| 587/743 [2:23:02<38:03, 14.64s/batch, batch_loss=14.7, batch

Validation:  79%|▊| 587/743 [2:23:16<38:03, 14.64s/batch, batch_loss=414, batch_

Validation:  79%|▊| 588/743 [2:23:16<37:48, 14.64s/batch, batch_loss=414, batch_

Validation:  79%|▊| 588/743 [2:23:31<37:48, 14.64s/batch, batch_loss=2.5e+4, bat

Validation:  79%|▊| 589/743 [2:23:31<37:32, 14.63s/batch, batch_loss=2.5e+4, bat

Validation:  79%|▊| 589/743 [2:23:49<37:32, 14.63s/batch, batch_loss=25.7, batch

Validation:  79%|▊| 590/743 [2:23:49<39:43, 15.58s/batch, batch_loss=25.7, batch

Validation:  79%|▊| 590/743 [2:24:03<39:43, 15.58s/batch, batch_loss=11.7, batch

Validation:  80%|▊| 591/743 [2:24:03<38:38, 15.25s/batch, batch_loss=11.7, batch

Validation:  80%|▊| 591/743 [2:24:18<38:38, 15.25s/batch, batch_loss=14.5, batch

Validation:  80%|▊| 592/743 [2:24:18<38:11, 15.17s/batch, batch_loss=14.5, batch

Validation:  80%|▊| 592/743 [2:24:33<38:11, 15.17s/batch, batch_loss=2.38e+4, ba

Validation:  80%|▊| 593/743 [2:24:33<37:29, 15.00s/batch, batch_loss=2.38e+4, ba

Validation:  80%|▊| 593/743 [2:24:47<37:29, 15.00s/batch, batch_loss=3.28, batch

Validation:  80%|▊| 594/743 [2:24:47<36:26, 14.68s/batch, batch_loss=3.28, batch

Validation:  80%|▊| 594/743 [2:25:02<36:26, 14.68s/batch, batch_loss=7.42, batch

Validation:  80%|▊| 595/743 [2:25:02<36:21, 14.74s/batch, batch_loss=7.42, batch

Validation:  80%|▊| 595/743 [2:25:16<36:21, 14.74s/batch, batch_loss=8.46, batch

Validation:  80%|▊| 596/743 [2:25:16<35:53, 14.65s/batch, batch_loss=8.46, batch

Validation:  80%|▊| 596/743 [2:25:32<35:53, 14.65s/batch, batch_loss=1.78e+3, ba

Validation:  80%|▊| 597/743 [2:25:32<36:39, 15.07s/batch, batch_loss=1.78e+3, ba

Validation:  80%|▊| 597/743 [2:25:50<36:39, 15.07s/batch, batch_loss=16, batch_i

Validation:  80%|▊| 598/743 [2:25:50<38:25, 15.90s/batch, batch_loss=16, batch_i

Validation:  80%|▊| 598/743 [2:26:05<38:25, 15.90s/batch, batch_loss=25.4, batch

Validation:  81%|▊| 599/743 [2:26:05<37:12, 15.50s/batch, batch_loss=25.4, batch

Validation:  81%|▊| 599/743 [2:26:20<37:12, 15.50s/batch, batch_loss=32.1, batch

Validation:  81%|▊| 600/743 [2:26:20<37:05, 15.56s/batch, batch_loss=32.1, batch

Validation:  81%|▊| 600/743 [2:26:35<37:05, 15.56s/batch, batch_loss=14, batch_i

Validation:  81%|▊| 601/743 [2:26:35<36:17, 15.34s/batch, batch_loss=14, batch_i

Validation:  81%|▊| 601/743 [2:26:50<36:17, 15.34s/batch, batch_loss=22.9, batch

Validation:  81%|▊| 602/743 [2:26:50<35:28, 15.09s/batch, batch_loss=22.9, batch

Validation:  81%|▊| 602/743 [2:27:03<35:28, 15.09s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:27:03<34:20, 14.72s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:27:17<34:20, 14.72s/batch, batch_loss=32.1, batch

Validation:  81%|▊| 604/743 [2:27:17<33:35, 14.50s/batch, batch_loss=32.1, batch

Validation:  81%|▊| 604/743 [2:27:32<33:35, 14.50s/batch, batch_loss=40.5, batch

Validation:  81%|▊| 605/743 [2:27:32<33:11, 14.43s/batch, batch_loss=40.5, batch

Validation:  81%|▊| 605/743 [2:27:47<33:11, 14.43s/batch, batch_loss=267, batch_

Validation:  82%|▊| 606/743 [2:27:47<33:23, 14.63s/batch, batch_loss=267, batch_

Validation:  82%|▊| 606/743 [2:28:02<33:23, 14.63s/batch, batch_loss=40.6, batch

Validation:  82%|▊| 607/743 [2:28:02<33:20, 14.71s/batch, batch_loss=40.6, batch

Validation:  82%|▊| 607/743 [2:28:16<33:20, 14.71s/batch, batch_loss=34.5, batch

Validation:  82%|▊| 608/743 [2:28:16<32:44, 14.55s/batch, batch_loss=34.5, batch

Validation:  82%|▊| 608/743 [2:28:31<32:44, 14.55s/batch, batch_loss=27.7, batch

Validation:  82%|▊| 609/743 [2:28:31<32:47, 14.68s/batch, batch_loss=27.7, batch

Validation:  82%|▊| 609/743 [2:28:46<32:47, 14.68s/batch, batch_loss=23.6, batch

Validation:  82%|▊| 610/743 [2:28:46<32:39, 14.73s/batch, batch_loss=23.6, batch

Validation:  82%|▊| 610/743 [2:29:01<32:39, 14.73s/batch, batch_loss=21.9, batch

Validation:  82%|▊| 611/743 [2:29:01<32:33, 14.80s/batch, batch_loss=21.9, batch

Validation:  82%|▊| 611/743 [2:29:15<32:33, 14.80s/batch, batch_loss=11.1, batch

Validation:  82%|▊| 612/743 [2:29:15<31:56, 14.63s/batch, batch_loss=11.1, batch

Validation:  82%|▊| 612/743 [2:29:30<31:56, 14.63s/batch, batch_loss=19.8, batch

Validation:  83%|▊| 613/743 [2:29:30<31:41, 14.63s/batch, batch_loss=19.8, batch

Validation:  83%|▊| 613/743 [2:29:46<31:41, 14.63s/batch, batch_loss=5.61e+3, ba

Validation:  83%|▊| 614/743 [2:29:46<32:49, 15.27s/batch, batch_loss=5.61e+3, ba

Validation:  83%|▊| 614/743 [2:30:01<32:49, 15.27s/batch, batch_loss=14.4, batch

Validation:  83%|▊| 615/743 [2:30:01<32:19, 15.15s/batch, batch_loss=14.4, batch

Validation:  83%|▊| 615/743 [2:30:15<32:19, 15.15s/batch, batch_loss=21.3, batch

Validation:  83%|▊| 616/743 [2:30:15<31:26, 14.85s/batch, batch_loss=21.3, batch

Validation:  83%|▊| 616/743 [2:30:29<31:26, 14.85s/batch, batch_loss=7.43, batch

Validation:  83%|▊| 617/743 [2:30:29<30:39, 14.60s/batch, batch_loss=7.43, batch

Validation:  83%|▊| 617/743 [2:30:43<30:39, 14.60s/batch, batch_loss=17.5, batch

Validation:  83%|▊| 618/743 [2:30:43<29:41, 14.25s/batch, batch_loss=17.5, batch

Validation:  83%|▊| 618/743 [2:30:57<29:41, 14.25s/batch, batch_loss=362, batch_

Validation:  83%|▊| 619/743 [2:30:57<29:19, 14.19s/batch, batch_loss=362, batch_

Validation:  83%|▊| 619/743 [2:31:11<29:19, 14.19s/batch, batch_loss=31.3, batch

Validation:  83%|▊| 620/743 [2:31:11<28:56, 14.12s/batch, batch_loss=31.3, batch

Validation:  83%|▊| 620/743 [2:31:26<28:56, 14.12s/batch, batch_loss=9.06, batch

Validation:  84%|▊| 621/743 [2:31:26<29:06, 14.31s/batch, batch_loss=9.06, batch

Validation:  84%|▊| 621/743 [2:31:40<29:06, 14.31s/batch, batch_loss=14.3, batch

Validation:  84%|▊| 622/743 [2:31:40<29:08, 14.45s/batch, batch_loss=14.3, batch

Validation:  84%|▊| 622/743 [2:31:55<29:08, 14.45s/batch, batch_loss=191, batch_

Validation:  84%|▊| 623/743 [2:31:55<29:06, 14.56s/batch, batch_loss=191, batch_

Validation:  84%|▊| 623/743 [2:32:10<29:06, 14.56s/batch, batch_loss=21.1, batch

Validation:  84%|▊| 624/743 [2:32:10<28:56, 14.59s/batch, batch_loss=21.1, batch

Validation:  84%|▊| 624/743 [2:32:25<28:56, 14.59s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:32:25<28:54, 14.70s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:32:39<28:54, 14.70s/batch, batch_loss=24.7, batch

Validation:  84%|▊| 626/743 [2:32:39<28:40, 14.71s/batch, batch_loss=24.7, batch

Validation:  84%|▊| 626/743 [2:32:55<28:40, 14.71s/batch, batch_loss=25.5, batch

Validation:  84%|▊| 627/743 [2:32:55<28:51, 14.92s/batch, batch_loss=25.5, batch

Validation:  84%|▊| 627/743 [2:33:10<28:51, 14.92s/batch, batch_loss=25.2, batch

Validation:  85%|▊| 628/743 [2:33:10<28:44, 15.00s/batch, batch_loss=25.2, batch

Validation:  85%|▊| 628/743 [2:33:24<28:44, 15.00s/batch, batch_loss=17.8, batch

Validation:  85%|▊| 629/743 [2:33:24<27:41, 14.58s/batch, batch_loss=17.8, batch

Validation:  85%|▊| 629/743 [2:33:38<27:41, 14.58s/batch, batch_loss=26, batch_i

Validation:  85%|▊| 630/743 [2:33:38<27:07, 14.41s/batch, batch_loss=26, batch_i

Validation:  85%|▊| 630/743 [2:33:54<27:07, 14.41s/batch, batch_loss=258, batch_

Validation:  85%|▊| 631/743 [2:33:54<27:42, 14.85s/batch, batch_loss=258, batch_

Validation:  85%|▊| 631/743 [2:34:08<27:42, 14.85s/batch, batch_loss=33, batch_i

Validation:  85%|▊| 632/743 [2:34:08<27:26, 14.83s/batch, batch_loss=33, batch_i

Validation:  85%|▊| 632/743 [2:34:23<27:26, 14.83s/batch, batch_loss=23.5, batch

Validation:  85%|▊| 633/743 [2:34:23<27:09, 14.82s/batch, batch_loss=23.5, batch

Validation:  85%|▊| 633/743 [2:34:37<27:09, 14.82s/batch, batch_loss=11.7, batch

Validation:  85%|▊| 634/743 [2:34:37<26:33, 14.62s/batch, batch_loss=11.7, batch

Validation:  85%|▊| 634/743 [2:34:52<26:33, 14.62s/batch, batch_loss=9.68, batch

Validation:  85%|▊| 635/743 [2:34:52<26:11, 14.55s/batch, batch_loss=9.68, batch

Validation:  85%|▊| 635/743 [2:35:06<26:11, 14.55s/batch, batch_loss=797, batch_

Validation:  86%|▊| 636/743 [2:35:06<25:41, 14.41s/batch, batch_loss=797, batch_

Validation:  86%|▊| 636/743 [2:35:23<25:41, 14.41s/batch, batch_loss=719, batch_

Validation:  86%|▊| 637/743 [2:35:23<26:45, 15.14s/batch, batch_loss=719, batch_

Validation:  86%|▊| 637/743 [2:35:38<26:45, 15.14s/batch, batch_loss=37.4, batch

Validation:  86%|▊| 638/743 [2:35:38<26:26, 15.11s/batch, batch_loss=37.4, batch

Validation:  86%|▊| 638/743 [2:35:52<26:26, 15.11s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:35:52<25:54, 14.94s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [2:36:07<25:54, 14.94s/batch, batch_loss=35.6, batch

Validation:  86%|▊| 640/743 [2:36:07<25:41, 14.96s/batch, batch_loss=35.6, batch

Validation:  86%|▊| 640/743 [2:36:22<25:41, 14.96s/batch, batch_loss=46.5, batch

Validation:  86%|▊| 641/743 [2:36:22<25:22, 14.93s/batch, batch_loss=46.5, batch

Validation:  86%|▊| 641/743 [2:36:36<25:22, 14.93s/batch, batch_loss=54.5, batch

Validation:  86%|▊| 642/743 [2:36:36<24:53, 14.79s/batch, batch_loss=54.5, batch

Validation:  86%|▊| 642/743 [2:36:51<24:53, 14.79s/batch, batch_loss=1.04e+3, ba

Validation:  87%|▊| 643/743 [2:36:51<24:17, 14.58s/batch, batch_loss=1.04e+3, ba

Validation:  87%|▊| 643/743 [2:37:04<24:17, 14.58s/batch, batch_loss=24.7, batch

Validation:  87%|▊| 644/743 [2:37:04<23:32, 14.26s/batch, batch_loss=24.7, batch

Validation:  87%|▊| 644/743 [2:37:18<23:32, 14.26s/batch, batch_loss=27.5, batch

Validation:  87%|▊| 645/743 [2:37:18<23:18, 14.27s/batch, batch_loss=27.5, batch

Validation:  87%|▊| 645/743 [2:37:33<23:18, 14.27s/batch, batch_loss=6.22e+3, ba

Validation:  87%|▊| 646/743 [2:37:33<23:19, 14.43s/batch, batch_loss=6.22e+3, ba

Validation:  87%|▊| 646/743 [2:37:48<23:19, 14.43s/batch, batch_loss=24.5, batch

Validation:  87%|▊| 647/743 [2:37:48<23:12, 14.50s/batch, batch_loss=24.5, batch

Validation:  87%|▊| 647/743 [2:38:03<23:12, 14.50s/batch, batch_loss=5.32, batch

Validation:  87%|▊| 648/743 [2:38:03<23:10, 14.64s/batch, batch_loss=5.32, batch

Validation:  87%|▊| 648/743 [2:38:17<23:10, 14.64s/batch, batch_loss=12.8, batch

Validation:  87%|▊| 649/743 [2:38:17<22:55, 14.63s/batch, batch_loss=12.8, batch

Validation:  87%|▊| 649/743 [2:38:32<22:55, 14.63s/batch, batch_loss=27.5, batch

Validation:  87%|▊| 650/743 [2:38:32<22:46, 14.69s/batch, batch_loss=27.5, batch

Validation:  87%|▊| 650/743 [2:38:47<22:46, 14.69s/batch, batch_loss=33.4, batch

Validation:  88%|▉| 651/743 [2:38:47<22:19, 14.56s/batch, batch_loss=33.4, batch

Validation:  88%|▉| 651/743 [2:39:01<22:19, 14.56s/batch, batch_loss=37.8, batch

Validation:  88%|▉| 652/743 [2:39:01<21:49, 14.39s/batch, batch_loss=37.8, batch

Validation:  88%|▉| 652/743 [2:39:15<21:49, 14.39s/batch, batch_loss=16.9, batch

Validation:  88%|▉| 653/743 [2:39:15<21:30, 14.34s/batch, batch_loss=16.9, batch

Validation:  88%|▉| 653/743 [2:39:30<21:30, 14.34s/batch, batch_loss=44.6, batch

Validation:  88%|▉| 654/743 [2:39:30<21:30, 14.50s/batch, batch_loss=44.6, batch

Validation:  88%|▉| 654/743 [2:39:44<21:30, 14.50s/batch, batch_loss=33.9, batch

Validation:  88%|▉| 655/743 [2:39:44<21:07, 14.41s/batch, batch_loss=33.9, batch

Validation:  88%|▉| 655/743 [2:39:59<21:07, 14.41s/batch, batch_loss=17, batch_i

Validation:  88%|▉| 656/743 [2:39:59<21:09, 14.59s/batch, batch_loss=17, batch_i

Validation:  88%|▉| 656/743 [2:40:15<21:09, 14.59s/batch, batch_loss=13, batch_i

Validation:  88%|▉| 657/743 [2:40:15<21:26, 14.96s/batch, batch_loss=13, batch_i

Validation:  88%|▉| 657/743 [2:40:29<21:26, 14.96s/batch, batch_loss=17, batch_i

Validation:  89%|▉| 658/743 [2:40:29<20:52, 14.74s/batch, batch_loss=17, batch_i

Validation:  89%|▉| 658/743 [2:40:43<20:52, 14.74s/batch, batch_loss=51.1, batch

Validation:  89%|▉| 659/743 [2:40:43<20:22, 14.55s/batch, batch_loss=51.1, batch

Validation:  89%|▉| 659/743 [2:40:56<20:22, 14.55s/batch, batch_loss=38.9, batch

Validation:  89%|▉| 660/743 [2:40:56<19:41, 14.24s/batch, batch_loss=38.9, batch

Validation:  89%|▉| 660/743 [2:41:11<19:41, 14.24s/batch, batch_loss=22.5, batch

Validation:  89%|▉| 661/743 [2:41:11<19:30, 14.27s/batch, batch_loss=22.5, batch

Validation:  89%|▉| 661/743 [2:41:25<19:30, 14.27s/batch, batch_loss=6.93, batch

Validation:  89%|▉| 662/743 [2:41:25<19:16, 14.28s/batch, batch_loss=6.93, batch

Validation:  89%|▉| 662/743 [2:41:40<19:16, 14.28s/batch, batch_loss=3.62e+3, ba

Validation:  89%|▉| 663/743 [2:41:40<19:25, 14.57s/batch, batch_loss=3.62e+3, ba

Validation:  89%|▉| 663/743 [2:41:55<19:25, 14.57s/batch, batch_loss=25.9, batch

Validation:  89%|▉| 664/743 [2:41:55<19:15, 14.63s/batch, batch_loss=25.9, batch

Validation:  89%|▉| 664/743 [2:42:10<19:15, 14.63s/batch, batch_loss=32.6, batch

Validation:  90%|▉| 665/743 [2:42:10<18:57, 14.59s/batch, batch_loss=32.6, batch

Validation:  90%|▉| 665/743 [2:42:23<18:57, 14.59s/batch, batch_loss=13.5, batch

Validation:  90%|▉| 666/743 [2:42:23<18:25, 14.36s/batch, batch_loss=13.5, batch

Validation:  90%|▉| 666/743 [2:42:38<18:25, 14.36s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:42:38<18:23, 14.52s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [2:42:53<18:23, 14.52s/batch, batch_loss=20.6, batch

Validation:  90%|▉| 668/743 [2:42:53<18:11, 14.55s/batch, batch_loss=20.6, batch

Validation:  90%|▉| 668/743 [2:43:07<18:11, 14.55s/batch, batch_loss=35.6, batch

Validation:  90%|▉| 669/743 [2:43:07<17:46, 14.41s/batch, batch_loss=35.6, batch

Validation:  90%|▉| 669/743 [2:43:22<17:46, 14.41s/batch, batch_loss=37.3, batch

Validation:  90%|▉| 670/743 [2:43:22<17:38, 14.50s/batch, batch_loss=37.3, batch

Validation:  90%|▉| 670/743 [2:43:36<17:38, 14.50s/batch, batch_loss=3.1e+3, bat

Validation:  90%|▉| 671/743 [2:43:36<17:18, 14.43s/batch, batch_loss=3.1e+3, bat

Validation:  90%|▉| 671/743 [2:43:51<17:18, 14.43s/batch, batch_loss=28.3, batch

Validation:  90%|▉| 672/743 [2:43:51<17:13, 14.56s/batch, batch_loss=28.3, batch

Validation:  90%|▉| 672/743 [2:44:06<17:13, 14.56s/batch, batch_loss=23.9, batch

Validation:  91%|▉| 673/743 [2:44:06<17:08, 14.70s/batch, batch_loss=23.9, batch

Validation:  91%|▉| 673/743 [2:44:21<17:08, 14.70s/batch, batch_loss=11.5, batch

Validation:  91%|▉| 674/743 [2:44:21<17:00, 14.79s/batch, batch_loss=11.5, batch

Validation:  91%|▉| 674/743 [2:44:37<17:00, 14.79s/batch, batch_loss=31.1, batch

Validation:  91%|▉| 675/743 [2:44:37<17:02, 15.03s/batch, batch_loss=31.1, batch

Validation:  91%|▉| 675/743 [2:44:51<17:02, 15.03s/batch, batch_loss=29.7, batch

Validation:  91%|▉| 676/743 [2:44:51<16:42, 14.96s/batch, batch_loss=29.7, batch

Validation:  91%|▉| 676/743 [2:45:06<16:42, 14.96s/batch, batch_loss=31.1, batch

Validation:  91%|▉| 677/743 [2:45:06<16:25, 14.94s/batch, batch_loss=31.1, batch

Validation:  91%|▉| 677/743 [2:45:20<16:25, 14.94s/batch, batch_loss=22.8, batch

Validation:  91%|▉| 678/743 [2:45:20<15:57, 14.74s/batch, batch_loss=22.8, batch

Validation:  91%|▉| 678/743 [2:45:35<15:57, 14.74s/batch, batch_loss=19, batch_i

Validation:  91%|▉| 679/743 [2:45:35<15:45, 14.77s/batch, batch_loss=19, batch_i

Validation:  91%|▉| 679/743 [2:45:50<15:45, 14.77s/batch, batch_loss=35.6, batch

Validation:  92%|▉| 680/743 [2:45:50<15:22, 14.64s/batch, batch_loss=35.6, batch

Validation:  92%|▉| 680/743 [2:46:03<15:22, 14.64s/batch, batch_loss=39.7, batch

Validation:  92%|▉| 681/743 [2:46:03<14:51, 14.38s/batch, batch_loss=39.7, batch

Validation:  92%|▉| 681/743 [2:46:18<14:51, 14.38s/batch, batch_loss=55.9, batch

Validation:  92%|▉| 682/743 [2:46:18<14:40, 14.43s/batch, batch_loss=55.9, batch

Validation:  92%|▉| 682/743 [2:46:32<14:40, 14.43s/batch, batch_loss=31.1, batch

Validation:  92%|▉| 683/743 [2:46:32<14:25, 14.43s/batch, batch_loss=31.1, batch

Validation:  92%|▉| 683/743 [2:46:48<14:25, 14.43s/batch, batch_loss=13.7, batch

Validation:  92%|▉| 684/743 [2:46:48<14:26, 14.69s/batch, batch_loss=13.7, batch

Validation:  92%|▉| 684/743 [2:47:03<14:26, 14.69s/batch, batch_loss=22.7, batch

Validation:  92%|▉| 685/743 [2:47:03<14:18, 14.80s/batch, batch_loss=22.7, batch

Validation:  92%|▉| 685/743 [2:47:18<14:18, 14.80s/batch, batch_loss=1.67e+3, ba

Validation:  92%|▉| 686/743 [2:47:18<14:15, 15.01s/batch, batch_loss=1.67e+3, ba

Validation:  92%|▉| 686/743 [2:47:33<14:15, 15.01s/batch, batch_loss=30.1, batch

Validation:  92%|▉| 687/743 [2:47:33<13:53, 14.89s/batch, batch_loss=30.1, batch

Validation:  92%|▉| 687/743 [2:47:51<13:53, 14.89s/batch, batch_loss=19.9, batch

Validation:  93%|▉| 688/743 [2:47:51<14:25, 15.73s/batch, batch_loss=19.9, batch

Validation:  93%|▉| 688/743 [2:48:04<14:25, 15.73s/batch, batch_loss=20.8, batch

Validation:  93%|▉| 689/743 [2:48:04<13:40, 15.19s/batch, batch_loss=20.8, batch

Validation:  93%|▉| 689/743 [2:48:19<13:40, 15.19s/batch, batch_loss=32.9, batch

Validation:  93%|▉| 690/743 [2:48:19<13:13, 14.97s/batch, batch_loss=32.9, batch

Validation:  93%|▉| 690/743 [2:48:32<13:13, 14.97s/batch, batch_loss=18.6, batch

Validation:  93%|▉| 691/743 [2:48:32<12:33, 14.49s/batch, batch_loss=18.6, batch

Validation:  93%|▉| 691/743 [2:48:45<12:33, 14.49s/batch, batch_loss=30.7, batch

Validation:  93%|▉| 692/743 [2:48:45<11:53, 14.00s/batch, batch_loss=30.7, batch

Validation:  93%|▉| 692/743 [2:49:00<11:53, 14.00s/batch, batch_loss=35.3, batch

Validation:  93%|▉| 693/743 [2:49:00<11:54, 14.30s/batch, batch_loss=35.3, batch

Validation:  93%|▉| 693/743 [2:49:18<11:54, 14.30s/batch, batch_loss=39, batch_i

Validation:  93%|▉| 694/743 [2:49:18<12:25, 15.22s/batch, batch_loss=39, batch_i

Validation:  93%|▉| 694/743 [2:49:32<12:25, 15.22s/batch, batch_loss=3.11e+3, ba

Validation:  94%|▉| 695/743 [2:49:32<11:57, 14.96s/batch, batch_loss=3.11e+3, ba

Validation:  94%|▉| 695/743 [2:49:46<11:57, 14.96s/batch, batch_loss=9.01, batch

Validation:  94%|▉| 696/743 [2:49:46<11:36, 14.83s/batch, batch_loss=9.01, batch

Validation:  94%|▉| 696/743 [2:50:01<11:36, 14.83s/batch, batch_loss=42.1, batch

Validation:  94%|▉| 697/743 [2:50:01<11:23, 14.86s/batch, batch_loss=42.1, batch

Validation:  94%|▉| 697/743 [2:50:16<11:23, 14.86s/batch, batch_loss=751, batch_

Validation:  94%|▉| 698/743 [2:50:16<11:00, 14.68s/batch, batch_loss=751, batch_

Validation:  94%|▉| 698/743 [2:50:30<11:00, 14.68s/batch, batch_loss=8.06, batch

Validation:  94%|▉| 699/743 [2:50:30<10:47, 14.72s/batch, batch_loss=8.06, batch

Validation:  94%|▉| 699/743 [2:50:46<10:47, 14.72s/batch, batch_loss=958, batch_

Validation:  94%|▉| 700/743 [2:50:46<10:38, 14.86s/batch, batch_loss=958, batch_

Validation:  94%|▉| 700/743 [2:51:01<10:38, 14.86s/batch, batch_loss=10.4, batch

Validation:  94%|▉| 701/743 [2:51:01<10:27, 14.95s/batch, batch_loss=10.4, batch

Validation:  94%|▉| 701/743 [2:51:17<10:27, 14.95s/batch, batch_loss=7.16, batch

Validation:  94%|▉| 702/743 [2:51:17<10:34, 15.48s/batch, batch_loss=7.16, batch

Validation:  94%|▉| 702/743 [2:51:32<10:34, 15.48s/batch, batch_loss=184, batch_

Validation:  95%|▉| 703/743 [2:51:32<10:11, 15.30s/batch, batch_loss=184, batch_

Validation:  95%|▉| 703/743 [2:51:47<10:11, 15.30s/batch, batch_loss=472, batch_

Validation:  95%|▉| 704/743 [2:51:47<09:53, 15.21s/batch, batch_loss=472, batch_

Validation:  95%|▉| 704/743 [2:52:02<09:53, 15.21s/batch, batch_loss=7.71, batch

Validation:  95%|▉| 705/743 [2:52:02<09:26, 14.89s/batch, batch_loss=7.71, batch

Validation:  95%|▉| 705/743 [2:52:16<09:26, 14.89s/batch, batch_loss=25.7, batch

Validation:  95%|▉| 706/743 [2:52:16<09:06, 14.77s/batch, batch_loss=25.7, batch

Validation:  95%|▉| 706/743 [2:52:32<09:06, 14.77s/batch, batch_loss=427, batch_

Validation:  95%|▉| 707/743 [2:52:32<09:02, 15.06s/batch, batch_loss=427, batch_

Validation:  95%|▉| 707/743 [2:52:47<09:02, 15.06s/batch, batch_loss=24.1, batch

Validation:  95%|▉| 708/743 [2:52:47<08:54, 15.27s/batch, batch_loss=24.1, batch

Validation:  95%|▉| 708/743 [2:53:02<08:54, 15.27s/batch, batch_loss=42.2, batch

Validation:  95%|▉| 709/743 [2:53:02<08:34, 15.12s/batch, batch_loss=42.2, batch

Validation:  95%|▉| 709/743 [2:53:17<08:34, 15.12s/batch, batch_loss=18.6, batch

Validation:  96%|▉| 710/743 [2:53:17<08:14, 15.00s/batch, batch_loss=18.6, batch

Validation:  96%|▉| 710/743 [2:53:33<08:14, 15.00s/batch, batch_loss=23.3, batch

Validation:  96%|▉| 711/743 [2:53:33<08:09, 15.31s/batch, batch_loss=23.3, batch

Validation:  96%|▉| 711/743 [2:53:48<08:09, 15.31s/batch, batch_loss=32.9, batch

Validation:  96%|▉| 712/743 [2:53:48<07:48, 15.10s/batch, batch_loss=32.9, batch

Validation:  96%|▉| 712/743 [2:54:02<07:48, 15.10s/batch, batch_loss=21.3, batch

Validation:  96%|▉| 713/743 [2:54:02<07:27, 14.92s/batch, batch_loss=21.3, batch

Validation:  96%|▉| 713/743 [2:54:16<07:27, 14.92s/batch, batch_loss=8.02, batch

Validation:  96%|▉| 714/743 [2:54:16<07:07, 14.73s/batch, batch_loss=8.02, batch

Validation:  96%|▉| 714/743 [2:54:31<07:07, 14.73s/batch, batch_loss=12.2, batch

Validation:  96%|▉| 715/743 [2:54:31<06:54, 14.80s/batch, batch_loss=12.2, batch

Validation:  96%|▉| 715/743 [2:54:45<06:54, 14.80s/batch, batch_loss=28, batch_i

Validation:  96%|▉| 716/743 [2:54:45<06:34, 14.59s/batch, batch_loss=28, batch_i

Validation:  96%|▉| 716/743 [2:55:00<06:34, 14.59s/batch, batch_loss=400, batch_

Validation:  97%|▉| 717/743 [2:55:00<06:17, 14.51s/batch, batch_loss=400, batch_

Validation:  97%|▉| 717/743 [2:55:14<06:17, 14.51s/batch, batch_loss=42.5, batch

Validation:  97%|▉| 718/743 [2:55:14<05:58, 14.33s/batch, batch_loss=42.5, batch

Validation:  97%|▉| 718/743 [2:55:30<05:58, 14.33s/batch, batch_loss=35.8, batch

Validation:  97%|▉| 719/743 [2:55:30<06:00, 15.01s/batch, batch_loss=35.8, batch

Validation:  97%|▉| 719/743 [2:55:45<06:00, 15.01s/batch, batch_loss=34.8, batch

Validation:  97%|▉| 720/743 [2:55:45<05:44, 14.96s/batch, batch_loss=34.8, batch

Validation:  97%|▉| 720/743 [2:55:59<05:44, 14.96s/batch, batch_loss=18.9, batch

Validation:  97%|▉| 721/743 [2:55:59<05:23, 14.72s/batch, batch_loss=18.9, batch

Validation:  97%|▉| 721/743 [2:56:14<05:23, 14.72s/batch, batch_loss=42.7, batch

Validation:  97%|▉| 722/743 [2:56:14<05:10, 14.80s/batch, batch_loss=42.7, batch

Validation:  97%|▉| 722/743 [2:56:29<05:10, 14.80s/batch, batch_loss=5.32e+3, ba

Validation:  97%|▉| 723/743 [2:56:29<04:54, 14.72s/batch, batch_loss=5.32e+3, ba

Validation:  97%|▉| 723/743 [2:56:43<04:54, 14.72s/batch, batch_loss=30.6, batch

Validation:  97%|▉| 724/743 [2:56:43<04:39, 14.70s/batch, batch_loss=30.6, batch

Validation:  97%|▉| 724/743 [2:56:57<04:39, 14.70s/batch, batch_loss=19.9, batch

Validation:  98%|▉| 725/743 [2:56:57<04:20, 14.49s/batch, batch_loss=19.9, batch

Validation:  98%|▉| 725/743 [2:57:11<04:20, 14.49s/batch, batch_loss=26.6, batch

Validation:  98%|▉| 726/743 [2:57:11<04:03, 14.33s/batch, batch_loss=26.6, batch

Validation:  98%|▉| 726/743 [2:57:25<04:03, 14.33s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [2:57:25<03:46, 14.15s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [2:57:40<03:46, 14.15s/batch, batch_loss=44.1, batch

Validation:  98%|▉| 728/743 [2:57:40<03:34, 14.31s/batch, batch_loss=44.1, batch

Validation:  98%|▉| 728/743 [2:57:55<03:34, 14.31s/batch, batch_loss=43, batch_i

Validation:  98%|▉| 729/743 [2:57:55<03:22, 14.43s/batch, batch_loss=43, batch_i

Validation:  98%|▉| 729/743 [2:58:09<03:22, 14.43s/batch, batch_loss=32, batch_i

Validation:  98%|▉| 730/743 [2:58:09<03:06, 14.33s/batch, batch_loss=32, batch_i

Validation:  98%|▉| 730/743 [2:58:26<03:06, 14.33s/batch, batch_loss=16.1, batch

Validation:  98%|▉| 731/743 [2:58:26<03:04, 15.34s/batch, batch_loss=16.1, batch

Validation:  98%|▉| 731/743 [2:58:40<03:04, 15.34s/batch, batch_loss=13.6, batch

Validation:  99%|▉| 732/743 [2:58:40<02:44, 14.93s/batch, batch_loss=13.6, batch

Validation:  99%|▉| 732/743 [2:58:55<02:44, 14.93s/batch, batch_loss=35.7, batch

Validation:  99%|▉| 733/743 [2:58:55<02:27, 14.79s/batch, batch_loss=35.7, batch

Validation:  99%|▉| 733/743 [2:59:09<02:27, 14.79s/batch, batch_loss=3.33, batch

Validation:  99%|▉| 734/743 [2:59:09<02:10, 14.50s/batch, batch_loss=3.33, batch

Validation:  99%|▉| 734/743 [2:59:23<02:10, 14.50s/batch, batch_loss=7.51, batch

Validation:  99%|▉| 735/743 [2:59:23<01:55, 14.50s/batch, batch_loss=7.51, batch

Validation:  99%|▉| 735/743 [2:59:37<01:55, 14.50s/batch, batch_loss=1.14, batch

Validation:  99%|▉| 736/743 [2:59:37<01:40, 14.35s/batch, batch_loss=1.14, batch

Validation:  99%|▉| 736/743 [2:59:50<01:40, 14.35s/batch, batch_loss=0.205, batc

Validation:  99%|▉| 737/743 [2:59:50<01:23, 13.92s/batch, batch_loss=0.205, batc

Validation:  99%|▉| 737/743 [3:00:03<01:23, 13.92s/batch, batch_loss=0.205, batc

Validation:  99%|▉| 738/743 [3:00:03<01:07, 13.59s/batch, batch_loss=0.205, batc

Validation:  99%|▉| 738/743 [3:00:16<01:07, 13.59s/batch, batch_loss=0.205, batc

Validation:  99%|▉| 739/743 [3:00:16<00:53, 13.40s/batch, batch_loss=0.205, batc

Validation:  99%|▉| 739/743 [3:00:28<00:53, 13.40s/batch, batch_loss=0.205, batc

Validation: 100%|▉| 740/743 [3:00:28<00:38, 12.90s/batch, batch_loss=0.205, batc

Validation: 100%|▉| 740/743 [3:00:40<00:38, 12.90s/batch, batch_loss=0.205, batc

Validation: 100%|▉| 741/743 [3:00:40<00:25, 12.75s/batch, batch_loss=0.205, batc

Validation: 100%|▉| 741/743 [3:00:52<00:25, 12.75s/batch, batch_loss=0.205, batc

Validation: 100%|▉| 742/743 [3:00:52<00:12, 12.53s/batch, batch_loss=0.205, batc

Validation: 100%|▉| 742/743 [3:01:03<00:12, 12.53s/batch, batch_loss=0.205, batc

Validation: 100%|█| 743/743 [3:01:03<00:00, 12.22s/batch, batch_loss=0.205, batc

Validation: 100%|█| 743/743 [3:01:03<00:00, 14.62s/batch, batch_loss=0.205, batc




Val Loss: 1303.2812


Epoch 10/10:   0%|                                   | 0/991 [00:00<?, ?batch/s]

Epoch 10/10:   0%| | 0/991 [00:15<?, ?batch/s, batch_loss=24, batch_index=1, bat

Epoch 10/10:   0%| | 1/991 [00:15<4:11:50, 15.26s/batch, batch_loss=24, batch_in

Epoch 10/10:   0%| | 1/991 [00:31<4:11:50, 15.26s/batch, batch_loss=24.3, batch_

Epoch 10/10:   0%| | 2/991 [00:31<4:21:40, 15.87s/batch, batch_loss=24.3, batch_

Epoch 10/10:   0%| | 2/991 [00:45<4:21:40, 15.87s/batch, batch_loss=22.4, batch_

Epoch 10/10:   0%| | 3/991 [00:45<4:06:32, 14.97s/batch, batch_loss=22.4, batch_

Epoch 10/10:   0%| | 3/991 [00:59<4:06:32, 14.97s/batch, batch_loss=11, batch_in

Epoch 10/10:   0%| | 4/991 [00:59<4:02:09, 14.72s/batch, batch_loss=11, batch_in

Epoch 10/10:   0%| | 4/991 [01:14<4:02:09, 14.72s/batch, batch_loss=34.3, batch_

Epoch 10/10:   1%| | 5/991 [01:14<4:04:34, 14.88s/batch, batch_loss=34.3, batch_

Epoch 10/10:   1%| | 5/991 [01:29<4:04:34, 14.88s/batch, batch_loss=30.5, batch_

Epoch 10/10:   1%| | 6/991 [01:29<4:04:48, 14.91s/batch, batch_loss=30.5, batch_

Epoch 10/10:   1%| | 6/991 [01:44<4:04:48, 14.91s/batch, batch_loss=25.3, batch_

Epoch 10/10:   1%| | 7/991 [01:44<4:04:37, 14.92s/batch, batch_loss=25.3, batch_

Epoch 10/10:   1%| | 7/991 [01:59<4:04:37, 14.92s/batch, batch_loss=611, batch_i

Epoch 10/10:   1%| | 8/991 [01:59<4:03:38, 14.87s/batch, batch_loss=611, batch_i

Epoch 10/10:   1%| | 8/991 [02:14<4:03:38, 14.87s/batch, batch_loss=19, batch_in

Epoch 10/10:   1%| | 9/991 [02:14<4:02:03, 14.79s/batch, batch_loss=19, batch_in

Epoch 10/10:   1%| | 9/991 [02:29<4:02:03, 14.79s/batch, batch_loss=16.9, batch_

Epoch 10/10:   1%| | 10/991 [02:29<4:06:16, 15.06s/batch, batch_loss=16.9, batch

Epoch 10/10:   1%| | 10/991 [02:47<4:06:16, 15.06s/batch, batch_loss=15.4, batch

Epoch 10/10:   1%| | 11/991 [02:47<4:20:41, 15.96s/batch, batch_loss=15.4, batch

Epoch 10/10:   1%| | 11/991 [03:03<4:20:41, 15.96s/batch, batch_loss=2.01e+3, ba

Epoch 10/10:   1%| | 12/991 [03:03<4:16:31, 15.72s/batch, batch_loss=2.01e+3, ba

Epoch 10/10:   1%| | 12/991 [03:18<4:16:31, 15.72s/batch, batch_loss=26.3, batch

Epoch 10/10:   1%| | 13/991 [03:18<4:13:36, 15.56s/batch, batch_loss=26.3, batch

Epoch 10/10:   1%| | 13/991 [03:34<4:13:36, 15.56s/batch, batch_loss=14.2, batch

Epoch 10/10:   1%| | 14/991 [03:34<4:16:05, 15.73s/batch, batch_loss=14.2, batch

Epoch 10/10:   1%| | 14/991 [03:49<4:16:05, 15.73s/batch, batch_loss=9.74, batch

Epoch 10/10:   2%| | 15/991 [03:49<4:12:14, 15.51s/batch, batch_loss=9.74, batch

Epoch 10/10:   2%| | 15/991 [04:04<4:12:14, 15.51s/batch, batch_loss=13.2, batch

Epoch 10/10:   2%| | 16/991 [04:04<4:12:02, 15.51s/batch, batch_loss=13.2, batch

Epoch 10/10:   2%| | 16/991 [04:20<4:12:02, 15.51s/batch, batch_loss=19, batch_i

Epoch 10/10:   2%| | 17/991 [04:20<4:10:34, 15.44s/batch, batch_loss=19, batch_i

Epoch 10/10:   2%| | 17/991 [04:35<4:10:34, 15.44s/batch, batch_loss=11.3, batch

Epoch 10/10:   2%| | 18/991 [04:35<4:08:37, 15.33s/batch, batch_loss=11.3, batch

Epoch 10/10:   2%| | 18/991 [04:50<4:08:37, 15.33s/batch, batch_loss=8.95e+3, ba

Epoch 10/10:   2%| | 19/991 [04:50<4:06:26, 15.21s/batch, batch_loss=8.95e+3, ba

Epoch 10/10:   2%| | 19/991 [05:04<4:06:26, 15.21s/batch, batch_loss=11.8, batch

Epoch 10/10:   2%| | 20/991 [05:04<4:01:32, 14.93s/batch, batch_loss=11.8, batch

Epoch 10/10:   2%| | 20/991 [05:19<4:01:32, 14.93s/batch, batch_loss=19.3, batch

Epoch 10/10:   2%| | 21/991 [05:19<4:03:31, 15.06s/batch, batch_loss=19.3, batch

Epoch 10/10:   2%| | 21/991 [05:35<4:03:31, 15.06s/batch, batch_loss=1.07e+3, ba

Epoch 10/10:   2%| | 22/991 [05:35<4:04:16, 15.13s/batch, batch_loss=1.07e+3, ba

Epoch 10/10:   2%| | 22/991 [05:50<4:04:16, 15.13s/batch, batch_loss=8.07, batch

Epoch 10/10:   2%| | 23/991 [05:50<4:04:35, 15.16s/batch, batch_loss=8.07, batch

Epoch 10/10:   2%| | 23/991 [06:05<4:04:35, 15.16s/batch, batch_loss=12.7, batch

Epoch 10/10:   2%| | 24/991 [06:05<4:04:04, 15.14s/batch, batch_loss=12.7, batch

Epoch 10/10:   2%| | 24/991 [06:20<4:04:04, 15.14s/batch, batch_loss=13.2, batch

Epoch 10/10:   3%| | 25/991 [06:20<4:04:12, 15.17s/batch, batch_loss=13.2, batch

Epoch 10/10:   3%| | 25/991 [06:35<4:04:12, 15.17s/batch, batch_loss=15.3, batch

Epoch 10/10:   3%| | 26/991 [06:35<4:03:34, 15.14s/batch, batch_loss=15.3, batch

Epoch 10/10:   3%| | 26/991 [06:50<4:03:34, 15.14s/batch, batch_loss=17.2, batch

Epoch 10/10:   3%| | 27/991 [06:50<4:02:20, 15.08s/batch, batch_loss=17.2, batch

Epoch 10/10:   3%| | 27/991 [07:06<4:02:20, 15.08s/batch, batch_loss=1.13e+3, ba

Epoch 10/10:   3%| | 28/991 [07:06<4:03:25, 15.17s/batch, batch_loss=1.13e+3, ba

Epoch 10/10:   3%| | 28/991 [07:20<4:03:25, 15.17s/batch, batch_loss=11.7, batch

Epoch 10/10:   3%| | 29/991 [07:20<4:01:35, 15.07s/batch, batch_loss=11.7, batch

Epoch 10/10:   3%| | 29/991 [07:36<4:01:35, 15.07s/batch, batch_loss=11.7, batch

Epoch 10/10:   3%| | 30/991 [07:36<4:03:11, 15.18s/batch, batch_loss=11.7, batch

Epoch 10/10:   3%| | 30/991 [07:51<4:03:11, 15.18s/batch, batch_loss=9.66, batch

Epoch 10/10:   3%| | 31/991 [07:51<4:00:22, 15.02s/batch, batch_loss=9.66, batch

Epoch 10/10:   3%| | 31/991 [08:05<4:00:22, 15.02s/batch, batch_loss=1.3e+4, bat

Epoch 10/10:   3%| | 32/991 [08:05<3:58:24, 14.92s/batch, batch_loss=1.3e+4, bat

Epoch 10/10:   3%| | 32/991 [08:21<3:58:24, 14.92s/batch, batch_loss=15.4, batch

Epoch 10/10:   3%| | 33/991 [08:21<4:01:38, 15.13s/batch, batch_loss=15.4, batch

Epoch 10/10:   3%| | 33/991 [08:38<4:01:38, 15.13s/batch, batch_loss=8.45, batch

Epoch 10/10:   3%| | 34/991 [08:38<4:10:22, 15.70s/batch, batch_loss=8.45, batch

Epoch 10/10:   3%| | 34/991 [08:54<4:10:22, 15.70s/batch, batch_loss=12.9, batch

Epoch 10/10:   4%| | 35/991 [08:54<4:10:37, 15.73s/batch, batch_loss=12.9, batch

Epoch 10/10:   4%| | 35/991 [09:10<4:10:37, 15.73s/batch, batch_loss=11.1, batch

Epoch 10/10:   4%| | 36/991 [09:10<4:11:20, 15.79s/batch, batch_loss=11.1, batch

Epoch 10/10:   4%| | 36/991 [09:25<4:11:20, 15.79s/batch, batch_loss=10.7, batch

Epoch 10/10:   4%| | 37/991 [09:25<4:11:10, 15.80s/batch, batch_loss=10.7, batch

Epoch 10/10:   4%| | 37/991 [09:40<4:11:10, 15.80s/batch, batch_loss=7.17, batch

Epoch 10/10:   4%| | 38/991 [09:40<4:07:29, 15.58s/batch, batch_loss=7.17, batch

Epoch 10/10:   4%| | 38/991 [09:55<4:07:29, 15.58s/batch, batch_loss=1.72e+3, ba

Epoch 10/10:   4%| | 39/991 [09:55<4:02:39, 15.29s/batch, batch_loss=1.72e+3, ba

Epoch 10/10:   4%| | 39/991 [10:10<4:02:39, 15.29s/batch, batch_loss=14.7, batch

Epoch 10/10:   4%| | 40/991 [10:10<4:01:57, 15.26s/batch, batch_loss=14.7, batch

Epoch 10/10:   4%| | 40/991 [10:24<4:01:57, 15.26s/batch, batch_loss=6.22e+3, ba

Epoch 10/10:   4%| | 41/991 [10:24<3:56:19, 14.93s/batch, batch_loss=6.22e+3, ba

Epoch 10/10:   4%| | 41/991 [10:42<3:56:19, 14.93s/batch, batch_loss=15.1, batch

Epoch 10/10:   4%| | 42/991 [10:42<4:08:32, 15.71s/batch, batch_loss=15.1, batch

Epoch 10/10:   4%| | 42/991 [10:57<4:08:32, 15.71s/batch, batch_loss=9.63, batch

Epoch 10/10:   4%| | 43/991 [10:57<4:05:06, 15.51s/batch, batch_loss=9.63, batch

Epoch 10/10:   4%| | 43/991 [11:12<4:05:06, 15.51s/batch, batch_loss=15.8, batch

Epoch 10/10:   4%| | 44/991 [11:12<4:02:54, 15.39s/batch, batch_loss=15.8, batch

Epoch 10/10:   4%| | 44/991 [11:27<4:02:54, 15.39s/batch, batch_loss=14.6, batch

Epoch 10/10:   5%| | 45/991 [11:27<4:00:18, 15.24s/batch, batch_loss=14.6, batch

Epoch 10/10:   5%| | 45/991 [11:43<4:00:18, 15.24s/batch, batch_loss=13.3, batch

Epoch 10/10:   5%| | 46/991 [11:43<4:01:17, 15.32s/batch, batch_loss=13.3, batch

Epoch 10/10:   5%| | 46/991 [11:57<4:01:17, 15.32s/batch, batch_loss=6.67, batch

Epoch 10/10:   5%| | 47/991 [11:57<3:59:06, 15.20s/batch, batch_loss=6.67, batch

Epoch 10/10:   5%| | 47/991 [12:12<3:59:06, 15.20s/batch, batch_loss=13.7, batch

Epoch 10/10:   5%| | 48/991 [12:12<3:57:24, 15.11s/batch, batch_loss=13.7, batch

Epoch 10/10:   5%| | 48/991 [12:27<3:57:24, 15.11s/batch, batch_loss=12.4, batch

Epoch 10/10:   5%| | 49/991 [12:27<3:53:42, 14.89s/batch, batch_loss=12.4, batch

Epoch 10/10:   5%| | 49/991 [12:43<3:53:42, 14.89s/batch, batch_loss=14.3, batch

Epoch 10/10:   5%| | 50/991 [12:43<3:59:34, 15.28s/batch, batch_loss=14.3, batch

Epoch 10/10:   5%| | 50/991 [12:57<3:59:34, 15.28s/batch, batch_loss=10.2, batch

Epoch 10/10:   5%| | 51/991 [12:57<3:55:12, 15.01s/batch, batch_loss=10.2, batch

Epoch 10/10:   5%| | 51/991 [13:11<3:55:12, 15.01s/batch, batch_loss=13.5, batch

Epoch 10/10:   5%| | 52/991 [13:11<3:50:10, 14.71s/batch, batch_loss=13.5, batch

Epoch 10/10:   5%| | 52/991 [13:26<3:50:10, 14.71s/batch, batch_loss=14.5, batch

Epoch 10/10:   5%| | 53/991 [13:26<3:50:19, 14.73s/batch, batch_loss=14.5, batch

Epoch 10/10:   5%| | 53/991 [13:41<3:50:19, 14.73s/batch, batch_loss=8.91, batch

Epoch 10/10:   5%| | 54/991 [13:41<3:51:58, 14.85s/batch, batch_loss=8.91, batch

Epoch 10/10:   5%| | 54/991 [13:57<3:51:58, 14.85s/batch, batch_loss=9.91, batch

Epoch 10/10:   6%| | 55/991 [13:57<3:56:06, 15.14s/batch, batch_loss=9.91, batch

Epoch 10/10:   6%| | 55/991 [14:13<3:56:06, 15.14s/batch, batch_loss=12.1, batch

Epoch 10/10:   6%| | 56/991 [14:13<3:58:27, 15.30s/batch, batch_loss=12.1, batch

Epoch 10/10:   6%| | 56/991 [14:28<3:58:27, 15.30s/batch, batch_loss=7.2, batch_

Epoch 10/10:   6%| | 57/991 [14:28<3:57:42, 15.27s/batch, batch_loss=7.2, batch_

Epoch 10/10:   6%| | 57/991 [14:43<3:57:42, 15.27s/batch, batch_loss=14.5, batch

Epoch 10/10:   6%| | 58/991 [14:43<3:54:25, 15.08s/batch, batch_loss=14.5, batch

Epoch 10/10:   6%| | 58/991 [14:58<3:54:25, 15.08s/batch, batch_loss=9.87, batch

Epoch 10/10:   6%| | 59/991 [14:58<3:55:35, 15.17s/batch, batch_loss=9.87, batch

Epoch 10/10:   6%| | 59/991 [15:14<3:55:35, 15.17s/batch, batch_loss=18.5, batch

Epoch 10/10:   6%| | 60/991 [15:14<3:59:07, 15.41s/batch, batch_loss=18.5, batch

Epoch 10/10:   6%| | 60/991 [15:29<3:59:07, 15.41s/batch, batch_loss=11.5, batch

Epoch 10/10:   6%| | 61/991 [15:29<3:57:31, 15.32s/batch, batch_loss=11.5, batch

Epoch 10/10:   6%| | 61/991 [15:45<3:57:31, 15.32s/batch, batch_loss=10.7, batch

Epoch 10/10:   6%| | 62/991 [15:45<3:59:24, 15.46s/batch, batch_loss=10.7, batch

Epoch 10/10:   6%| | 62/991 [16:00<3:59:24, 15.46s/batch, batch_loss=418, batch_

Epoch 10/10:   6%| | 63/991 [16:00<3:57:18, 15.34s/batch, batch_loss=418, batch_

Epoch 10/10:   6%| | 63/991 [16:15<3:57:18, 15.34s/batch, batch_loss=802, batch_

Epoch 10/10:   6%| | 64/991 [16:15<3:54:24, 15.17s/batch, batch_loss=802, batch_

Epoch 10/10:   6%| | 64/991 [16:28<3:54:24, 15.17s/batch, batch_loss=2.55e+3, ba

Epoch 10/10:   7%| | 65/991 [16:28<3:47:38, 14.75s/batch, batch_loss=2.55e+3, ba

Epoch 10/10:   7%| | 65/991 [16:43<3:47:38, 14.75s/batch, batch_loss=4.32, batch

Epoch 10/10:   7%| | 66/991 [16:43<3:44:46, 14.58s/batch, batch_loss=4.32, batch

Epoch 10/10:   7%| | 66/991 [16:57<3:44:46, 14.58s/batch, batch_loss=11.7, batch

Epoch 10/10:   7%| | 67/991 [16:57<3:43:27, 14.51s/batch, batch_loss=11.7, batch

Epoch 10/10:   7%| | 67/991 [17:10<3:43:27, 14.51s/batch, batch_loss=8.15, batch

Epoch 10/10:   7%| | 68/991 [17:10<3:37:48, 14.16s/batch, batch_loss=8.15, batch

Epoch 10/10:   7%| | 68/991 [17:24<3:37:48, 14.16s/batch, batch_loss=19.8, batch

Epoch 10/10:   7%| | 69/991 [17:24<3:33:39, 13.90s/batch, batch_loss=19.8, batch

Epoch 10/10:   7%| | 69/991 [17:39<3:33:39, 13.90s/batch, batch_loss=7.94, batch

Epoch 10/10:   7%| | 70/991 [17:39<3:41:48, 14.45s/batch, batch_loss=7.94, batch

Epoch 10/10:   7%| | 70/991 [17:53<3:41:48, 14.45s/batch, batch_loss=9.74, batch

Epoch 10/10:   7%| | 71/991 [17:53<3:39:09, 14.29s/batch, batch_loss=9.74, batch

Epoch 10/10:   7%| | 71/991 [18:09<3:39:09, 14.29s/batch, batch_loss=14, batch_i

Epoch 10/10:   7%| | 72/991 [18:09<3:44:36, 14.66s/batch, batch_loss=14, batch_i

Epoch 10/10:   7%| | 72/991 [18:24<3:44:36, 14.66s/batch, batch_loss=24.3, batch

Epoch 10/10:   7%| | 73/991 [18:24<3:47:03, 14.84s/batch, batch_loss=24.3, batch

Epoch 10/10:   7%| | 73/991 [18:39<3:47:03, 14.84s/batch, batch_loss=1.73e+3, ba

Epoch 10/10:   7%| | 74/991 [18:39<3:47:25, 14.88s/batch, batch_loss=1.73e+3, ba

Epoch 10/10:   7%| | 74/991 [18:54<3:47:25, 14.88s/batch, batch_loss=15.3, batch

Epoch 10/10:   8%| | 75/991 [18:54<3:47:49, 14.92s/batch, batch_loss=15.3, batch

Epoch 10/10:   8%| | 75/991 [19:09<3:47:49, 14.92s/batch, batch_loss=12, batch_i

Epoch 10/10:   8%| | 76/991 [19:09<3:46:27, 14.85s/batch, batch_loss=12, batch_i

Epoch 10/10:   8%| | 76/991 [19:25<3:46:27, 14.85s/batch, batch_loss=11.8, batch

Epoch 10/10:   8%| | 77/991 [19:25<3:50:54, 15.16s/batch, batch_loss=11.8, batch

Epoch 10/10:   8%| | 77/991 [19:40<3:50:54, 15.16s/batch, batch_loss=13.4, batch

Epoch 10/10:   8%| | 78/991 [19:40<3:49:54, 15.11s/batch, batch_loss=13.4, batch

Epoch 10/10:   8%| | 78/991 [19:54<3:49:54, 15.11s/batch, batch_loss=9.75, batch

Epoch 10/10:   8%| | 79/991 [19:54<3:48:23, 15.03s/batch, batch_loss=9.75, batch

Epoch 10/10:   8%| | 79/991 [20:10<3:48:23, 15.03s/batch, batch_loss=9.07, batch

Epoch 10/10:   8%| | 80/991 [20:10<3:51:40, 15.26s/batch, batch_loss=9.07, batch

Epoch 10/10:   8%| | 80/991 [20:28<3:51:40, 15.26s/batch, batch_loss=13.6, batch

Epoch 10/10:   8%| | 81/991 [20:28<4:03:17, 16.04s/batch, batch_loss=13.6, batch

Epoch 10/10:   8%| | 81/991 [20:43<4:03:17, 16.04s/batch, batch_loss=11.9, batch

Epoch 10/10:   8%| | 82/991 [20:43<3:59:29, 15.81s/batch, batch_loss=11.9, batch

Epoch 10/10:   8%| | 82/991 [20:58<3:59:29, 15.81s/batch, batch_loss=8.98, batch

Epoch 10/10:   8%| | 83/991 [20:58<3:55:02, 15.53s/batch, batch_loss=8.98, batch

Epoch 10/10:   8%| | 83/991 [21:13<3:55:02, 15.53s/batch, batch_loss=10.4, batch

Epoch 10/10:   8%| | 84/991 [21:13<3:51:20, 15.30s/batch, batch_loss=10.4, batch

Epoch 10/10:   8%| | 84/991 [21:28<3:51:20, 15.30s/batch, batch_loss=9.23, batch

Epoch 10/10:   9%| | 85/991 [21:28<3:51:21, 15.32s/batch, batch_loss=9.23, batch

Epoch 10/10:   9%| | 85/991 [21:43<3:51:21, 15.32s/batch, batch_loss=11.2, batch

Epoch 10/10:   9%| | 86/991 [21:43<3:50:16, 15.27s/batch, batch_loss=11.2, batch

Epoch 10/10:   9%| | 86/991 [21:59<3:50:16, 15.27s/batch, batch_loss=10.5, batch

Epoch 10/10:   9%| | 87/991 [21:59<3:53:13, 15.48s/batch, batch_loss=10.5, batch

Epoch 10/10:   9%| | 87/991 [22:15<3:53:13, 15.48s/batch, batch_loss=8.61, batch

Epoch 10/10:   9%| | 88/991 [22:15<3:54:25, 15.58s/batch, batch_loss=8.61, batch

Epoch 10/10:   9%| | 88/991 [22:33<3:54:25, 15.58s/batch, batch_loss=6.47, batch

Epoch 10/10:   9%| | 89/991 [22:33<4:05:49, 16.35s/batch, batch_loss=6.47, batch

Epoch 10/10:   9%| | 89/991 [22:49<4:05:49, 16.35s/batch, batch_loss=249, batch_

Epoch 10/10:   9%| | 90/991 [22:49<4:02:35, 16.15s/batch, batch_loss=249, batch_

Epoch 10/10:   9%| | 90/991 [23:04<4:02:35, 16.15s/batch, batch_loss=1.54e+3, ba

Epoch 10/10:   9%| | 91/991 [23:04<3:58:51, 15.92s/batch, batch_loss=1.54e+3, ba

Epoch 10/10:   9%| | 91/991 [23:19<3:58:51, 15.92s/batch, batch_loss=15.4, batch

Epoch 10/10:   9%| | 92/991 [23:19<3:53:40, 15.60s/batch, batch_loss=15.4, batch

Epoch 10/10:   9%| | 92/991 [23:35<3:53:40, 15.60s/batch, batch_loss=22.1, batch

Epoch 10/10:   9%| | 93/991 [23:35<3:52:31, 15.54s/batch, batch_loss=22.1, batch

Epoch 10/10:   9%| | 93/991 [23:50<3:52:31, 15.54s/batch, batch_loss=18.5, batch

Epoch 10/10:   9%| | 94/991 [23:50<3:51:02, 15.45s/batch, batch_loss=18.5, batch

Epoch 10/10:   9%| | 94/991 [24:05<3:51:02, 15.45s/batch, batch_loss=19.2, batch

Epoch 10/10:  10%| | 95/991 [24:05<3:49:15, 15.35s/batch, batch_loss=19.2, batch

Epoch 10/10:  10%| | 95/991 [24:20<3:49:15, 15.35s/batch, batch_loss=18.6, batch

Epoch 10/10:  10%| | 96/991 [24:20<3:45:52, 15.14s/batch, batch_loss=18.6, batch

Epoch 10/10:  10%| | 96/991 [24:37<3:45:52, 15.14s/batch, batch_loss=17.4, batch

Epoch 10/10:  10%| | 97/991 [24:37<3:56:27, 15.87s/batch, batch_loss=17.4, batch

Epoch 10/10:  10%| | 97/991 [24:52<3:56:27, 15.87s/batch, batch_loss=19.4, batch

Epoch 10/10:  10%| | 98/991 [24:52<3:50:36, 15.49s/batch, batch_loss=19.4, batch

Epoch 10/10:  10%| | 98/991 [25:07<3:50:36, 15.49s/batch, batch_loss=18, batch_i

Epoch 10/10:  10%| | 99/991 [25:07<3:47:05, 15.28s/batch, batch_loss=18, batch_i

Epoch 10/10:  10%| | 99/991 [25:23<3:47:05, 15.28s/batch, batch_loss=18.8, batch

Epoch 10/10:  10%| | 100/991 [25:23<3:49:42, 15.47s/batch, batch_loss=18.8, batc

Epoch 10/10:  10%| | 100/991 [25:38<3:49:42, 15.47s/batch, batch_loss=14.5, batc

Epoch 10/10:  10%| | 101/991 [25:38<3:49:20, 15.46s/batch, batch_loss=14.5, batc

Epoch 10/10:  10%| | 101/991 [25:57<3:49:20, 15.46s/batch, batch_loss=18, batch_

Epoch 10/10:  10%| | 102/991 [25:57<4:04:13, 16.48s/batch, batch_loss=18, batch_

Epoch 10/10:  10%| | 102/991 [26:16<4:04:13, 16.48s/batch, batch_loss=914, batch

Epoch 10/10:  10%| | 103/991 [26:16<4:15:46, 17.28s/batch, batch_loss=914, batch

Epoch 10/10:  10%| | 103/991 [26:30<4:15:46, 17.28s/batch, batch_loss=13.7, batc

Epoch 10/10:  10%| | 104/991 [26:30<4:02:11, 16.38s/batch, batch_loss=13.7, batc

Epoch 10/10:  10%| | 104/991 [26:45<4:02:11, 16.38s/batch, batch_loss=8.83, batc

Epoch 10/10:  11%| | 105/991 [26:45<3:54:26, 15.88s/batch, batch_loss=8.83, batc

Epoch 10/10:  11%| | 105/991 [27:00<3:54:26, 15.88s/batch, batch_loss=9.56, batc

Epoch 10/10:  11%| | 106/991 [27:00<3:51:32, 15.70s/batch, batch_loss=9.56, batc

Epoch 10/10:  11%| | 106/991 [27:16<3:51:32, 15.70s/batch, batch_loss=17.5, batc

Epoch 10/10:  11%| | 107/991 [27:16<3:48:59, 15.54s/batch, batch_loss=17.5, batc

Epoch 10/10:  11%| | 107/991 [27:30<3:48:59, 15.54s/batch, batch_loss=26.6, batc

Epoch 10/10:  11%| | 108/991 [27:30<3:42:28, 15.12s/batch, batch_loss=26.6, batc

Epoch 10/10:  11%| | 108/991 [27:45<3:42:28, 15.12s/batch, batch_loss=16.5, batc

Epoch 10/10:  11%| | 109/991 [27:45<3:45:04, 15.31s/batch, batch_loss=16.5, batc

Epoch 10/10:  11%| | 109/991 [28:00<3:45:04, 15.31s/batch, batch_loss=15.5, batc

Epoch 10/10:  11%| | 110/991 [28:00<3:42:34, 15.16s/batch, batch_loss=15.5, batc

Epoch 10/10:  11%| | 110/991 [28:16<3:42:34, 15.16s/batch, batch_loss=15.8, batc

Epoch 10/10:  11%| | 111/991 [28:16<3:43:45, 15.26s/batch, batch_loss=15.8, batc

Epoch 10/10:  11%| | 111/991 [28:34<3:43:45, 15.26s/batch, batch_loss=20.4, batc

Epoch 10/10:  11%| | 112/991 [28:34<3:57:17, 16.20s/batch, batch_loss=20.4, batc

Epoch 10/10:  11%| | 112/991 [28:50<3:57:17, 16.20s/batch, batch_loss=11.7, batc

Epoch 10/10:  11%| | 113/991 [28:50<3:54:48, 16.05s/batch, batch_loss=11.7, batc

Epoch 10/10:  11%| | 113/991 [29:05<3:54:48, 16.05s/batch, batch_loss=15.1, batc

Epoch 10/10:  12%| | 114/991 [29:05<3:53:01, 15.94s/batch, batch_loss=15.1, batc

Epoch 10/10:  12%| | 114/991 [29:21<3:53:01, 15.94s/batch, batch_loss=16.6, batc

Epoch 10/10:  12%| | 115/991 [29:21<3:52:25, 15.92s/batch, batch_loss=16.6, batc

Epoch 10/10:  12%| | 115/991 [29:38<3:52:25, 15.92s/batch, batch_loss=11.1, batc

Epoch 10/10:  12%| | 116/991 [29:38<3:53:55, 16.04s/batch, batch_loss=11.1, batc

Epoch 10/10:  12%| | 116/991 [29:53<3:53:55, 16.04s/batch, batch_loss=17.8, batc

Epoch 10/10:  12%| | 117/991 [29:53<3:51:59, 15.93s/batch, batch_loss=17.8, batc

Epoch 10/10:  12%| | 117/991 [30:09<3:51:59, 15.93s/batch, batch_loss=17.4, batc

Epoch 10/10:  12%| | 118/991 [30:09<3:49:22, 15.77s/batch, batch_loss=17.4, batc

Epoch 10/10:  12%| | 118/991 [30:25<3:49:22, 15.77s/batch, batch_loss=23.7, batc

Epoch 10/10:  12%| | 119/991 [30:25<3:51:04, 15.90s/batch, batch_loss=23.7, batc

Epoch 10/10:  12%| | 119/991 [30:41<3:51:04, 15.90s/batch, batch_loss=16.7, batc

Epoch 10/10:  12%| | 120/991 [30:41<3:53:03, 16.05s/batch, batch_loss=16.7, batc

Epoch 10/10:  12%| | 120/991 [30:57<3:53:03, 16.05s/batch, batch_loss=25.8, batc

Epoch 10/10:  12%| | 121/991 [30:57<3:49:57, 15.86s/batch, batch_loss=25.8, batc

Epoch 10/10:  12%| | 121/991 [31:13<3:49:57, 15.86s/batch, batch_loss=8.75, batc

Epoch 10/10:  12%| | 122/991 [31:13<3:49:41, 15.86s/batch, batch_loss=8.75, batc

Epoch 10/10:  12%| | 122/991 [31:29<3:49:41, 15.86s/batch, batch_loss=14.7, batc

Epoch 10/10:  12%| | 123/991 [31:29<3:49:49, 15.89s/batch, batch_loss=14.7, batc

Epoch 10/10:  12%| | 123/991 [31:43<3:49:49, 15.89s/batch, batch_loss=3.47e+3, b

Epoch 10/10:  13%|▏| 124/991 [31:43<3:45:03, 15.57s/batch, batch_loss=3.47e+3, b

Epoch 10/10:  13%|▏| 124/991 [31:59<3:45:03, 15.57s/batch, batch_loss=7.26, batc

Epoch 10/10:  13%|▏| 125/991 [31:59<3:43:16, 15.47s/batch, batch_loss=7.26, batc

Epoch 10/10:  13%|▏| 125/991 [32:15<3:43:16, 15.47s/batch, batch_loss=13.1, batc

Epoch 10/10:  13%|▏| 126/991 [32:15<3:44:55, 15.60s/batch, batch_loss=13.1, batc

Epoch 10/10:  13%|▏| 126/991 [32:33<3:44:55, 15.60s/batch, batch_loss=1.89e+3, b

Epoch 10/10:  13%|▏| 127/991 [32:33<3:56:21, 16.41s/batch, batch_loss=1.89e+3, b

Epoch 10/10:  13%|▏| 127/991 [32:48<3:56:21, 16.41s/batch, batch_loss=1.57e+3, b

Epoch 10/10:  13%|▏| 128/991 [32:48<3:50:05, 16.00s/batch, batch_loss=1.57e+3, b

Epoch 10/10:  13%|▏| 128/991 [33:03<3:50:05, 16.00s/batch, batch_loss=227, batch

Epoch 10/10:  13%|▏| 129/991 [33:03<3:45:28, 15.69s/batch, batch_loss=227, batch

Epoch 10/10:  13%|▏| 129/991 [33:18<3:45:28, 15.69s/batch, batch_loss=992, batch

Epoch 10/10:  13%|▏| 130/991 [33:18<3:42:28, 15.50s/batch, batch_loss=992, batch

Epoch 10/10:  13%|▏| 130/991 [33:33<3:42:28, 15.50s/batch, batch_loss=8.34e+3, b

Epoch 10/10:  13%|▏| 131/991 [33:33<3:40:55, 15.41s/batch, batch_loss=8.34e+3, b

Epoch 10/10:  13%|▏| 131/991 [33:48<3:40:55, 15.41s/batch, batch_loss=17.4, batc

Epoch 10/10:  13%|▏| 132/991 [33:48<3:37:12, 15.17s/batch, batch_loss=17.4, batc

Epoch 10/10:  13%|▏| 132/991 [34:02<3:37:12, 15.17s/batch, batch_loss=8.13, batc

Epoch 10/10:  13%|▏| 133/991 [34:02<3:35:02, 15.04s/batch, batch_loss=8.13, batc

Epoch 10/10:  13%|▏| 133/991 [34:19<3:35:02, 15.04s/batch, batch_loss=10.7, batc

Epoch 10/10:  14%|▏| 134/991 [34:19<3:42:03, 15.55s/batch, batch_loss=10.7, batc

Epoch 10/10:  14%|▏| 134/991 [34:34<3:42:03, 15.55s/batch, batch_loss=16.4, batc

Epoch 10/10:  14%|▏| 135/991 [34:34<3:40:41, 15.47s/batch, batch_loss=16.4, batc

Epoch 10/10:  14%|▏| 135/991 [34:49<3:40:41, 15.47s/batch, batch_loss=7.23, batc

Epoch 10/10:  14%|▏| 136/991 [34:49<3:37:45, 15.28s/batch, batch_loss=7.23, batc

Epoch 10/10:  14%|▏| 136/991 [35:04<3:37:45, 15.28s/batch, batch_loss=13.2, batc

Epoch 10/10:  14%|▏| 137/991 [35:04<3:36:52, 15.24s/batch, batch_loss=13.2, batc

Epoch 10/10:  14%|▏| 137/991 [35:19<3:36:52, 15.24s/batch, batch_loss=14.6, batc

Epoch 10/10:  14%|▏| 138/991 [35:19<3:35:01, 15.13s/batch, batch_loss=14.6, batc

Epoch 10/10:  14%|▏| 138/991 [35:34<3:35:01, 15.13s/batch, batch_loss=6.89, batc

Epoch 10/10:  14%|▏| 139/991 [35:34<3:33:48, 15.06s/batch, batch_loss=6.89, batc

Epoch 10/10:  14%|▏| 139/991 [35:50<3:33:48, 15.06s/batch, batch_loss=10.7, batc

Epoch 10/10:  14%|▏| 140/991 [35:50<3:35:51, 15.22s/batch, batch_loss=10.7, batc

Epoch 10/10:  14%|▏| 140/991 [36:05<3:35:51, 15.22s/batch, batch_loss=5.94, batc

Epoch 10/10:  14%|▏| 141/991 [36:05<3:37:02, 15.32s/batch, batch_loss=5.94, batc

Epoch 10/10:  14%|▏| 141/991 [36:21<3:37:02, 15.32s/batch, batch_loss=6.41, batc

Epoch 10/10:  14%|▏| 142/991 [36:21<3:36:42, 15.31s/batch, batch_loss=6.41, batc

Epoch 10/10:  14%|▏| 142/991 [36:35<3:36:42, 15.31s/batch, batch_loss=13.9, batc

Epoch 10/10:  14%|▏| 143/991 [36:35<3:32:51, 15.06s/batch, batch_loss=13.9, batc

Epoch 10/10:  14%|▏| 143/991 [36:51<3:32:51, 15.06s/batch, batch_loss=13.1, batc

Epoch 10/10:  15%|▏| 144/991 [36:51<3:35:31, 15.27s/batch, batch_loss=13.1, batc

Epoch 10/10:  15%|▏| 144/991 [37:06<3:35:31, 15.27s/batch, batch_loss=16.3, batc

Epoch 10/10:  15%|▏| 145/991 [37:06<3:34:18, 15.20s/batch, batch_loss=16.3, batc

Epoch 10/10:  15%|▏| 145/991 [37:20<3:34:18, 15.20s/batch, batch_loss=14.9, batc

Epoch 10/10:  15%|▏| 146/991 [37:20<3:31:24, 15.01s/batch, batch_loss=14.9, batc

Epoch 10/10:  15%|▏| 146/991 [37:36<3:31:24, 15.01s/batch, batch_loss=7.98, batc

Epoch 10/10:  15%|▏| 147/991 [37:36<3:33:15, 15.16s/batch, batch_loss=7.98, batc

Epoch 10/10:  15%|▏| 147/991 [37:52<3:33:15, 15.16s/batch, batch_loss=19.3, batc

Epoch 10/10:  15%|▏| 148/991 [37:52<3:35:34, 15.34s/batch, batch_loss=19.3, batc

Epoch 10/10:  15%|▏| 148/991 [38:08<3:35:34, 15.34s/batch, batch_loss=9.45, batc

Epoch 10/10:  15%|▏| 149/991 [38:08<3:38:39, 15.58s/batch, batch_loss=9.45, batc

Epoch 10/10:  15%|▏| 149/991 [38:23<3:38:39, 15.58s/batch, batch_loss=11.1, batc

Epoch 10/10:  15%|▏| 150/991 [38:23<3:37:51, 15.54s/batch, batch_loss=11.1, batc

Epoch 10/10:  15%|▏| 150/991 [38:39<3:37:51, 15.54s/batch, batch_loss=17.9, batc

Epoch 10/10:  15%|▏| 151/991 [38:39<3:36:23, 15.46s/batch, batch_loss=17.9, batc

Epoch 10/10:  15%|▏| 151/991 [38:55<3:36:23, 15.46s/batch, batch_loss=14.7, batc

Epoch 10/10:  15%|▏| 152/991 [38:55<3:38:27, 15.62s/batch, batch_loss=14.7, batc

Epoch 10/10:  15%|▏| 152/991 [39:11<3:38:27, 15.62s/batch, batch_loss=18, batch_

Epoch 10/10:  15%|▏| 153/991 [39:11<3:39:41, 15.73s/batch, batch_loss=18, batch_

Epoch 10/10:  15%|▏| 153/991 [39:27<3:39:41, 15.73s/batch, batch_loss=20.6, batc

Epoch 10/10:  16%|▏| 154/991 [39:27<3:43:01, 15.99s/batch, batch_loss=20.6, batc

Epoch 10/10:  16%|▏| 154/991 [39:43<3:43:01, 15.99s/batch, batch_loss=20.4, batc

Epoch 10/10:  16%|▏| 155/991 [39:43<3:43:20, 16.03s/batch, batch_loss=20.4, batc

Epoch 10/10:  16%|▏| 155/991 [39:58<3:43:20, 16.03s/batch, batch_loss=8.85, batc

Epoch 10/10:  16%|▏| 156/991 [39:58<3:38:48, 15.72s/batch, batch_loss=8.85, batc

Epoch 10/10:  16%|▏| 156/991 [40:13<3:38:48, 15.72s/batch, batch_loss=23.5, batc

Epoch 10/10:  16%|▏| 157/991 [40:13<3:32:43, 15.30s/batch, batch_loss=23.5, batc

Epoch 10/10:  16%|▏| 157/991 [40:27<3:32:43, 15.30s/batch, batch_loss=10.5, batc

Epoch 10/10:  16%|▏| 158/991 [40:27<3:29:12, 15.07s/batch, batch_loss=10.5, batc

Epoch 10/10:  16%|▏| 158/991 [40:42<3:29:12, 15.07s/batch, batch_loss=7.08, batc

Epoch 10/10:  16%|▏| 159/991 [40:42<3:26:27, 14.89s/batch, batch_loss=7.08, batc

Epoch 10/10:  16%|▏| 159/991 [40:57<3:26:27, 14.89s/batch, batch_loss=11.8, batc

Epoch 10/10:  16%|▏| 160/991 [40:57<3:26:58, 14.94s/batch, batch_loss=11.8, batc

Epoch 10/10:  16%|▏| 160/991 [41:11<3:26:58, 14.94s/batch, batch_loss=458, batch

Epoch 10/10:  16%|▏| 161/991 [41:11<3:22:40, 14.65s/batch, batch_loss=458, batch

Epoch 10/10:  16%|▏| 161/991 [41:25<3:22:40, 14.65s/batch, batch_loss=15.2, batc

Epoch 10/10:  16%|▏| 162/991 [41:25<3:22:57, 14.69s/batch, batch_loss=15.2, batc

Epoch 10/10:  16%|▏| 162/991 [41:40<3:22:57, 14.69s/batch, batch_loss=9.53, batc

Epoch 10/10:  16%|▏| 163/991 [41:40<3:23:43, 14.76s/batch, batch_loss=9.53, batc

Epoch 10/10:  16%|▏| 163/991 [41:54<3:23:43, 14.76s/batch, batch_loss=11.2, batc

Epoch 10/10:  17%|▏| 164/991 [41:54<3:20:16, 14.53s/batch, batch_loss=11.2, batc

Epoch 10/10:  17%|▏| 164/991 [42:08<3:20:16, 14.53s/batch, batch_loss=10.6, batc

Epoch 10/10:  17%|▏| 165/991 [42:08<3:17:58, 14.38s/batch, batch_loss=10.6, batc

Epoch 10/10:  17%|▏| 165/991 [42:24<3:17:58, 14.38s/batch, batch_loss=10.8, batc

Epoch 10/10:  17%|▏| 166/991 [42:24<3:20:57, 14.61s/batch, batch_loss=10.8, batc

Epoch 10/10:  17%|▏| 166/991 [42:39<3:20:57, 14.61s/batch, batch_loss=16.8, batc

Epoch 10/10:  17%|▏| 167/991 [42:39<3:24:13, 14.87s/batch, batch_loss=16.8, batc

Epoch 10/10:  17%|▏| 167/991 [42:55<3:24:13, 14.87s/batch, batch_loss=12.2, batc

Epoch 10/10:  17%|▏| 168/991 [42:55<3:26:33, 15.06s/batch, batch_loss=12.2, batc

Epoch 10/10:  17%|▏| 168/991 [43:10<3:26:33, 15.06s/batch, batch_loss=12.3, batc

Epoch 10/10:  17%|▏| 169/991 [43:10<3:26:31, 15.07s/batch, batch_loss=12.3, batc

Epoch 10/10:  17%|▏| 169/991 [43:25<3:26:31, 15.07s/batch, batch_loss=9.07, batc

Epoch 10/10:  17%|▏| 170/991 [43:25<3:26:50, 15.12s/batch, batch_loss=9.07, batc

Epoch 10/10:  17%|▏| 170/991 [43:40<3:26:50, 15.12s/batch, batch_loss=6.09, batc

Epoch 10/10:  17%|▏| 171/991 [43:40<3:28:06, 15.23s/batch, batch_loss=6.09, batc

Epoch 10/10:  17%|▏| 171/991 [43:56<3:28:06, 15.23s/batch, batch_loss=8.3, batch

Epoch 10/10:  17%|▏| 172/991 [43:56<3:30:31, 15.42s/batch, batch_loss=8.3, batch

Epoch 10/10:  17%|▏| 172/991 [44:12<3:30:31, 15.42s/batch, batch_loss=6.8, batch

Epoch 10/10:  17%|▏| 173/991 [44:12<3:31:59, 15.55s/batch, batch_loss=6.8, batch

Epoch 10/10:  17%|▏| 173/991 [44:28<3:31:59, 15.55s/batch, batch_loss=3.02e+4, b

Epoch 10/10:  18%|▏| 174/991 [44:28<3:31:15, 15.52s/batch, batch_loss=3.02e+4, b

Epoch 10/10:  18%|▏| 174/991 [44:43<3:31:15, 15.52s/batch, batch_loss=20.6, batc

Epoch 10/10:  18%|▏| 175/991 [44:43<3:31:49, 15.58s/batch, batch_loss=20.6, batc

Epoch 10/10:  18%|▏| 175/991 [44:58<3:31:49, 15.58s/batch, batch_loss=20, batch_

Epoch 10/10:  18%|▏| 176/991 [44:58<3:28:23, 15.34s/batch, batch_loss=20, batch_

Epoch 10/10:  18%|▏| 176/991 [45:13<3:28:23, 15.34s/batch, batch_loss=22.4, batc

Epoch 10/10:  18%|▏| 177/991 [45:13<3:27:22, 15.29s/batch, batch_loss=22.4, batc

Epoch 10/10:  18%|▏| 177/991 [45:29<3:27:22, 15.29s/batch, batch_loss=22, batch_

Epoch 10/10:  18%|▏| 178/991 [45:29<3:28:36, 15.40s/batch, batch_loss=22, batch_

Epoch 10/10:  18%|▏| 178/991 [45:43<3:28:36, 15.40s/batch, batch_loss=12.5, batc

Epoch 10/10:  18%|▏| 179/991 [45:43<3:24:38, 15.12s/batch, batch_loss=12.5, batc

Epoch 10/10:  18%|▏| 179/991 [45:59<3:24:38, 15.12s/batch, batch_loss=7.54, batc

Epoch 10/10:  18%|▏| 180/991 [45:59<3:25:13, 15.18s/batch, batch_loss=7.54, batc

Epoch 10/10:  18%|▏| 180/991 [46:16<3:25:13, 15.18s/batch, batch_loss=2.51e+4, b

Epoch 10/10:  18%|▏| 181/991 [46:16<3:35:19, 15.95s/batch, batch_loss=2.51e+4, b

Epoch 10/10:  18%|▏| 181/991 [46:32<3:35:19, 15.95s/batch, batch_loss=13.7, batc

Epoch 10/10:  18%|▏| 182/991 [46:32<3:33:40, 15.85s/batch, batch_loss=13.7, batc

Epoch 10/10:  18%|▏| 182/991 [46:47<3:33:40, 15.85s/batch, batch_loss=17.9, batc

Epoch 10/10:  18%|▏| 183/991 [46:47<3:31:17, 15.69s/batch, batch_loss=17.9, batc

Epoch 10/10:  18%|▏| 183/991 [47:03<3:31:17, 15.69s/batch, batch_loss=18.9, batc

Epoch 10/10:  19%|▏| 184/991 [47:03<3:30:47, 15.67s/batch, batch_loss=18.9, batc

Epoch 10/10:  19%|▏| 184/991 [47:17<3:30:47, 15.67s/batch, batch_loss=13.5, batc

Epoch 10/10:  19%|▏| 185/991 [47:17<3:25:55, 15.33s/batch, batch_loss=13.5, batc

Epoch 10/10:  19%|▏| 185/991 [47:31<3:25:55, 15.33s/batch, batch_loss=18.5, batc

Epoch 10/10:  19%|▏| 186/991 [47:31<3:17:01, 14.69s/batch, batch_loss=18.5, batc

Epoch 10/10:  19%|▏| 186/991 [47:44<3:17:01, 14.69s/batch, batch_loss=13.8, batc

Epoch 10/10:  19%|▏| 187/991 [47:44<3:11:21, 14.28s/batch, batch_loss=13.8, batc

Epoch 10/10:  19%|▏| 187/991 [47:59<3:11:21, 14.28s/batch, batch_loss=17.1, batc

Epoch 10/10:  19%|▏| 188/991 [47:59<3:14:16, 14.52s/batch, batch_loss=17.1, batc

Epoch 10/10:  19%|▏| 188/991 [48:17<3:14:16, 14.52s/batch, batch_loss=17.9, batc

Epoch 10/10:  19%|▏| 189/991 [48:17<3:29:18, 15.66s/batch, batch_loss=17.9, batc

Epoch 10/10:  19%|▏| 189/991 [48:32<3:29:18, 15.66s/batch, batch_loss=20.9, batc

Epoch 10/10:  19%|▏| 190/991 [48:32<3:26:50, 15.49s/batch, batch_loss=20.9, batc

Epoch 10/10:  19%|▏| 190/991 [48:48<3:26:50, 15.49s/batch, batch_loss=19.7, batc

Epoch 10/10:  19%|▏| 191/991 [48:48<3:25:11, 15.39s/batch, batch_loss=19.7, batc

Epoch 10/10:  19%|▏| 191/991 [49:03<3:25:11, 15.39s/batch, batch_loss=11.4, batc

Epoch 10/10:  19%|▏| 192/991 [49:03<3:24:33, 15.36s/batch, batch_loss=11.4, batc

Epoch 10/10:  19%|▏| 192/991 [49:19<3:24:33, 15.36s/batch, batch_loss=20.7, batc

Epoch 10/10:  19%|▏| 193/991 [49:19<3:25:30, 15.45s/batch, batch_loss=20.7, batc

Epoch 10/10:  19%|▏| 193/991 [49:35<3:25:30, 15.45s/batch, batch_loss=8.78, batc

Epoch 10/10:  20%|▏| 194/991 [49:35<3:29:37, 15.78s/batch, batch_loss=8.78, batc

Epoch 10/10:  20%|▏| 194/991 [49:50<3:29:37, 15.78s/batch, batch_loss=3.44, batc

Epoch 10/10:  20%|▏| 195/991 [49:50<3:26:44, 15.58s/batch, batch_loss=3.44, batc

Epoch 10/10:  20%|▏| 195/991 [50:05<3:26:44, 15.58s/batch, batch_loss=5.84, batc

Epoch 10/10:  20%|▏| 196/991 [50:05<3:24:32, 15.44s/batch, batch_loss=5.84, batc

Epoch 10/10:  20%|▏| 196/991 [50:24<3:24:32, 15.44s/batch, batch_loss=12.2, batc

Epoch 10/10:  20%|▏| 197/991 [50:24<3:35:22, 16.27s/batch, batch_loss=12.2, batc

Epoch 10/10:  20%|▏| 197/991 [50:39<3:35:22, 16.27s/batch, batch_loss=8.54, batc

Epoch 10/10:  20%|▏| 198/991 [50:39<3:30:41, 15.94s/batch, batch_loss=8.54, batc

Epoch 10/10:  20%|▏| 198/991 [50:55<3:30:41, 15.94s/batch, batch_loss=16.4, batc

Epoch 10/10:  20%|▏| 199/991 [50:55<3:31:29, 16.02s/batch, batch_loss=16.4, batc

Epoch 10/10:  20%|▏| 199/991 [51:10<3:31:29, 16.02s/batch, batch_loss=8.31, batc

Epoch 10/10:  20%|▏| 200/991 [51:10<3:26:29, 15.66s/batch, batch_loss=8.31, batc

Epoch 10/10:  20%|▏| 200/991 [51:25<3:26:29, 15.66s/batch, batch_loss=12.1, batc

Epoch 10/10:  20%|▏| 201/991 [51:25<3:26:00, 15.65s/batch, batch_loss=12.1, batc

Epoch 10/10:  20%|▏| 201/991 [51:41<3:26:00, 15.65s/batch, batch_loss=13.1, batc

Epoch 10/10:  20%|▏| 202/991 [51:41<3:24:04, 15.52s/batch, batch_loss=13.1, batc

Epoch 10/10:  20%|▏| 202/991 [51:56<3:24:04, 15.52s/batch, batch_loss=15.7, batc

Epoch 10/10:  20%|▏| 203/991 [51:56<3:23:36, 15.50s/batch, batch_loss=15.7, batc

Epoch 10/10:  20%|▏| 203/991 [52:13<3:23:36, 15.50s/batch, batch_loss=17.8, batc

Epoch 10/10:  21%|▏| 204/991 [52:13<3:29:35, 15.98s/batch, batch_loss=17.8, batc

Epoch 10/10:  21%|▏| 204/991 [52:28<3:29:35, 15.98s/batch, batch_loss=16.7, batc

Epoch 10/10:  21%|▏| 205/991 [52:28<3:26:14, 15.74s/batch, batch_loss=16.7, batc

Epoch 10/10:  21%|▏| 205/991 [52:44<3:26:14, 15.74s/batch, batch_loss=7.94, batc

Epoch 10/10:  21%|▏| 206/991 [52:44<3:25:06, 15.68s/batch, batch_loss=7.94, batc

Epoch 10/10:  21%|▏| 206/991 [52:59<3:25:06, 15.68s/batch, batch_loss=10.2, batc

Epoch 10/10:  21%|▏| 207/991 [52:59<3:21:49, 15.45s/batch, batch_loss=10.2, batc

Epoch 10/10:  21%|▏| 207/991 [53:14<3:21:49, 15.45s/batch, batch_loss=11.9, batc

Epoch 10/10:  21%|▏| 208/991 [53:14<3:21:27, 15.44s/batch, batch_loss=11.9, batc

Epoch 10/10:  21%|▏| 208/991 [53:30<3:21:27, 15.44s/batch, batch_loss=9.38, batc

Epoch 10/10:  21%|▏| 209/991 [53:30<3:21:25, 15.45s/batch, batch_loss=9.38, batc

Epoch 10/10:  21%|▏| 209/991 [53:45<3:21:25, 15.45s/batch, batch_loss=19.3, batc

Epoch 10/10:  21%|▏| 210/991 [53:45<3:20:34, 15.41s/batch, batch_loss=19.3, batc

Epoch 10/10:  21%|▏| 210/991 [54:00<3:20:34, 15.41s/batch, batch_loss=12.2, batc

Epoch 10/10:  21%|▏| 211/991 [54:00<3:19:03, 15.31s/batch, batch_loss=12.2, batc

Epoch 10/10:  21%|▏| 211/991 [54:18<3:19:03, 15.31s/batch, batch_loss=15.6, batc

Epoch 10/10:  21%|▏| 212/991 [54:18<3:26:59, 15.94s/batch, batch_loss=15.6, batc

Epoch 10/10:  21%|▏| 212/991 [54:33<3:26:59, 15.94s/batch, batch_loss=3.19, batc

Epoch 10/10:  21%|▏| 213/991 [54:33<3:23:50, 15.72s/batch, batch_loss=3.19, batc

Epoch 10/10:  21%|▏| 213/991 [54:48<3:23:50, 15.72s/batch, batch_loss=13.9, batc

Epoch 10/10:  22%|▏| 214/991 [54:48<3:21:15, 15.54s/batch, batch_loss=13.9, batc

Epoch 10/10:  22%|▏| 214/991 [55:03<3:21:15, 15.54s/batch, batch_loss=15.5, batc

Epoch 10/10:  22%|▏| 215/991 [55:03<3:19:28, 15.42s/batch, batch_loss=15.5, batc

Epoch 10/10:  22%|▏| 215/991 [55:18<3:19:28, 15.42s/batch, batch_loss=9.31, batc

Epoch 10/10:  22%|▏| 216/991 [55:18<3:17:50, 15.32s/batch, batch_loss=9.31, batc

Epoch 10/10:  22%|▏| 216/991 [55:33<3:17:50, 15.32s/batch, batch_loss=10.6, batc

Epoch 10/10:  22%|▏| 217/991 [55:33<3:14:59, 15.12s/batch, batch_loss=10.6, batc

Epoch 10/10:  22%|▏| 217/991 [55:48<3:14:59, 15.12s/batch, batch_loss=18.6, batc

Epoch 10/10:  22%|▏| 218/991 [55:48<3:15:47, 15.20s/batch, batch_loss=18.6, batc

Epoch 10/10:  22%|▏| 218/991 [56:03<3:15:47, 15.20s/batch, batch_loss=17.3, batc

Epoch 10/10:  22%|▏| 219/991 [56:03<3:14:48, 15.14s/batch, batch_loss=17.3, batc

Epoch 10/10:  22%|▏| 219/991 [56:17<3:14:48, 15.14s/batch, batch_loss=21.1, batc

Epoch 10/10:  22%|▏| 220/991 [56:17<3:11:39, 14.92s/batch, batch_loss=21.1, batc

Epoch 10/10:  22%|▏| 220/991 [56:36<3:11:39, 14.92s/batch, batch_loss=20.9, batc

Epoch 10/10:  22%|▏| 221/991 [56:36<3:24:31, 15.94s/batch, batch_loss=20.9, batc

Epoch 10/10:  22%|▏| 221/991 [56:51<3:24:31, 15.94s/batch, batch_loss=15.6, batc

Epoch 10/10:  22%|▏| 222/991 [56:51<3:21:49, 15.75s/batch, batch_loss=15.6, batc

Epoch 10/10:  22%|▏| 222/991 [57:05<3:21:49, 15.75s/batch, batch_loss=18.7, batc

Epoch 10/10:  23%|▏| 223/991 [57:05<3:14:57, 15.23s/batch, batch_loss=18.7, batc

Epoch 10/10:  23%|▏| 223/991 [57:20<3:14:57, 15.23s/batch, batch_loss=14, batch_

Epoch 10/10:  23%|▏| 224/991 [57:20<3:12:50, 15.09s/batch, batch_loss=14, batch_

Epoch 10/10:  23%|▏| 224/991 [57:35<3:12:50, 15.09s/batch, batch_loss=11.4, batc

Epoch 10/10:  23%|▏| 225/991 [57:35<3:13:54, 15.19s/batch, batch_loss=11.4, batc

Epoch 10/10:  23%|▏| 225/991 [57:51<3:13:54, 15.19s/batch, batch_loss=21.9, batc

Epoch 10/10:  23%|▏| 226/991 [57:51<3:14:04, 15.22s/batch, batch_loss=21.9, batc

Epoch 10/10:  23%|▏| 226/991 [58:06<3:14:04, 15.22s/batch, batch_loss=2.41e+3, b

Epoch 10/10:  23%|▏| 227/991 [58:06<3:15:02, 15.32s/batch, batch_loss=2.41e+3, b

Epoch 10/10:  23%|▏| 227/991 [58:22<3:15:02, 15.32s/batch, batch_loss=3.61e+3, b

Epoch 10/10:  23%|▏| 228/991 [58:22<3:16:22, 15.44s/batch, batch_loss=3.61e+3, b

Epoch 10/10:  23%|▏| 228/991 [58:37<3:16:22, 15.44s/batch, batch_loss=12.3, batc

Epoch 10/10:  23%|▏| 229/991 [58:37<3:15:07, 15.36s/batch, batch_loss=12.3, batc

Epoch 10/10:  23%|▏| 229/991 [58:52<3:15:07, 15.36s/batch, batch_loss=8.97, batc

Epoch 10/10:  23%|▏| 230/991 [58:52<3:12:23, 15.17s/batch, batch_loss=8.97, batc

Epoch 10/10:  23%|▏| 230/991 [59:07<3:12:23, 15.17s/batch, batch_loss=12.8, batc

Epoch 10/10:  23%|▏| 231/991 [59:07<3:12:34, 15.20s/batch, batch_loss=12.8, batc

Epoch 10/10:  23%|▏| 231/991 [59:22<3:12:34, 15.20s/batch, batch_loss=8.05, batc

Epoch 10/10:  23%|▏| 232/991 [59:22<3:11:55, 15.17s/batch, batch_loss=8.05, batc

Epoch 10/10:  23%|▏| 232/991 [59:37<3:11:55, 15.17s/batch, batch_loss=8.01, batc

Epoch 10/10:  24%|▏| 233/991 [59:37<3:11:32, 15.16s/batch, batch_loss=8.01, batc

Epoch 10/10:  24%|▏| 233/991 [59:53<3:11:32, 15.16s/batch, batch_loss=14.2, batc

Epoch 10/10:  24%|▏| 234/991 [59:53<3:14:16, 15.40s/batch, batch_loss=14.2, batc

Epoch 10/10:  24%|▏| 234/991 [1:00:09<3:14:16, 15.40s/batch, batch_loss=13.6, ba

Epoch 10/10:  24%|▏| 235/991 [1:00:09<3:16:48, 15.62s/batch, batch_loss=13.6, ba

Epoch 10/10:  24%|▏| 235/991 [1:00:27<3:16:48, 15.62s/batch, batch_loss=23.6, ba

Epoch 10/10:  24%|▏| 236/991 [1:00:27<3:24:19, 16.24s/batch, batch_loss=23.6, ba

Epoch 10/10:  24%|▏| 236/991 [1:00:43<3:24:19, 16.24s/batch, batch_loss=23.8, ba

Epoch 10/10:  24%|▏| 237/991 [1:00:43<3:21:34, 16.04s/batch, batch_loss=23.8, ba

Epoch 10/10:  24%|▏| 237/991 [1:00:57<3:21:34, 16.04s/batch, batch_loss=17, batc

Epoch 10/10:  24%|▏| 238/991 [1:00:57<3:16:23, 15.65s/batch, batch_loss=17, batc

Epoch 10/10:  24%|▏| 238/991 [1:01:13<3:16:23, 15.65s/batch, batch_loss=4.84, ba

Epoch 10/10:  24%|▏| 239/991 [1:01:13<3:14:30, 15.52s/batch, batch_loss=4.84, ba

Epoch 10/10:  24%|▏| 239/991 [1:01:27<3:14:30, 15.52s/batch, batch_loss=6.65, ba

Epoch 10/10:  24%|▏| 240/991 [1:01:27<3:11:38, 15.31s/batch, batch_loss=6.65, ba

Epoch 10/10:  24%|▏| 240/991 [1:01:42<3:11:38, 15.31s/batch, batch_loss=10.2, ba

Epoch 10/10:  24%|▏| 241/991 [1:01:42<3:09:32, 15.16s/batch, batch_loss=10.2, ba

Epoch 10/10:  24%|▏| 241/991 [1:01:57<3:09:32, 15.16s/batch, batch_loss=20.9, ba

Epoch 10/10:  24%|▏| 242/991 [1:01:57<3:07:25, 15.01s/batch, batch_loss=20.9, ba

Epoch 10/10:  24%|▏| 242/991 [1:02:12<3:07:25, 15.01s/batch, batch_loss=267, bat

Epoch 10/10:  25%|▏| 243/991 [1:02:12<3:08:32, 15.12s/batch, batch_loss=267, bat

Epoch 10/10:  25%|▏| 243/991 [1:02:29<3:08:32, 15.12s/batch, batch_loss=17, batc

Epoch 10/10:  25%|▏| 244/991 [1:02:29<3:13:19, 15.53s/batch, batch_loss=17, batc

Epoch 10/10:  25%|▏| 244/991 [1:02:46<3:13:19, 15.53s/batch, batch_loss=6.69, ba

Epoch 10/10:  25%|▏| 245/991 [1:02:46<3:18:33, 15.97s/batch, batch_loss=6.69, ba

Epoch 10/10:  25%|▏| 245/991 [1:03:01<3:18:33, 15.97s/batch, batch_loss=5.99, ba

Epoch 10/10:  25%|▏| 246/991 [1:03:01<3:15:57, 15.78s/batch, batch_loss=5.99, ba

Epoch 10/10:  25%|▏| 246/991 [1:03:16<3:15:57, 15.78s/batch, batch_loss=14.3, ba

Epoch 10/10:  25%|▏| 247/991 [1:03:16<3:14:10, 15.66s/batch, batch_loss=14.3, ba

Epoch 10/10:  25%|▏| 247/991 [1:03:32<3:14:10, 15.66s/batch, batch_loss=4.57, ba

Epoch 10/10:  25%|▎| 248/991 [1:03:32<3:15:01, 15.75s/batch, batch_loss=4.57, ba

Epoch 10/10:  25%|▎| 248/991 [1:03:48<3:15:01, 15.75s/batch, batch_loss=12.8, ba

Epoch 10/10:  25%|▎| 249/991 [1:03:48<3:12:56, 15.60s/batch, batch_loss=12.8, ba

Epoch 10/10:  25%|▎| 249/991 [1:04:03<3:12:56, 15.60s/batch, batch_loss=9.41, ba

Epoch 10/10:  25%|▎| 250/991 [1:04:03<3:10:33, 15.43s/batch, batch_loss=9.41, ba

Epoch 10/10:  25%|▎| 250/991 [1:04:18<3:10:33, 15.43s/batch, batch_loss=7.17, ba

Epoch 10/10:  25%|▎| 251/991 [1:04:18<3:09:44, 15.38s/batch, batch_loss=7.17, ba

Epoch 10/10:  25%|▎| 251/991 [1:04:34<3:09:44, 15.38s/batch, batch_loss=13.6, ba

Epoch 10/10:  25%|▎| 252/991 [1:04:34<3:11:15, 15.53s/batch, batch_loss=13.6, ba

Epoch 10/10:  25%|▎| 252/991 [1:04:48<3:11:15, 15.53s/batch, batch_loss=7.09, ba

Epoch 10/10:  26%|▎| 253/991 [1:04:48<3:07:06, 15.21s/batch, batch_loss=7.09, ba

Epoch 10/10:  26%|▎| 253/991 [1:05:04<3:07:06, 15.21s/batch, batch_loss=18.5, ba

Epoch 10/10:  26%|▎| 254/991 [1:05:04<3:07:30, 15.27s/batch, batch_loss=18.5, ba

Epoch 10/10:  26%|▎| 254/991 [1:05:19<3:07:30, 15.27s/batch, batch_loss=15.8, ba

Epoch 10/10:  26%|▎| 255/991 [1:05:19<3:06:42, 15.22s/batch, batch_loss=15.8, ba

Epoch 10/10:  26%|▎| 255/991 [1:05:33<3:06:42, 15.22s/batch, batch_loss=696, bat

Epoch 10/10:  26%|▎| 256/991 [1:05:33<3:04:11, 15.04s/batch, batch_loss=696, bat

Epoch 10/10:  26%|▎| 256/991 [1:05:48<3:04:11, 15.04s/batch, batch_loss=18.6, ba

Epoch 10/10:  26%|▎| 257/991 [1:05:48<3:02:04, 14.88s/batch, batch_loss=18.6, ba

Epoch 10/10:  26%|▎| 257/991 [1:06:02<3:02:04, 14.88s/batch, batch_loss=205, bat

Epoch 10/10:  26%|▎| 258/991 [1:06:02<2:58:12, 14.59s/batch, batch_loss=205, bat

Epoch 10/10:  26%|▎| 258/991 [1:06:17<2:58:12, 14.59s/batch, batch_loss=15.5, ba

Epoch 10/10:  26%|▎| 259/991 [1:06:17<3:01:22, 14.87s/batch, batch_loss=15.5, ba

Epoch 10/10:  26%|▎| 259/991 [1:06:33<3:01:22, 14.87s/batch, batch_loss=13.4, ba

Epoch 10/10:  26%|▎| 260/991 [1:06:33<3:02:49, 15.01s/batch, batch_loss=13.4, ba

Epoch 10/10:  26%|▎| 260/991 [1:06:48<3:02:49, 15.01s/batch, batch_loss=15.3, ba

Epoch 10/10:  26%|▎| 261/991 [1:06:48<3:03:34, 15.09s/batch, batch_loss=15.3, ba

Epoch 10/10:  26%|▎| 261/991 [1:07:03<3:03:34, 15.09s/batch, batch_loss=11.1, ba

Epoch 10/10:  26%|▎| 262/991 [1:07:03<3:04:31, 15.19s/batch, batch_loss=11.1, ba

Epoch 10/10:  26%|▎| 262/991 [1:07:19<3:04:31, 15.19s/batch, batch_loss=11.8, ba

Epoch 10/10:  27%|▎| 263/991 [1:07:19<3:05:15, 15.27s/batch, batch_loss=11.8, ba

Epoch 10/10:  27%|▎| 263/991 [1:07:33<3:05:15, 15.27s/batch, batch_loss=15.6, ba

Epoch 10/10:  27%|▎| 264/991 [1:07:33<3:01:34, 14.99s/batch, batch_loss=15.6, ba

Epoch 10/10:  27%|▎| 264/991 [1:07:48<3:01:34, 14.99s/batch, batch_loss=17.2, ba

Epoch 10/10:  27%|▎| 265/991 [1:07:48<3:01:47, 15.02s/batch, batch_loss=17.2, ba

Epoch 10/10:  27%|▎| 265/991 [1:08:04<3:01:47, 15.02s/batch, batch_loss=15.1, ba

Epoch 10/10:  27%|▎| 266/991 [1:08:04<3:02:18, 15.09s/batch, batch_loss=15.1, ba

Epoch 10/10:  27%|▎| 266/991 [1:08:19<3:02:18, 15.09s/batch, batch_loss=10.9, ba

Epoch 10/10:  27%|▎| 267/991 [1:08:19<3:02:53, 15.16s/batch, batch_loss=10.9, ba

Epoch 10/10:  27%|▎| 267/991 [1:08:37<3:02:53, 15.16s/batch, batch_loss=7.03, ba

Epoch 10/10:  27%|▎| 268/991 [1:08:37<3:14:11, 16.12s/batch, batch_loss=7.03, ba

Epoch 10/10:  27%|▎| 268/991 [1:08:53<3:14:11, 16.12s/batch, batch_loss=11.3, ba

Epoch 10/10:  27%|▎| 269/991 [1:08:53<3:11:25, 15.91s/batch, batch_loss=11.3, ba

Epoch 10/10:  27%|▎| 269/991 [1:09:08<3:11:25, 15.91s/batch, batch_loss=1.39, ba

Epoch 10/10:  27%|▎| 270/991 [1:09:08<3:09:53, 15.80s/batch, batch_loss=1.39, ba

Epoch 10/10:  27%|▎| 270/991 [1:09:23<3:09:53, 15.80s/batch, batch_loss=10.6, ba

Epoch 10/10:  27%|▎| 271/991 [1:09:23<3:06:06, 15.51s/batch, batch_loss=10.6, ba

Epoch 10/10:  27%|▎| 271/991 [1:09:39<3:06:06, 15.51s/batch, batch_loss=9, batch

Epoch 10/10:  27%|▎| 272/991 [1:09:39<3:07:08, 15.62s/batch, batch_loss=9, batch

Epoch 10/10:  27%|▎| 272/991 [1:09:55<3:07:08, 15.62s/batch, batch_loss=16.2, ba

Epoch 10/10:  28%|▎| 273/991 [1:09:55<3:07:09, 15.64s/batch, batch_loss=16.2, ba

Epoch 10/10:  28%|▎| 273/991 [1:10:10<3:07:09, 15.64s/batch, batch_loss=11.9, ba

Epoch 10/10:  28%|▎| 274/991 [1:10:10<3:04:47, 15.46s/batch, batch_loss=11.9, ba

Epoch 10/10:  28%|▎| 274/991 [1:10:27<3:04:47, 15.46s/batch, batch_loss=3.31e+3,

Epoch 10/10:  28%|▎| 275/991 [1:10:27<3:13:05, 16.18s/batch, batch_loss=3.31e+3,

Epoch 10/10:  28%|▎| 275/991 [1:10:43<3:13:05, 16.18s/batch, batch_loss=12.8, ba

Epoch 10/10:  28%|▎| 276/991 [1:10:43<3:10:04, 15.95s/batch, batch_loss=12.8, ba

Epoch 10/10:  28%|▎| 276/991 [1:10:58<3:10:04, 15.95s/batch, batch_loss=4.79e+3,

Epoch 10/10:  28%|▎| 277/991 [1:10:58<3:07:45, 15.78s/batch, batch_loss=4.79e+3,

Epoch 10/10:  28%|▎| 277/991 [1:11:13<3:07:45, 15.78s/batch, batch_loss=11.5, ba

Epoch 10/10:  28%|▎| 278/991 [1:11:13<3:02:48, 15.38s/batch, batch_loss=11.5, ba

Epoch 10/10:  28%|▎| 278/991 [1:11:28<3:02:48, 15.38s/batch, batch_loss=16.8, ba

Epoch 10/10:  28%|▎| 279/991 [1:11:28<3:00:33, 15.22s/batch, batch_loss=16.8, ba

Epoch 10/10:  28%|▎| 279/991 [1:11:43<3:00:33, 15.22s/batch, batch_loss=11.2, ba

Epoch 10/10:  28%|▎| 280/991 [1:11:43<3:00:31, 15.23s/batch, batch_loss=11.2, ba

Epoch 10/10:  28%|▎| 280/991 [1:11:59<3:00:31, 15.23s/batch, batch_loss=11.9, ba

Epoch 10/10:  28%|▎| 281/991 [1:11:59<3:02:07, 15.39s/batch, batch_loss=11.9, ba

Epoch 10/10:  28%|▎| 281/991 [1:12:14<3:02:07, 15.39s/batch, batch_loss=6.46, ba

Epoch 10/10:  28%|▎| 282/991 [1:12:14<3:01:39, 15.37s/batch, batch_loss=6.46, ba

Epoch 10/10:  28%|▎| 282/991 [1:12:32<3:01:39, 15.37s/batch, batch_loss=14.8, ba

Epoch 10/10:  29%|▎| 283/991 [1:12:32<3:11:43, 16.25s/batch, batch_loss=14.8, ba

Epoch 10/10:  29%|▎| 283/991 [1:12:47<3:11:43, 16.25s/batch, batch_loss=10.4, ba

Epoch 10/10:  29%|▎| 284/991 [1:12:47<3:06:05, 15.79s/batch, batch_loss=10.4, ba

Epoch 10/10:  29%|▎| 284/991 [1:13:03<3:06:05, 15.79s/batch, batch_loss=12.1, ba

Epoch 10/10:  29%|▎| 285/991 [1:13:03<3:06:10, 15.82s/batch, batch_loss=12.1, ba

Epoch 10/10:  29%|▎| 285/991 [1:13:18<3:06:10, 15.82s/batch, batch_loss=8.2, bat

Epoch 10/10:  29%|▎| 286/991 [1:13:18<3:02:01, 15.49s/batch, batch_loss=8.2, bat

Epoch 10/10:  29%|▎| 286/991 [1:13:33<3:02:01, 15.49s/batch, batch_loss=7.1, bat

Epoch 10/10:  29%|▎| 287/991 [1:13:33<3:02:41, 15.57s/batch, batch_loss=7.1, bat

Epoch 10/10:  29%|▎| 287/991 [1:13:48<3:02:41, 15.57s/batch, batch_loss=2.57e+3,

Epoch 10/10:  29%|▎| 288/991 [1:13:48<3:00:14, 15.38s/batch, batch_loss=2.57e+3,

Epoch 10/10:  29%|▎| 288/991 [1:14:03<3:00:14, 15.38s/batch, batch_loss=1.25e+3,

Epoch 10/10:  29%|▎| 289/991 [1:14:03<2:59:05, 15.31s/batch, batch_loss=1.25e+3,

Epoch 10/10:  29%|▎| 289/991 [1:14:18<2:59:05, 15.31s/batch, batch_loss=12.1, ba

Epoch 10/10:  29%|▎| 290/991 [1:14:18<2:57:32, 15.20s/batch, batch_loss=12.1, ba

Epoch 10/10:  29%|▎| 290/991 [1:14:34<2:57:32, 15.20s/batch, batch_loss=5.59, ba

Epoch 10/10:  29%|▎| 291/991 [1:14:34<2:58:09, 15.27s/batch, batch_loss=5.59, ba

Epoch 10/10:  29%|▎| 291/991 [1:14:49<2:58:09, 15.27s/batch, batch_loss=10.4, ba

Epoch 10/10:  29%|▎| 292/991 [1:14:49<2:58:49, 15.35s/batch, batch_loss=10.4, ba

Epoch 10/10:  29%|▎| 292/991 [1:15:05<2:58:49, 15.35s/batch, batch_loss=16.5, ba

Epoch 10/10:  30%|▎| 293/991 [1:15:05<2:58:38, 15.36s/batch, batch_loss=16.5, ba

Epoch 10/10:  30%|▎| 293/991 [1:15:20<2:58:38, 15.36s/batch, batch_loss=13.3, ba

Epoch 10/10:  30%|▎| 294/991 [1:15:20<2:57:21, 15.27s/batch, batch_loss=13.3, ba

Epoch 10/10:  30%|▎| 294/991 [1:15:36<2:57:21, 15.27s/batch, batch_loss=9.8, bat

Epoch 10/10:  30%|▎| 295/991 [1:15:36<2:58:48, 15.41s/batch, batch_loss=9.8, bat

Epoch 10/10:  30%|▎| 295/991 [1:15:51<2:58:48, 15.41s/batch, batch_loss=16.9, ba

Epoch 10/10:  30%|▎| 296/991 [1:15:51<2:58:04, 15.37s/batch, batch_loss=16.9, ba

Epoch 10/10:  30%|▎| 296/991 [1:16:06<2:58:04, 15.37s/batch, batch_loss=12.5, ba

Epoch 10/10:  30%|▎| 297/991 [1:16:06<2:56:14, 15.24s/batch, batch_loss=12.5, ba

Epoch 10/10:  30%|▎| 297/991 [1:16:23<2:56:14, 15.24s/batch, batch_loss=3.24e+4,

Epoch 10/10:  30%|▎| 298/991 [1:16:23<3:03:26, 15.88s/batch, batch_loss=3.24e+4,

Epoch 10/10:  30%|▎| 298/991 [1:16:39<3:03:26, 15.88s/batch, batch_loss=12.3, ba

Epoch 10/10:  30%|▎| 299/991 [1:16:39<3:04:02, 15.96s/batch, batch_loss=12.3, ba

Epoch 10/10:  30%|▎| 299/991 [1:16:54<3:04:02, 15.96s/batch, batch_loss=6.89, ba

Epoch 10/10:  30%|▎| 300/991 [1:16:54<3:00:53, 15.71s/batch, batch_loss=6.89, ba

Epoch 10/10:  30%|▎| 300/991 [1:17:08<3:00:53, 15.71s/batch, batch_loss=8.73, ba

Epoch 10/10:  30%|▎| 301/991 [1:17:08<2:54:23, 15.17s/batch, batch_loss=8.73, ba

Epoch 10/10:  30%|▎| 301/991 [1:17:22<2:54:23, 15.17s/batch, batch_loss=11.9, ba

Epoch 10/10:  30%|▎| 302/991 [1:17:22<2:48:44, 14.69s/batch, batch_loss=11.9, ba

Epoch 10/10:  30%|▎| 302/991 [1:17:36<2:48:44, 14.69s/batch, batch_loss=8.77, ba

Epoch 10/10:  31%|▎| 303/991 [1:17:36<2:45:36, 14.44s/batch, batch_loss=8.77, ba

Epoch 10/10:  31%|▎| 303/991 [1:17:50<2:45:36, 14.44s/batch, batch_loss=3.3, bat

Epoch 10/10:  31%|▎| 304/991 [1:17:50<2:43:59, 14.32s/batch, batch_loss=3.3, bat

Epoch 10/10:  31%|▎| 304/991 [1:18:07<2:43:59, 14.32s/batch, batch_loss=14.8, ba

Epoch 10/10:  31%|▎| 305/991 [1:18:07<2:53:20, 15.16s/batch, batch_loss=14.8, ba

Epoch 10/10:  31%|▎| 305/991 [1:18:21<2:53:20, 15.16s/batch, batch_loss=8.58, ba

Epoch 10/10:  31%|▎| 306/991 [1:18:21<2:49:22, 14.84s/batch, batch_loss=8.58, ba

Epoch 10/10:  31%|▎| 306/991 [1:18:35<2:49:22, 14.84s/batch, batch_loss=6.28e+3,

Epoch 10/10:  31%|▎| 307/991 [1:18:35<2:47:58, 14.73s/batch, batch_loss=6.28e+3,

Epoch 10/10:  31%|▎| 307/991 [1:18:50<2:47:58, 14.73s/batch, batch_loss=12.2, ba

Epoch 10/10:  31%|▎| 308/991 [1:18:50<2:45:50, 14.57s/batch, batch_loss=12.2, ba

Epoch 10/10:  31%|▎| 308/991 [1:19:05<2:45:50, 14.57s/batch, batch_loss=21.2, ba

Epoch 10/10:  31%|▎| 309/991 [1:19:05<2:47:26, 14.73s/batch, batch_loss=21.2, ba

Epoch 10/10:  31%|▎| 309/991 [1:19:20<2:47:26, 14.73s/batch, batch_loss=12.1, ba

Epoch 10/10:  31%|▎| 310/991 [1:19:20<2:48:03, 14.81s/batch, batch_loss=12.1, ba

Epoch 10/10:  31%|▎| 310/991 [1:19:35<2:48:03, 14.81s/batch, batch_loss=14.2, ba

Epoch 10/10:  31%|▎| 311/991 [1:19:35<2:50:02, 15.00s/batch, batch_loss=14.2, ba

Epoch 10/10:  31%|▎| 311/991 [1:19:51<2:50:02, 15.00s/batch, batch_loss=11, batc

Epoch 10/10:  31%|▎| 312/991 [1:19:51<2:52:12, 15.22s/batch, batch_loss=11, batc

Epoch 10/10:  31%|▎| 312/991 [1:20:06<2:52:12, 15.22s/batch, batch_loss=1.06e+4,

Epoch 10/10:  32%|▎| 313/991 [1:20:06<2:50:07, 15.05s/batch, batch_loss=1.06e+4,

Epoch 10/10:  32%|▎| 313/991 [1:20:19<2:50:07, 15.05s/batch, batch_loss=10.8, ba

Epoch 10/10:  32%|▎| 314/991 [1:20:19<2:45:34, 14.67s/batch, batch_loss=10.8, ba

Epoch 10/10:  32%|▎| 314/991 [1:20:37<2:45:34, 14.67s/batch, batch_loss=15.5, ba

Epoch 10/10:  32%|▎| 315/991 [1:20:37<2:55:08, 15.54s/batch, batch_loss=15.5, ba

Epoch 10/10:  32%|▎| 315/991 [1:20:52<2:55:08, 15.54s/batch, batch_loss=21.6, ba

Epoch 10/10:  32%|▎| 316/991 [1:20:52<2:54:12, 15.49s/batch, batch_loss=21.6, ba

Epoch 10/10:  32%|▎| 316/991 [1:21:07<2:54:12, 15.49s/batch, batch_loss=19.5, ba

Epoch 10/10:  32%|▎| 317/991 [1:21:07<2:52:50, 15.39s/batch, batch_loss=19.5, ba

Epoch 10/10:  32%|▎| 317/991 [1:21:22<2:52:50, 15.39s/batch, batch_loss=21.4, ba

Epoch 10/10:  32%|▎| 318/991 [1:21:22<2:49:22, 15.10s/batch, batch_loss=21.4, ba

Epoch 10/10:  32%|▎| 318/991 [1:21:36<2:49:22, 15.10s/batch, batch_loss=15.4, ba

Epoch 10/10:  32%|▎| 319/991 [1:21:36<2:47:29, 14.95s/batch, batch_loss=15.4, ba

Epoch 10/10:  32%|▎| 319/991 [1:21:52<2:47:29, 14.95s/batch, batch_loss=14.3, ba

Epoch 10/10:  32%|▎| 320/991 [1:21:52<2:48:01, 15.02s/batch, batch_loss=14.3, ba

Epoch 10/10:  32%|▎| 320/991 [1:22:06<2:48:01, 15.02s/batch, batch_loss=20.4, ba

Epoch 10/10:  32%|▎| 321/991 [1:22:06<2:47:08, 14.97s/batch, batch_loss=20.4, ba

Epoch 10/10:  32%|▎| 321/991 [1:22:22<2:47:08, 14.97s/batch, batch_loss=7.08, ba

Epoch 10/10:  32%|▎| 322/991 [1:22:22<2:47:37, 15.03s/batch, batch_loss=7.08, ba

Epoch 10/10:  32%|▎| 322/991 [1:22:38<2:47:37, 15.03s/batch, batch_loss=9.06, ba

Epoch 10/10:  33%|▎| 323/991 [1:22:38<2:53:09, 15.55s/batch, batch_loss=9.06, ba

Epoch 10/10:  33%|▎| 323/991 [1:22:53<2:53:09, 15.55s/batch, batch_loss=21.8, ba

Epoch 10/10:  33%|▎| 324/991 [1:22:53<2:48:14, 15.13s/batch, batch_loss=21.8, ba

Epoch 10/10:  33%|▎| 324/991 [1:23:07<2:48:14, 15.13s/batch, batch_loss=9.39, ba

Epoch 10/10:  33%|▎| 325/991 [1:23:07<2:46:48, 15.03s/batch, batch_loss=9.39, ba

Epoch 10/10:  33%|▎| 325/991 [1:23:22<2:46:48, 15.03s/batch, batch_loss=20, batc

Epoch 10/10:  33%|▎| 326/991 [1:23:22<2:45:53, 14.97s/batch, batch_loss=20, batc

Epoch 10/10:  33%|▎| 326/991 [1:23:36<2:45:53, 14.97s/batch, batch_loss=3.04e+3,

Epoch 10/10:  33%|▎| 327/991 [1:23:36<2:43:21, 14.76s/batch, batch_loss=3.04e+3,

Epoch 10/10:  33%|▎| 327/991 [1:23:51<2:43:21, 14.76s/batch, batch_loss=9.44, ba

Epoch 10/10:  33%|▎| 328/991 [1:23:51<2:41:42, 14.63s/batch, batch_loss=9.44, ba

Epoch 10/10:  33%|▎| 328/991 [1:24:06<2:41:42, 14.63s/batch, batch_loss=17.2, ba

Epoch 10/10:  33%|▎| 329/991 [1:24:06<2:43:37, 14.83s/batch, batch_loss=17.2, ba

Epoch 10/10:  33%|▎| 329/991 [1:24:21<2:43:37, 14.83s/batch, batch_loss=12.7, ba

Epoch 10/10:  33%|▎| 330/991 [1:24:21<2:43:59, 14.89s/batch, batch_loss=12.7, ba

Epoch 10/10:  33%|▎| 330/991 [1:24:38<2:43:59, 14.89s/batch, batch_loss=12.9, ba

Epoch 10/10:  33%|▎| 331/991 [1:24:38<2:50:59, 15.55s/batch, batch_loss=12.9, ba

Epoch 10/10:  33%|▎| 331/991 [1:24:53<2:50:59, 15.55s/batch, batch_loss=14.3, ba

Epoch 10/10:  34%|▎| 332/991 [1:24:53<2:47:49, 15.28s/batch, batch_loss=14.3, ba

Epoch 10/10:  34%|▎| 332/991 [1:25:07<2:47:49, 15.28s/batch, batch_loss=12, batc

Epoch 10/10:  34%|▎| 333/991 [1:25:07<2:44:04, 14.96s/batch, batch_loss=12, batc

Epoch 10/10:  34%|▎| 333/991 [1:25:23<2:44:04, 14.96s/batch, batch_loss=14.3, ba

Epoch 10/10:  34%|▎| 334/991 [1:25:23<2:45:22, 15.10s/batch, batch_loss=14.3, ba

Epoch 10/10:  34%|▎| 334/991 [1:25:38<2:45:22, 15.10s/batch, batch_loss=4.39, ba

Epoch 10/10:  34%|▎| 335/991 [1:25:38<2:46:31, 15.23s/batch, batch_loss=4.39, ba

Epoch 10/10:  34%|▎| 335/991 [1:25:53<2:46:31, 15.23s/batch, batch_loss=8.46e+3,

Epoch 10/10:  34%|▎| 336/991 [1:25:53<2:46:02, 15.21s/batch, batch_loss=8.46e+3,

Epoch 10/10:  34%|▎| 336/991 [1:26:08<2:46:02, 15.21s/batch, batch_loss=2.36e+3,

Epoch 10/10:  34%|▎| 337/991 [1:26:08<2:45:06, 15.15s/batch, batch_loss=2.36e+3,

Epoch 10/10:  34%|▎| 337/991 [1:26:23<2:45:06, 15.15s/batch, batch_loss=8.1, bat

Epoch 10/10:  34%|▎| 338/991 [1:26:23<2:42:36, 14.94s/batch, batch_loss=8.1, bat

Epoch 10/10:  34%|▎| 338/991 [1:26:37<2:42:36, 14.94s/batch, batch_loss=21.5, ba

Epoch 10/10:  34%|▎| 339/991 [1:26:37<2:41:49, 14.89s/batch, batch_loss=21.5, ba

Epoch 10/10:  34%|▎| 339/991 [1:26:53<2:41:49, 14.89s/batch, batch_loss=10.2, ba

Epoch 10/10:  34%|▎| 340/991 [1:26:53<2:43:19, 15.05s/batch, batch_loss=10.2, ba

Epoch 10/10:  34%|▎| 340/991 [1:27:08<2:43:19, 15.05s/batch, batch_loss=10.1, ba

Epoch 10/10:  34%|▎| 341/991 [1:27:08<2:44:11, 15.16s/batch, batch_loss=10.1, ba

Epoch 10/10:  34%|▎| 341/991 [1:27:23<2:44:11, 15.16s/batch, batch_loss=1.17, ba

Epoch 10/10:  35%|▎| 342/991 [1:27:23<2:43:06, 15.08s/batch, batch_loss=1.17, ba

Epoch 10/10:  35%|▎| 342/991 [1:27:38<2:43:06, 15.08s/batch, batch_loss=7.36, ba

Epoch 10/10:  35%|▎| 343/991 [1:27:38<2:43:29, 15.14s/batch, batch_loss=7.36, ba

Epoch 10/10:  35%|▎| 343/991 [1:27:53<2:43:29, 15.14s/batch, batch_loss=15.3, ba

Epoch 10/10:  35%|▎| 344/991 [1:27:53<2:41:54, 15.01s/batch, batch_loss=15.3, ba

Epoch 10/10:  35%|▎| 344/991 [1:28:08<2:41:54, 15.01s/batch, batch_loss=113, bat

Epoch 10/10:  35%|▎| 345/991 [1:28:08<2:42:01, 15.05s/batch, batch_loss=113, bat

Epoch 10/10:  35%|▎| 345/991 [1:28:24<2:42:01, 15.05s/batch, batch_loss=14.2, ba

Epoch 10/10:  35%|▎| 346/991 [1:28:24<2:42:36, 15.13s/batch, batch_loss=14.2, ba

Epoch 10/10:  35%|▎| 346/991 [1:28:41<2:42:36, 15.13s/batch, batch_loss=12.1, ba

Epoch 10/10:  35%|▎| 347/991 [1:28:41<2:48:47, 15.73s/batch, batch_loss=12.1, ba

Epoch 10/10:  35%|▎| 347/991 [1:28:56<2:48:47, 15.73s/batch, batch_loss=12.2, ba

Epoch 10/10:  35%|▎| 348/991 [1:28:56<2:46:23, 15.53s/batch, batch_loss=12.2, ba

Epoch 10/10:  35%|▎| 348/991 [1:29:12<2:46:23, 15.53s/batch, batch_loss=8.44, ba

Epoch 10/10:  35%|▎| 349/991 [1:29:12<2:47:14, 15.63s/batch, batch_loss=8.44, ba

Epoch 10/10:  35%|▎| 349/991 [1:29:27<2:47:14, 15.63s/batch, batch_loss=13.2, ba

Epoch 10/10:  35%|▎| 350/991 [1:29:27<2:46:19, 15.57s/batch, batch_loss=13.2, ba

Epoch 10/10:  35%|▎| 350/991 [1:29:43<2:46:19, 15.57s/batch, batch_loss=7.69, ba

Epoch 10/10:  35%|▎| 351/991 [1:29:43<2:46:19, 15.59s/batch, batch_loss=7.69, ba

Epoch 10/10:  35%|▎| 351/991 [1:29:57<2:46:19, 15.59s/batch, batch_loss=14, batc

Epoch 10/10:  36%|▎| 352/991 [1:29:57<2:43:05, 15.31s/batch, batch_loss=14, batc

Epoch 10/10:  36%|▎| 352/991 [1:30:13<2:43:05, 15.31s/batch, batch_loss=14, batc

Epoch 10/10:  36%|▎| 353/991 [1:30:13<2:43:42, 15.40s/batch, batch_loss=14, batc

Epoch 10/10:  36%|▎| 353/991 [1:30:29<2:43:42, 15.40s/batch, batch_loss=21.1, ba

Epoch 10/10:  36%|▎| 354/991 [1:30:29<2:44:29, 15.49s/batch, batch_loss=21.1, ba

Epoch 10/10:  36%|▎| 354/991 [1:30:45<2:44:29, 15.49s/batch, batch_loss=9.06, ba

Epoch 10/10:  36%|▎| 355/991 [1:30:45<2:46:11, 15.68s/batch, batch_loss=9.06, ba

Epoch 10/10:  36%|▎| 355/991 [1:31:00<2:46:11, 15.68s/batch, batch_loss=16.3, ba

Epoch 10/10:  36%|▎| 356/991 [1:31:00<2:45:32, 15.64s/batch, batch_loss=16.3, ba

Epoch 10/10:  36%|▎| 356/991 [1:31:16<2:45:32, 15.64s/batch, batch_loss=11.9, ba

Epoch 10/10:  36%|▎| 357/991 [1:31:16<2:45:51, 15.70s/batch, batch_loss=11.9, ba

Epoch 10/10:  36%|▎| 357/991 [1:31:32<2:45:51, 15.70s/batch, batch_loss=12.6, ba

Epoch 10/10:  36%|▎| 358/991 [1:31:32<2:46:23, 15.77s/batch, batch_loss=12.6, ba

Epoch 10/10:  36%|▎| 358/991 [1:31:47<2:46:23, 15.77s/batch, batch_loss=5.16, ba

Epoch 10/10:  36%|▎| 359/991 [1:31:47<2:43:45, 15.55s/batch, batch_loss=5.16, ba

Epoch 10/10:  36%|▎| 359/991 [1:32:02<2:43:45, 15.55s/batch, batch_loss=8.9, bat

Epoch 10/10:  36%|▎| 360/991 [1:32:02<2:42:40, 15.47s/batch, batch_loss=8.9, bat

Epoch 10/10:  36%|▎| 360/991 [1:32:18<2:42:40, 15.47s/batch, batch_loss=26.6, ba

Epoch 10/10:  36%|▎| 361/991 [1:32:18<2:43:05, 15.53s/batch, batch_loss=26.6, ba

Epoch 10/10:  36%|▎| 361/991 [1:32:36<2:43:05, 15.53s/batch, batch_loss=19, batc

Epoch 10/10:  37%|▎| 362/991 [1:32:36<2:51:11, 16.33s/batch, batch_loss=19, batc

Epoch 10/10:  37%|▎| 362/991 [1:32:52<2:51:11, 16.33s/batch, batch_loss=12.9, ba

Epoch 10/10:  37%|▎| 363/991 [1:32:52<2:49:09, 16.16s/batch, batch_loss=12.9, ba

Epoch 10/10:  37%|▎| 363/991 [1:33:08<2:49:09, 16.16s/batch, batch_loss=13.9, ba

Epoch 10/10:  37%|▎| 364/991 [1:33:08<2:46:55, 15.97s/batch, batch_loss=13.9, ba

Epoch 10/10:  37%|▎| 364/991 [1:33:22<2:46:55, 15.97s/batch, batch_loss=10.7, ba

Epoch 10/10:  37%|▎| 365/991 [1:33:22<2:43:06, 15.63s/batch, batch_loss=10.7, ba

Epoch 10/10:  37%|▎| 365/991 [1:33:38<2:43:06, 15.63s/batch, batch_loss=13.7, ba

Epoch 10/10:  37%|▎| 366/991 [1:33:38<2:42:45, 15.63s/batch, batch_loss=13.7, ba

Epoch 10/10:  37%|▎| 366/991 [1:33:54<2:42:45, 15.63s/batch, batch_loss=11.1, ba

Epoch 10/10:  37%|▎| 367/991 [1:33:54<2:42:25, 15.62s/batch, batch_loss=11.1, ba

Epoch 10/10:  37%|▎| 367/991 [1:34:12<2:42:25, 15.62s/batch, batch_loss=11, batc

Epoch 10/10:  37%|▎| 368/991 [1:34:12<2:50:19, 16.40s/batch, batch_loss=11, batc

Epoch 10/10:  37%|▎| 368/991 [1:34:27<2:50:19, 16.40s/batch, batch_loss=13.1, ba

Epoch 10/10:  37%|▎| 369/991 [1:34:27<2:44:23, 15.86s/batch, batch_loss=13.1, ba

Epoch 10/10:  37%|▎| 369/991 [1:34:42<2:44:23, 15.86s/batch, batch_loss=1.21e+4,

Epoch 10/10:  37%|▎| 370/991 [1:34:42<2:41:32, 15.61s/batch, batch_loss=1.21e+4,

Epoch 10/10:  37%|▎| 370/991 [1:34:56<2:41:32, 15.61s/batch, batch_loss=21.8, ba

Epoch 10/10:  37%|▎| 371/991 [1:34:56<2:39:00, 15.39s/batch, batch_loss=21.8, ba

Epoch 10/10:  37%|▎| 371/991 [1:35:12<2:39:00, 15.39s/batch, batch_loss=15.1, ba

Epoch 10/10:  38%|▍| 372/991 [1:35:12<2:39:02, 15.42s/batch, batch_loss=15.1, ba

Epoch 10/10:  38%|▍| 372/991 [1:35:27<2:39:02, 15.42s/batch, batch_loss=23.4, ba

Epoch 10/10:  38%|▍| 373/991 [1:35:27<2:38:07, 15.35s/batch, batch_loss=23.4, ba

Epoch 10/10:  38%|▍| 373/991 [1:35:43<2:38:07, 15.35s/batch, batch_loss=468, bat

Epoch 10/10:  38%|▍| 374/991 [1:35:43<2:38:14, 15.39s/batch, batch_loss=468, bat

Epoch 10/10:  38%|▍| 374/991 [1:35:57<2:38:14, 15.39s/batch, batch_loss=1.43e+3,

Epoch 10/10:  38%|▍| 375/991 [1:35:57<2:36:32, 15.25s/batch, batch_loss=1.43e+3,

Epoch 10/10:  38%|▍| 375/991 [1:36:12<2:36:32, 15.25s/batch, batch_loss=1.23e+3,

Epoch 10/10:  38%|▍| 376/991 [1:36:12<2:35:21, 15.16s/batch, batch_loss=1.23e+3,

Epoch 10/10:  38%|▍| 376/991 [1:36:27<2:35:21, 15.16s/batch, batch_loss=18.5, ba

Epoch 10/10:  38%|▍| 377/991 [1:36:27<2:31:51, 14.84s/batch, batch_loss=18.5, ba

Epoch 10/10:  38%|▍| 377/991 [1:36:41<2:31:51, 14.84s/batch, batch_loss=1.19e+3,

Epoch 10/10:  38%|▍| 378/991 [1:36:41<2:31:39, 14.84s/batch, batch_loss=1.19e+3,

Epoch 10/10:  38%|▍| 378/991 [1:36:57<2:31:39, 14.84s/batch, batch_loss=14.3, ba

Epoch 10/10:  38%|▍| 379/991 [1:36:57<2:32:33, 14.96s/batch, batch_loss=14.3, ba

Epoch 10/10:  38%|▍| 379/991 [1:37:12<2:32:33, 14.96s/batch, batch_loss=14, batc

Epoch 10/10:  38%|▍| 380/991 [1:37:12<2:32:12, 14.95s/batch, batch_loss=14, batc

Epoch 10/10:  38%|▍| 380/991 [1:37:26<2:32:12, 14.95s/batch, batch_loss=22, batc

Epoch 10/10:  38%|▍| 381/991 [1:37:26<2:29:57, 14.75s/batch, batch_loss=22, batc

Epoch 10/10:  38%|▍| 381/991 [1:37:40<2:29:57, 14.75s/batch, batch_loss=14.3, ba

Epoch 10/10:  39%|▍| 382/991 [1:37:40<2:28:38, 14.64s/batch, batch_loss=14.3, ba

Epoch 10/10:  39%|▍| 382/991 [1:37:54<2:28:38, 14.64s/batch, batch_loss=10.7, ba

Epoch 10/10:  39%|▍| 383/991 [1:37:54<2:27:11, 14.52s/batch, batch_loss=10.7, ba

Epoch 10/10:  39%|▍| 383/991 [1:38:13<2:27:11, 14.52s/batch, batch_loss=27.5, ba

Epoch 10/10:  39%|▍| 384/991 [1:38:13<2:37:50, 15.60s/batch, batch_loss=27.5, ba

Epoch 10/10:  39%|▍| 384/991 [1:38:27<2:37:50, 15.60s/batch, batch_loss=18.6, ba

Epoch 10/10:  39%|▍| 385/991 [1:38:27<2:35:09, 15.36s/batch, batch_loss=18.6, ba

Epoch 10/10:  39%|▍| 385/991 [1:38:42<2:35:09, 15.36s/batch, batch_loss=19.8, ba

Epoch 10/10:  39%|▍| 386/991 [1:38:42<2:33:46, 15.25s/batch, batch_loss=19.8, ba

Epoch 10/10:  39%|▍| 386/991 [1:38:57<2:33:46, 15.25s/batch, batch_loss=25.8, ba

Epoch 10/10:  39%|▍| 387/991 [1:38:57<2:31:40, 15.07s/batch, batch_loss=25.8, ba

Epoch 10/10:  39%|▍| 387/991 [1:39:11<2:31:40, 15.07s/batch, batch_loss=789, bat

Epoch 10/10:  39%|▍| 388/991 [1:39:11<2:29:26, 14.87s/batch, batch_loss=789, bat

Epoch 10/10:  39%|▍| 388/991 [1:39:26<2:29:26, 14.87s/batch, batch_loss=22.4, ba

Epoch 10/10:  39%|▍| 389/991 [1:39:26<2:28:59, 14.85s/batch, batch_loss=22.4, ba

Epoch 10/10:  39%|▍| 389/991 [1:39:42<2:28:59, 14.85s/batch, batch_loss=861, bat

Epoch 10/10:  39%|▍| 390/991 [1:39:42<2:30:06, 14.99s/batch, batch_loss=861, bat

Epoch 10/10:  39%|▍| 390/991 [1:39:56<2:30:06, 14.99s/batch, batch_loss=20.2, ba

Epoch 10/10:  39%|▍| 391/991 [1:39:56<2:29:13, 14.92s/batch, batch_loss=20.2, ba

Epoch 10/10:  39%|▍| 391/991 [1:40:12<2:29:13, 14.92s/batch, batch_loss=16.5, ba

Epoch 10/10:  40%|▍| 392/991 [1:40:12<2:29:59, 15.02s/batch, batch_loss=16.5, ba

Epoch 10/10:  40%|▍| 392/991 [1:40:26<2:29:59, 15.02s/batch, batch_loss=19.6, ba

Epoch 10/10:  40%|▍| 393/991 [1:40:26<2:29:08, 14.96s/batch, batch_loss=19.6, ba

Epoch 10/10:  40%|▍| 393/991 [1:40:42<2:29:08, 14.96s/batch, batch_loss=613, bat

Epoch 10/10:  40%|▍| 394/991 [1:40:42<2:31:18, 15.21s/batch, batch_loss=613, bat

Epoch 10/10:  40%|▍| 394/991 [1:40:57<2:31:18, 15.21s/batch, batch_loss=23.1, ba

Epoch 10/10:  40%|▍| 395/991 [1:40:57<2:30:06, 15.11s/batch, batch_loss=23.1, ba

Epoch 10/10:  40%|▍| 395/991 [1:41:12<2:30:06, 15.11s/batch, batch_loss=15.6, ba

Epoch 10/10:  40%|▍| 396/991 [1:41:12<2:28:05, 14.93s/batch, batch_loss=15.6, ba

Epoch 10/10:  40%|▍| 396/991 [1:41:26<2:28:05, 14.93s/batch, batch_loss=16.5, ba

Epoch 10/10:  40%|▍| 397/991 [1:41:26<2:26:14, 14.77s/batch, batch_loss=16.5, ba

Epoch 10/10:  40%|▍| 397/991 [1:41:41<2:26:14, 14.77s/batch, batch_loss=15.8, ba

Epoch 10/10:  40%|▍| 398/991 [1:41:41<2:26:22, 14.81s/batch, batch_loss=15.8, ba

Epoch 10/10:  40%|▍| 398/991 [1:41:56<2:26:22, 14.81s/batch, batch_loss=23.3, ba

Epoch 10/10:  40%|▍| 399/991 [1:41:56<2:25:53, 14.79s/batch, batch_loss=23.3, ba

Epoch 10/10:  40%|▍| 399/991 [1:42:11<2:25:53, 14.79s/batch, batch_loss=13.4, ba

Epoch 10/10:  40%|▍| 400/991 [1:42:11<2:27:49, 15.01s/batch, batch_loss=13.4, ba

Epoch 10/10:  40%|▍| 400/991 [1:42:25<2:27:49, 15.01s/batch, batch_loss=12.1, ba

Epoch 10/10:  40%|▍| 401/991 [1:42:25<2:25:39, 14.81s/batch, batch_loss=12.1, ba

Epoch 10/10:  40%|▍| 401/991 [1:42:41<2:25:39, 14.81s/batch, batch_loss=18.6, ba

Epoch 10/10:  41%|▍| 402/991 [1:42:41<2:26:13, 14.90s/batch, batch_loss=18.6, ba

Epoch 10/10:  41%|▍| 402/991 [1:42:55<2:26:13, 14.90s/batch, batch_loss=21.3, ba

Epoch 10/10:  41%|▍| 403/991 [1:42:55<2:25:49, 14.88s/batch, batch_loss=21.3, ba

Epoch 10/10:  41%|▍| 403/991 [1:43:11<2:25:49, 14.88s/batch, batch_loss=14.7, ba

Epoch 10/10:  41%|▍| 404/991 [1:43:11<2:27:07, 15.04s/batch, batch_loss=14.7, ba

Epoch 10/10:  41%|▍| 404/991 [1:43:26<2:27:07, 15.04s/batch, batch_loss=14.3, ba

Epoch 10/10:  41%|▍| 405/991 [1:43:26<2:27:57, 15.15s/batch, batch_loss=14.3, ba

Epoch 10/10:  41%|▍| 405/991 [1:43:41<2:27:57, 15.15s/batch, batch_loss=6.47, ba

Epoch 10/10:  41%|▍| 406/991 [1:43:41<2:27:02, 15.08s/batch, batch_loss=6.47, ba

Epoch 10/10:  41%|▍| 406/991 [1:43:57<2:27:02, 15.08s/batch, batch_loss=25.4, ba

Epoch 10/10:  41%|▍| 407/991 [1:43:57<2:27:48, 15.19s/batch, batch_loss=25.4, ba

Epoch 10/10:  41%|▍| 407/991 [1:44:12<2:27:48, 15.19s/batch, batch_loss=8.73, ba

Epoch 10/10:  41%|▍| 408/991 [1:44:12<2:27:11, 15.15s/batch, batch_loss=8.73, ba

Epoch 10/10:  41%|▍| 408/991 [1:44:26<2:27:11, 15.15s/batch, batch_loss=26.1, ba

Epoch 10/10:  41%|▍| 409/991 [1:44:26<2:24:43, 14.92s/batch, batch_loss=26.1, ba

Epoch 10/10:  41%|▍| 409/991 [1:44:41<2:24:43, 14.92s/batch, batch_loss=20.1, ba

Epoch 10/10:  41%|▍| 410/991 [1:44:41<2:23:56, 14.86s/batch, batch_loss=20.1, ba

Epoch 10/10:  41%|▍| 410/991 [1:44:56<2:23:56, 14.86s/batch, batch_loss=19.4, ba

Epoch 10/10:  41%|▍| 411/991 [1:44:56<2:24:36, 14.96s/batch, batch_loss=19.4, ba

Epoch 10/10:  41%|▍| 411/991 [1:45:10<2:24:36, 14.96s/batch, batch_loss=13.3, ba

Epoch 10/10:  42%|▍| 412/991 [1:45:10<2:23:02, 14.82s/batch, batch_loss=13.3, ba

Epoch 10/10:  42%|▍| 412/991 [1:45:24<2:23:02, 14.82s/batch, batch_loss=20.7, ba

Epoch 10/10:  42%|▍| 413/991 [1:45:24<2:20:32, 14.59s/batch, batch_loss=20.7, ba

Epoch 10/10:  42%|▍| 413/991 [1:45:39<2:20:32, 14.59s/batch, batch_loss=18, batc

Epoch 10/10:  42%|▍| 414/991 [1:45:39<2:19:42, 14.53s/batch, batch_loss=18, batc

Epoch 10/10:  42%|▍| 414/991 [1:45:54<2:19:42, 14.53s/batch, batch_loss=11.4, ba

Epoch 10/10:  42%|▍| 415/991 [1:45:54<2:21:10, 14.71s/batch, batch_loss=11.4, ba

Epoch 10/10:  42%|▍| 415/991 [1:46:08<2:21:10, 14.71s/batch, batch_loss=13.7, ba

Epoch 10/10:  42%|▍| 416/991 [1:46:08<2:20:03, 14.61s/batch, batch_loss=13.7, ba

Epoch 10/10:  42%|▍| 416/991 [1:46:24<2:20:03, 14.61s/batch, batch_loss=11.8, ba

Epoch 10/10:  42%|▍| 417/991 [1:46:24<2:21:24, 14.78s/batch, batch_loss=11.8, ba

Epoch 10/10:  42%|▍| 417/991 [1:46:39<2:21:24, 14.78s/batch, batch_loss=16.5, ba

Epoch 10/10:  42%|▍| 418/991 [1:46:39<2:23:32, 15.03s/batch, batch_loss=16.5, ba

Epoch 10/10:  42%|▍| 418/991 [1:46:57<2:23:32, 15.03s/batch, batch_loss=1.3e+3, 

Epoch 10/10:  42%|▍| 419/991 [1:46:57<2:31:13, 15.86s/batch, batch_loss=1.3e+3, 

Epoch 10/10:  42%|▍| 419/991 [1:47:12<2:31:13, 15.86s/batch, batch_loss=17.5, ba

Epoch 10/10:  42%|▍| 420/991 [1:47:12<2:27:33, 15.51s/batch, batch_loss=17.5, ba

Epoch 10/10:  42%|▍| 420/991 [1:47:25<2:27:33, 15.51s/batch, batch_loss=13.4, ba

Epoch 10/10:  42%|▍| 421/991 [1:47:25<2:21:42, 14.92s/batch, batch_loss=13.4, ba

Epoch 10/10:  42%|▍| 421/991 [1:47:39<2:21:42, 14.92s/batch, batch_loss=13.6, ba

Epoch 10/10:  43%|▍| 422/991 [1:47:39<2:17:14, 14.47s/batch, batch_loss=13.6, ba

Epoch 10/10:  43%|▍| 422/991 [1:47:53<2:17:14, 14.47s/batch, batch_loss=13.4, ba

Epoch 10/10:  43%|▍| 423/991 [1:47:53<2:15:36, 14.32s/batch, batch_loss=13.4, ba

Epoch 10/10:  43%|▍| 423/991 [1:48:10<2:15:36, 14.32s/batch, batch_loss=13.8, ba

Epoch 10/10:  43%|▍| 424/991 [1:48:10<2:24:45, 15.32s/batch, batch_loss=13.8, ba

Epoch 10/10:  43%|▍| 424/991 [1:48:26<2:24:45, 15.32s/batch, batch_loss=8.84, ba

Epoch 10/10:  43%|▍| 425/991 [1:48:26<2:24:51, 15.36s/batch, batch_loss=8.84, ba

Epoch 10/10:  43%|▍| 425/991 [1:48:41<2:24:51, 15.36s/batch, batch_loss=3.72, ba

Epoch 10/10:  43%|▍| 426/991 [1:48:41<2:24:19, 15.33s/batch, batch_loss=3.72, ba

Epoch 10/10:  43%|▍| 426/991 [1:48:56<2:24:19, 15.33s/batch, batch_loss=12.9, ba

Epoch 10/10:  43%|▍| 427/991 [1:48:56<2:23:11, 15.23s/batch, batch_loss=12.9, ba

Epoch 10/10:  43%|▍| 427/991 [1:49:12<2:23:11, 15.23s/batch, batch_loss=18.2, ba

Epoch 10/10:  43%|▍| 428/991 [1:49:12<2:25:41, 15.53s/batch, batch_loss=18.2, ba

Epoch 10/10:  43%|▍| 428/991 [1:49:28<2:25:41, 15.53s/batch, batch_loss=22.6, ba

Epoch 10/10:  43%|▍| 429/991 [1:49:28<2:27:29, 15.75s/batch, batch_loss=22.6, ba

Epoch 10/10:  43%|▍| 429/991 [1:49:44<2:27:29, 15.75s/batch, batch_loss=9.32e+3,

Epoch 10/10:  43%|▍| 430/991 [1:49:44<2:25:34, 15.57s/batch, batch_loss=9.32e+3,

Epoch 10/10:  43%|▍| 430/991 [1:49:58<2:25:34, 15.57s/batch, batch_loss=26.4, ba

Epoch 10/10:  43%|▍| 431/991 [1:49:58<2:22:11, 15.23s/batch, batch_loss=26.4, ba

Epoch 10/10:  43%|▍| 431/991 [1:50:16<2:22:11, 15.23s/batch, batch_loss=22.8, ba

Epoch 10/10:  44%|▍| 432/991 [1:50:16<2:30:08, 16.11s/batch, batch_loss=22.8, ba

Epoch 10/10:  44%|▍| 432/991 [1:50:32<2:30:08, 16.11s/batch, batch_loss=11.2, ba

Epoch 10/10:  44%|▍| 433/991 [1:50:32<2:28:22, 15.96s/batch, batch_loss=11.2, ba

Epoch 10/10:  44%|▍| 433/991 [1:50:47<2:28:22, 15.96s/batch, batch_loss=16.3, ba

Epoch 10/10:  44%|▍| 434/991 [1:50:47<2:24:41, 15.59s/batch, batch_loss=16.3, ba

Epoch 10/10:  44%|▍| 434/991 [1:51:02<2:24:41, 15.59s/batch, batch_loss=13.2, ba

Epoch 10/10:  44%|▍| 435/991 [1:51:02<2:24:54, 15.64s/batch, batch_loss=13.2, ba

Epoch 10/10:  44%|▍| 435/991 [1:51:16<2:24:54, 15.64s/batch, batch_loss=13.1, ba

Epoch 10/10:  44%|▍| 436/991 [1:51:16<2:19:43, 15.11s/batch, batch_loss=13.1, ba

Epoch 10/10:  44%|▍| 436/991 [1:51:31<2:19:43, 15.11s/batch, batch_loss=18.3, ba

Epoch 10/10:  44%|▍| 437/991 [1:51:31<2:17:50, 14.93s/batch, batch_loss=18.3, ba

Epoch 10/10:  44%|▍| 437/991 [1:51:46<2:17:50, 14.93s/batch, batch_loss=23.4, ba

Epoch 10/10:  44%|▍| 438/991 [1:51:46<2:17:31, 14.92s/batch, batch_loss=23.4, ba

Epoch 10/10:  44%|▍| 438/991 [1:52:00<2:17:31, 14.92s/batch, batch_loss=14.7, ba

Epoch 10/10:  44%|▍| 439/991 [1:52:00<2:16:31, 14.84s/batch, batch_loss=14.7, ba

Epoch 10/10:  44%|▍| 439/991 [1:52:19<2:16:31, 14.84s/batch, batch_loss=24.8, ba

Epoch 10/10:  44%|▍| 440/991 [1:52:19<2:26:10, 15.92s/batch, batch_loss=24.8, ba

Epoch 10/10:  44%|▍| 440/991 [1:52:34<2:26:10, 15.92s/batch, batch_loss=23, batc

Epoch 10/10:  45%|▍| 441/991 [1:52:34<2:23:44, 15.68s/batch, batch_loss=23, batc

Epoch 10/10:  45%|▍| 441/991 [1:52:48<2:23:44, 15.68s/batch, batch_loss=17, batc

Epoch 10/10:  45%|▍| 442/991 [1:52:48<2:20:46, 15.38s/batch, batch_loss=17, batc

Epoch 10/10:  45%|▍| 442/991 [1:53:04<2:20:46, 15.38s/batch, batch_loss=20.7, ba

Epoch 10/10:  45%|▍| 443/991 [1:53:04<2:19:55, 15.32s/batch, batch_loss=20.7, ba

Epoch 10/10:  45%|▍| 443/991 [1:53:18<2:19:55, 15.32s/batch, batch_loss=14.4, ba

Epoch 10/10:  45%|▍| 444/991 [1:53:18<2:18:21, 15.18s/batch, batch_loss=14.4, ba

Epoch 10/10:  45%|▍| 444/991 [1:53:34<2:18:21, 15.18s/batch, batch_loss=17.8, ba

Epoch 10/10:  45%|▍| 445/991 [1:53:34<2:19:09, 15.29s/batch, batch_loss=17.8, ba

Epoch 10/10:  45%|▍| 445/991 [1:53:49<2:19:09, 15.29s/batch, batch_loss=27.8, ba

Epoch 10/10:  45%|▍| 446/991 [1:53:49<2:16:54, 15.07s/batch, batch_loss=27.8, ba

Epoch 10/10:  45%|▍| 446/991 [1:54:04<2:16:54, 15.07s/batch, batch_loss=15.7, ba

Epoch 10/10:  45%|▍| 447/991 [1:54:04<2:17:22, 15.15s/batch, batch_loss=15.7, ba

Epoch 10/10:  45%|▍| 447/991 [1:54:23<2:17:22, 15.15s/batch, batch_loss=18.8, ba

Epoch 10/10:  45%|▍| 448/991 [1:54:23<2:27:09, 16.26s/batch, batch_loss=18.8, ba

Epoch 10/10:  45%|▍| 448/991 [1:54:38<2:27:09, 16.26s/batch, batch_loss=17.5, ba

Epoch 10/10:  45%|▍| 449/991 [1:54:38<2:22:57, 15.83s/batch, batch_loss=17.5, ba

Epoch 10/10:  45%|▍| 449/991 [1:54:53<2:22:57, 15.83s/batch, batch_loss=26.1, ba

Epoch 10/10:  45%|▍| 450/991 [1:54:53<2:20:20, 15.57s/batch, batch_loss=26.1, ba

Epoch 10/10:  45%|▍| 450/991 [1:55:07<2:20:20, 15.57s/batch, batch_loss=21.1, ba

Epoch 10/10:  46%|▍| 451/991 [1:55:07<2:18:08, 15.35s/batch, batch_loss=21.1, ba

Epoch 10/10:  46%|▍| 451/991 [1:55:23<2:18:08, 15.35s/batch, batch_loss=17.3, ba

Epoch 10/10:  46%|▍| 452/991 [1:55:23<2:17:16, 15.28s/batch, batch_loss=17.3, ba

Epoch 10/10:  46%|▍| 452/991 [1:55:37<2:17:16, 15.28s/batch, batch_loss=20.4, ba

Epoch 10/10:  46%|▍| 453/991 [1:55:37<2:15:11, 15.08s/batch, batch_loss=20.4, ba

Epoch 10/10:  46%|▍| 453/991 [1:55:52<2:15:11, 15.08s/batch, batch_loss=7.23e+3,

Epoch 10/10:  46%|▍| 454/991 [1:55:52<2:15:35, 15.15s/batch, batch_loss=7.23e+3,

Epoch 10/10:  46%|▍| 454/991 [1:56:08<2:15:35, 15.15s/batch, batch_loss=18.8, ba

Epoch 10/10:  46%|▍| 455/991 [1:56:08<2:16:00, 15.22s/batch, batch_loss=18.8, ba

Epoch 10/10:  46%|▍| 455/991 [1:56:23<2:16:00, 15.22s/batch, batch_loss=25.7, ba

Epoch 10/10:  46%|▍| 456/991 [1:56:23<2:14:52, 15.13s/batch, batch_loss=25.7, ba

Epoch 10/10:  46%|▍| 456/991 [1:56:41<2:14:52, 15.13s/batch, batch_loss=13.3, ba

Epoch 10/10:  46%|▍| 457/991 [1:56:41<2:22:34, 16.02s/batch, batch_loss=13.3, ba

Epoch 10/10:  46%|▍| 457/991 [1:56:56<2:22:34, 16.02s/batch, batch_loss=16.8, ba

Epoch 10/10:  46%|▍| 458/991 [1:56:56<2:19:29, 15.70s/batch, batch_loss=16.8, ba

Epoch 10/10:  46%|▍| 458/991 [1:57:10<2:19:29, 15.70s/batch, batch_loss=26.4, ba

Epoch 10/10:  46%|▍| 459/991 [1:57:10<2:15:24, 15.27s/batch, batch_loss=26.4, ba

Epoch 10/10:  46%|▍| 459/991 [1:57:25<2:15:24, 15.27s/batch, batch_loss=23.6, ba

Epoch 10/10:  46%|▍| 460/991 [1:57:25<2:13:28, 15.08s/batch, batch_loss=23.6, ba

Epoch 10/10:  46%|▍| 460/991 [1:57:40<2:13:28, 15.08s/batch, batch_loss=47.1, ba

Epoch 10/10:  47%|▍| 461/991 [1:57:40<2:12:55, 15.05s/batch, batch_loss=47.1, ba

Epoch 10/10:  47%|▍| 461/991 [1:57:55<2:12:55, 15.05s/batch, batch_loss=16.5, ba

Epoch 10/10:  47%|▍| 462/991 [1:57:55<2:14:26, 15.25s/batch, batch_loss=16.5, ba

Epoch 10/10:  47%|▍| 462/991 [1:58:10<2:14:26, 15.25s/batch, batch_loss=6.22e+4,

Epoch 10/10:  47%|▍| 463/991 [1:58:10<2:12:30, 15.06s/batch, batch_loss=6.22e+4,

Epoch 10/10:  47%|▍| 463/991 [1:58:25<2:12:30, 15.06s/batch, batch_loss=15.4, ba

Epoch 10/10:  47%|▍| 464/991 [1:58:25<2:12:44, 15.11s/batch, batch_loss=15.4, ba

Epoch 10/10:  47%|▍| 464/991 [1:58:41<2:12:44, 15.11s/batch, batch_loss=16.2, ba

Epoch 10/10:  47%|▍| 465/991 [1:58:41<2:13:01, 15.17s/batch, batch_loss=16.2, ba

Epoch 10/10:  47%|▍| 465/991 [1:58:56<2:13:01, 15.17s/batch, batch_loss=17.1, ba

Epoch 10/10:  47%|▍| 466/991 [1:58:56<2:13:04, 15.21s/batch, batch_loss=17.1, ba

Epoch 10/10:  47%|▍| 466/991 [1:59:12<2:13:04, 15.21s/batch, batch_loss=15.6, ba

Epoch 10/10:  47%|▍| 467/991 [1:59:12<2:15:34, 15.52s/batch, batch_loss=15.6, ba

Epoch 10/10:  47%|▍| 467/991 [1:59:28<2:15:34, 15.52s/batch, batch_loss=17.8, ba

Epoch 10/10:  47%|▍| 468/991 [1:59:28<2:15:14, 15.51s/batch, batch_loss=17.8, ba

Epoch 10/10:  47%|▍| 468/991 [1:59:44<2:15:14, 15.51s/batch, batch_loss=12.5, ba

Epoch 10/10:  47%|▍| 469/991 [1:59:44<2:16:12, 15.66s/batch, batch_loss=12.5, ba

Epoch 10/10:  47%|▍| 469/991 [1:59:58<2:16:12, 15.66s/batch, batch_loss=15.5, ba

Epoch 10/10:  47%|▍| 470/991 [1:59:58<2:12:59, 15.32s/batch, batch_loss=15.5, ba

Epoch 10/10:  47%|▍| 470/991 [2:00:13<2:12:59, 15.32s/batch, batch_loss=25.7, ba

Epoch 10/10:  48%|▍| 471/991 [2:00:13<2:10:46, 15.09s/batch, batch_loss=25.7, ba

Epoch 10/10:  48%|▍| 471/991 [2:00:30<2:10:46, 15.09s/batch, batch_loss=20.7, ba

Epoch 10/10:  48%|▍| 472/991 [2:00:30<2:17:16, 15.87s/batch, batch_loss=20.7, ba

Epoch 10/10:  48%|▍| 472/991 [2:00:47<2:17:16, 15.87s/batch, batch_loss=21.3, ba

Epoch 10/10:  48%|▍| 473/991 [2:00:47<2:17:47, 15.96s/batch, batch_loss=21.3, ba

Epoch 10/10:  48%|▍| 473/991 [2:01:01<2:17:47, 15.96s/batch, batch_loss=19.1, ba

Epoch 10/10:  48%|▍| 474/991 [2:01:01<2:14:29, 15.61s/batch, batch_loss=19.1, ba

Epoch 10/10:  48%|▍| 474/991 [2:01:17<2:14:29, 15.61s/batch, batch_loss=2.4e+3, 

Epoch 10/10:  48%|▍| 475/991 [2:01:17<2:13:40, 15.54s/batch, batch_loss=2.4e+3, 

Epoch 10/10:  48%|▍| 475/991 [2:01:31<2:13:40, 15.54s/batch, batch_loss=18.3, ba

Epoch 10/10:  48%|▍| 476/991 [2:01:31<2:11:12, 15.29s/batch, batch_loss=18.3, ba

Epoch 10/10:  48%|▍| 476/991 [2:01:46<2:11:12, 15.29s/batch, batch_loss=19.9, ba

Epoch 10/10:  48%|▍| 477/991 [2:01:46<2:09:44, 15.15s/batch, batch_loss=19.9, ba

Epoch 10/10:  48%|▍| 477/991 [2:02:02<2:09:44, 15.15s/batch, batch_loss=17.9, ba

Epoch 10/10:  48%|▍| 478/991 [2:02:02<2:10:13, 15.23s/batch, batch_loss=17.9, ba

Epoch 10/10:  48%|▍| 478/991 [2:02:16<2:10:13, 15.23s/batch, batch_loss=22.1, ba

Epoch 10/10:  48%|▍| 479/991 [2:02:16<2:08:21, 15.04s/batch, batch_loss=22.1, ba

Epoch 10/10:  48%|▍| 479/991 [2:02:32<2:08:21, 15.04s/batch, batch_loss=21.1, ba

Epoch 10/10:  48%|▍| 480/991 [2:02:32<2:10:01, 15.27s/batch, batch_loss=21.1, ba

Epoch 10/10:  48%|▍| 480/991 [2:02:50<2:10:01, 15.27s/batch, batch_loss=28.2, ba

Epoch 10/10:  49%|▍| 481/991 [2:02:50<2:15:40, 15.96s/batch, batch_loss=28.2, ba

Epoch 10/10:  49%|▍| 481/991 [2:03:05<2:15:40, 15.96s/batch, batch_loss=20.7, ba

Epoch 10/10:  49%|▍| 482/991 [2:03:05<2:13:05, 15.69s/batch, batch_loss=20.7, ba

Epoch 10/10:  49%|▍| 482/991 [2:03:20<2:13:05, 15.69s/batch, batch_loss=17.9, ba

Epoch 10/10:  49%|▍| 483/991 [2:03:20<2:11:00, 15.47s/batch, batch_loss=17.9, ba

Epoch 10/10:  49%|▍| 483/991 [2:03:35<2:11:00, 15.47s/batch, batch_loss=22.9, ba

Epoch 10/10:  49%|▍| 484/991 [2:03:35<2:10:06, 15.40s/batch, batch_loss=22.9, ba

Epoch 10/10:  49%|▍| 484/991 [2:03:50<2:10:06, 15.40s/batch, batch_loss=10.5, ba

Epoch 10/10:  49%|▍| 485/991 [2:03:50<2:10:14, 15.44s/batch, batch_loss=10.5, ba

Epoch 10/10:  49%|▍| 485/991 [2:04:06<2:10:14, 15.44s/batch, batch_loss=28.2, ba

Epoch 10/10:  49%|▍| 486/991 [2:04:06<2:09:58, 15.44s/batch, batch_loss=28.2, ba

Epoch 10/10:  49%|▍| 486/991 [2:04:21<2:09:58, 15.44s/batch, batch_loss=15.9, ba

Epoch 10/10:  49%|▍| 487/991 [2:04:21<2:07:55, 15.23s/batch, batch_loss=15.9, ba

Epoch 10/10:  49%|▍| 487/991 [2:04:36<2:07:55, 15.23s/batch, batch_loss=11.7, ba

Epoch 10/10:  49%|▍| 488/991 [2:04:36<2:09:16, 15.42s/batch, batch_loss=11.7, ba

Epoch 10/10:  49%|▍| 488/991 [2:04:52<2:09:16, 15.42s/batch, batch_loss=12.9, ba

Epoch 10/10:  49%|▍| 489/991 [2:04:52<2:08:57, 15.41s/batch, batch_loss=12.9, ba

Epoch 10/10:  49%|▍| 489/991 [2:05:07<2:08:57, 15.41s/batch, batch_loss=10.3, ba

Epoch 10/10:  49%|▍| 490/991 [2:05:07<2:07:33, 15.28s/batch, batch_loss=10.3, ba

Epoch 10/10:  49%|▍| 490/991 [2:05:21<2:07:33, 15.28s/batch, batch_loss=21.5, ba

Epoch 10/10:  50%|▍| 491/991 [2:05:21<2:05:02, 15.01s/batch, batch_loss=21.5, ba

Epoch 10/10:  50%|▍| 491/991 [2:05:35<2:05:02, 15.01s/batch, batch_loss=19.5, ba

Epoch 10/10:  50%|▍| 492/991 [2:05:35<2:02:57, 14.78s/batch, batch_loss=19.5, ba

Epoch 10/10:  50%|▍| 492/991 [2:05:50<2:02:57, 14.78s/batch, batch_loss=22.3, ba

Epoch 10/10:  50%|▍| 493/991 [2:05:50<2:01:29, 14.64s/batch, batch_loss=22.3, ba

Epoch 10/10:  50%|▍| 493/991 [2:06:04<2:01:29, 14.64s/batch, batch_loss=9.83, ba

Epoch 10/10:  50%|▍| 494/991 [2:06:04<2:00:36, 14.56s/batch, batch_loss=9.83, ba

Epoch 10/10:  50%|▍| 494/991 [2:06:18<2:00:36, 14.56s/batch, batch_loss=8.54e+4,

Epoch 10/10:  50%|▍| 495/991 [2:06:18<1:59:14, 14.42s/batch, batch_loss=8.54e+4,

Epoch 10/10:  50%|▍| 495/991 [2:06:33<1:59:14, 14.42s/batch, batch_loss=15.6, ba

Epoch 10/10:  50%|▌| 496/991 [2:06:33<2:00:29, 14.61s/batch, batch_loss=15.6, ba

Epoch 10/10:  50%|▌| 496/991 [2:06:48<2:00:29, 14.61s/batch, batch_loss=177, bat

Epoch 10/10:  50%|▌| 497/991 [2:06:48<2:00:53, 14.68s/batch, batch_loss=177, bat

Epoch 10/10:  50%|▌| 497/991 [2:07:03<2:00:53, 14.68s/batch, batch_loss=19.1, ba

Epoch 10/10:  50%|▌| 498/991 [2:07:03<2:00:47, 14.70s/batch, batch_loss=19.1, ba

Epoch 10/10:  50%|▌| 498/991 [2:07:18<2:00:47, 14.70s/batch, batch_loss=405, bat

Epoch 10/10:  50%|▌| 499/991 [2:07:18<2:02:10, 14.90s/batch, batch_loss=405, bat

Epoch 10/10:  50%|▌| 499/991 [2:07:34<2:02:10, 14.90s/batch, batch_loss=18.7, ba

Epoch 10/10:  50%|▌| 500/991 [2:07:34<2:03:23, 15.08s/batch, batch_loss=18.7, ba

Epoch 10/10:  50%|▌| 500/991 [2:07:48<2:03:23, 15.08s/batch, batch_loss=10.3, ba

Epoch 10/10:  51%|▌| 501/991 [2:07:49<2:02:22, 14.98s/batch, batch_loss=10.3, ba

Epoch 10/10:  51%|▌| 501/991 [2:08:04<2:02:22, 14.98s/batch, batch_loss=12.6, ba

Epoch 10/10:  51%|▌| 502/991 [2:08:04<2:03:08, 15.11s/batch, batch_loss=12.6, ba

Epoch 10/10:  51%|▌| 502/991 [2:08:20<2:03:08, 15.11s/batch, batch_loss=21.7, ba

Epoch 10/10:  51%|▌| 503/991 [2:08:20<2:05:13, 15.40s/batch, batch_loss=21.7, ba

Epoch 10/10:  51%|▌| 503/991 [2:08:36<2:05:13, 15.40s/batch, batch_loss=13.2, ba

Epoch 10/10:  51%|▌| 504/991 [2:08:36<2:07:20, 15.69s/batch, batch_loss=13.2, ba

Epoch 10/10:  51%|▌| 504/991 [2:08:52<2:07:20, 15.69s/batch, batch_loss=8.73, ba

Epoch 10/10:  51%|▌| 505/991 [2:08:52<2:05:56, 15.55s/batch, batch_loss=8.73, ba

Epoch 10/10:  51%|▌| 505/991 [2:09:07<2:05:56, 15.55s/batch, batch_loss=15.5, ba

Epoch 10/10:  51%|▌| 506/991 [2:09:07<2:05:37, 15.54s/batch, batch_loss=15.5, ba

Epoch 10/10:  51%|▌| 506/991 [2:09:23<2:05:37, 15.54s/batch, batch_loss=12.7, ba

Epoch 10/10:  51%|▌| 507/991 [2:09:23<2:06:39, 15.70s/batch, batch_loss=12.7, ba

Epoch 10/10:  51%|▌| 507/991 [2:09:39<2:06:39, 15.70s/batch, batch_loss=15.7, ba

Epoch 10/10:  51%|▌| 508/991 [2:09:39<2:06:33, 15.72s/batch, batch_loss=15.7, ba

Epoch 10/10:  51%|▌| 508/991 [2:09:55<2:06:33, 15.72s/batch, batch_loss=18.9, ba

Epoch 10/10:  51%|▌| 509/991 [2:09:55<2:06:44, 15.78s/batch, batch_loss=18.9, ba

Epoch 10/10:  51%|▌| 509/991 [2:10:11<2:06:44, 15.78s/batch, batch_loss=15.9, ba

Epoch 10/10:  51%|▌| 510/991 [2:10:11<2:07:59, 15.97s/batch, batch_loss=15.9, ba

Epoch 10/10:  51%|▌| 510/991 [2:10:30<2:07:59, 15.97s/batch, batch_loss=16.3, ba

Epoch 10/10:  52%|▌| 511/991 [2:10:30<2:13:25, 16.68s/batch, batch_loss=16.3, ba

Epoch 10/10:  52%|▌| 511/991 [2:10:46<2:13:25, 16.68s/batch, batch_loss=12.7, ba

Epoch 10/10:  52%|▌| 512/991 [2:10:46<2:12:00, 16.54s/batch, batch_loss=12.7, ba

Epoch 10/10:  52%|▌| 512/991 [2:11:01<2:12:00, 16.54s/batch, batch_loss=10.9, ba

Epoch 10/10:  52%|▌| 513/991 [2:11:01<2:08:59, 16.19s/batch, batch_loss=10.9, ba

Epoch 10/10:  52%|▌| 513/991 [2:11:18<2:08:59, 16.19s/batch, batch_loss=18.6, ba

Epoch 10/10:  52%|▌| 514/991 [2:11:18<2:09:34, 16.30s/batch, batch_loss=18.6, ba

Epoch 10/10:  52%|▌| 514/991 [2:11:33<2:09:34, 16.30s/batch, batch_loss=19.2, ba

Epoch 10/10:  52%|▌| 515/991 [2:11:33<2:06:52, 15.99s/batch, batch_loss=19.2, ba

Epoch 10/10:  52%|▌| 515/991 [2:11:49<2:06:52, 15.99s/batch, batch_loss=19.2, ba

Epoch 10/10:  52%|▌| 516/991 [2:11:49<2:07:24, 16.09s/batch, batch_loss=19.2, ba

Epoch 10/10:  52%|▌| 516/991 [2:12:06<2:07:24, 16.09s/batch, batch_loss=13, batc

Epoch 10/10:  52%|▌| 517/991 [2:12:06<2:07:29, 16.14s/batch, batch_loss=13, batc

Epoch 10/10:  52%|▌| 517/991 [2:12:22<2:07:29, 16.14s/batch, batch_loss=24.1, ba

Epoch 10/10:  52%|▌| 518/991 [2:12:22<2:06:55, 16.10s/batch, batch_loss=24.1, ba

Epoch 10/10:  52%|▌| 518/991 [2:12:40<2:06:55, 16.10s/batch, batch_loss=15.5, ba

Epoch 10/10:  52%|▌| 519/991 [2:12:40<2:11:05, 16.66s/batch, batch_loss=15.5, ba

Epoch 10/10:  52%|▌| 519/991 [2:12:55<2:11:05, 16.66s/batch, batch_loss=14.7, ba

Epoch 10/10:  52%|▌| 520/991 [2:12:55<2:08:07, 16.32s/batch, batch_loss=14.7, ba

Epoch 10/10:  52%|▌| 520/991 [2:13:11<2:08:07, 16.32s/batch, batch_loss=8.54, ba

Epoch 10/10:  53%|▌| 521/991 [2:13:11<2:08:00, 16.34s/batch, batch_loss=8.54, ba

Epoch 10/10:  53%|▌| 521/991 [2:13:27<2:08:00, 16.34s/batch, batch_loss=12.1, ba

Epoch 10/10:  53%|▌| 522/991 [2:13:27<2:06:22, 16.17s/batch, batch_loss=12.1, ba

Epoch 10/10:  53%|▌| 522/991 [2:13:42<2:06:22, 16.17s/batch, batch_loss=4.14, ba

Epoch 10/10:  53%|▌| 523/991 [2:13:42<2:03:58, 15.90s/batch, batch_loss=4.14, ba

Epoch 10/10:  53%|▌| 523/991 [2:13:58<2:03:58, 15.90s/batch, batch_loss=10.1, ba

Epoch 10/10:  53%|▌| 524/991 [2:13:58<2:02:39, 15.76s/batch, batch_loss=10.1, ba

Epoch 10/10:  53%|▌| 524/991 [2:14:14<2:02:39, 15.76s/batch, batch_loss=8.21, ba

Epoch 10/10:  53%|▌| 525/991 [2:14:14<2:02:10, 15.73s/batch, batch_loss=8.21, ba

Epoch 10/10:  53%|▌| 525/991 [2:14:28<2:02:10, 15.73s/batch, batch_loss=8.37, ba

Epoch 10/10:  53%|▌| 526/991 [2:14:28<1:59:32, 15.42s/batch, batch_loss=8.37, ba

Epoch 10/10:  53%|▌| 526/991 [2:14:44<1:59:32, 15.42s/batch, batch_loss=15.6, ba

Epoch 10/10:  53%|▌| 527/991 [2:14:44<1:59:11, 15.41s/batch, batch_loss=15.6, ba

Epoch 10/10:  53%|▌| 527/991 [2:14:59<1:59:11, 15.41s/batch, batch_loss=13.4, ba

Epoch 10/10:  53%|▌| 528/991 [2:14:59<1:58:05, 15.30s/batch, batch_loss=13.4, ba

Epoch 10/10:  53%|▌| 528/991 [2:15:13<1:58:05, 15.30s/batch, batch_loss=10.7, ba

Epoch 10/10:  53%|▌| 529/991 [2:15:13<1:56:10, 15.09s/batch, batch_loss=10.7, ba

Epoch 10/10:  53%|▌| 529/991 [2:15:28<1:56:10, 15.09s/batch, batch_loss=16.6, ba

Epoch 10/10:  53%|▌| 530/991 [2:15:28<1:56:00, 15.10s/batch, batch_loss=16.6, ba

Epoch 10/10:  53%|▌| 530/991 [2:15:44<1:56:00, 15.10s/batch, batch_loss=13.8, ba

Epoch 10/10:  54%|▌| 531/991 [2:15:44<1:56:03, 15.14s/batch, batch_loss=13.8, ba

Epoch 10/10:  54%|▌| 531/991 [2:15:59<1:56:03, 15.14s/batch, batch_loss=12.1, ba

Epoch 10/10:  54%|▌| 532/991 [2:15:59<1:55:20, 15.08s/batch, batch_loss=12.1, ba

Epoch 10/10:  54%|▌| 532/991 [2:16:15<1:55:20, 15.08s/batch, batch_loss=12.8, ba

Epoch 10/10:  54%|▌| 533/991 [2:16:15<1:58:09, 15.48s/batch, batch_loss=12.8, ba

Epoch 10/10:  54%|▌| 533/991 [2:16:31<1:58:09, 15.48s/batch, batch_loss=13.3, ba

Epoch 10/10:  54%|▌| 534/991 [2:16:31<1:59:19, 15.67s/batch, batch_loss=13.3, ba

Epoch 10/10:  54%|▌| 534/991 [2:16:46<1:59:19, 15.67s/batch, batch_loss=19, batc

Epoch 10/10:  54%|▌| 535/991 [2:16:46<1:57:48, 15.50s/batch, batch_loss=19, batc

Epoch 10/10:  54%|▌| 535/991 [2:17:00<1:57:48, 15.50s/batch, batch_loss=14.9, ba

Epoch 10/10:  54%|▌| 536/991 [2:17:00<1:54:05, 15.05s/batch, batch_loss=14.9, ba

Epoch 10/10:  54%|▌| 536/991 [2:17:13<1:54:05, 15.05s/batch, batch_loss=9.21, ba

Epoch 10/10:  54%|▌| 537/991 [2:17:13<1:49:20, 14.45s/batch, batch_loss=9.21, ba

Epoch 10/10:  54%|▌| 537/991 [2:17:28<1:49:20, 14.45s/batch, batch_loss=1.79e+3,

Epoch 10/10:  54%|▌| 538/991 [2:17:28<1:50:48, 14.68s/batch, batch_loss=1.79e+3,

Epoch 10/10:  54%|▌| 538/991 [2:17:42<1:50:48, 14.68s/batch, batch_loss=31.6, ba

Epoch 10/10:  54%|▌| 539/991 [2:17:42<1:46:54, 14.19s/batch, batch_loss=31.6, ba

Epoch 10/10:  54%|▌| 539/991 [2:17:56<1:46:54, 14.19s/batch, batch_loss=25.6, ba

Epoch 10/10:  54%|▌| 540/991 [2:17:56<1:48:10, 14.39s/batch, batch_loss=25.6, ba

Epoch 10/10:  54%|▌| 540/991 [2:18:11<1:48:10, 14.39s/batch, batch_loss=1.3e+4, 

Epoch 10/10:  55%|▌| 541/991 [2:18:11<1:48:28, 14.46s/batch, batch_loss=1.3e+4, 

Epoch 10/10:  55%|▌| 541/991 [2:18:26<1:48:28, 14.46s/batch, batch_loss=2.84e+3,

Epoch 10/10:  55%|▌| 542/991 [2:18:26<1:49:18, 14.61s/batch, batch_loss=2.84e+3,

Epoch 10/10:  55%|▌| 542/991 [2:18:40<1:49:18, 14.61s/batch, batch_loss=24.6, ba

Epoch 10/10:  55%|▌| 543/991 [2:18:40<1:48:28, 14.53s/batch, batch_loss=24.6, ba

Epoch 10/10:  55%|▌| 543/991 [2:18:55<1:48:28, 14.53s/batch, batch_loss=21.1, ba

Epoch 10/10:  55%|▌| 544/991 [2:18:55<1:48:32, 14.57s/batch, batch_loss=21.1, ba

Epoch 10/10:  55%|▌| 544/991 [2:19:10<1:48:32, 14.57s/batch, batch_loss=17.1, ba

Epoch 10/10:  55%|▌| 545/991 [2:19:10<1:49:16, 14.70s/batch, batch_loss=17.1, ba

Epoch 10/10:  55%|▌| 545/991 [2:19:25<1:49:16, 14.70s/batch, batch_loss=299, bat

Epoch 10/10:  55%|▌| 546/991 [2:19:25<1:48:46, 14.67s/batch, batch_loss=299, bat

Epoch 10/10:  55%|▌| 546/991 [2:19:40<1:48:46, 14.67s/batch, batch_loss=17.5, ba

Epoch 10/10:  55%|▌| 547/991 [2:19:40<1:50:07, 14.88s/batch, batch_loss=17.5, ba

Epoch 10/10:  55%|▌| 547/991 [2:19:55<1:50:07, 14.88s/batch, batch_loss=17.9, ba

Epoch 10/10:  55%|▌| 548/991 [2:19:55<1:49:44, 14.86s/batch, batch_loss=17.9, ba

Epoch 10/10:  55%|▌| 548/991 [2:20:10<1:49:44, 14.86s/batch, batch_loss=9.34, ba

Epoch 10/10:  55%|▌| 549/991 [2:20:10<1:49:50, 14.91s/batch, batch_loss=9.34, ba

Epoch 10/10:  55%|▌| 549/991 [2:20:28<1:49:50, 14.91s/batch, batch_loss=19.5, ba

Epoch 10/10:  55%|▌| 550/991 [2:20:28<1:56:22, 15.83s/batch, batch_loss=19.5, ba

Epoch 10/10:  55%|▌| 550/991 [2:20:43<1:56:22, 15.83s/batch, batch_loss=18.8, ba

Epoch 10/10:  56%|▌| 551/991 [2:20:43<1:55:10, 15.71s/batch, batch_loss=18.8, ba

Epoch 10/10:  56%|▌| 551/991 [2:20:58<1:55:10, 15.71s/batch, batch_loss=14.2, ba

Epoch 10/10:  56%|▌| 552/991 [2:20:58<1:53:14, 15.48s/batch, batch_loss=14.2, ba

Epoch 10/10:  56%|▌| 552/991 [2:21:13<1:53:14, 15.48s/batch, batch_loss=17.3, ba

Epoch 10/10:  56%|▌| 553/991 [2:21:13<1:51:50, 15.32s/batch, batch_loss=17.3, ba

Epoch 10/10:  56%|▌| 553/991 [2:21:28<1:51:50, 15.32s/batch, batch_loss=5.73e+3,

Epoch 10/10:  56%|▌| 554/991 [2:21:28<1:50:17, 15.14s/batch, batch_loss=5.73e+3,

Epoch 10/10:  56%|▌| 554/991 [2:21:43<1:50:17, 15.14s/batch, batch_loss=2.58e+3,

Epoch 10/10:  56%|▌| 555/991 [2:21:43<1:50:14, 15.17s/batch, batch_loss=2.58e+3,

Epoch 10/10:  56%|▌| 555/991 [2:21:58<1:50:14, 15.17s/batch, batch_loss=17.7, ba

Epoch 10/10:  56%|▌| 556/991 [2:21:58<1:49:58, 15.17s/batch, batch_loss=17.7, ba

Epoch 10/10:  56%|▌| 556/991 [2:22:14<1:49:58, 15.17s/batch, batch_loss=1.27e+4,

Epoch 10/10:  56%|▌| 557/991 [2:22:14<1:51:30, 15.42s/batch, batch_loss=1.27e+4,

Epoch 10/10:  56%|▌| 557/991 [2:22:29<1:51:30, 15.42s/batch, batch_loss=10.1, ba

Epoch 10/10:  56%|▌| 558/991 [2:22:29<1:49:57, 15.24s/batch, batch_loss=10.1, ba

Epoch 10/10:  56%|▌| 558/991 [2:22:47<1:49:57, 15.24s/batch, batch_loss=17.1, ba

Epoch 10/10:  56%|▌| 559/991 [2:22:47<1:56:02, 16.12s/batch, batch_loss=17.1, ba

Epoch 10/10:  56%|▌| 559/991 [2:23:02<1:56:02, 16.12s/batch, batch_loss=7.22, ba

Epoch 10/10:  57%|▌| 560/991 [2:23:02<1:52:15, 15.63s/batch, batch_loss=7.22, ba

Epoch 10/10:  57%|▌| 560/991 [2:23:16<1:52:15, 15.63s/batch, batch_loss=8.82, ba

Epoch 10/10:  57%|▌| 561/991 [2:23:16<1:49:19, 15.25s/batch, batch_loss=8.82, ba

Epoch 10/10:  57%|▌| 561/991 [2:23:31<1:49:19, 15.25s/batch, batch_loss=16.4, ba

Epoch 10/10:  57%|▌| 562/991 [2:23:31<1:47:16, 15.00s/batch, batch_loss=16.4, ba

Epoch 10/10:  57%|▌| 562/991 [2:23:46<1:47:16, 15.00s/batch, batch_loss=7.65, ba

Epoch 10/10:  57%|▌| 563/991 [2:23:46<1:48:04, 15.15s/batch, batch_loss=7.65, ba

Epoch 10/10:  57%|▌| 563/991 [2:24:01<1:48:04, 15.15s/batch, batch_loss=12.8, ba

Epoch 10/10:  57%|▌| 564/991 [2:24:01<1:47:58, 15.17s/batch, batch_loss=12.8, ba

Epoch 10/10:  57%|▌| 564/991 [2:24:16<1:47:58, 15.17s/batch, batch_loss=497, bat

Epoch 10/10:  57%|▌| 565/991 [2:24:16<1:47:04, 15.08s/batch, batch_loss=497, bat

Epoch 10/10:  57%|▌| 565/991 [2:24:33<1:47:04, 15.08s/batch, batch_loss=12, batc

Epoch 10/10:  57%|▌| 566/991 [2:24:33<1:51:45, 15.78s/batch, batch_loss=12, batc

Epoch 10/10:  57%|▌| 566/991 [2:24:48<1:51:45, 15.78s/batch, batch_loss=20.6, ba

Epoch 10/10:  57%|▌| 567/991 [2:24:48<1:49:39, 15.52s/batch, batch_loss=20.6, ba

Epoch 10/10:  57%|▌| 567/991 [2:25:03<1:49:39, 15.52s/batch, batch_loss=299, bat

Epoch 10/10:  57%|▌| 568/991 [2:25:03<1:47:52, 15.30s/batch, batch_loss=299, bat

Epoch 10/10:  57%|▌| 568/991 [2:25:18<1:47:52, 15.30s/batch, batch_loss=29.3, ba

Epoch 10/10:  57%|▌| 569/991 [2:25:18<1:46:10, 15.10s/batch, batch_loss=29.3, ba

Epoch 10/10:  57%|▌| 569/991 [2:25:33<1:46:10, 15.10s/batch, batch_loss=8.49e+3,

Epoch 10/10:  58%|▌| 570/991 [2:25:33<1:46:14, 15.14s/batch, batch_loss=8.49e+3,

Epoch 10/10:  58%|▌| 570/991 [2:25:48<1:46:14, 15.14s/batch, batch_loss=10.1, ba

Epoch 10/10:  58%|▌| 571/991 [2:25:48<1:45:33, 15.08s/batch, batch_loss=10.1, ba

Epoch 10/10:  58%|▌| 571/991 [2:26:03<1:45:33, 15.08s/batch, batch_loss=11, batc

Epoch 10/10:  58%|▌| 572/991 [2:26:03<1:45:09, 15.06s/batch, batch_loss=11, batc

Epoch 10/10:  58%|▌| 572/991 [2:26:17<1:45:09, 15.06s/batch, batch_loss=7.12, ba

Epoch 10/10:  58%|▌| 573/991 [2:26:17<1:43:30, 14.86s/batch, batch_loss=7.12, ba

Epoch 10/10:  58%|▌| 573/991 [2:26:32<1:43:30, 14.86s/batch, batch_loss=10.4, ba

Epoch 10/10:  58%|▌| 574/991 [2:26:32<1:43:38, 14.91s/batch, batch_loss=10.4, ba

Epoch 10/10:  58%|▌| 574/991 [2:26:47<1:43:38, 14.91s/batch, batch_loss=17.3, ba

Epoch 10/10:  58%|▌| 575/991 [2:26:47<1:43:39, 14.95s/batch, batch_loss=17.3, ba

Epoch 10/10:  58%|▌| 575/991 [2:27:03<1:43:39, 14.95s/batch, batch_loss=24.8, ba

Epoch 10/10:  58%|▌| 576/991 [2:27:03<1:44:33, 15.12s/batch, batch_loss=24.8, ba

Epoch 10/10:  58%|▌| 576/991 [2:27:17<1:44:33, 15.12s/batch, batch_loss=11.4, ba

Epoch 10/10:  58%|▌| 577/991 [2:27:17<1:42:13, 14.82s/batch, batch_loss=11.4, ba

Epoch 10/10:  58%|▌| 577/991 [2:27:32<1:42:13, 14.82s/batch, batch_loss=8.69, ba

Epoch 10/10:  58%|▌| 578/991 [2:27:32<1:42:25, 14.88s/batch, batch_loss=8.69, ba

Epoch 10/10:  58%|▌| 578/991 [2:27:47<1:42:25, 14.88s/batch, batch_loss=10, batc

Epoch 10/10:  58%|▌| 579/991 [2:27:47<1:42:36, 14.94s/batch, batch_loss=10, batc

Epoch 10/10:  58%|▌| 579/991 [2:28:02<1:42:36, 14.94s/batch, batch_loss=16.7, ba

Epoch 10/10:  59%|▌| 580/991 [2:28:02<1:41:00, 14.75s/batch, batch_loss=16.7, ba

Epoch 10/10:  59%|▌| 580/991 [2:28:16<1:41:00, 14.75s/batch, batch_loss=5.99, ba

Epoch 10/10:  59%|▌| 581/991 [2:28:16<1:40:32, 14.71s/batch, batch_loss=5.99, ba

Epoch 10/10:  59%|▌| 581/991 [2:28:33<1:40:32, 14.71s/batch, batch_loss=0.158, b

Epoch 10/10:  59%|▌| 582/991 [2:28:33<1:45:28, 15.47s/batch, batch_loss=0.158, b

Epoch 10/10:  59%|▌| 582/991 [2:28:48<1:45:28, 15.47s/batch, batch_loss=6.6e+3, 

Epoch 10/10:  59%|▌| 583/991 [2:28:48<1:44:12, 15.33s/batch, batch_loss=6.6e+3, 

Epoch 10/10:  59%|▌| 583/991 [2:29:02<1:44:12, 15.33s/batch, batch_loss=11.5, ba

Epoch 10/10:  59%|▌| 584/991 [2:29:02<1:40:19, 14.79s/batch, batch_loss=11.5, ba

Epoch 10/10:  59%|▌| 584/991 [2:29:17<1:40:19, 14.79s/batch, batch_loss=8.09, ba

Epoch 10/10:  59%|▌| 585/991 [2:29:17<1:41:22, 14.98s/batch, batch_loss=8.09, ba

Epoch 10/10:  59%|▌| 585/991 [2:29:33<1:41:22, 14.98s/batch, batch_loss=22.9, ba

Epoch 10/10:  59%|▌| 586/991 [2:29:33<1:41:50, 15.09s/batch, batch_loss=22.9, ba

Epoch 10/10:  59%|▌| 586/991 [2:29:47<1:41:50, 15.09s/batch, batch_loss=24.1, ba

Epoch 10/10:  59%|▌| 587/991 [2:29:47<1:40:52, 14.98s/batch, batch_loss=24.1, ba

Epoch 10/10:  59%|▌| 587/991 [2:30:02<1:40:52, 14.98s/batch, batch_loss=15.4, ba

Epoch 10/10:  59%|▌| 588/991 [2:30:02<1:40:03, 14.90s/batch, batch_loss=15.4, ba

Epoch 10/10:  59%|▌| 588/991 [2:30:16<1:40:03, 14.90s/batch, batch_loss=8.14, ba

Epoch 10/10:  59%|▌| 589/991 [2:30:16<1:38:42, 14.73s/batch, batch_loss=8.14, ba

Epoch 10/10:  59%|▌| 589/991 [2:30:32<1:38:42, 14.73s/batch, batch_loss=16, batc

Epoch 10/10:  60%|▌| 590/991 [2:30:32<1:39:14, 14.85s/batch, batch_loss=16, batc

Epoch 10/10:  60%|▌| 590/991 [2:30:47<1:39:14, 14.85s/batch, batch_loss=16.4, ba

Epoch 10/10:  60%|▌| 591/991 [2:30:47<1:39:13, 14.88s/batch, batch_loss=16.4, ba

Epoch 10/10:  60%|▌| 591/991 [2:31:02<1:39:13, 14.88s/batch, batch_loss=7.86, ba

Epoch 10/10:  60%|▌| 592/991 [2:31:02<1:39:34, 14.97s/batch, batch_loss=7.86, ba

Epoch 10/10:  60%|▌| 592/991 [2:31:17<1:39:34, 14.97s/batch, batch_loss=11.3, ba

Epoch 10/10:  60%|▌| 593/991 [2:31:17<1:39:12, 14.96s/batch, batch_loss=11.3, ba

Epoch 10/10:  60%|▌| 593/991 [2:31:32<1:39:12, 14.96s/batch, batch_loss=9.7, bat

Epoch 10/10:  60%|▌| 594/991 [2:31:32<1:39:12, 14.99s/batch, batch_loss=9.7, bat

Epoch 10/10:  60%|▌| 594/991 [2:31:46<1:39:12, 14.99s/batch, batch_loss=7.06, ba

Epoch 10/10:  60%|▌| 595/991 [2:31:46<1:38:03, 14.86s/batch, batch_loss=7.06, ba

Epoch 10/10:  60%|▌| 595/991 [2:32:01<1:38:03, 14.86s/batch, batch_loss=7.03, ba

Epoch 10/10:  60%|▌| 596/991 [2:32:01<1:37:59, 14.89s/batch, batch_loss=7.03, ba

Epoch 10/10:  60%|▌| 596/991 [2:32:16<1:37:59, 14.89s/batch, batch_loss=24.1, ba

Epoch 10/10:  60%|▌| 597/991 [2:32:16<1:38:05, 14.94s/batch, batch_loss=24.1, ba

Epoch 10/10:  60%|▌| 597/991 [2:32:33<1:38:05, 14.94s/batch, batch_loss=9.07, ba

Epoch 10/10:  60%|▌| 598/991 [2:32:33<1:41:48, 15.54s/batch, batch_loss=9.07, ba

Epoch 10/10:  60%|▌| 598/991 [2:32:48<1:41:48, 15.54s/batch, batch_loss=17.3, ba

Epoch 10/10:  60%|▌| 599/991 [2:32:48<1:39:24, 15.21s/batch, batch_loss=17.3, ba

Epoch 10/10:  60%|▌| 599/991 [2:33:02<1:39:24, 15.21s/batch, batch_loss=10.6, ba

Epoch 10/10:  61%|▌| 600/991 [2:33:02<1:38:10, 15.07s/batch, batch_loss=10.6, ba

Epoch 10/10:  61%|▌| 600/991 [2:33:17<1:38:10, 15.07s/batch, batch_loss=15.9, ba

Epoch 10/10:  61%|▌| 601/991 [2:33:17<1:37:47, 15.05s/batch, batch_loss=15.9, ba

Epoch 10/10:  61%|▌| 601/991 [2:33:31<1:37:47, 15.05s/batch, batch_loss=10.1, ba

Epoch 10/10:  61%|▌| 602/991 [2:33:31<1:35:16, 14.70s/batch, batch_loss=10.1, ba

Epoch 10/10:  61%|▌| 602/991 [2:33:46<1:35:16, 14.70s/batch, batch_loss=6.09, ba

Epoch 10/10:  61%|▌| 603/991 [2:33:46<1:35:06, 14.71s/batch, batch_loss=6.09, ba

Epoch 10/10:  61%|▌| 603/991 [2:34:00<1:35:06, 14.71s/batch, batch_loss=1.01e+4,

Epoch 10/10:  61%|▌| 604/991 [2:34:00<1:34:07, 14.59s/batch, batch_loss=1.01e+4,

Epoch 10/10:  61%|▌| 604/991 [2:34:15<1:34:07, 14.59s/batch, batch_loss=9.9, bat

Epoch 10/10:  61%|▌| 605/991 [2:34:15<1:34:37, 14.71s/batch, batch_loss=9.9, bat

Epoch 10/10:  61%|▌| 605/991 [2:34:30<1:34:37, 14.71s/batch, batch_loss=8.94, ba

Epoch 10/10:  61%|▌| 606/991 [2:34:30<1:35:09, 14.83s/batch, batch_loss=8.94, ba

Epoch 10/10:  61%|▌| 606/991 [2:34:45<1:35:09, 14.83s/batch, batch_loss=10.7, ba

Epoch 10/10:  61%|▌| 607/991 [2:34:45<1:34:48, 14.81s/batch, batch_loss=10.7, ba

Epoch 10/10:  61%|▌| 607/991 [2:34:59<1:34:48, 14.81s/batch, batch_loss=13.6, ba

Epoch 10/10:  61%|▌| 608/991 [2:34:59<1:33:10, 14.60s/batch, batch_loss=13.6, ba

Epoch 10/10:  61%|▌| 608/991 [2:35:17<1:33:10, 14.60s/batch, batch_loss=15.8, ba

Epoch 10/10:  61%|▌| 609/991 [2:35:17<1:38:45, 15.51s/batch, batch_loss=15.8, ba

Epoch 10/10:  61%|▌| 609/991 [2:35:32<1:38:45, 15.51s/batch, batch_loss=15.2, ba

Epoch 10/10:  62%|▌| 610/991 [2:35:32<1:38:13, 15.47s/batch, batch_loss=15.2, ba

Epoch 10/10:  62%|▌| 610/991 [2:35:47<1:38:13, 15.47s/batch, batch_loss=23.8, ba

Epoch 10/10:  62%|▌| 611/991 [2:35:47<1:36:29, 15.24s/batch, batch_loss=23.8, ba

Epoch 10/10:  62%|▌| 611/991 [2:36:02<1:36:29, 15.24s/batch, batch_loss=6.45, ba

Epoch 10/10:  62%|▌| 612/991 [2:36:02<1:35:15, 15.08s/batch, batch_loss=6.45, ba

Epoch 10/10:  62%|▌| 612/991 [2:36:17<1:35:15, 15.08s/batch, batch_loss=12, batc

Epoch 10/10:  62%|▌| 613/991 [2:36:17<1:35:02, 15.09s/batch, batch_loss=12, batc

Epoch 10/10:  62%|▌| 613/991 [2:36:32<1:35:02, 15.09s/batch, batch_loss=1.73e+4,

Epoch 10/10:  62%|▌| 614/991 [2:36:32<1:35:46, 15.24s/batch, batch_loss=1.73e+4,

Epoch 10/10:  62%|▌| 614/991 [2:36:47<1:35:46, 15.24s/batch, batch_loss=990, bat

Epoch 10/10:  62%|▌| 615/991 [2:36:47<1:34:13, 15.04s/batch, batch_loss=990, bat

Epoch 10/10:  62%|▌| 615/991 [2:37:02<1:34:13, 15.04s/batch, batch_loss=7.47, ba

Epoch 10/10:  62%|▌| 616/991 [2:37:02<1:33:40, 14.99s/batch, batch_loss=7.47, ba

Epoch 10/10:  62%|▌| 616/991 [2:37:17<1:33:40, 14.99s/batch, batch_loss=18.7, ba

Epoch 10/10:  62%|▌| 617/991 [2:37:17<1:34:18, 15.13s/batch, batch_loss=18.7, ba

Epoch 10/10:  62%|▌| 617/991 [2:37:32<1:34:18, 15.13s/batch, batch_loss=12.4, ba

Epoch 10/10:  62%|▌| 618/991 [2:37:32<1:33:30, 15.04s/batch, batch_loss=12.4, ba

Epoch 10/10:  62%|▌| 618/991 [2:37:47<1:33:30, 15.04s/batch, batch_loss=18.3, ba

Epoch 10/10:  62%|▌| 619/991 [2:37:47<1:32:48, 14.97s/batch, batch_loss=18.3, ba

Epoch 10/10:  62%|▌| 619/991 [2:38:01<1:32:48, 14.97s/batch, batch_loss=12.3, ba

Epoch 10/10:  63%|▋| 620/991 [2:38:01<1:31:17, 14.76s/batch, batch_loss=12.3, ba

Epoch 10/10:  63%|▋| 620/991 [2:38:16<1:31:17, 14.76s/batch, batch_loss=9.81, ba

Epoch 10/10:  63%|▋| 621/991 [2:38:16<1:30:25, 14.66s/batch, batch_loss=9.81, ba

Epoch 10/10:  63%|▋| 621/991 [2:38:30<1:30:25, 14.66s/batch, batch_loss=5.49e+3,

Epoch 10/10:  63%|▋| 622/991 [2:38:30<1:30:29, 14.71s/batch, batch_loss=5.49e+3,

Epoch 10/10:  63%|▋| 622/991 [2:38:46<1:30:29, 14.71s/batch, batch_loss=17.6, ba

Epoch 10/10:  63%|▋| 623/991 [2:38:46<1:31:16, 14.88s/batch, batch_loss=17.6, ba

Epoch 10/10:  63%|▋| 623/991 [2:39:01<1:31:16, 14.88s/batch, batch_loss=1.6e+4, 

Epoch 10/10:  63%|▋| 624/991 [2:39:01<1:31:24, 14.94s/batch, batch_loss=1.6e+4, 

Epoch 10/10:  63%|▋| 624/991 [2:39:16<1:31:24, 14.94s/batch, batch_loss=9.06, ba

Epoch 10/10:  63%|▋| 625/991 [2:39:16<1:31:40, 15.03s/batch, batch_loss=9.06, ba

Epoch 10/10:  63%|▋| 625/991 [2:39:32<1:31:40, 15.03s/batch, batch_loss=5.89, ba

Epoch 10/10:  63%|▋| 626/991 [2:39:32<1:32:23, 15.19s/batch, batch_loss=5.89, ba

Epoch 10/10:  63%|▋| 626/991 [2:39:46<1:32:23, 15.19s/batch, batch_loss=4.31e+3,

Epoch 10/10:  63%|▋| 627/991 [2:39:46<1:30:50, 14.97s/batch, batch_loss=4.31e+3,

Epoch 10/10:  63%|▋| 627/991 [2:40:00<1:30:50, 14.97s/batch, batch_loss=1.05e+3,

Epoch 10/10:  63%|▋| 628/991 [2:40:00<1:28:52, 14.69s/batch, batch_loss=1.05e+3,

Epoch 10/10:  63%|▋| 628/991 [2:40:15<1:28:52, 14.69s/batch, batch_loss=13.4, ba

Epoch 10/10:  63%|▋| 629/991 [2:40:15<1:28:16, 14.63s/batch, batch_loss=13.4, ba

Epoch 10/10:  63%|▋| 629/991 [2:40:30<1:28:16, 14.63s/batch, batch_loss=19, batc

Epoch 10/10:  64%|▋| 630/991 [2:40:30<1:28:34, 14.72s/batch, batch_loss=19, batc

Epoch 10/10:  64%|▋| 630/991 [2:40:44<1:28:34, 14.72s/batch, batch_loss=17.5, ba

Epoch 10/10:  64%|▋| 631/991 [2:40:44<1:27:28, 14.58s/batch, batch_loss=17.5, ba

Epoch 10/10:  64%|▋| 631/991 [2:40:58<1:27:28, 14.58s/batch, batch_loss=3.6, bat

Epoch 10/10:  64%|▋| 632/991 [2:40:58<1:26:19, 14.43s/batch, batch_loss=3.6, bat

Epoch 10/10:  64%|▋| 632/991 [2:41:13<1:26:19, 14.43s/batch, batch_loss=19.7, ba

Epoch 10/10:  64%|▋| 633/991 [2:41:13<1:26:25, 14.48s/batch, batch_loss=19.7, ba

Epoch 10/10:  64%|▋| 633/991 [2:41:27<1:26:25, 14.48s/batch, batch_loss=27.3, ba

Epoch 10/10:  64%|▋| 634/991 [2:41:27<1:26:06, 14.47s/batch, batch_loss=27.3, ba

Epoch 10/10:  64%|▋| 634/991 [2:41:42<1:26:06, 14.47s/batch, batch_loss=24.3, ba

Epoch 10/10:  64%|▋| 635/991 [2:41:42<1:26:31, 14.58s/batch, batch_loss=24.3, ba

Epoch 10/10:  64%|▋| 635/991 [2:41:57<1:26:31, 14.58s/batch, batch_loss=17.7, ba

Epoch 10/10:  64%|▋| 636/991 [2:41:57<1:27:29, 14.79s/batch, batch_loss=17.7, ba

Epoch 10/10:  64%|▋| 636/991 [2:42:12<1:27:29, 14.79s/batch, batch_loss=17.2, ba

Epoch 10/10:  64%|▋| 637/991 [2:42:12<1:28:13, 14.95s/batch, batch_loss=17.2, ba

Epoch 10/10:  64%|▋| 637/991 [2:42:28<1:28:13, 14.95s/batch, batch_loss=17.4, ba

Epoch 10/10:  64%|▋| 638/991 [2:42:28<1:28:54, 15.11s/batch, batch_loss=17.4, ba

Epoch 10/10:  64%|▋| 638/991 [2:42:43<1:28:54, 15.11s/batch, batch_loss=12.6, ba

Epoch 10/10:  64%|▋| 639/991 [2:42:43<1:28:48, 15.14s/batch, batch_loss=12.6, ba

Epoch 10/10:  64%|▋| 639/991 [2:42:58<1:28:48, 15.14s/batch, batch_loss=676, bat

Epoch 10/10:  65%|▋| 640/991 [2:42:58<1:28:45, 15.17s/batch, batch_loss=676, bat

Epoch 10/10:  65%|▋| 640/991 [2:43:13<1:28:45, 15.17s/batch, batch_loss=14.5, ba

Epoch 10/10:  65%|▋| 641/991 [2:43:13<1:27:28, 15.00s/batch, batch_loss=14.5, ba

Epoch 10/10:  65%|▋| 641/991 [2:43:28<1:27:28, 15.00s/batch, batch_loss=8.39, ba

Epoch 10/10:  65%|▋| 642/991 [2:43:28<1:27:20, 15.01s/batch, batch_loss=8.39, ba

Epoch 10/10:  65%|▋| 642/991 [2:43:43<1:27:20, 15.01s/batch, batch_loss=2.12e+4,

Epoch 10/10:  65%|▋| 643/991 [2:43:43<1:27:12, 15.03s/batch, batch_loss=2.12e+4,

Epoch 10/10:  65%|▋| 643/991 [2:43:58<1:27:12, 15.03s/batch, batch_loss=1.76e+4,

Epoch 10/10:  65%|▋| 644/991 [2:43:58<1:27:30, 15.13s/batch, batch_loss=1.76e+4,

Epoch 10/10:  65%|▋| 644/991 [2:44:13<1:27:30, 15.13s/batch, batch_loss=2.2e+3, 

Epoch 10/10:  65%|▋| 645/991 [2:44:13<1:26:54, 15.07s/batch, batch_loss=2.2e+3, 

Epoch 10/10:  65%|▋| 645/991 [2:44:28<1:26:54, 15.07s/batch, batch_loss=11.6, ba

Epoch 10/10:  65%|▋| 646/991 [2:44:28<1:26:41, 15.08s/batch, batch_loss=11.6, ba

Epoch 10/10:  65%|▋| 646/991 [2:44:43<1:26:41, 15.08s/batch, batch_loss=13.1, ba

Epoch 10/10:  65%|▋| 647/991 [2:44:43<1:26:03, 15.01s/batch, batch_loss=13.1, ba

Epoch 10/10:  65%|▋| 647/991 [2:44:58<1:26:03, 15.01s/batch, batch_loss=15.9, ba

Epoch 10/10:  65%|▋| 648/991 [2:44:58<1:24:56, 14.86s/batch, batch_loss=15.9, ba

Epoch 10/10:  65%|▋| 648/991 [2:45:13<1:24:56, 14.86s/batch, batch_loss=15.8, ba

Epoch 10/10:  65%|▋| 649/991 [2:45:13<1:24:39, 14.85s/batch, batch_loss=15.8, ba

Epoch 10/10:  65%|▋| 649/991 [2:45:27<1:24:39, 14.85s/batch, batch_loss=1.34e+4,

Epoch 10/10:  66%|▋| 650/991 [2:45:27<1:24:20, 14.84s/batch, batch_loss=1.34e+4,

Epoch 10/10:  66%|▋| 650/991 [2:45:42<1:24:20, 14.84s/batch, batch_loss=8.66, ba

Epoch 10/10:  66%|▋| 651/991 [2:45:42<1:22:55, 14.63s/batch, batch_loss=8.66, ba

Epoch 10/10:  66%|▋| 651/991 [2:45:57<1:22:55, 14.63s/batch, batch_loss=13.6, ba

Epoch 10/10:  66%|▋| 652/991 [2:45:57<1:23:18, 14.75s/batch, batch_loss=13.6, ba

Epoch 10/10:  66%|▋| 652/991 [2:46:11<1:23:18, 14.75s/batch, batch_loss=19.7, ba

Epoch 10/10:  66%|▋| 653/991 [2:46:11<1:23:03, 14.74s/batch, batch_loss=19.7, ba

Epoch 10/10:  66%|▋| 653/991 [2:46:26<1:23:03, 14.74s/batch, batch_loss=19.5, ba

Epoch 10/10:  66%|▋| 654/991 [2:46:26<1:22:35, 14.71s/batch, batch_loss=19.5, ba

Epoch 10/10:  66%|▋| 654/991 [2:46:43<1:22:35, 14.71s/batch, batch_loss=3.84e+3,

Epoch 10/10:  66%|▋| 655/991 [2:46:43<1:27:00, 15.54s/batch, batch_loss=3.84e+3,

Epoch 10/10:  66%|▋| 655/991 [2:46:58<1:27:00, 15.54s/batch, batch_loss=5.19e+3,

Epoch 10/10:  66%|▋| 656/991 [2:46:58<1:24:59, 15.22s/batch, batch_loss=5.19e+3,

Epoch 10/10:  66%|▋| 656/991 [2:47:12<1:24:59, 15.22s/batch, batch_loss=4.22e+3,

Epoch 10/10:  66%|▋| 657/991 [2:47:12<1:23:25, 14.99s/batch, batch_loss=4.22e+3,

Epoch 10/10:  66%|▋| 657/991 [2:47:26<1:23:25, 14.99s/batch, batch_loss=2.2e+4, 

Epoch 10/10:  66%|▋| 658/991 [2:47:26<1:21:13, 14.63s/batch, batch_loss=2.2e+4, 

Epoch 10/10:  66%|▋| 658/991 [2:47:39<1:21:13, 14.63s/batch, batch_loss=4.49, ba

Epoch 10/10:  66%|▋| 659/991 [2:47:39<1:18:47, 14.24s/batch, batch_loss=4.49, ba

Epoch 10/10:  66%|▋| 659/991 [2:47:54<1:18:47, 14.24s/batch, batch_loss=5.29, ba

Epoch 10/10:  67%|▋| 660/991 [2:47:54<1:19:44, 14.45s/batch, batch_loss=5.29, ba

Epoch 10/10:  67%|▋| 660/991 [2:48:13<1:19:44, 14.45s/batch, batch_loss=13.5, ba

Epoch 10/10:  67%|▋| 661/991 [2:48:13<1:25:59, 15.64s/batch, batch_loss=13.5, ba

Epoch 10/10:  67%|▋| 661/991 [2:48:29<1:25:59, 15.64s/batch, batch_loss=16.1, ba

Epoch 10/10:  67%|▋| 662/991 [2:48:29<1:26:06, 15.70s/batch, batch_loss=16.1, ba

Epoch 10/10:  67%|▋| 662/991 [2:48:43<1:26:06, 15.70s/batch, batch_loss=16.6, ba

Epoch 10/10:  67%|▋| 663/991 [2:48:43<1:24:07, 15.39s/batch, batch_loss=16.6, ba

Epoch 10/10:  67%|▋| 663/991 [2:48:59<1:24:07, 15.39s/batch, batch_loss=3.05e+3,

Epoch 10/10:  67%|▋| 664/991 [2:48:59<1:23:45, 15.37s/batch, batch_loss=3.05e+3,

Epoch 10/10:  67%|▋| 664/991 [2:49:14<1:23:45, 15.37s/batch, batch_loss=14.5, ba

Epoch 10/10:  67%|▋| 665/991 [2:49:14<1:22:39, 15.21s/batch, batch_loss=14.5, ba

Epoch 10/10:  67%|▋| 665/991 [2:49:29<1:22:39, 15.21s/batch, batch_loss=3.06e+3,

Epoch 10/10:  67%|▋| 666/991 [2:49:29<1:22:55, 15.31s/batch, batch_loss=3.06e+3,

Epoch 10/10:  67%|▋| 666/991 [2:49:44<1:22:55, 15.31s/batch, batch_loss=19.9, ba

Epoch 10/10:  67%|▋| 667/991 [2:49:44<1:22:09, 15.21s/batch, batch_loss=19.9, ba

Epoch 10/10:  67%|▋| 667/991 [2:49:59<1:22:09, 15.21s/batch, batch_loss=376, bat

Epoch 10/10:  67%|▋| 668/991 [2:49:59<1:21:45, 15.19s/batch, batch_loss=376, bat

Epoch 10/10:  67%|▋| 668/991 [2:50:17<1:21:45, 15.19s/batch, batch_loss=2.96e+3,

Epoch 10/10:  68%|▋| 669/991 [2:50:17<1:26:31, 16.12s/batch, batch_loss=2.96e+3,

Epoch 10/10:  68%|▋| 669/991 [2:50:32<1:26:31, 16.12s/batch, batch_loss=1.01e+3,

Epoch 10/10:  68%|▋| 670/991 [2:50:32<1:24:02, 15.71s/batch, batch_loss=1.01e+3,

Epoch 10/10:  68%|▋| 670/991 [2:50:48<1:24:02, 15.71s/batch, batch_loss=10.4, ba

Epoch 10/10:  68%|▋| 671/991 [2:50:48<1:23:25, 15.64s/batch, batch_loss=10.4, ba

Epoch 10/10:  68%|▋| 671/991 [2:51:03<1:23:25, 15.64s/batch, batch_loss=15.2, ba

Epoch 10/10:  68%|▋| 672/991 [2:51:03<1:22:43, 15.56s/batch, batch_loss=15.2, ba

Epoch 10/10:  68%|▋| 672/991 [2:51:19<1:22:43, 15.56s/batch, batch_loss=17.3, ba

Epoch 10/10:  68%|▋| 673/991 [2:51:19<1:23:16, 15.71s/batch, batch_loss=17.3, ba

Epoch 10/10:  68%|▋| 673/991 [2:51:35<1:23:16, 15.71s/batch, batch_loss=16.3, ba

Epoch 10/10:  68%|▋| 674/991 [2:51:35<1:22:57, 15.70s/batch, batch_loss=16.3, ba

Epoch 10/10:  68%|▋| 674/991 [2:51:50<1:22:57, 15.70s/batch, batch_loss=4.92, ba

Epoch 10/10:  68%|▋| 675/991 [2:51:50<1:21:58, 15.57s/batch, batch_loss=4.92, ba

Epoch 10/10:  68%|▋| 675/991 [2:52:05<1:21:58, 15.57s/batch, batch_loss=8.57, ba

Epoch 10/10:  68%|▋| 676/991 [2:52:05<1:20:53, 15.41s/batch, batch_loss=8.57, ba

Epoch 10/10:  68%|▋| 676/991 [2:52:23<1:20:53, 15.41s/batch, batch_loss=15.8, ba

Epoch 10/10:  68%|▋| 677/991 [2:52:23<1:24:09, 16.08s/batch, batch_loss=15.8, ba

Epoch 10/10:  68%|▋| 677/991 [2:52:37<1:24:09, 16.08s/batch, batch_loss=7.03, ba

Epoch 10/10:  68%|▋| 678/991 [2:52:37<1:21:45, 15.67s/batch, batch_loss=7.03, ba

Epoch 10/10:  68%|▋| 678/991 [2:52:53<1:21:45, 15.67s/batch, batch_loss=3.82e+3,

Epoch 10/10:  69%|▋| 679/991 [2:52:53<1:21:09, 15.61s/batch, batch_loss=3.82e+3,

Epoch 10/10:  69%|▋| 679/991 [2:53:08<1:21:09, 15.61s/batch, batch_loss=6.1e+3, 

Epoch 10/10:  69%|▋| 680/991 [2:53:08<1:19:45, 15.39s/batch, batch_loss=6.1e+3, 

Epoch 10/10:  69%|▋| 680/991 [2:53:23<1:19:45, 15.39s/batch, batch_loss=7.24e+4,

Epoch 10/10:  69%|▋| 681/991 [2:53:23<1:18:33, 15.20s/batch, batch_loss=7.24e+4,

Epoch 10/10:  69%|▋| 681/991 [2:53:38<1:18:33, 15.20s/batch, batch_loss=14.7, ba

Epoch 10/10:  69%|▋| 682/991 [2:53:38<1:18:42, 15.28s/batch, batch_loss=14.7, ba

Epoch 10/10:  69%|▋| 682/991 [2:53:52<1:18:42, 15.28s/batch, batch_loss=377, bat

Epoch 10/10:  69%|▋| 683/991 [2:53:52<1:17:06, 15.02s/batch, batch_loss=377, bat

Epoch 10/10:  69%|▋| 683/991 [2:54:08<1:17:06, 15.02s/batch, batch_loss=5.58, ba

Epoch 10/10:  69%|▋| 684/991 [2:54:08<1:16:55, 15.03s/batch, batch_loss=5.58, ba

Epoch 10/10:  69%|▋| 684/991 [2:54:26<1:16:55, 15.03s/batch, batch_loss=13.3, ba

Epoch 10/10:  69%|▋| 685/991 [2:54:26<1:21:45, 16.03s/batch, batch_loss=13.3, ba

Epoch 10/10:  69%|▋| 685/991 [2:54:40<1:21:45, 16.03s/batch, batch_loss=12.2, ba

Epoch 10/10:  69%|▋| 686/991 [2:54:40<1:19:04, 15.55s/batch, batch_loss=12.2, ba

Epoch 10/10:  69%|▋| 686/991 [2:54:56<1:19:04, 15.55s/batch, batch_loss=539, bat

Epoch 10/10:  69%|▋| 687/991 [2:54:56<1:18:22, 15.47s/batch, batch_loss=539, bat

Epoch 10/10:  69%|▋| 687/991 [2:55:11<1:18:22, 15.47s/batch, batch_loss=5.76, ba

Epoch 10/10:  69%|▋| 688/991 [2:55:11<1:17:40, 15.38s/batch, batch_loss=5.76, ba

Epoch 10/10:  69%|▋| 688/991 [2:55:26<1:17:40, 15.38s/batch, batch_loss=8.05, ba

Epoch 10/10:  70%|▋| 689/991 [2:55:26<1:16:48, 15.26s/batch, batch_loss=8.05, ba

Epoch 10/10:  70%|▋| 689/991 [2:55:41<1:16:48, 15.26s/batch, batch_loss=13, batc

Epoch 10/10:  70%|▋| 690/991 [2:55:41<1:15:59, 15.15s/batch, batch_loss=13, batc

Epoch 10/10:  70%|▋| 690/991 [2:55:55<1:15:59, 15.15s/batch, batch_loss=17.3, ba

Epoch 10/10:  70%|▋| 691/991 [2:55:55<1:14:35, 14.92s/batch, batch_loss=17.3, ba

Epoch 10/10:  70%|▋| 691/991 [2:56:11<1:14:35, 14.92s/batch, batch_loss=5.96, ba

Epoch 10/10:  70%|▋| 692/991 [2:56:11<1:15:20, 15.12s/batch, batch_loss=5.96, ba

Epoch 10/10:  70%|▋| 692/991 [2:56:29<1:15:20, 15.12s/batch, batch_loss=4.65e+3,

Epoch 10/10:  70%|▋| 693/991 [2:56:29<1:19:15, 15.96s/batch, batch_loss=4.65e+3,

Epoch 10/10:  70%|▋| 693/991 [2:56:43<1:19:15, 15.96s/batch, batch_loss=459, bat

Epoch 10/10:  70%|▋| 694/991 [2:56:43<1:17:31, 15.66s/batch, batch_loss=459, bat

Epoch 10/10:  70%|▋| 694/991 [2:56:58<1:17:31, 15.66s/batch, batch_loss=775, bat

Epoch 10/10:  70%|▋| 695/991 [2:56:58<1:16:07, 15.43s/batch, batch_loss=775, bat

Epoch 10/10:  70%|▋| 695/991 [2:57:13<1:16:07, 15.43s/batch, batch_loss=9.26, ba

Epoch 10/10:  70%|▋| 696/991 [2:57:13<1:15:07, 15.28s/batch, batch_loss=9.26, ba

Epoch 10/10:  70%|▋| 696/991 [2:57:28<1:15:07, 15.28s/batch, batch_loss=6.8e+3, 

Epoch 10/10:  70%|▋| 697/991 [2:57:28<1:14:31, 15.21s/batch, batch_loss=6.8e+3, 

Epoch 10/10:  70%|▋| 697/991 [2:57:44<1:14:31, 15.21s/batch, batch_loss=11.4, ba

Epoch 10/10:  70%|▋| 698/991 [2:57:44<1:14:47, 15.31s/batch, batch_loss=11.4, ba

Epoch 10/10:  70%|▋| 698/991 [2:57:59<1:14:47, 15.31s/batch, batch_loss=8.67, ba

Epoch 10/10:  71%|▋| 699/991 [2:57:59<1:13:32, 15.11s/batch, batch_loss=8.67, ba

Epoch 10/10:  71%|▋| 699/991 [2:58:13<1:13:32, 15.11s/batch, batch_loss=9.7, bat

Epoch 10/10:  71%|▋| 700/991 [2:58:13<1:12:43, 15.00s/batch, batch_loss=9.7, bat

Epoch 10/10:  71%|▋| 700/991 [2:58:28<1:12:43, 15.00s/batch, batch_loss=216, bat

Epoch 10/10:  71%|▋| 701/991 [2:58:28<1:12:03, 14.91s/batch, batch_loss=216, bat

Epoch 10/10:  71%|▋| 701/991 [2:58:43<1:12:03, 14.91s/batch, batch_loss=18.1, ba

Epoch 10/10:  71%|▋| 702/991 [2:58:43<1:12:07, 14.97s/batch, batch_loss=18.1, ba

Epoch 10/10:  71%|▋| 702/991 [2:58:58<1:12:07, 14.97s/batch, batch_loss=274, bat

Epoch 10/10:  71%|▋| 703/991 [2:58:58<1:11:43, 14.94s/batch, batch_loss=274, bat

Epoch 10/10:  71%|▋| 703/991 [2:59:13<1:11:43, 14.94s/batch, batch_loss=8.97, ba

Epoch 10/10:  71%|▋| 704/991 [2:59:13<1:11:32, 14.96s/batch, batch_loss=8.97, ba

Epoch 10/10:  71%|▋| 704/991 [2:59:28<1:11:32, 14.96s/batch, batch_loss=10.8, ba

Epoch 10/10:  71%|▋| 705/991 [2:59:28<1:11:03, 14.91s/batch, batch_loss=10.8, ba

Epoch 10/10:  71%|▋| 705/991 [2:59:43<1:11:03, 14.91s/batch, batch_loss=17, batc

Epoch 10/10:  71%|▋| 706/991 [2:59:43<1:10:44, 14.89s/batch, batch_loss=17, batc

Epoch 10/10:  71%|▋| 706/991 [2:59:57<1:10:44, 14.89s/batch, batch_loss=15.6, ba

Epoch 10/10:  71%|▋| 707/991 [2:59:57<1:09:35, 14.70s/batch, batch_loss=15.6, ba

Epoch 10/10:  71%|▋| 707/991 [3:00:12<1:09:35, 14.70s/batch, batch_loss=9.07, ba

Epoch 10/10:  71%|▋| 708/991 [3:00:12<1:09:14, 14.68s/batch, batch_loss=9.07, ba

Epoch 10/10:  71%|▋| 708/991 [3:00:29<1:09:14, 14.68s/batch, batch_loss=8.19, ba

Epoch 10/10:  72%|▋| 709/991 [3:00:29<1:12:41, 15.47s/batch, batch_loss=8.19, ba

Epoch 10/10:  72%|▋| 709/991 [3:00:45<1:12:41, 15.47s/batch, batch_loss=34.4, ba

Epoch 10/10:  72%|▋| 710/991 [3:00:45<1:12:53, 15.57s/batch, batch_loss=34.4, ba

Epoch 10/10:  72%|▋| 710/991 [3:01:00<1:12:53, 15.57s/batch, batch_loss=97.7, ba

Epoch 10/10:  72%|▋| 711/991 [3:01:00<1:12:22, 15.51s/batch, batch_loss=97.7, ba

Epoch 10/10:  72%|▋| 711/991 [3:01:15<1:12:22, 15.51s/batch, batch_loss=14.3, ba

Epoch 10/10:  72%|▋| 712/991 [3:01:15<1:12:02, 15.49s/batch, batch_loss=14.3, ba

Epoch 10/10:  72%|▋| 712/991 [3:01:31<1:12:02, 15.49s/batch, batch_loss=76.5, ba

Epoch 10/10:  72%|▋| 713/991 [3:01:31<1:12:18, 15.61s/batch, batch_loss=76.5, ba

Epoch 10/10:  72%|▋| 713/991 [3:01:47<1:12:18, 15.61s/batch, batch_loss=20.3, ba

Epoch 10/10:  72%|▋| 714/991 [3:01:47<1:11:32, 15.49s/batch, batch_loss=20.3, ba

Epoch 10/10:  72%|▋| 714/991 [3:02:01<1:11:32, 15.49s/batch, batch_loss=17.3, ba

Epoch 10/10:  72%|▋| 715/991 [3:02:01<1:10:22, 15.30s/batch, batch_loss=17.3, ba

Epoch 10/10:  72%|▋| 715/991 [3:02:17<1:10:22, 15.30s/batch, batch_loss=15.4, ba

Epoch 10/10:  72%|▋| 716/991 [3:02:17<1:10:15, 15.33s/batch, batch_loss=15.4, ba

Epoch 10/10:  72%|▋| 716/991 [3:02:33<1:10:15, 15.33s/batch, batch_loss=15.5, ba

Epoch 10/10:  72%|▋| 717/991 [3:02:33<1:10:52, 15.52s/batch, batch_loss=15.5, ba

Epoch 10/10:  72%|▋| 717/991 [3:02:50<1:10:52, 15.52s/batch, batch_loss=20.6, ba

Epoch 10/10:  72%|▋| 718/991 [3:02:50<1:12:54, 16.02s/batch, batch_loss=20.6, ba

Epoch 10/10:  72%|▋| 718/991 [3:03:06<1:12:54, 16.02s/batch, batch_loss=12, batc

Epoch 10/10:  73%|▋| 719/991 [3:03:06<1:12:18, 15.95s/batch, batch_loss=12, batc

Epoch 10/10:  73%|▋| 719/991 [3:03:21<1:12:18, 15.95s/batch, batch_loss=12.5, ba

Epoch 10/10:  73%|▋| 720/991 [3:03:21<1:10:29, 15.61s/batch, batch_loss=12.5, ba

Epoch 10/10:  73%|▋| 720/991 [3:03:35<1:10:29, 15.61s/batch, batch_loss=18.7, ba

Epoch 10/10:  73%|▋| 721/991 [3:03:35<1:09:20, 15.41s/batch, batch_loss=18.7, ba

Epoch 10/10:  73%|▋| 721/991 [3:03:51<1:09:20, 15.41s/batch, batch_loss=19.8, ba

Epoch 10/10:  73%|▋| 722/991 [3:03:51<1:08:42, 15.32s/batch, batch_loss=19.8, ba

Epoch 10/10:  73%|▋| 722/991 [3:04:06<1:08:42, 15.32s/batch, batch_loss=7.22e+3,

Epoch 10/10:  73%|▋| 723/991 [3:04:06<1:07:59, 15.22s/batch, batch_loss=7.22e+3,

Epoch 10/10:  73%|▋| 723/991 [3:04:19<1:07:59, 15.22s/batch, batch_loss=4.63, ba

Epoch 10/10:  73%|▋| 724/991 [3:04:19<1:05:48, 14.79s/batch, batch_loss=4.63, ba

Epoch 10/10:  73%|▋| 724/991 [3:04:34<1:05:48, 14.79s/batch, batch_loss=15.7, ba

Epoch 10/10:  73%|▋| 725/991 [3:04:34<1:05:45, 14.83s/batch, batch_loss=15.7, ba

Epoch 10/10:  73%|▋| 725/991 [3:04:49<1:05:45, 14.83s/batch, batch_loss=12.3, ba

Epoch 10/10:  73%|▋| 726/991 [3:04:49<1:05:43, 14.88s/batch, batch_loss=12.3, ba

Epoch 10/10:  73%|▋| 726/991 [3:05:05<1:05:43, 14.88s/batch, batch_loss=1.3e+4, 

Epoch 10/10:  73%|▋| 727/991 [3:05:05<1:06:08, 15.03s/batch, batch_loss=1.3e+4, 

Epoch 10/10:  73%|▋| 727/991 [3:05:20<1:06:08, 15.03s/batch, batch_loss=12.7, ba

Epoch 10/10:  73%|▋| 728/991 [3:05:20<1:06:12, 15.10s/batch, batch_loss=12.7, ba

Epoch 10/10:  73%|▋| 728/991 [3:05:35<1:06:12, 15.10s/batch, batch_loss=130, bat

Epoch 10/10:  74%|▋| 729/991 [3:05:35<1:06:01, 15.12s/batch, batch_loss=130, bat

Epoch 10/10:  74%|▋| 729/991 [3:05:50<1:06:01, 15.12s/batch, batch_loss=11.3, ba

Epoch 10/10:  74%|▋| 730/991 [3:05:50<1:05:50, 15.14s/batch, batch_loss=11.3, ba

Epoch 10/10:  74%|▋| 730/991 [3:06:05<1:05:50, 15.14s/batch, batch_loss=108, bat

Epoch 10/10:  74%|▋| 731/991 [3:06:05<1:04:34, 14.90s/batch, batch_loss=108, bat

Epoch 10/10:  74%|▋| 731/991 [3:06:19<1:04:34, 14.90s/batch, batch_loss=1.39e+4,

Epoch 10/10:  74%|▋| 732/991 [3:06:19<1:04:13, 14.88s/batch, batch_loss=1.39e+4,

Epoch 10/10:  74%|▋| 732/991 [3:06:34<1:04:13, 14.88s/batch, batch_loss=18, batc

Epoch 10/10:  74%|▋| 733/991 [3:06:34<1:03:17, 14.72s/batch, batch_loss=18, batc

Epoch 10/10:  74%|▋| 733/991 [3:06:49<1:03:17, 14.72s/batch, batch_loss=6.81e+3,

Epoch 10/10:  74%|▋| 734/991 [3:06:49<1:03:10, 14.75s/batch, batch_loss=6.81e+3,

Epoch 10/10:  74%|▋| 734/991 [3:07:04<1:03:10, 14.75s/batch, batch_loss=18.4, ba

Epoch 10/10:  74%|▋| 735/991 [3:07:04<1:03:42, 14.93s/batch, batch_loss=18.4, ba

Epoch 10/10:  74%|▋| 735/991 [3:07:19<1:03:42, 14.93s/batch, batch_loss=14.8, ba

Epoch 10/10:  74%|▋| 736/991 [3:07:19<1:03:13, 14.88s/batch, batch_loss=14.8, ba

Epoch 10/10:  74%|▋| 736/991 [3:07:35<1:03:13, 14.88s/batch, batch_loss=9.25, ba

Epoch 10/10:  74%|▋| 737/991 [3:07:35<1:04:30, 15.24s/batch, batch_loss=9.25, ba

Epoch 10/10:  74%|▋| 737/991 [3:07:49<1:04:30, 15.24s/batch, batch_loss=1.48e+3,

Epoch 10/10:  74%|▋| 738/991 [3:07:49<1:03:16, 15.00s/batch, batch_loss=1.48e+3,

Epoch 10/10:  74%|▋| 738/991 [3:08:05<1:03:16, 15.00s/batch, batch_loss=28.7, ba

Epoch 10/10:  75%|▋| 739/991 [3:08:05<1:03:21, 15.09s/batch, batch_loss=28.7, ba

Epoch 10/10:  75%|▋| 739/991 [3:08:20<1:03:21, 15.09s/batch, batch_loss=9.22, ba

Epoch 10/10:  75%|▋| 740/991 [3:08:20<1:03:56, 15.29s/batch, batch_loss=9.22, ba

Epoch 10/10:  75%|▋| 740/991 [3:08:37<1:03:56, 15.29s/batch, batch_loss=1.81e+4,

Epoch 10/10:  75%|▋| 741/991 [3:08:37<1:05:06, 15.63s/batch, batch_loss=1.81e+4,

Epoch 10/10:  75%|▋| 741/991 [3:08:52<1:05:06, 15.63s/batch, batch_loss=2.27e+3,

Epoch 10/10:  75%|▋| 742/991 [3:08:52<1:04:46, 15.61s/batch, batch_loss=2.27e+3,

Epoch 10/10:  75%|▋| 742/991 [3:09:07<1:04:46, 15.61s/batch, batch_loss=9.58, ba

Epoch 10/10:  75%|▋| 743/991 [3:09:07<1:03:45, 15.43s/batch, batch_loss=9.58, ba

Epoch 10/10:  75%|▋| 743/991 [3:09:22<1:03:45, 15.43s/batch, batch_loss=13.9, ba

Epoch 10/10:  75%|▊| 744/991 [3:09:22<1:03:04, 15.32s/batch, batch_loss=13.9, ba

Epoch 10/10:  75%|▊| 744/991 [3:09:37<1:03:04, 15.32s/batch, batch_loss=15.2, ba

Epoch 10/10:  75%|▊| 745/991 [3:09:37<1:02:00, 15.12s/batch, batch_loss=15.2, ba

Epoch 10/10:  75%|▊| 745/991 [3:09:52<1:02:00, 15.12s/batch, batch_loss=1.15e+3,

Epoch 10/10:  75%|▊| 746/991 [3:09:52<1:01:50, 15.15s/batch, batch_loss=1.15e+3,

Epoch 10/10:  75%|▊| 746/991 [3:10:07<1:01:50, 15.15s/batch, batch_loss=3.87e+3,

Epoch 10/10:  75%|▊| 747/991 [3:10:07<1:00:59, 15.00s/batch, batch_loss=3.87e+3,

Epoch 10/10:  75%|▊| 747/991 [3:10:24<1:00:59, 15.00s/batch, batch_loss=13, batc

Epoch 10/10:  75%|▊| 748/991 [3:10:24<1:03:30, 15.68s/batch, batch_loss=13, batc

Epoch 10/10:  75%|▊| 748/991 [3:10:40<1:03:30, 15.68s/batch, batch_loss=11.8, ba

Epoch 10/10:  76%|▊| 749/991 [3:10:40<1:03:40, 15.79s/batch, batch_loss=11.8, ba

Epoch 10/10:  76%|▊| 749/991 [3:10:54<1:03:40, 15.79s/batch, batch_loss=11.6, ba

Epoch 10/10:  76%|▊| 750/991 [3:10:54<1:01:30, 15.31s/batch, batch_loss=11.6, ba

Epoch 10/10:  76%|▊| 750/991 [3:11:10<1:01:30, 15.31s/batch, batch_loss=10.8, ba

Epoch 10/10:  76%|▊| 751/991 [3:11:10<1:01:15, 15.31s/batch, batch_loss=10.8, ba

Epoch 10/10:  76%|▊| 751/991 [3:11:25<1:01:15, 15.31s/batch, batch_loss=6.16, ba

Epoch 10/10:  76%|▊| 752/991 [3:11:25<1:00:27, 15.18s/batch, batch_loss=6.16, ba

Epoch 10/10:  76%|▊| 752/991 [3:11:40<1:00:27, 15.18s/batch, batch_loss=7.66, ba

Epoch 10/10:  76%|▊| 753/991 [3:11:40<1:00:16, 15.20s/batch, batch_loss=7.66, ba

Epoch 10/10:  76%|▊| 753/991 [3:11:55<1:00:16, 15.20s/batch, batch_loss=5.14, ba

Epoch 10/10:  76%|▊| 754/991 [3:11:55<1:00:05, 15.21s/batch, batch_loss=5.14, ba

Epoch 10/10:  76%|▊| 754/991 [3:12:10<1:00:05, 15.21s/batch, batch_loss=13.4, ba

Epoch 10/10:  76%|▊| 755/991 [3:12:10<59:27, 15.12s/batch, batch_loss=13.4, batc

Epoch 10/10:  76%|▊| 755/991 [3:12:28<59:27, 15.12s/batch, batch_loss=14.6, batc

Epoch 10/10:  76%|▊| 756/991 [3:12:28<1:02:12, 15.88s/batch, batch_loss=14.6, ba

Epoch 10/10:  76%|▊| 756/991 [3:12:43<1:02:12, 15.88s/batch, batch_loss=5.44, ba

Epoch 10/10:  76%|▊| 757/991 [3:12:43<1:00:56, 15.63s/batch, batch_loss=5.44, ba

Epoch 10/10:  76%|▊| 757/991 [3:12:58<1:00:56, 15.63s/batch, batch_loss=15.6, ba

Epoch 10/10:  76%|▊| 758/991 [3:12:58<1:00:40, 15.62s/batch, batch_loss=15.6, ba

Epoch 10/10:  76%|▊| 758/991 [3:13:13<1:00:40, 15.62s/batch, batch_loss=14.3, ba

Epoch 10/10:  77%|▊| 759/991 [3:13:13<59:49, 15.47s/batch, batch_loss=14.3, batc

Epoch 10/10:  77%|▊| 759/991 [3:13:28<59:49, 15.47s/batch, batch_loss=17.1, batc

Epoch 10/10:  77%|▊| 760/991 [3:13:28<58:16, 15.14s/batch, batch_loss=17.1, batc

Epoch 10/10:  77%|▊| 760/991 [3:13:42<58:16, 15.14s/batch, batch_loss=18.7, batc

Epoch 10/10:  77%|▊| 761/991 [3:13:42<57:21, 14.96s/batch, batch_loss=18.7, batc

Epoch 10/10:  77%|▊| 761/991 [3:13:57<57:21, 14.96s/batch, batch_loss=24.3, batc

Epoch 10/10:  77%|▊| 762/991 [3:13:57<56:41, 14.86s/batch, batch_loss=24.3, batc

Epoch 10/10:  77%|▊| 762/991 [3:14:12<56:41, 14.86s/batch, batch_loss=516, batch

Epoch 10/10:  77%|▊| 763/991 [3:14:12<56:48, 14.95s/batch, batch_loss=516, batch

Epoch 10/10:  77%|▊| 763/991 [3:14:28<56:48, 14.95s/batch, batch_loss=10.1, batc

Epoch 10/10:  77%|▊| 764/991 [3:14:28<57:20, 15.16s/batch, batch_loss=10.1, batc

Epoch 10/10:  77%|▊| 764/991 [3:14:43<57:20, 15.16s/batch, batch_loss=3.4, batch

Epoch 10/10:  77%|▊| 765/991 [3:14:43<57:17, 15.21s/batch, batch_loss=3.4, batch

Epoch 10/10:  77%|▊| 765/991 [3:14:58<57:17, 15.21s/batch, batch_loss=12.8, batc

Epoch 10/10:  77%|▊| 766/991 [3:14:58<56:52, 15.16s/batch, batch_loss=12.8, batc

Epoch 10/10:  77%|▊| 766/991 [3:15:14<56:52, 15.16s/batch, batch_loss=13.5, batc

Epoch 10/10:  77%|▊| 767/991 [3:15:14<57:24, 15.38s/batch, batch_loss=13.5, batc

Epoch 10/10:  77%|▊| 767/991 [3:15:29<57:24, 15.38s/batch, batch_loss=4.54, batc

Epoch 10/10:  77%|▊| 768/991 [3:15:29<56:25, 15.18s/batch, batch_loss=4.54, batc

Epoch 10/10:  77%|▊| 768/991 [3:15:43<56:25, 15.18s/batch, batch_loss=2.28, batc

Epoch 10/10:  78%|▊| 769/991 [3:15:43<54:53, 14.83s/batch, batch_loss=2.28, batc

Epoch 10/10:  78%|▊| 769/991 [3:15:58<54:53, 14.83s/batch, batch_loss=11.5, batc

Epoch 10/10:  78%|▊| 770/991 [3:15:58<54:35, 14.82s/batch, batch_loss=11.5, batc

Epoch 10/10:  78%|▊| 770/991 [3:16:13<54:35, 14.82s/batch, batch_loss=2.74e+3, b

Epoch 10/10:  78%|▊| 771/991 [3:16:13<54:47, 14.94s/batch, batch_loss=2.74e+3, b

Epoch 10/10:  78%|▊| 771/991 [3:16:27<54:47, 14.94s/batch, batch_loss=4.56, batc

Epoch 10/10:  78%|▊| 772/991 [3:16:27<53:28, 14.65s/batch, batch_loss=4.56, batc

Epoch 10/10:  78%|▊| 772/991 [3:16:41<53:28, 14.65s/batch, batch_loss=1.23, batc

Epoch 10/10:  78%|▊| 773/991 [3:16:41<52:33, 14.47s/batch, batch_loss=1.23, batc

Epoch 10/10:  78%|▊| 773/991 [3:16:58<52:33, 14.47s/batch, batch_loss=7.67, batc

Epoch 10/10:  78%|▊| 774/991 [3:16:58<55:25, 15.33s/batch, batch_loss=7.67, batc

Epoch 10/10:  78%|▊| 774/991 [3:17:12<55:25, 15.33s/batch, batch_loss=7.98, batc

Epoch 10/10:  78%|▊| 775/991 [3:17:12<54:00, 15.00s/batch, batch_loss=7.98, batc

Epoch 10/10:  78%|▊| 775/991 [3:17:25<54:00, 15.00s/batch, batch_loss=256, batch

Epoch 10/10:  78%|▊| 776/991 [3:17:25<51:12, 14.29s/batch, batch_loss=256, batch

Epoch 10/10:  78%|▊| 776/991 [3:17:38<51:12, 14.29s/batch, batch_loss=0.616, bat

Epoch 10/10:  78%|▊| 777/991 [3:17:38<49:46, 13.95s/batch, batch_loss=0.616, bat

Epoch 10/10:  78%|▊| 777/991 [3:17:52<49:46, 13.95s/batch, batch_loss=0.854, bat

Epoch 10/10:  79%|▊| 778/991 [3:17:52<49:40, 14.00s/batch, batch_loss=0.854, bat

Epoch 10/10:  79%|▊| 778/991 [3:18:07<49:40, 14.00s/batch, batch_loss=5.63, batc

Epoch 10/10:  79%|▊| 779/991 [3:18:07<49:45, 14.08s/batch, batch_loss=5.63, batc

Epoch 10/10:  79%|▊| 779/991 [3:18:22<49:45, 14.08s/batch, batch_loss=3.28, batc

Epoch 10/10:  79%|▊| 780/991 [3:18:22<51:10, 14.55s/batch, batch_loss=3.28, batc

Epoch 10/10:  79%|▊| 780/991 [3:18:37<51:10, 14.55s/batch, batch_loss=3.16, batc

Epoch 10/10:  79%|▊| 781/991 [3:18:37<51:10, 14.62s/batch, batch_loss=3.16, batc

Epoch 10/10:  79%|▊| 781/991 [3:18:51<51:10, 14.62s/batch, batch_loss=2.51e+4, b

Epoch 10/10:  79%|▊| 782/991 [3:18:51<50:35, 14.52s/batch, batch_loss=2.51e+4, b

Epoch 10/10:  79%|▊| 782/991 [3:19:06<50:35, 14.52s/batch, batch_loss=17.7, batc

Epoch 10/10:  79%|▊| 783/991 [3:19:06<50:29, 14.57s/batch, batch_loss=17.7, batc

Epoch 10/10:  79%|▊| 783/991 [3:19:21<50:29, 14.57s/batch, batch_loss=14.2, batc

Epoch 10/10:  79%|▊| 784/991 [3:19:21<50:40, 14.69s/batch, batch_loss=14.2, batc

Epoch 10/10:  79%|▊| 784/991 [3:19:36<50:40, 14.69s/batch, batch_loss=13.2, batc

Epoch 10/10:  79%|▊| 785/991 [3:19:36<50:51, 14.81s/batch, batch_loss=13.2, batc

Epoch 10/10:  79%|▊| 785/991 [3:19:52<50:51, 14.81s/batch, batch_loss=8.73, batc

Epoch 10/10:  79%|▊| 786/991 [3:19:52<52:08, 15.26s/batch, batch_loss=8.73, batc

Epoch 10/10:  79%|▊| 786/991 [3:20:08<52:08, 15.26s/batch, batch_loss=2.48e+4, b

Epoch 10/10:  79%|▊| 787/991 [3:20:08<52:08, 15.34s/batch, batch_loss=2.48e+4, b

Epoch 10/10:  79%|▊| 787/991 [3:20:26<52:08, 15.34s/batch, batch_loss=686, batch

Epoch 10/10:  80%|▊| 788/991 [3:20:26<54:52, 16.22s/batch, batch_loss=686, batch

Epoch 10/10:  80%|▊| 788/991 [3:20:41<54:52, 16.22s/batch, batch_loss=18.5, batc

Epoch 10/10:  80%|▊| 789/991 [3:20:41<53:30, 15.90s/batch, batch_loss=18.5, batc

Epoch 10/10:  80%|▊| 789/991 [3:20:56<53:30, 15.90s/batch, batch_loss=13.7, batc

Epoch 10/10:  80%|▊| 790/991 [3:20:56<52:22, 15.63s/batch, batch_loss=13.7, batc

Epoch 10/10:  80%|▊| 790/991 [3:21:12<52:22, 15.63s/batch, batch_loss=13.6, batc

Epoch 10/10:  80%|▊| 791/991 [3:21:12<52:21, 15.71s/batch, batch_loss=13.6, batc

Epoch 10/10:  80%|▊| 791/991 [3:21:27<52:21, 15.71s/batch, batch_loss=1.04e+4, b

Epoch 10/10:  80%|▊| 792/991 [3:21:27<51:28, 15.52s/batch, batch_loss=1.04e+4, b

Epoch 10/10:  80%|▊| 792/991 [3:21:42<51:28, 15.52s/batch, batch_loss=8.21, batc

Epoch 10/10:  80%|▊| 793/991 [3:21:42<50:38, 15.35s/batch, batch_loss=8.21, batc

Epoch 10/10:  80%|▊| 793/991 [3:21:57<50:38, 15.35s/batch, batch_loss=1.78, batc

Epoch 10/10:  80%|▊| 794/991 [3:21:57<50:16, 15.31s/batch, batch_loss=1.78, batc

Epoch 10/10:  80%|▊| 794/991 [3:22:13<50:16, 15.31s/batch, batch_loss=7.45, batc

Epoch 10/10:  80%|▊| 795/991 [3:22:13<50:00, 15.31s/batch, batch_loss=7.45, batc

Epoch 10/10:  80%|▊| 795/991 [3:22:30<50:00, 15.31s/batch, batch_loss=11.5, batc

Epoch 10/10:  80%|▊| 796/991 [3:22:30<51:38, 15.89s/batch, batch_loss=11.5, batc

Epoch 10/10:  80%|▊| 796/991 [3:22:44<51:38, 15.89s/batch, batch_loss=19.6, batc

Epoch 10/10:  80%|▊| 797/991 [3:22:44<50:06, 15.50s/batch, batch_loss=19.6, batc

Epoch 10/10:  80%|▊| 797/991 [3:22:58<50:06, 15.50s/batch, batch_loss=336, batch

Epoch 10/10:  81%|▊| 798/991 [3:22:58<48:15, 15.00s/batch, batch_loss=336, batch

Epoch 10/10:  81%|▊| 798/991 [3:23:13<48:15, 15.00s/batch, batch_loss=10.6, batc

Epoch 10/10:  81%|▊| 799/991 [3:23:13<48:03, 15.02s/batch, batch_loss=10.6, batc

Epoch 10/10:  81%|▊| 799/991 [3:23:28<48:03, 15.02s/batch, batch_loss=15.2, batc

Epoch 10/10:  81%|▊| 800/991 [3:23:28<47:49, 15.02s/batch, batch_loss=15.2, batc

Epoch 10/10:  81%|▊| 800/991 [3:23:44<47:49, 15.02s/batch, batch_loss=11, batch_

Epoch 10/10:  81%|▊| 801/991 [3:23:44<47:52, 15.12s/batch, batch_loss=11, batch_

Epoch 10/10:  81%|▊| 801/991 [3:23:59<47:52, 15.12s/batch, batch_loss=14.7, batc

Epoch 10/10:  81%|▊| 802/991 [3:23:59<47:15, 15.00s/batch, batch_loss=14.7, batc

Epoch 10/10:  81%|▊| 802/991 [3:24:14<47:15, 15.00s/batch, batch_loss=6.24, batc

Epoch 10/10:  81%|▊| 803/991 [3:24:14<47:08, 15.04s/batch, batch_loss=6.24, batc

Epoch 10/10:  81%|▊| 803/991 [3:24:31<47:08, 15.04s/batch, batch_loss=12, batch_

Epoch 10/10:  81%|▊| 804/991 [3:24:31<49:00, 15.72s/batch, batch_loss=12, batch_

Epoch 10/10:  81%|▊| 804/991 [3:24:47<49:00, 15.72s/batch, batch_loss=6.87, batc

Epoch 10/10:  81%|▊| 805/991 [3:24:47<48:41, 15.71s/batch, batch_loss=6.87, batc

Epoch 10/10:  81%|▊| 805/991 [3:25:02<48:41, 15.71s/batch, batch_loss=10.2, batc

Epoch 10/10:  81%|▊| 806/991 [3:25:02<48:13, 15.64s/batch, batch_loss=10.2, batc

Epoch 10/10:  81%|▊| 806/991 [3:25:17<48:13, 15.64s/batch, batch_loss=8.89, batc

Epoch 10/10:  81%|▊| 807/991 [3:25:17<46:58, 15.32s/batch, batch_loss=8.89, batc

Epoch 10/10:  81%|▊| 807/991 [3:25:32<46:58, 15.32s/batch, batch_loss=17, batch_

Epoch 10/10:  82%|▊| 808/991 [3:25:32<46:23, 15.21s/batch, batch_loss=17, batch_

Epoch 10/10:  82%|▊| 808/991 [3:25:47<46:23, 15.21s/batch, batch_loss=1.21e+4, b

Epoch 10/10:  82%|▊| 809/991 [3:25:47<46:15, 15.25s/batch, batch_loss=1.21e+4, b

Epoch 10/10:  82%|▊| 809/991 [3:26:02<46:15, 15.25s/batch, batch_loss=14.2, batc

Epoch 10/10:  82%|▊| 810/991 [3:26:02<45:35, 15.12s/batch, batch_loss=14.2, batc

Epoch 10/10:  82%|▊| 810/991 [3:26:17<45:35, 15.12s/batch, batch_loss=8.12, batc

Epoch 10/10:  82%|▊| 811/991 [3:26:17<45:04, 15.03s/batch, batch_loss=8.12, batc

Epoch 10/10:  82%|▊| 811/991 [3:26:32<45:04, 15.03s/batch, batch_loss=7.23, batc

Epoch 10/10:  82%|▊| 812/991 [3:26:32<44:50, 15.03s/batch, batch_loss=7.23, batc

Epoch 10/10:  82%|▊| 812/991 [3:26:46<44:50, 15.03s/batch, batch_loss=7.34, batc

Epoch 10/10:  82%|▊| 813/991 [3:26:46<44:04, 14.86s/batch, batch_loss=7.34, batc

Epoch 10/10:  82%|▊| 813/991 [3:27:00<44:04, 14.86s/batch, batch_loss=10.9, batc

Epoch 10/10:  82%|▊| 814/991 [3:27:00<43:06, 14.61s/batch, batch_loss=10.9, batc

Epoch 10/10:  82%|▊| 814/991 [3:27:16<43:06, 14.61s/batch, batch_loss=7.57, batc

Epoch 10/10:  82%|▊| 815/991 [3:27:16<43:35, 14.86s/batch, batch_loss=7.57, batc

Epoch 10/10:  82%|▊| 815/991 [3:27:31<43:35, 14.86s/batch, batch_loss=90.6, batc

Epoch 10/10:  82%|▊| 816/991 [3:27:31<43:27, 14.90s/batch, batch_loss=90.6, batc

Epoch 10/10:  82%|▊| 816/991 [3:27:45<43:27, 14.90s/batch, batch_loss=358, batch

Epoch 10/10:  82%|▊| 817/991 [3:27:45<43:10, 14.89s/batch, batch_loss=358, batch

Epoch 10/10:  82%|▊| 817/991 [3:28:01<43:10, 14.89s/batch, batch_loss=361, batch

Epoch 10/10:  83%|▊| 818/991 [3:28:01<43:08, 14.96s/batch, batch_loss=361, batch

Epoch 10/10:  83%|▊| 818/991 [3:28:15<43:08, 14.96s/batch, batch_loss=13.1, batc

Epoch 10/10:  83%|▊| 819/991 [3:28:15<42:52, 14.96s/batch, batch_loss=13.1, batc

Epoch 10/10:  83%|▊| 819/991 [3:28:33<42:52, 14.96s/batch, batch_loss=6.88, batc

Epoch 10/10:  83%|▊| 820/991 [3:28:33<45:06, 15.83s/batch, batch_loss=6.88, batc

Epoch 10/10:  83%|▊| 820/991 [3:28:49<45:06, 15.83s/batch, batch_loss=7.59, batc

Epoch 10/10:  83%|▊| 821/991 [3:28:49<44:38, 15.75s/batch, batch_loss=7.59, batc

Epoch 10/10:  83%|▊| 821/991 [3:29:04<44:38, 15.75s/batch, batch_loss=8.76, batc

Epoch 10/10:  83%|▊| 822/991 [3:29:04<43:59, 15.62s/batch, batch_loss=8.76, batc

Epoch 10/10:  83%|▊| 822/991 [3:29:20<43:59, 15.62s/batch, batch_loss=155, batch

Epoch 10/10:  83%|▊| 823/991 [3:29:20<43:39, 15.59s/batch, batch_loss=155, batch

Epoch 10/10:  83%|▊| 823/991 [3:29:35<43:39, 15.59s/batch, batch_loss=7.43, batc

Epoch 10/10:  83%|▊| 824/991 [3:29:35<42:56, 15.43s/batch, batch_loss=7.43, batc

Epoch 10/10:  83%|▊| 824/991 [3:29:50<42:56, 15.43s/batch, batch_loss=13, batch_

Epoch 10/10:  83%|▊| 825/991 [3:29:50<42:09, 15.24s/batch, batch_loss=13, batch_

Epoch 10/10:  83%|▊| 825/991 [3:30:04<42:09, 15.24s/batch, batch_loss=2.6e+3, ba

Epoch 10/10:  83%|▊| 826/991 [3:30:04<40:55, 14.88s/batch, batch_loss=2.6e+3, ba

Epoch 10/10:  83%|▊| 826/991 [3:30:19<40:55, 14.88s/batch, batch_loss=21.4, batc

Epoch 10/10:  83%|▊| 827/991 [3:30:19<41:09, 15.06s/batch, batch_loss=21.4, batc

Epoch 10/10:  83%|▊| 827/991 [3:30:33<41:09, 15.06s/batch, batch_loss=21.4, batc

Epoch 10/10:  84%|▊| 828/991 [3:30:33<40:12, 14.80s/batch, batch_loss=21.4, batc

Epoch 10/10:  84%|▊| 828/991 [3:30:49<40:12, 14.80s/batch, batch_loss=7.03, batc

Epoch 10/10:  84%|▊| 829/991 [3:30:49<40:27, 14.99s/batch, batch_loss=7.03, batc

Epoch 10/10:  84%|▊| 829/991 [3:31:04<40:27, 14.99s/batch, batch_loss=12.5, batc

Epoch 10/10:  84%|▊| 830/991 [3:31:04<40:02, 14.92s/batch, batch_loss=12.5, batc

Epoch 10/10:  84%|▊| 830/991 [3:31:18<40:02, 14.92s/batch, batch_loss=9.81, batc

Epoch 10/10:  84%|▊| 831/991 [3:31:18<39:46, 14.92s/batch, batch_loss=9.81, batc

Epoch 10/10:  84%|▊| 831/991 [3:31:33<39:46, 14.92s/batch, batch_loss=13.5, batc

Epoch 10/10:  84%|▊| 832/991 [3:31:33<39:22, 14.86s/batch, batch_loss=13.5, batc

Epoch 10/10:  84%|▊| 832/991 [3:31:48<39:22, 14.86s/batch, batch_loss=216, batch

Epoch 10/10:  84%|▊| 833/991 [3:31:48<38:52, 14.76s/batch, batch_loss=216, batch

Epoch 10/10:  84%|▊| 833/991 [3:32:03<38:52, 14.76s/batch, batch_loss=16.8, batc

Epoch 10/10:  84%|▊| 834/991 [3:32:03<38:56, 14.88s/batch, batch_loss=16.8, batc

Epoch 10/10:  84%|▊| 834/991 [3:32:18<38:56, 14.88s/batch, batch_loss=12.1, batc

Epoch 10/10:  84%|▊| 835/991 [3:32:18<38:46, 14.91s/batch, batch_loss=12.1, batc

Epoch 10/10:  84%|▊| 835/991 [3:32:35<38:46, 14.91s/batch, batch_loss=3.27e+3, b

Epoch 10/10:  84%|▊| 836/991 [3:32:35<40:25, 15.65s/batch, batch_loss=3.27e+3, b

Epoch 10/10:  84%|▊| 836/991 [3:32:51<40:25, 15.65s/batch, batch_loss=4.91e+3, b

Epoch 10/10:  84%|▊| 837/991 [3:32:51<40:00, 15.59s/batch, batch_loss=4.91e+3, b

Epoch 10/10:  84%|▊| 837/991 [3:33:06<40:00, 15.59s/batch, batch_loss=16, batch_

Epoch 10/10:  85%|▊| 838/991 [3:33:06<39:24, 15.46s/batch, batch_loss=16, batch_

Epoch 10/10:  85%|▊| 838/991 [3:33:21<39:24, 15.46s/batch, batch_loss=4.36, batc

Epoch 10/10:  85%|▊| 839/991 [3:33:21<39:15, 15.50s/batch, batch_loss=4.36, batc

Epoch 10/10:  85%|▊| 839/991 [3:33:37<39:15, 15.50s/batch, batch_loss=4.86, batc

Epoch 10/10:  85%|▊| 840/991 [3:33:37<38:48, 15.42s/batch, batch_loss=4.86, batc

Epoch 10/10:  85%|▊| 840/991 [3:33:52<38:48, 15.42s/batch, batch_loss=12.8, batc

Epoch 10/10:  85%|▊| 841/991 [3:33:52<38:36, 15.45s/batch, batch_loss=12.8, batc

Epoch 10/10:  85%|▊| 841/991 [3:34:07<38:36, 15.45s/batch, batch_loss=14, batch_

Epoch 10/10:  85%|▊| 842/991 [3:34:07<38:13, 15.39s/batch, batch_loss=14, batch_

Epoch 10/10:  85%|▊| 842/991 [3:34:22<38:13, 15.39s/batch, batch_loss=7.26, batc

Epoch 10/10:  85%|▊| 843/991 [3:34:22<37:32, 15.22s/batch, batch_loss=7.26, batc

Epoch 10/10:  85%|▊| 843/991 [3:34:36<37:32, 15.22s/batch, batch_loss=1.68e+3, b

Epoch 10/10:  85%|▊| 844/991 [3:34:36<36:28, 14.89s/batch, batch_loss=1.68e+3, b

Epoch 10/10:  85%|▊| 844/991 [3:34:51<36:28, 14.89s/batch, batch_loss=18.1, batc

Epoch 10/10:  85%|▊| 845/991 [3:34:51<36:03, 14.82s/batch, batch_loss=18.1, batc

Epoch 10/10:  85%|▊| 845/991 [3:35:08<36:03, 14.82s/batch, batch_loss=1.18e+4, b

Epoch 10/10:  85%|▊| 846/991 [3:35:08<37:07, 15.36s/batch, batch_loss=1.18e+4, b

Epoch 10/10:  85%|▊| 846/991 [3:35:23<37:07, 15.36s/batch, batch_loss=23.9, batc

Epoch 10/10:  85%|▊| 847/991 [3:35:23<36:47, 15.33s/batch, batch_loss=23.9, batc

Epoch 10/10:  85%|▊| 847/991 [3:35:39<36:47, 15.33s/batch, batch_loss=22.5, batc

Epoch 10/10:  86%|▊| 848/991 [3:35:39<36:55, 15.49s/batch, batch_loss=22.5, batc

Epoch 10/10:  86%|▊| 848/991 [3:35:54<36:55, 15.49s/batch, batch_loss=1e+3, batc

Epoch 10/10:  86%|▊| 849/991 [3:35:54<36:23, 15.38s/batch, batch_loss=1e+3, batc

Epoch 10/10:  86%|▊| 849/991 [3:36:08<36:23, 15.38s/batch, batch_loss=8.12, batc

Epoch 10/10:  86%|▊| 850/991 [3:36:08<35:18, 15.03s/batch, batch_loss=8.12, batc

Epoch 10/10:  86%|▊| 850/991 [3:36:22<35:18, 15.03s/batch, batch_loss=18.9, batc

Epoch 10/10:  86%|▊| 851/991 [3:36:22<34:30, 14.79s/batch, batch_loss=18.9, batc

Epoch 10/10:  86%|▊| 851/991 [3:36:37<34:30, 14.79s/batch, batch_loss=14.1, batc

Epoch 10/10:  86%|▊| 852/991 [3:36:37<34:02, 14.69s/batch, batch_loss=14.1, batc

Epoch 10/10:  86%|▊| 852/991 [3:36:52<34:02, 14.69s/batch, batch_loss=7.64e+3, b

Epoch 10/10:  86%|▊| 853/991 [3:36:52<33:53, 14.74s/batch, batch_loss=7.64e+3, b

Epoch 10/10:  86%|▊| 853/991 [3:37:07<33:53, 14.74s/batch, batch_loss=17.6, batc

Epoch 10/10:  86%|▊| 854/991 [3:37:07<34:01, 14.90s/batch, batch_loss=17.6, batc

Epoch 10/10:  86%|▊| 854/991 [3:37:22<34:01, 14.90s/batch, batch_loss=7.75, batc

Epoch 10/10:  86%|▊| 855/991 [3:37:22<34:08, 15.07s/batch, batch_loss=7.75, batc

Epoch 10/10:  86%|▊| 855/991 [3:37:37<34:08, 15.07s/batch, batch_loss=8.34, batc

Epoch 10/10:  86%|▊| 856/991 [3:37:37<33:37, 14.95s/batch, batch_loss=8.34, batc

Epoch 10/10:  86%|▊| 856/991 [3:37:51<33:37, 14.95s/batch, batch_loss=8.34, batc

Epoch 10/10:  86%|▊| 857/991 [3:37:51<33:00, 14.78s/batch, batch_loss=8.34, batc

Epoch 10/10:  86%|▊| 857/991 [3:38:06<33:00, 14.78s/batch, batch_loss=22, batch_

Epoch 10/10:  87%|▊| 858/991 [3:38:06<32:22, 14.61s/batch, batch_loss=22, batch_

Epoch 10/10:  87%|▊| 858/991 [3:38:21<32:22, 14.61s/batch, batch_loss=13, batch_

Epoch 10/10:  87%|▊| 859/991 [3:38:21<32:23, 14.72s/batch, batch_loss=13, batch_

Epoch 10/10:  87%|▊| 859/991 [3:38:35<32:23, 14.72s/batch, batch_loss=20.2, batc

Epoch 10/10:  87%|▊| 860/991 [3:38:35<31:43, 14.53s/batch, batch_loss=20.2, batc

Epoch 10/10:  87%|▊| 860/991 [3:38:49<31:43, 14.53s/batch, batch_loss=7.76, batc

Epoch 10/10:  87%|▊| 861/991 [3:38:49<31:26, 14.51s/batch, batch_loss=7.76, batc

Epoch 10/10:  87%|▊| 861/991 [3:39:04<31:26, 14.51s/batch, batch_loss=15, batch_

Epoch 10/10:  87%|▊| 862/991 [3:39:04<31:33, 14.68s/batch, batch_loss=15, batch_

Epoch 10/10:  87%|▊| 862/991 [3:39:19<31:33, 14.68s/batch, batch_loss=25.2, batc

Epoch 10/10:  87%|▊| 863/991 [3:39:19<31:30, 14.77s/batch, batch_loss=25.2, batc

Epoch 10/10:  87%|▊| 863/991 [3:39:34<31:30, 14.77s/batch, batch_loss=9.42, batc

Epoch 10/10:  87%|▊| 864/991 [3:39:34<31:31, 14.89s/batch, batch_loss=9.42, batc

Epoch 10/10:  87%|▊| 864/991 [3:39:49<31:31, 14.89s/batch, batch_loss=15.5, batc

Epoch 10/10:  87%|▊| 865/991 [3:39:49<31:10, 14.84s/batch, batch_loss=15.5, batc

Epoch 10/10:  87%|▊| 865/991 [3:40:03<31:10, 14.84s/batch, batch_loss=20, batch_

Epoch 10/10:  87%|▊| 866/991 [3:40:03<30:33, 14.67s/batch, batch_loss=20, batch_

Epoch 10/10:  87%|▊| 866/991 [3:40:18<30:33, 14.67s/batch, batch_loss=21.1, batc

Epoch 10/10:  87%|▊| 867/991 [3:40:18<30:27, 14.74s/batch, batch_loss=21.1, batc

Epoch 10/10:  87%|▊| 867/991 [3:40:34<30:27, 14.74s/batch, batch_loss=18.4, batc

Epoch 10/10:  88%|▉| 868/991 [3:40:34<30:56, 15.09s/batch, batch_loss=18.4, batc

Epoch 10/10:  88%|▉| 868/991 [3:40:50<30:56, 15.09s/batch, batch_loss=11.3, batc

Epoch 10/10:  88%|▉| 869/991 [3:40:50<30:49, 15.16s/batch, batch_loss=11.3, batc

Epoch 10/10:  88%|▉| 869/991 [3:41:04<30:49, 15.16s/batch, batch_loss=12.3, batc

Epoch 10/10:  88%|▉| 870/991 [3:41:04<29:59, 14.88s/batch, batch_loss=12.3, batc

Epoch 10/10:  88%|▉| 870/991 [3:41:18<29:59, 14.88s/batch, batch_loss=7.21, batc

Epoch 10/10:  88%|▉| 871/991 [3:41:18<29:28, 14.73s/batch, batch_loss=7.21, batc

Epoch 10/10:  88%|▉| 871/991 [3:41:32<29:28, 14.73s/batch, batch_loss=17.6, batc

Epoch 10/10:  88%|▉| 872/991 [3:41:32<28:51, 14.55s/batch, batch_loss=17.6, batc

Epoch 10/10:  88%|▉| 872/991 [3:41:47<28:51, 14.55s/batch, batch_loss=14, batch_

Epoch 10/10:  88%|▉| 873/991 [3:41:47<28:48, 14.65s/batch, batch_loss=14, batch_

Epoch 10/10:  88%|▉| 873/991 [3:42:02<28:48, 14.65s/batch, batch_loss=6.96, batc

Epoch 10/10:  88%|▉| 874/991 [3:42:02<28:57, 14.85s/batch, batch_loss=6.96, batc

Epoch 10/10:  88%|▉| 874/991 [3:42:18<28:57, 14.85s/batch, batch_loss=12.5, batc

Epoch 10/10:  88%|▉| 875/991 [3:42:18<29:08, 15.07s/batch, batch_loss=12.5, batc

Epoch 10/10:  88%|▉| 875/991 [3:42:33<29:08, 15.07s/batch, batch_loss=24.1, batc

Epoch 10/10:  88%|▉| 876/991 [3:42:33<28:57, 15.11s/batch, batch_loss=24.1, batc

Epoch 10/10:  88%|▉| 876/991 [3:42:48<28:57, 15.11s/batch, batch_loss=18.9, batc

Epoch 10/10:  88%|▉| 877/991 [3:42:48<28:23, 14.95s/batch, batch_loss=18.9, batc

Epoch 10/10:  88%|▉| 877/991 [3:43:02<28:23, 14.95s/batch, batch_loss=27.4, batc

Epoch 10/10:  89%|▉| 878/991 [3:43:02<27:55, 14.83s/batch, batch_loss=27.4, batc

Epoch 10/10:  89%|▉| 878/991 [3:43:17<27:55, 14.83s/batch, batch_loss=16.8, batc

Epoch 10/10:  89%|▉| 879/991 [3:43:17<27:41, 14.83s/batch, batch_loss=16.8, batc

Epoch 10/10:  89%|▉| 879/991 [3:43:32<27:41, 14.83s/batch, batch_loss=11.1, batc

Epoch 10/10:  89%|▉| 880/991 [3:43:32<27:39, 14.95s/batch, batch_loss=11.1, batc

Epoch 10/10:  89%|▉| 880/991 [3:43:47<27:39, 14.95s/batch, batch_loss=5.12e+3, b

Epoch 10/10:  89%|▉| 881/991 [3:43:47<27:28, 14.99s/batch, batch_loss=5.12e+3, b

Epoch 10/10:  89%|▉| 881/991 [3:44:02<27:28, 14.99s/batch, batch_loss=15.5, batc

Epoch 10/10:  89%|▉| 882/991 [3:44:02<27:08, 14.94s/batch, batch_loss=15.5, batc

Epoch 10/10:  89%|▉| 882/991 [3:44:17<27:08, 14.94s/batch, batch_loss=15.1, batc

Epoch 10/10:  89%|▉| 883/991 [3:44:17<26:56, 14.97s/batch, batch_loss=15.1, batc

Epoch 10/10:  89%|▉| 883/991 [3:44:33<26:56, 14.97s/batch, batch_loss=8.62, batc

Epoch 10/10:  89%|▉| 884/991 [3:44:33<27:03, 15.17s/batch, batch_loss=8.62, batc

Epoch 10/10:  89%|▉| 884/991 [3:44:48<27:03, 15.17s/batch, batch_loss=13.3, batc

Epoch 10/10:  89%|▉| 885/991 [3:44:48<26:37, 15.07s/batch, batch_loss=13.3, batc

Epoch 10/10:  89%|▉| 885/991 [3:45:03<26:37, 15.07s/batch, batch_loss=16, batch_

Epoch 10/10:  89%|▉| 886/991 [3:45:03<26:34, 15.19s/batch, batch_loss=16, batch_

Epoch 10/10:  89%|▉| 886/991 [3:45:18<26:34, 15.19s/batch, batch_loss=1.93e+4, b

Epoch 10/10:  90%|▉| 887/991 [3:45:18<26:05, 15.05s/batch, batch_loss=1.93e+4, b

Epoch 10/10:  90%|▉| 887/991 [3:45:33<26:05, 15.05s/batch, batch_loss=16.4, batc

Epoch 10/10:  90%|▉| 888/991 [3:45:33<25:34, 14.90s/batch, batch_loss=16.4, batc

Epoch 10/10:  90%|▉| 888/991 [3:45:48<25:34, 14.90s/batch, batch_loss=18.2, batc

Epoch 10/10:  90%|▉| 889/991 [3:45:48<25:28, 14.99s/batch, batch_loss=18.2, batc

Epoch 10/10:  90%|▉| 889/991 [3:46:03<25:28, 14.99s/batch, batch_loss=11.8, batc

Epoch 10/10:  90%|▉| 890/991 [3:46:03<25:19, 15.04s/batch, batch_loss=11.8, batc

Epoch 10/10:  90%|▉| 890/991 [3:46:18<25:19, 15.04s/batch, batch_loss=14.3, batc

Epoch 10/10:  90%|▉| 891/991 [3:46:18<24:56, 14.97s/batch, batch_loss=14.3, batc

Epoch 10/10:  90%|▉| 891/991 [3:46:34<24:56, 14.97s/batch, batch_loss=17.6, batc

Epoch 10/10:  90%|▉| 892/991 [3:46:34<25:33, 15.49s/batch, batch_loss=17.6, batc

Epoch 10/10:  90%|▉| 892/991 [3:46:49<25:33, 15.49s/batch, batch_loss=3.7e+3, ba

Epoch 10/10:  90%|▉| 893/991 [3:46:49<24:53, 15.24s/batch, batch_loss=3.7e+3, ba

Epoch 10/10:  90%|▉| 893/991 [3:47:05<24:53, 15.24s/batch, batch_loss=10.3, batc

Epoch 10/10:  90%|▉| 894/991 [3:47:05<24:50, 15.37s/batch, batch_loss=10.3, batc

Epoch 10/10:  90%|▉| 894/991 [3:47:19<24:50, 15.37s/batch, batch_loss=12.9, batc

Epoch 10/10:  90%|▉| 895/991 [3:47:19<23:51, 14.91s/batch, batch_loss=12.9, batc

Epoch 10/10:  90%|▉| 895/991 [3:47:31<23:51, 14.91s/batch, batch_loss=10.7, batc

Epoch 10/10:  90%|▉| 896/991 [3:47:31<22:31, 14.23s/batch, batch_loss=10.7, batc

Epoch 10/10:  90%|▉| 896/991 [3:47:45<22:31, 14.23s/batch, batch_loss=15.2, batc

Epoch 10/10:  91%|▉| 897/991 [3:47:45<21:55, 13.99s/batch, batch_loss=15.2, batc

Epoch 10/10:  91%|▉| 897/991 [3:47:59<21:55, 13.99s/batch, batch_loss=20.4, batc

Epoch 10/10:  91%|▉| 898/991 [3:47:59<22:00, 14.20s/batch, batch_loss=20.4, batc

Epoch 10/10:  91%|▉| 898/991 [3:48:14<22:00, 14.20s/batch, batch_loss=14.6, batc

Epoch 10/10:  91%|▉| 899/991 [3:48:14<22:05, 14.41s/batch, batch_loss=14.6, batc

Epoch 10/10:  91%|▉| 899/991 [3:48:30<22:05, 14.41s/batch, batch_loss=17.9, batc

Epoch 10/10:  91%|▉| 900/991 [3:48:30<22:35, 14.90s/batch, batch_loss=17.9, batc

Epoch 10/10:  91%|▉| 900/991 [3:48:49<22:35, 14.90s/batch, batch_loss=12.1, batc

Epoch 10/10:  91%|▉| 901/991 [3:48:49<23:56, 15.96s/batch, batch_loss=12.1, batc

Epoch 10/10:  91%|▉| 901/991 [3:49:04<23:56, 15.96s/batch, batch_loss=12, batch_

Epoch 10/10:  91%|▉| 902/991 [3:49:04<23:33, 15.88s/batch, batch_loss=12, batch_

Epoch 10/10:  91%|▉| 902/991 [3:49:20<23:33, 15.88s/batch, batch_loss=6.92, batc

Epoch 10/10:  91%|▉| 903/991 [3:49:20<23:20, 15.92s/batch, batch_loss=6.92, batc

Epoch 10/10:  91%|▉| 903/991 [3:49:37<23:20, 15.92s/batch, batch_loss=7.66, batc

Epoch 10/10:  91%|▉| 904/991 [3:49:37<23:16, 16.06s/batch, batch_loss=7.66, batc

Epoch 10/10:  91%|▉| 904/991 [3:49:53<23:16, 16.06s/batch, batch_loss=26.4, batc

Epoch 10/10:  91%|▉| 905/991 [3:49:53<23:07, 16.13s/batch, batch_loss=26.4, batc

Epoch 10/10:  91%|▉| 905/991 [3:50:12<23:07, 16.13s/batch, batch_loss=19.2, batc

Epoch 10/10:  91%|▉| 906/991 [3:50:12<24:09, 17.06s/batch, batch_loss=19.2, batc

Epoch 10/10:  91%|▉| 906/991 [3:50:29<24:09, 17.06s/batch, batch_loss=18.7, batc

Epoch 10/10:  92%|▉| 907/991 [3:50:29<23:32, 16.82s/batch, batch_loss=18.7, batc

Epoch 10/10:  92%|▉| 907/991 [3:50:44<23:32, 16.82s/batch, batch_loss=12.1, batc

Epoch 10/10:  92%|▉| 908/991 [3:50:44<22:34, 16.32s/batch, batch_loss=12.1, batc

Epoch 10/10:  92%|▉| 908/991 [3:50:59<22:34, 16.32s/batch, batch_loss=5.61, batc

Epoch 10/10:  92%|▉| 909/991 [3:50:59<22:03, 16.14s/batch, batch_loss=5.61, batc

Epoch 10/10:  92%|▉| 909/991 [3:51:15<22:03, 16.14s/batch, batch_loss=684, batch

Epoch 10/10:  92%|▉| 910/991 [3:51:15<21:36, 16.01s/batch, batch_loss=684, batch

Epoch 10/10:  92%|▉| 910/991 [3:51:30<21:36, 16.01s/batch, batch_loss=1.02e+3, b

Epoch 10/10:  92%|▉| 911/991 [3:51:30<20:44, 15.55s/batch, batch_loss=1.02e+3, b

Epoch 10/10:  92%|▉| 911/991 [3:51:45<20:44, 15.55s/batch, batch_loss=23.4, batc

Epoch 10/10:  92%|▉| 912/991 [3:51:45<20:25, 15.51s/batch, batch_loss=23.4, batc

Epoch 10/10:  92%|▉| 912/991 [3:52:00<20:25, 15.51s/batch, batch_loss=23, batch_

Epoch 10/10:  92%|▉| 913/991 [3:52:00<19:55, 15.32s/batch, batch_loss=23, batch_

Epoch 10/10:  92%|▉| 913/991 [3:52:18<19:55, 15.32s/batch, batch_loss=18.9, batc

Epoch 10/10:  92%|▉| 914/991 [3:52:18<20:51, 16.25s/batch, batch_loss=18.9, batc

Epoch 10/10:  92%|▉| 914/991 [3:52:34<20:51, 16.25s/batch, batch_loss=17.5, batc

Epoch 10/10:  92%|▉| 915/991 [3:52:34<20:17, 16.02s/batch, batch_loss=17.5, batc

Epoch 10/10:  92%|▉| 915/991 [3:52:50<20:17, 16.02s/batch, batch_loss=14.6, batc

Epoch 10/10:  92%|▉| 916/991 [3:52:50<19:59, 15.99s/batch, batch_loss=14.6, batc

Epoch 10/10:  92%|▉| 916/991 [3:53:05<19:59, 15.99s/batch, batch_loss=7.07, batc

Epoch 10/10:  93%|▉| 917/991 [3:53:05<19:30, 15.82s/batch, batch_loss=7.07, batc

Epoch 10/10:  93%|▉| 917/991 [3:53:21<19:30, 15.82s/batch, batch_loss=13.6, batc

Epoch 10/10:  93%|▉| 918/991 [3:53:21<19:17, 15.85s/batch, batch_loss=13.6, batc

Epoch 10/10:  93%|▉| 918/991 [3:53:37<19:17, 15.85s/batch, batch_loss=12.7, batc

Epoch 10/10:  93%|▉| 919/991 [3:53:37<18:57, 15.79s/batch, batch_loss=12.7, batc

Epoch 10/10:  93%|▉| 919/991 [3:53:52<18:57, 15.79s/batch, batch_loss=13.1, batc

Epoch 10/10:  93%|▉| 920/991 [3:53:52<18:38, 15.76s/batch, batch_loss=13.1, batc

Epoch 10/10:  93%|▉| 920/991 [3:54:07<18:38, 15.76s/batch, batch_loss=18.2, batc

Epoch 10/10:  93%|▉| 921/991 [3:54:07<17:52, 15.32s/batch, batch_loss=18.2, batc

Epoch 10/10:  93%|▉| 921/991 [3:54:25<17:52, 15.32s/batch, batch_loss=22.4, batc

Epoch 10/10:  93%|▉| 922/991 [3:54:25<18:41, 16.26s/batch, batch_loss=22.4, batc

Epoch 10/10:  93%|▉| 922/991 [3:54:41<18:41, 16.26s/batch, batch_loss=6.04, batc

Epoch 10/10:  93%|▉| 923/991 [3:54:41<18:13, 16.08s/batch, batch_loss=6.04, batc

Epoch 10/10:  93%|▉| 923/991 [3:54:57<18:13, 16.08s/batch, batch_loss=11, batch_

Epoch 10/10:  93%|▉| 924/991 [3:54:57<17:48, 15.95s/batch, batch_loss=11, batch_

Epoch 10/10:  93%|▉| 924/991 [3:55:11<17:48, 15.95s/batch, batch_loss=10.4, batc

Epoch 10/10:  93%|▉| 925/991 [3:55:11<17:11, 15.63s/batch, batch_loss=10.4, batc

Epoch 10/10:  93%|▉| 925/991 [3:55:27<17:11, 15.63s/batch, batch_loss=3e+4, batc

Epoch 10/10:  93%|▉| 926/991 [3:55:27<16:46, 15.49s/batch, batch_loss=3e+4, batc

Epoch 10/10:  93%|▉| 926/991 [3:55:42<16:46, 15.49s/batch, batch_loss=6.4, batch

Epoch 10/10:  94%|▉| 927/991 [3:55:42<16:25, 15.39s/batch, batch_loss=6.4, batch

Epoch 10/10:  94%|▉| 927/991 [3:55:57<16:25, 15.39s/batch, batch_loss=855, batch

Epoch 10/10:  94%|▉| 928/991 [3:55:57<16:12, 15.43s/batch, batch_loss=855, batch

Epoch 10/10:  94%|▉| 928/991 [3:56:13<16:12, 15.43s/batch, batch_loss=10.6, batc

Epoch 10/10:  94%|▉| 929/991 [3:56:13<16:07, 15.60s/batch, batch_loss=10.6, batc

Epoch 10/10:  94%|▉| 929/991 [3:56:29<16:07, 15.60s/batch, batch_loss=8.54, batc

Epoch 10/10:  94%|▉| 930/991 [3:56:29<15:55, 15.66s/batch, batch_loss=8.54, batc

Epoch 10/10:  94%|▉| 930/991 [3:56:48<15:55, 15.66s/batch, batch_loss=12.3, batc

Epoch 10/10:  94%|▉| 931/991 [3:56:48<16:33, 16.56s/batch, batch_loss=12.3, batc

Epoch 10/10:  94%|▉| 931/991 [3:57:03<16:33, 16.56s/batch, batch_loss=12.6, batc

Epoch 10/10:  94%|▉| 932/991 [3:57:03<15:52, 16.14s/batch, batch_loss=12.6, batc

Epoch 10/10:  94%|▉| 932/991 [3:57:18<15:52, 16.14s/batch, batch_loss=11.9, batc

Epoch 10/10:  94%|▉| 933/991 [3:57:18<15:20, 15.87s/batch, batch_loss=11.9, batc

Epoch 10/10:  94%|▉| 933/991 [3:57:34<15:20, 15.87s/batch, batch_loss=2.37, batc

Epoch 10/10:  94%|▉| 934/991 [3:57:34<15:07, 15.92s/batch, batch_loss=2.37, batc

Epoch 10/10:  94%|▉| 934/991 [3:57:50<15:07, 15.92s/batch, batch_loss=1.86, batc

Epoch 10/10:  94%|▉| 935/991 [3:57:50<14:52, 15.94s/batch, batch_loss=1.86, batc

Epoch 10/10:  94%|▉| 935/991 [3:58:04<14:52, 15.94s/batch, batch_loss=164, batch

Epoch 10/10:  94%|▉| 936/991 [3:58:04<14:10, 15.46s/batch, batch_loss=164, batch

Epoch 10/10:  94%|▉| 936/991 [3:58:20<14:10, 15.46s/batch, batch_loss=35.8, batc

Epoch 10/10:  95%|▉| 937/991 [3:58:20<13:52, 15.42s/batch, batch_loss=35.8, batc

Epoch 10/10:  95%|▉| 937/991 [3:58:36<13:52, 15.42s/batch, batch_loss=9.57, batc

Epoch 10/10:  95%|▉| 938/991 [3:58:36<13:44, 15.56s/batch, batch_loss=9.57, batc

Epoch 10/10:  95%|▉| 938/991 [3:58:51<13:44, 15.56s/batch, batch_loss=9.11, batc

Epoch 10/10:  95%|▉| 939/991 [3:58:51<13:24, 15.47s/batch, batch_loss=9.11, batc

Epoch 10/10:  95%|▉| 939/991 [3:59:07<13:24, 15.47s/batch, batch_loss=417, batch

Epoch 10/10:  95%|▉| 940/991 [3:59:07<13:12, 15.55s/batch, batch_loss=417, batch

Epoch 10/10:  95%|▉| 940/991 [3:59:22<13:12, 15.55s/batch, batch_loss=16.7, batc

Epoch 10/10:  95%|▉| 941/991 [3:59:22<12:55, 15.52s/batch, batch_loss=16.7, batc

Epoch 10/10:  95%|▉| 941/991 [3:59:37<12:55, 15.52s/batch, batch_loss=13.5, batc

Epoch 10/10:  95%|▉| 942/991 [3:59:37<12:25, 15.22s/batch, batch_loss=13.5, batc

Epoch 10/10:  95%|▉| 942/991 [3:59:51<12:25, 15.22s/batch, batch_loss=9.61, batc

Epoch 10/10:  95%|▉| 943/991 [3:59:51<12:01, 15.02s/batch, batch_loss=9.61, batc

Epoch 10/10:  95%|▉| 943/991 [4:00:07<12:01, 15.02s/batch, batch_loss=14.8, batc

Epoch 10/10:  95%|▉| 944/991 [4:00:07<11:51, 15.13s/batch, batch_loss=14.8, batc

Epoch 10/10:  95%|▉| 944/991 [4:00:25<11:51, 15.13s/batch, batch_loss=1.62, batc

Epoch 10/10:  95%|▉| 945/991 [4:00:25<12:18, 16.06s/batch, batch_loss=1.62, batc

Epoch 10/10:  95%|▉| 945/991 [4:00:41<12:18, 16.06s/batch, batch_loss=12.2, batc

Epoch 10/10:  95%|▉| 946/991 [4:00:41<12:08, 16.19s/batch, batch_loss=12.2, batc

Epoch 10/10:  95%|▉| 946/991 [4:00:56<12:08, 16.19s/batch, batch_loss=14, batch_

Epoch 10/10:  96%|▉| 947/991 [4:00:56<11:39, 15.89s/batch, batch_loss=14, batch_

Epoch 10/10:  96%|▉| 947/991 [4:01:11<11:39, 15.89s/batch, batch_loss=11, batch_

Epoch 10/10:  96%|▉| 948/991 [4:01:11<11:10, 15.58s/batch, batch_loss=11, batch_

Epoch 10/10:  96%|▉| 948/991 [4:01:26<11:10, 15.58s/batch, batch_loss=5.59, batc

Epoch 10/10:  96%|▉| 949/991 [4:01:26<10:40, 15.25s/batch, batch_loss=5.59, batc

Epoch 10/10:  96%|▉| 949/991 [4:01:40<10:40, 15.25s/batch, batch_loss=7.98, batc

Epoch 10/10:  96%|▉| 950/991 [4:01:40<10:13, 14.97s/batch, batch_loss=7.98, batc

Epoch 10/10:  96%|▉| 950/991 [4:01:54<10:13, 14.97s/batch, batch_loss=15.2, batc

Epoch 10/10:  96%|▉| 951/991 [4:01:54<09:49, 14.73s/batch, batch_loss=15.2, batc

Epoch 10/10:  96%|▉| 951/991 [4:02:10<09:49, 14.73s/batch, batch_loss=15.8, batc

Epoch 10/10:  96%|▉| 952/991 [4:02:10<09:40, 14.88s/batch, batch_loss=15.8, batc

Epoch 10/10:  96%|▉| 952/991 [4:02:25<09:40, 14.88s/batch, batch_loss=7.55, batc

Epoch 10/10:  96%|▉| 953/991 [4:02:25<09:33, 15.09s/batch, batch_loss=7.55, batc

Epoch 10/10:  96%|▉| 953/991 [4:02:42<09:33, 15.09s/batch, batch_loss=331, batch

Epoch 10/10:  96%|▉| 954/991 [4:02:42<09:42, 15.73s/batch, batch_loss=331, batch

Epoch 10/10:  96%|▉| 954/991 [4:02:57<09:42, 15.73s/batch, batch_loss=14, batch_

Epoch 10/10:  96%|▉| 955/991 [4:02:57<09:16, 15.46s/batch, batch_loss=14, batch_

Epoch 10/10:  96%|▉| 955/991 [4:03:12<09:16, 15.46s/batch, batch_loss=15.2, batc

Epoch 10/10:  96%|▉| 956/991 [4:03:12<08:55, 15.31s/batch, batch_loss=15.2, batc

Epoch 10/10:  96%|▉| 956/991 [4:03:27<08:55, 15.31s/batch, batch_loss=16, batch_

Epoch 10/10:  97%|▉| 957/991 [4:03:27<08:35, 15.16s/batch, batch_loss=16, batch_

Epoch 10/10:  97%|▉| 957/991 [4:03:42<08:35, 15.16s/batch, batch_loss=13.6, batc

Epoch 10/10:  97%|▉| 958/991 [4:03:42<08:19, 15.15s/batch, batch_loss=13.6, batc

Epoch 10/10:  97%|▉| 958/991 [4:03:57<08:19, 15.15s/batch, batch_loss=7.27, batc

Epoch 10/10:  97%|▉| 959/991 [4:03:57<08:03, 15.12s/batch, batch_loss=7.27, batc

Epoch 10/10:  97%|▉| 959/991 [4:04:12<08:03, 15.12s/batch, batch_loss=10.7, batc

Epoch 10/10:  97%|▉| 960/991 [4:04:12<07:49, 15.15s/batch, batch_loss=10.7, batc

Epoch 10/10:  97%|▉| 960/991 [4:04:27<07:49, 15.15s/batch, batch_loss=14.3, batc

Epoch 10/10:  97%|▉| 961/991 [4:04:27<07:34, 15.14s/batch, batch_loss=14.3, batc

Epoch 10/10:  97%|▉| 961/991 [4:04:40<07:34, 15.14s/batch, batch_loss=5.31, batc

Epoch 10/10:  97%|▉| 962/991 [4:04:40<06:57, 14.41s/batch, batch_loss=5.31, batc

Epoch 10/10:  97%|▉| 962/991 [4:04:53<06:57, 14.41s/batch, batch_loss=7.22, batc

Epoch 10/10:  97%|▉| 963/991 [4:04:53<06:31, 13.98s/batch, batch_loss=7.22, batc

Epoch 10/10:  97%|▉| 963/991 [4:05:07<06:31, 13.98s/batch, batch_loss=9.44e+3, b

Epoch 10/10:  97%|▉| 964/991 [4:05:07<06:19, 14.05s/batch, batch_loss=9.44e+3, b

Epoch 10/10:  97%|▉| 964/991 [4:05:23<06:19, 14.05s/batch, batch_loss=19.9, batc

Epoch 10/10:  97%|▉| 965/991 [4:05:23<06:14, 14.39s/batch, batch_loss=19.9, batc

Epoch 10/10:  97%|▉| 965/991 [4:05:37<06:14, 14.39s/batch, batch_loss=16, batch_

Epoch 10/10:  97%|▉| 966/991 [4:05:37<06:00, 14.42s/batch, batch_loss=16, batch_

Epoch 10/10:  97%|▉| 966/991 [4:05:52<06:00, 14.42s/batch, batch_loss=2.4e+4, ba

Epoch 10/10:  98%|▉| 967/991 [4:05:52<05:53, 14.72s/batch, batch_loss=2.4e+4, ba

Epoch 10/10:  98%|▉| 967/991 [4:06:07<05:53, 14.72s/batch, batch_loss=412, batch

Epoch 10/10:  98%|▉| 968/991 [4:06:07<05:39, 14.77s/batch, batch_loss=412, batch

Epoch 10/10:  98%|▉| 968/991 [4:06:23<05:39, 14.77s/batch, batch_loss=21.8, batc

Epoch 10/10:  98%|▉| 969/991 [4:06:23<05:27, 14.90s/batch, batch_loss=21.8, batc

Epoch 10/10:  98%|▉| 969/991 [4:06:38<05:27, 14.90s/batch, batch_loss=1.01, batc

Epoch 10/10:  98%|▉| 970/991 [4:06:38<05:14, 14.99s/batch, batch_loss=1.01, batc

Epoch 10/10:  98%|▉| 970/991 [4:06:53<05:14, 14.99s/batch, batch_loss=9.49, batc

Epoch 10/10:  98%|▉| 971/991 [4:06:53<05:03, 15.19s/batch, batch_loss=9.49, batc

Epoch 10/10:  98%|▉| 971/991 [4:07:09<05:03, 15.19s/batch, batch_loss=23.5, batc

Epoch 10/10:  98%|▉| 972/991 [4:07:09<04:49, 15.22s/batch, batch_loss=23.5, batc

Epoch 10/10:  98%|▉| 972/991 [4:07:24<04:49, 15.22s/batch, batch_loss=19.4, batc

Epoch 10/10:  98%|▉| 973/991 [4:07:24<04:32, 15.12s/batch, batch_loss=19.4, batc

Epoch 10/10:  98%|▉| 973/991 [4:07:39<04:32, 15.12s/batch, batch_loss=16.4, batc

Epoch 10/10:  98%|▉| 974/991 [4:07:39<04:17, 15.16s/batch, batch_loss=16.4, batc

Epoch 10/10:  98%|▉| 974/991 [4:07:53<04:17, 15.16s/batch, batch_loss=8.98, batc

Epoch 10/10:  98%|▉| 975/991 [4:07:53<03:58, 14.90s/batch, batch_loss=8.98, batc

Epoch 10/10:  98%|▉| 975/991 [4:08:07<03:58, 14.90s/batch, batch_loss=27.6, batc

Epoch 10/10:  98%|▉| 976/991 [4:08:07<03:41, 14.74s/batch, batch_loss=27.6, batc

Epoch 10/10:  98%|▉| 976/991 [4:08:23<03:41, 14.74s/batch, batch_loss=0.68, batc

Epoch 10/10:  99%|▉| 977/991 [4:08:23<03:28, 14.92s/batch, batch_loss=0.68, batc

Epoch 10/10:  99%|▉| 977/991 [4:08:36<03:28, 14.92s/batch, batch_loss=0.493, bat

Epoch 10/10:  99%|▉| 978/991 [4:08:36<03:07, 14.40s/batch, batch_loss=0.493, bat

Epoch 10/10:  99%|▉| 978/991 [4:08:49<03:07, 14.40s/batch, batch_loss=0.325, bat

Epoch 10/10:  99%|▉| 979/991 [4:08:49<02:49, 14.10s/batch, batch_loss=0.325, bat

Epoch 10/10:  99%|▉| 979/991 [4:09:02<02:49, 14.10s/batch, batch_loss=0.205, bat

Epoch 10/10:  99%|▉| 980/991 [4:09:02<02:30, 13.70s/batch, batch_loss=0.205, bat

Epoch 10/10:  99%|▉| 980/991 [4:09:15<02:30, 13.70s/batch, batch_loss=0.117, bat

Epoch 10/10:  99%|▉| 981/991 [4:09:15<02:15, 13.54s/batch, batch_loss=0.117, bat

Epoch 10/10:  99%|▉| 981/991 [4:09:27<02:15, 13.54s/batch, batch_loss=0.0612, ba

Epoch 10/10:  99%|▉| 982/991 [4:09:27<01:57, 13.11s/batch, batch_loss=0.0612, ba

Epoch 10/10:  99%|▉| 982/991 [4:09:38<01:57, 13.11s/batch, batch_loss=0.0449, ba

Epoch 10/10:  99%|▉| 983/991 [4:09:38<01:39, 12.45s/batch, batch_loss=0.0449, ba

Epoch 10/10:  99%|▉| 983/991 [4:09:51<01:39, 12.45s/batch, batch_loss=0.0531, ba

Epoch 10/10:  99%|▉| 984/991 [4:09:51<01:27, 12.49s/batch, batch_loss=0.0531, ba

Epoch 10/10:  99%|▉| 984/991 [4:10:05<01:27, 12.49s/batch, batch_loss=0.0708, ba

Epoch 10/10:  99%|▉| 985/991 [4:10:05<01:16, 12.83s/batch, batch_loss=0.0708, ba

Epoch 10/10:  99%|▉| 985/991 [4:10:19<01:16, 12.83s/batch, batch_loss=0.0848, ba

Epoch 10/10:  99%|▉| 986/991 [4:10:19<01:06, 13.22s/batch, batch_loss=0.0848, ba

Epoch 10/10:  99%|▉| 986/991 [4:10:36<01:06, 13.22s/batch, batch_loss=0.0906, ba

Epoch 10/10: 100%|▉| 987/991 [4:10:36<00:57, 14.34s/batch, batch_loss=0.0906, ba

Epoch 10/10: 100%|▉| 987/991 [4:10:49<00:57, 14.34s/batch, batch_loss=0.0879, ba

Epoch 10/10: 100%|▉| 988/991 [4:10:49<00:42, 14.03s/batch, batch_loss=0.0879, ba

Epoch 10/10: 100%|▉| 988/991 [4:11:02<00:42, 14.03s/batch, batch_loss=0.0771, ba

Epoch 10/10: 100%|▉| 989/991 [4:11:02<00:27, 13.68s/batch, batch_loss=0.0771, ba

Epoch 10/10: 100%|▉| 989/991 [4:11:14<00:27, 13.68s/batch, batch_loss=0.0615, ba

Epoch 10/10: 100%|▉| 990/991 [4:11:14<00:13, 13.31s/batch, batch_loss=0.0615, ba

Epoch 10/10: 100%|▉| 990/991 [4:11:25<00:13, 13.31s/batch, batch_loss=0.0469, ba

Epoch 10/10: 100%|█| 991/991 [4:11:25<00:00, 12.68s/batch, batch_loss=0.0469, ba

Epoch 10/10: 100%|█| 991/991 [4:11:25<00:00, 15.22s/batch, batch_loss=0.0469, ba




Epoch 10, Loss: 986.8414


Validation:   0%|                                    | 0/743 [00:00<?, ?batch/s]

Validation:   0%| | 0/743 [00:14<?, ?batch/s, batch_loss=16.8, batch_index=1, ba

Validation:   0%| | 1/743 [00:14<3:05:22, 14.99s/batch, batch_loss=16.8, batch_i

Validation:   0%| | 1/743 [00:30<3:05:22, 14.99s/batch, batch_loss=17.5, batch_i

Validation:   0%| | 2/743 [00:30<3:05:29, 15.02s/batch, batch_loss=17.5, batch_i

Validation:   0%| | 2/743 [00:45<3:05:29, 15.02s/batch, batch_loss=15.4, batch_i

Validation:   0%| | 3/743 [00:45<3:06:27, 15.12s/batch, batch_loss=15.4, batch_i

Validation:   0%| | 3/743 [01:01<3:06:27, 15.12s/batch, batch_loss=9.48, batch_i

Validation:   1%| | 4/743 [01:01<3:12:47, 15.65s/batch, batch_loss=9.48, batch_i

Validation:   1%| | 4/743 [01:17<3:12:47, 15.65s/batch, batch_loss=20.5, batch_i

Validation:   1%| | 5/743 [01:17<3:11:23, 15.56s/batch, batch_loss=20.5, batch_i

Validation:   1%| | 5/743 [01:31<3:11:23, 15.56s/batch, batch_loss=19.5, batch_i

Validation:   1%| | 6/743 [01:31<3:05:12, 15.08s/batch, batch_loss=19.5, batch_i

Validation:   1%| | 6/743 [01:46<3:05:12, 15.08s/batch, batch_loss=556, batch_in

Validation:   1%| | 7/743 [01:46<3:04:07, 15.01s/batch, batch_loss=556, batch_in

Validation:   1%| | 7/743 [02:01<3:04:07, 15.01s/batch, batch_loss=17, batch_ind

Validation:   1%| | 8/743 [02:01<3:04:49, 15.09s/batch, batch_loss=17, batch_ind

Validation:   1%| | 8/743 [02:16<3:04:49, 15.09s/batch, batch_loss=13.5, batch_i

Validation:   1%| | 9/743 [02:16<3:04:31, 15.08s/batch, batch_loss=13.5, batch_i

Validation:   1%| | 9/743 [02:31<3:04:31, 15.08s/batch, batch_loss=13.3, batch_i

Validation:   1%| | 10/743 [02:31<3:02:47, 14.96s/batch, batch_loss=13.3, batch_

Validation:   1%| | 10/743 [02:46<3:02:47, 14.96s/batch, batch_loss=10.7, batch_

Validation:   1%| | 11/743 [02:46<3:02:30, 14.96s/batch, batch_loss=10.7, batch_

Validation:   1%| | 11/743 [03:01<3:02:30, 14.96s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:01<3:02:44, 15.00s/batch, batch_loss=2.19e+3, bat

Validation:   2%| | 12/743 [03:15<3:02:44, 15.00s/batch, batch_loss=14.4, batch_

Validation:   2%| | 13/743 [03:15<3:01:14, 14.90s/batch, batch_loss=14.4, batch_

Validation:   2%| | 13/743 [03:30<3:01:14, 14.90s/batch, batch_loss=9.31, batch_

Validation:   2%| | 14/743 [03:30<2:59:54, 14.81s/batch, batch_loss=9.31, batch_

Validation:   2%| | 14/743 [03:46<2:59:54, 14.81s/batch, batch_loss=16.4, batch_

Validation:   2%| | 15/743 [03:46<3:02:55, 15.08s/batch, batch_loss=16.4, batch_

Validation:   2%| | 15/743 [04:01<3:02:55, 15.08s/batch, batch_loss=14.4, batch_

Validation:   2%| | 16/743 [04:01<3:02:44, 15.08s/batch, batch_loss=14.4, batch_

Validation:   2%| | 16/743 [04:16<3:02:44, 15.08s/batch, batch_loss=10.6, batch_

Validation:   2%| | 17/743 [04:16<3:02:48, 15.11s/batch, batch_loss=10.6, batch_

Validation:   2%| | 17/743 [04:31<3:02:48, 15.11s/batch, batch_loss=4.52e+3, bat

Validation:   2%| | 18/743 [04:31<3:02:45, 15.12s/batch, batch_loss=4.52e+3, bat

Validation:   2%| | 18/743 [04:46<3:02:45, 15.12s/batch, batch_loss=9.62, batch_

Validation:   3%| | 19/743 [04:46<3:02:21, 15.11s/batch, batch_loss=9.62, batch_

Validation:   3%| | 19/743 [05:00<3:02:21, 15.11s/batch, batch_loss=14.7, batch_

Validation:   3%| | 20/743 [05:00<2:58:51, 14.84s/batch, batch_loss=14.7, batch_

Validation:   3%| | 20/743 [05:15<2:58:51, 14.84s/batch, batch_loss=963, batch_i

Validation:   3%| | 21/743 [05:15<2:58:11, 14.81s/batch, batch_loss=963, batch_i

Validation:   3%| | 21/743 [05:32<2:58:11, 14.81s/batch, batch_loss=13.3, batch_

Validation:   3%| | 22/743 [05:32<3:06:56, 15.56s/batch, batch_loss=13.3, batch_

Validation:   3%| | 22/743 [05:46<3:06:56, 15.56s/batch, batch_loss=6.31, batch_

Validation:   3%| | 23/743 [05:46<3:00:08, 15.01s/batch, batch_loss=6.31, batch_

Validation:   3%| | 23/743 [05:59<3:00:08, 15.01s/batch, batch_loss=15.2, batch_

Validation:   3%| | 24/743 [05:59<2:53:15, 14.46s/batch, batch_loss=15.2, batch_

Validation:   3%| | 24/743 [06:13<2:53:15, 14.46s/batch, batch_loss=12, batch_in

Validation:   3%| | 25/743 [06:13<2:50:00, 14.21s/batch, batch_loss=12, batch_in

Validation:   3%| | 25/743 [06:27<2:50:00, 14.21s/batch, batch_loss=19.6, batch_

Validation:   3%| | 26/743 [06:27<2:49:15, 14.16s/batch, batch_loss=19.6, batch_

Validation:   3%| | 26/743 [06:41<2:49:15, 14.16s/batch, batch_loss=1.65e+3, bat

Validation:   4%| | 27/743 [06:41<2:49:30, 14.21s/batch, batch_loss=1.65e+3, bat

Validation:   4%| | 27/743 [06:56<2:49:30, 14.21s/batch, batch_loss=14.6, batch_

Validation:   4%| | 28/743 [06:56<2:52:24, 14.47s/batch, batch_loss=14.6, batch_

Validation:   4%| | 28/743 [07:11<2:52:24, 14.47s/batch, batch_loss=15.9, batch_

Validation:   4%| | 29/743 [07:11<2:53:27, 14.58s/batch, batch_loss=15.9, batch_

Validation:   4%| | 29/743 [07:25<2:53:27, 14.58s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:25<2:51:36, 14.44s/batch, batch_loss=1.18e+4, bat

Validation:   4%| | 30/743 [07:40<2:51:36, 14.44s/batch, batch_loss=18.7, batch_

Validation:   4%| | 31/743 [07:40<2:52:44, 14.56s/batch, batch_loss=18.7, batch_

Validation:   4%| | 31/743 [07:56<2:52:44, 14.56s/batch, batch_loss=12.6, batch_

Validation:   4%| | 32/743 [07:56<2:56:59, 14.94s/batch, batch_loss=12.6, batch_

Validation:   4%| | 32/743 [08:11<2:56:59, 14.94s/batch, batch_loss=16.8, batch_

Validation:   4%| | 33/743 [08:11<2:55:48, 14.86s/batch, batch_loss=16.8, batch_

Validation:   4%| | 33/743 [08:25<2:55:48, 14.86s/batch, batch_loss=16, batch_in

Validation:   5%| | 34/743 [08:25<2:53:59, 14.72s/batch, batch_loss=16, batch_in

Validation:   5%| | 34/743 [08:40<2:53:59, 14.72s/batch, batch_loss=2.83e+3, bat

Validation:   5%| | 35/743 [08:40<2:52:46, 14.64s/batch, batch_loss=2.83e+3, bat

Validation:   5%| | 35/743 [08:54<2:52:46, 14.64s/batch, batch_loss=14.2, batch_

Validation:   5%| | 36/743 [08:54<2:51:46, 14.58s/batch, batch_loss=14.2, batch_

Validation:   5%| | 36/743 [09:11<2:51:46, 14.58s/batch, batch_loss=163, batch_i

Validation:   5%| | 37/743 [09:11<2:59:06, 15.22s/batch, batch_loss=163, batch_i

Validation:   5%| | 37/743 [09:25<2:59:06, 15.22s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:25<2:56:51, 15.05s/batch, batch_loss=6.5e+3, batc

Validation:   5%| | 38/743 [09:41<2:56:51, 15.05s/batch, batch_loss=12.9, batch_

Validation:   5%| | 39/743 [09:41<2:58:19, 15.20s/batch, batch_loss=12.9, batch_

Validation:   5%| | 39/743 [09:56<2:58:19, 15.20s/batch, batch_loss=18.9, batch_

Validation:   5%| | 40/743 [09:56<2:57:01, 15.11s/batch, batch_loss=18.9, batch_

Validation:   5%| | 40/743 [10:10<2:57:01, 15.11s/batch, batch_loss=13.3, batch_

Validation:   6%| | 41/743 [10:10<2:55:10, 14.97s/batch, batch_loss=13.3, batch_

Validation:   6%| | 41/743 [10:25<2:55:10, 14.97s/batch, batch_loss=14.8, batch_

Validation:   6%| | 42/743 [10:25<2:54:49, 14.96s/batch, batch_loss=14.8, batch_

Validation:   6%| | 42/743 [10:40<2:54:49, 14.96s/batch, batch_loss=10.8, batch_

Validation:   6%| | 43/743 [10:40<2:53:29, 14.87s/batch, batch_loss=10.8, batch_

Validation:   6%| | 43/743 [10:55<2:53:29, 14.87s/batch, batch_loss=14.6, batch_

Validation:   6%| | 44/743 [10:55<2:53:39, 14.91s/batch, batch_loss=14.6, batch_

Validation:   6%| | 44/743 [11:12<2:53:39, 14.91s/batch, batch_loss=18.9, batch_

Validation:   6%| | 45/743 [11:12<3:01:55, 15.64s/batch, batch_loss=18.9, batch_

Validation:   6%| | 45/743 [11:27<3:01:55, 15.64s/batch, batch_loss=8.43, batch_

Validation:   6%| | 46/743 [11:27<2:57:22, 15.27s/batch, batch_loss=8.43, batch_

Validation:   6%| | 46/743 [11:42<2:57:22, 15.27s/batch, batch_loss=16.7, batch_

Validation:   6%| | 47/743 [11:42<2:57:03, 15.26s/batch, batch_loss=16.7, batch_

Validation:   6%| | 47/743 [11:57<2:57:03, 15.26s/batch, batch_loss=17.7, batch_

Validation:   6%| | 48/743 [11:57<2:55:42, 15.17s/batch, batch_loss=17.7, batch_

Validation:   6%| | 48/743 [12:12<2:55:42, 15.17s/batch, batch_loss=19.3, batch_

Validation:   7%| | 49/743 [12:12<2:55:23, 15.16s/batch, batch_loss=19.3, batch_

Validation:   7%| | 49/743 [12:28<2:55:23, 15.16s/batch, batch_loss=13.1, batch_

Validation:   7%| | 50/743 [12:28<2:57:10, 15.34s/batch, batch_loss=13.1, batch_

Validation:   7%| | 50/743 [12:43<2:57:10, 15.34s/batch, batch_loss=14, batch_in

Validation:   7%| | 51/743 [12:43<2:57:32, 15.39s/batch, batch_loss=14, batch_in

Validation:   7%| | 51/743 [13:01<2:57:32, 15.39s/batch, batch_loss=15.2, batch_

Validation:   7%| | 52/743 [13:01<3:06:30, 16.20s/batch, batch_loss=15.2, batch_

Validation:   7%| | 52/743 [13:16<3:06:30, 16.20s/batch, batch_loss=21.2, batch_

Validation:   7%| | 53/743 [13:16<3:01:54, 15.82s/batch, batch_loss=21.2, batch_

Validation:   7%| | 53/743 [13:31<3:01:54, 15.82s/batch, batch_loss=12.5, batch_

Validation:   7%| | 54/743 [13:31<2:57:02, 15.42s/batch, batch_loss=12.5, batch_

Validation:   7%| | 54/743 [13:45<2:57:02, 15.42s/batch, batch_loss=20.5, batch_

Validation:   7%| | 55/743 [13:45<2:53:40, 15.15s/batch, batch_loss=20.5, batch_

Validation:   7%| | 55/743 [14:01<2:53:40, 15.15s/batch, batch_loss=16.1, batch_

Validation:   8%| | 56/743 [14:01<2:56:07, 15.38s/batch, batch_loss=16.1, batch_

Validation:   8%| | 56/743 [14:16<2:56:07, 15.38s/batch, batch_loss=12.7, batch_

Validation:   8%| | 57/743 [14:16<2:54:08, 15.23s/batch, batch_loss=12.7, batch_

Validation:   8%| | 57/743 [14:31<2:54:08, 15.23s/batch, batch_loss=17.5, batch_

Validation:   8%| | 58/743 [14:31<2:50:46, 14.96s/batch, batch_loss=17.5, batch_

Validation:   8%| | 58/743 [14:45<2:50:46, 14.96s/batch, batch_loss=107, batch_i

Validation:   8%| | 59/743 [14:45<2:49:31, 14.87s/batch, batch_loss=107, batch_i

Validation:   8%| | 59/743 [15:00<2:49:31, 14.87s/batch, batch_loss=6.14e+3, bat

Validation:   8%| | 60/743 [15:00<2:49:22, 14.88s/batch, batch_loss=6.14e+3, bat

Validation:   8%| | 60/743 [15:15<2:49:22, 14.88s/batch, batch_loss=7.64, batch_

Validation:   8%| | 61/743 [15:15<2:48:26, 14.82s/batch, batch_loss=7.64, batch_

Validation:   8%| | 61/743 [15:29<2:48:26, 14.82s/batch, batch_loss=8.52, batch_

Validation:   8%| | 62/743 [15:29<2:47:30, 14.76s/batch, batch_loss=8.52, batch_

Validation:   8%| | 62/743 [15:45<2:47:30, 14.76s/batch, batch_loss=20.8, batch_

Validation:   8%| | 63/743 [15:45<2:48:55, 14.90s/batch, batch_loss=20.8, batch_

Validation:   8%| | 63/743 [15:59<2:48:55, 14.90s/batch, batch_loss=11.2, batch_

Validation:   9%| | 64/743 [15:59<2:46:09, 14.68s/batch, batch_loss=11.2, batch_

Validation:   9%| | 64/743 [16:14<2:46:09, 14.68s/batch, batch_loss=17.6, batch_

Validation:   9%| | 65/743 [16:14<2:47:19, 14.81s/batch, batch_loss=17.6, batch_

Validation:   9%| | 65/743 [16:29<2:47:19, 14.81s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [16:29<2:48:17, 14.92s/batch, batch_loss=1.27e+3, bat

Validation:   9%| | 66/743 [16:44<2:48:17, 14.92s/batch, batch_loss=14.1, batch_

Validation:   9%| | 67/743 [16:44<2:48:09, 14.93s/batch, batch_loss=14.1, batch_

Validation:   9%| | 67/743 [16:58<2:48:09, 14.93s/batch, batch_loss=13.5, batch_

Validation:   9%| | 68/743 [16:58<2:46:21, 14.79s/batch, batch_loss=13.5, batch_

Validation:   9%| | 68/743 [17:15<2:46:21, 14.79s/batch, batch_loss=9, batch_ind

Validation:   9%| | 69/743 [17:15<2:53:24, 15.44s/batch, batch_loss=9, batch_ind

Validation:   9%| | 69/743 [17:30<2:53:24, 15.44s/batch, batch_loss=13.1, batch_

Validation:   9%| | 70/743 [17:30<2:51:44, 15.31s/batch, batch_loss=13.1, batch_

Validation:   9%| | 70/743 [17:45<2:51:44, 15.31s/batch, batch_loss=8.24, batch_

Validation:  10%| | 71/743 [17:45<2:48:53, 15.08s/batch, batch_loss=8.24, batch_

Validation:  10%| | 71/743 [18:00<2:48:53, 15.08s/batch, batch_loss=13.2, batch_

Validation:  10%| | 72/743 [18:00<2:47:54, 15.01s/batch, batch_loss=13.2, batch_

Validation:  10%| | 72/743 [18:15<2:47:54, 15.01s/batch, batch_loss=13.1, batch_

Validation:  10%| | 73/743 [18:15<2:47:29, 15.00s/batch, batch_loss=13.1, batch_

Validation:  10%| | 73/743 [18:31<2:47:29, 15.00s/batch, batch_loss=15.4, batch_

Validation:  10%| | 74/743 [18:31<2:50:48, 15.32s/batch, batch_loss=15.4, batch_

Validation:  10%| | 74/743 [18:45<2:50:48, 15.32s/batch, batch_loss=11.5, batch_

Validation:  10%| | 75/743 [18:45<2:46:20, 14.94s/batch, batch_loss=11.5, batch_

Validation:  10%| | 75/743 [18:59<2:46:20, 14.94s/batch, batch_loss=14.1, batch_

Validation:  10%| | 76/743 [18:59<2:43:25, 14.70s/batch, batch_loss=14.1, batch_

Validation:  10%| | 76/743 [19:15<2:43:25, 14.70s/batch, batch_loss=11.2, batch_

Validation:  10%| | 77/743 [19:15<2:45:41, 14.93s/batch, batch_loss=11.2, batch_

Validation:  10%| | 77/743 [19:29<2:45:41, 14.93s/batch, batch_loss=15.3, batch_

Validation:  10%| | 78/743 [19:29<2:45:28, 14.93s/batch, batch_loss=15.3, batch_

Validation:  10%| | 78/743 [19:44<2:45:28, 14.93s/batch, batch_loss=8.57, batch_

Validation:  11%| | 79/743 [19:44<2:44:22, 14.85s/batch, batch_loss=8.57, batch_

Validation:  11%| | 79/743 [20:00<2:44:22, 14.85s/batch, batch_loss=7.41, batch_

Validation:  11%| | 80/743 [20:00<2:47:03, 15.12s/batch, batch_loss=7.41, batch_

Validation:  11%| | 80/743 [20:15<2:47:03, 15.12s/batch, batch_loss=149, batch_i

Validation:  11%| | 81/743 [20:15<2:45:37, 15.01s/batch, batch_loss=149, batch_i

Validation:  11%| | 81/743 [20:29<2:45:37, 15.01s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [20:29<2:44:18, 14.91s/batch, batch_loss=1.51e+3, bat

Validation:  11%| | 82/743 [20:44<2:44:18, 14.91s/batch, batch_loss=30.6, batch_

Validation:  11%| | 83/743 [20:44<2:44:19, 14.94s/batch, batch_loss=30.6, batch_

Validation:  11%| | 83/743 [21:02<2:44:19, 14.94s/batch, batch_loss=14.6, batch_

Validation:  11%| | 84/743 [21:02<2:51:43, 15.63s/batch, batch_loss=14.6, batch_

Validation:  11%| | 84/743 [21:17<2:51:43, 15.63s/batch, batch_loss=20.3, batch_

Validation:  11%| | 85/743 [21:17<2:49:37, 15.47s/batch, batch_loss=20.3, batch_

Validation:  11%| | 85/743 [21:32<2:49:37, 15.47s/batch, batch_loss=23.1, batch_

Validation:  12%| | 86/743 [21:32<2:47:40, 15.31s/batch, batch_loss=23.1, batch_

Validation:  12%| | 86/743 [21:47<2:47:40, 15.31s/batch, batch_loss=31.4, batch_

Validation:  12%| | 87/743 [21:47<2:46:55, 15.27s/batch, batch_loss=31.4, batch_

Validation:  12%| | 87/743 [22:02<2:46:55, 15.27s/batch, batch_loss=21.3, batch_

Validation:  12%| | 88/743 [22:02<2:46:22, 15.24s/batch, batch_loss=21.3, batch_

Validation:  12%| | 88/743 [22:16<2:46:22, 15.24s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [22:16<2:43:33, 15.01s/batch, batch_loss=1.46e+4, bat

Validation:  12%| | 89/743 [22:31<2:43:33, 15.01s/batch, batch_loss=4.28, batch_

Validation:  12%| | 90/743 [22:31<2:43:28, 15.02s/batch, batch_loss=4.28, batch_

Validation:  12%| | 90/743 [22:49<2:43:28, 15.02s/batch, batch_loss=30.6, batch_

Validation:  12%| | 91/743 [22:49<2:52:24, 15.87s/batch, batch_loss=30.6, batch_

Validation:  12%| | 91/743 [23:03<2:52:24, 15.87s/batch, batch_loss=29.5, batch_

Validation:  12%| | 92/743 [23:03<2:45:39, 15.27s/batch, batch_loss=29.5, batch_

Validation:  12%| | 92/743 [23:17<2:45:39, 15.27s/batch, batch_loss=23.9, batch_

Validation:  13%|▏| 93/743 [23:17<2:40:39, 14.83s/batch, batch_loss=23.9, batch_

Validation:  13%|▏| 93/743 [23:31<2:40:39, 14.83s/batch, batch_loss=27.4, batch_

Validation:  13%|▏| 94/743 [23:31<2:39:13, 14.72s/batch, batch_loss=27.4, batch_

Validation:  13%|▏| 94/743 [23:46<2:39:13, 14.72s/batch, batch_loss=12.3, batch_

Validation:  13%|▏| 95/743 [23:46<2:39:39, 14.78s/batch, batch_loss=12.3, batch_

Validation:  13%|▏| 95/743 [24:03<2:39:39, 14.78s/batch, batch_loss=17.1, batch_

Validation:  13%|▏| 96/743 [24:03<2:46:35, 15.45s/batch, batch_loss=17.1, batch_

Validation:  13%|▏| 96/743 [24:18<2:46:35, 15.45s/batch, batch_loss=26, batch_in

Validation:  13%|▏| 97/743 [24:18<2:43:37, 15.20s/batch, batch_loss=26, batch_in

Validation:  13%|▏| 97/743 [24:33<2:43:37, 15.20s/batch, batch_loss=17.4, batch_

Validation:  13%|▏| 98/743 [24:33<2:42:57, 15.16s/batch, batch_loss=17.4, batch_

Validation:  13%|▏| 98/743 [24:47<2:42:57, 15.16s/batch, batch_loss=19.9, batch_

Validation:  13%|▏| 99/743 [24:47<2:39:37, 14.87s/batch, batch_loss=19.9, batch_

Validation:  13%|▏| 99/743 [25:02<2:39:37, 14.87s/batch, batch_loss=10.2, batch_

Validation:  13%|▏| 100/743 [25:02<2:39:01, 14.84s/batch, batch_loss=10.2, batch

Validation:  13%|▏| 100/743 [25:17<2:39:01, 14.84s/batch, batch_loss=14, batch_i

Validation:  14%|▏| 101/743 [25:17<2:40:25, 14.99s/batch, batch_loss=14, batch_i

Validation:  14%|▏| 101/743 [25:31<2:40:25, 14.99s/batch, batch_loss=12, batch_i

Validation:  14%|▏| 102/743 [25:31<2:36:44, 14.67s/batch, batch_loss=12, batch_i

Validation:  14%|▏| 102/743 [25:46<2:36:44, 14.67s/batch, batch_loss=3.39e+3, ba

Validation:  14%|▏| 103/743 [25:46<2:35:40, 14.59s/batch, batch_loss=3.39e+3, ba

Validation:  14%|▏| 103/743 [26:00<2:35:40, 14.59s/batch, batch_loss=14.8, batch

Validation:  14%|▏| 104/743 [26:00<2:34:38, 14.52s/batch, batch_loss=14.8, batch

Validation:  14%|▏| 104/743 [26:15<2:34:38, 14.52s/batch, batch_loss=7.72, batch

Validation:  14%|▏| 105/743 [26:15<2:35:41, 14.64s/batch, batch_loss=7.72, batch

Validation:  14%|▏| 105/743 [26:29<2:35:41, 14.64s/batch, batch_loss=14.1, batch

Validation:  14%|▏| 106/743 [26:29<2:35:00, 14.60s/batch, batch_loss=14.1, batch

Validation:  14%|▏| 106/743 [26:44<2:35:00, 14.60s/batch, batch_loss=715, batch_

Validation:  14%|▏| 107/743 [26:44<2:34:19, 14.56s/batch, batch_loss=715, batch_

Validation:  14%|▏| 107/743 [26:58<2:34:19, 14.56s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [26:58<2:33:44, 14.53s/batch, batch_loss=1.6e+3, bat

Validation:  15%|▏| 108/743 [27:13<2:33:44, 14.53s/batch, batch_loss=200, batch_

Validation:  15%|▏| 109/743 [27:13<2:34:49, 14.65s/batch, batch_loss=200, batch_

Validation:  15%|▏| 109/743 [27:28<2:34:49, 14.65s/batch, batch_loss=25.1, batch

Validation:  15%|▏| 110/743 [27:28<2:34:39, 14.66s/batch, batch_loss=25.1, batch

Validation:  15%|▏| 110/743 [27:43<2:34:39, 14.66s/batch, batch_loss=12.5, batch

Validation:  15%|▏| 111/743 [27:43<2:36:07, 14.82s/batch, batch_loss=12.5, batch

Validation:  15%|▏| 111/743 [27:58<2:36:07, 14.82s/batch, batch_loss=24.4, batch

Validation:  15%|▏| 112/743 [27:58<2:36:30, 14.88s/batch, batch_loss=24.4, batch

Validation:  15%|▏| 112/743 [28:13<2:36:30, 14.88s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [28:13<2:35:17, 14.79s/batch, batch_loss=1.06e+4, ba

Validation:  15%|▏| 113/743 [28:27<2:35:17, 14.79s/batch, batch_loss=17.9, batch

Validation:  15%|▏| 114/743 [28:27<2:34:17, 14.72s/batch, batch_loss=17.9, batch

Validation:  15%|▏| 114/743 [28:42<2:34:17, 14.72s/batch, batch_loss=18.4, batch

Validation:  15%|▏| 115/743 [28:42<2:33:48, 14.69s/batch, batch_loss=18.4, batch

Validation:  15%|▏| 115/743 [28:57<2:33:48, 14.69s/batch, batch_loss=14.8, batch

Validation:  16%|▏| 116/743 [28:57<2:34:45, 14.81s/batch, batch_loss=14.8, batch

Validation:  16%|▏| 116/743 [29:12<2:34:45, 14.81s/batch, batch_loss=20.3, batch

Validation:  16%|▏| 117/743 [29:12<2:34:37, 14.82s/batch, batch_loss=20.3, batch

Validation:  16%|▏| 117/743 [29:27<2:34:37, 14.82s/batch, batch_loss=21.3, batch

Validation:  16%|▏| 118/743 [29:27<2:35:29, 14.93s/batch, batch_loss=21.3, batch

Validation:  16%|▏| 118/743 [29:42<2:35:29, 14.93s/batch, batch_loss=15.1, batch

Validation:  16%|▏| 119/743 [29:42<2:35:18, 14.93s/batch, batch_loss=15.1, batch

Validation:  16%|▏| 119/743 [29:57<2:35:18, 14.93s/batch, batch_loss=19.4, batch

Validation:  16%|▏| 120/743 [29:57<2:35:38, 14.99s/batch, batch_loss=19.4, batch

Validation:  16%|▏| 120/743 [30:12<2:35:38, 14.99s/batch, batch_loss=12.7, batch

Validation:  16%|▏| 121/743 [30:12<2:33:34, 14.81s/batch, batch_loss=12.7, batch

Validation:  16%|▏| 121/743 [30:27<2:33:34, 14.81s/batch, batch_loss=3.67, batch

Validation:  16%|▏| 122/743 [30:27<2:36:01, 15.07s/batch, batch_loss=3.67, batch

Validation:  16%|▏| 122/743 [30:42<2:36:01, 15.07s/batch, batch_loss=7.62, batch

Validation:  17%|▏| 123/743 [30:42<2:33:50, 14.89s/batch, batch_loss=7.62, batch

Validation:  17%|▏| 123/743 [30:57<2:33:50, 14.89s/batch, batch_loss=10.2, batch

Validation:  17%|▏| 124/743 [30:57<2:34:01, 14.93s/batch, batch_loss=10.2, batch

Validation:  17%|▏| 124/743 [31:11<2:34:01, 14.93s/batch, batch_loss=24.7, batch

Validation:  17%|▏| 125/743 [31:11<2:30:15, 14.59s/batch, batch_loss=24.7, batch

Validation:  17%|▏| 125/743 [31:25<2:30:15, 14.59s/batch, batch_loss=11.9, batch

Validation:  17%|▏| 126/743 [31:25<2:30:34, 14.64s/batch, batch_loss=11.9, batch

Validation:  17%|▏| 126/743 [31:39<2:30:34, 14.64s/batch, batch_loss=11.5, batch

Validation:  17%|▏| 127/743 [31:39<2:28:43, 14.49s/batch, batch_loss=11.5, batch

Validation:  17%|▏| 127/743 [31:53<2:28:43, 14.49s/batch, batch_loss=20.5, batch

Validation:  17%|▏| 128/743 [31:53<2:25:12, 14.17s/batch, batch_loss=20.5, batch

Validation:  17%|▏| 128/743 [32:08<2:25:12, 14.17s/batch, batch_loss=12.6, batch

Validation:  17%|▏| 129/743 [32:08<2:26:40, 14.33s/batch, batch_loss=12.6, batch

Validation:  17%|▏| 129/743 [32:22<2:26:40, 14.33s/batch, batch_loss=17.2, batch

Validation:  17%|▏| 130/743 [32:22<2:26:33, 14.35s/batch, batch_loss=17.2, batch

Validation:  17%|▏| 130/743 [32:36<2:26:33, 14.35s/batch, batch_loss=22.8, batch

Validation:  18%|▏| 131/743 [32:36<2:25:09, 14.23s/batch, batch_loss=22.8, batch

Validation:  18%|▏| 131/743 [32:51<2:25:09, 14.23s/batch, batch_loss=24.5, batch

Validation:  18%|▏| 132/743 [32:51<2:26:24, 14.38s/batch, batch_loss=24.5, batch

Validation:  18%|▏| 132/743 [33:06<2:26:24, 14.38s/batch, batch_loss=35.2, batch

Validation:  18%|▏| 133/743 [33:06<2:28:13, 14.58s/batch, batch_loss=35.2, batch

Validation:  18%|▏| 133/743 [33:21<2:28:13, 14.58s/batch, batch_loss=19.3, batch

Validation:  18%|▏| 134/743 [33:21<2:31:13, 14.90s/batch, batch_loss=19.3, batch

Validation:  18%|▏| 134/743 [33:36<2:31:13, 14.90s/batch, batch_loss=33.4, batch

Validation:  18%|▏| 135/743 [33:36<2:30:49, 14.88s/batch, batch_loss=33.4, batch

Validation:  18%|▏| 135/743 [33:51<2:30:49, 14.88s/batch, batch_loss=15.2, batch

Validation:  18%|▏| 136/743 [33:51<2:29:46, 14.81s/batch, batch_loss=15.2, batch

Validation:  18%|▏| 136/743 [34:05<2:29:46, 14.81s/batch, batch_loss=24.6, batch

Validation:  18%|▏| 137/743 [34:05<2:28:34, 14.71s/batch, batch_loss=24.6, batch

Validation:  18%|▏| 137/743 [34:20<2:28:34, 14.71s/batch, batch_loss=6.87, batch

Validation:  19%|▏| 138/743 [34:20<2:29:15, 14.80s/batch, batch_loss=6.87, batch

Validation:  19%|▏| 138/743 [34:34<2:29:15, 14.80s/batch, batch_loss=252, batch_

Validation:  19%|▏| 139/743 [34:34<2:26:07, 14.52s/batch, batch_loss=252, batch_

Validation:  19%|▏| 139/743 [34:49<2:26:07, 14.52s/batch, batch_loss=18.2, batch

Validation:  19%|▏| 140/743 [34:49<2:26:52, 14.61s/batch, batch_loss=18.2, batch

Validation:  19%|▏| 140/743 [35:03<2:26:52, 14.61s/batch, batch_loss=12.4, batch

Validation:  19%|▏| 141/743 [35:03<2:24:43, 14.42s/batch, batch_loss=12.4, batch

Validation:  19%|▏| 141/743 [35:20<2:24:43, 14.42s/batch, batch_loss=13.1, batch

Validation:  19%|▏| 142/743 [35:20<2:31:17, 15.10s/batch, batch_loss=13.1, batch

Validation:  19%|▏| 142/743 [35:35<2:31:17, 15.10s/batch, batch_loss=12.4, batch

Validation:  19%|▏| 143/743 [35:35<2:30:35, 15.06s/batch, batch_loss=12.4, batch

Validation:  19%|▏| 143/743 [35:49<2:30:35, 15.06s/batch, batch_loss=18.3, batch

Validation:  19%|▏| 144/743 [35:49<2:29:19, 14.96s/batch, batch_loss=18.3, batch

Validation:  19%|▏| 144/743 [36:03<2:29:19, 14.96s/batch, batch_loss=14.3, batch

Validation:  20%|▏| 145/743 [36:03<2:26:00, 14.65s/batch, batch_loss=14.3, batch

Validation:  20%|▏| 145/743 [36:17<2:26:00, 14.65s/batch, batch_loss=16, batch_i

Validation:  20%|▏| 146/743 [36:17<2:22:10, 14.29s/batch, batch_loss=16, batch_i

Validation:  20%|▏| 146/743 [36:31<2:22:10, 14.29s/batch, batch_loss=16, batch_i

Validation:  20%|▏| 147/743 [36:31<2:22:41, 14.36s/batch, batch_loss=16, batch_i

Validation:  20%|▏| 147/743 [36:45<2:22:41, 14.36s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [36:45<2:19:54, 14.11s/batch, batch_loss=3.2e+4, bat

Validation:  20%|▏| 148/743 [37:02<2:19:54, 14.11s/batch, batch_loss=21.5, batch

Validation:  20%|▏| 149/743 [37:02<2:28:35, 15.01s/batch, batch_loss=21.5, batch

Validation:  20%|▏| 149/743 [37:17<2:28:35, 15.01s/batch, batch_loss=23.4, batch

Validation:  20%|▏| 150/743 [37:17<2:28:12, 15.00s/batch, batch_loss=23.4, batch

Validation:  20%|▏| 150/743 [37:32<2:28:12, 15.00s/batch, batch_loss=13.2, batch

Validation:  20%|▏| 151/743 [37:32<2:29:36, 15.16s/batch, batch_loss=13.2, batch

Validation:  20%|▏| 151/743 [37:47<2:29:36, 15.16s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [37:47<2:28:33, 15.08s/batch, batch_loss=1.04e+4, ba

Validation:  20%|▏| 152/743 [38:02<2:28:33, 15.08s/batch, batch_loss=15.1, batch

Validation:  21%|▏| 153/743 [38:02<2:26:22, 14.89s/batch, batch_loss=15.1, batch

Validation:  21%|▏| 153/743 [38:16<2:26:22, 14.89s/batch, batch_loss=14.4, batch

Validation:  21%|▏| 154/743 [38:16<2:23:46, 14.65s/batch, batch_loss=14.4, batch

Validation:  21%|▏| 154/743 [38:30<2:23:46, 14.65s/batch, batch_loss=17.9, batch

Validation:  21%|▏| 155/743 [38:30<2:23:06, 14.60s/batch, batch_loss=17.9, batch

Validation:  21%|▏| 155/743 [38:47<2:23:06, 14.60s/batch, batch_loss=16.4, batch

Validation:  21%|▏| 156/743 [38:47<2:30:20, 15.37s/batch, batch_loss=16.4, batch

Validation:  21%|▏| 156/743 [39:02<2:30:20, 15.37s/batch, batch_loss=16.1, batch

Validation:  21%|▏| 157/743 [39:02<2:29:06, 15.27s/batch, batch_loss=16.1, batch

Validation:  21%|▏| 157/743 [39:17<2:29:06, 15.27s/batch, batch_loss=20.3, batch

Validation:  21%|▏| 158/743 [39:17<2:27:06, 15.09s/batch, batch_loss=20.3, batch

Validation:  21%|▏| 158/743 [39:32<2:27:06, 15.09s/batch, batch_loss=22.1, batch

Validation:  21%|▏| 159/743 [39:32<2:25:32, 14.95s/batch, batch_loss=22.1, batch

Validation:  21%|▏| 159/743 [39:47<2:25:32, 14.95s/batch, batch_loss=14, batch_i

Validation:  22%|▏| 160/743 [39:47<2:27:01, 15.13s/batch, batch_loss=14, batch_i

Validation:  22%|▏| 160/743 [40:01<2:27:01, 15.13s/batch, batch_loss=15.3, batch

Validation:  22%|▏| 161/743 [40:01<2:21:32, 14.59s/batch, batch_loss=15.3, batch

Validation:  22%|▏| 161/743 [40:16<2:21:32, 14.59s/batch, batch_loss=20, batch_i

Validation:  22%|▏| 162/743 [40:16<2:22:44, 14.74s/batch, batch_loss=20, batch_i

Validation:  22%|▏| 162/743 [40:30<2:22:44, 14.74s/batch, batch_loss=12.2, batch

Validation:  22%|▏| 163/743 [40:30<2:21:52, 14.68s/batch, batch_loss=12.2, batch

Validation:  22%|▏| 163/743 [40:47<2:21:52, 14.68s/batch, batch_loss=8.72, batch

Validation:  22%|▏| 164/743 [40:47<2:27:11, 15.25s/batch, batch_loss=8.72, batch

Validation:  22%|▏| 164/743 [41:03<2:27:11, 15.25s/batch, batch_loss=12.9, batch

Validation:  22%|▏| 165/743 [41:03<2:28:19, 15.40s/batch, batch_loss=12.9, batch

Validation:  22%|▏| 165/743 [41:18<2:28:19, 15.40s/batch, batch_loss=10.2, batch

Validation:  22%|▏| 166/743 [41:18<2:26:40, 15.25s/batch, batch_loss=10.2, batch

Validation:  22%|▏| 166/743 [41:32<2:26:40, 15.25s/batch, batch_loss=12.6, batch

Validation:  22%|▏| 167/743 [41:32<2:24:18, 15.03s/batch, batch_loss=12.6, batch

Validation:  22%|▏| 167/743 [41:46<2:24:18, 15.03s/batch, batch_loss=20.6, batch

Validation:  23%|▏| 168/743 [41:46<2:21:59, 14.82s/batch, batch_loss=20.6, batch

Validation:  23%|▏| 168/743 [42:01<2:21:59, 14.82s/batch, batch_loss=21, batch_i

Validation:  23%|▏| 169/743 [42:01<2:19:50, 14.62s/batch, batch_loss=21, batch_i

Validation:  23%|▏| 169/743 [42:15<2:19:50, 14.62s/batch, batch_loss=18.3, batch

Validation:  23%|▏| 170/743 [42:15<2:20:03, 14.67s/batch, batch_loss=18.3, batch

Validation:  23%|▏| 170/743 [42:30<2:20:03, 14.67s/batch, batch_loss=18, batch_i

Validation:  23%|▏| 171/743 [42:30<2:20:42, 14.76s/batch, batch_loss=18, batch_i

Validation:  23%|▏| 171/743 [42:45<2:20:42, 14.76s/batch, batch_loss=17.1, batch

Validation:  23%|▏| 172/743 [42:45<2:21:24, 14.86s/batch, batch_loss=17.1, batch

Validation:  23%|▏| 172/743 [43:03<2:21:24, 14.86s/batch, batch_loss=20.1, batch

Validation:  23%|▏| 173/743 [43:03<2:27:41, 15.55s/batch, batch_loss=20.1, batch

Validation:  23%|▏| 173/743 [43:17<2:27:41, 15.55s/batch, batch_loss=14.6, batch

Validation:  23%|▏| 174/743 [43:17<2:24:50, 15.27s/batch, batch_loss=14.6, batch

Validation:  23%|▏| 174/743 [43:33<2:24:50, 15.27s/batch, batch_loss=18.8, batch

Validation:  24%|▏| 175/743 [43:33<2:25:33, 15.38s/batch, batch_loss=18.8, batch

Validation:  24%|▏| 175/743 [43:47<2:25:33, 15.38s/batch, batch_loss=13.5, batch

Validation:  24%|▏| 176/743 [43:47<2:23:02, 15.14s/batch, batch_loss=13.5, batch

Validation:  24%|▏| 176/743 [44:02<2:23:02, 15.14s/batch, batch_loss=16.1, batch

Validation:  24%|▏| 177/743 [44:02<2:20:38, 14.91s/batch, batch_loss=16.1, batch

Validation:  24%|▏| 177/743 [44:17<2:20:38, 14.91s/batch, batch_loss=20.8, batch

Validation:  24%|▏| 178/743 [44:17<2:21:20, 15.01s/batch, batch_loss=20.8, batch

Validation:  24%|▏| 178/743 [44:33<2:21:20, 15.01s/batch, batch_loss=18.7, batch

Validation:  24%|▏| 179/743 [44:33<2:22:36, 15.17s/batch, batch_loss=18.7, batch

Validation:  24%|▏| 179/743 [44:47<2:22:36, 15.17s/batch, batch_loss=7.24e+3, ba

Validation:  24%|▏| 180/743 [44:47<2:21:08, 15.04s/batch, batch_loss=7.24e+3, ba

Validation:  24%|▏| 180/743 [45:04<2:21:08, 15.04s/batch, batch_loss=15.7, batch

Validation:  24%|▏| 181/743 [45:04<2:24:47, 15.46s/batch, batch_loss=15.7, batch

Validation:  24%|▏| 181/743 [45:18<2:24:47, 15.46s/batch, batch_loss=17.9, batch

Validation:  24%|▏| 182/743 [45:18<2:20:25, 15.02s/batch, batch_loss=17.9, batch

Validation:  24%|▏| 182/743 [45:31<2:20:25, 15.02s/batch, batch_loss=16.2, batch

Validation:  25%|▏| 183/743 [45:31<2:16:45, 14.65s/batch, batch_loss=16.2, batch

Validation:  25%|▏| 183/743 [45:47<2:16:45, 14.65s/batch, batch_loss=9.56, batch

Validation:  25%|▏| 184/743 [45:47<2:17:32, 14.76s/batch, batch_loss=9.56, batch

Validation:  25%|▏| 184/743 [46:01<2:17:32, 14.76s/batch, batch_loss=15.8, batch

Validation:  25%|▏| 185/743 [46:01<2:17:40, 14.80s/batch, batch_loss=15.8, batch

Validation:  25%|▏| 185/743 [46:17<2:17:40, 14.80s/batch, batch_loss=22, batch_i

Validation:  25%|▎| 186/743 [46:17<2:19:39, 15.04s/batch, batch_loss=22, batch_i

Validation:  25%|▎| 186/743 [46:32<2:19:39, 15.04s/batch, batch_loss=26.2, batch

Validation:  25%|▎| 187/743 [46:32<2:18:54, 14.99s/batch, batch_loss=26.2, batch

Validation:  25%|▎| 187/743 [46:46<2:18:54, 14.99s/batch, batch_loss=14.5, batch

Validation:  25%|▎| 188/743 [46:46<2:15:18, 14.63s/batch, batch_loss=14.5, batch

Validation:  25%|▎| 188/743 [47:03<2:15:18, 14.63s/batch, batch_loss=16, batch_i

Validation:  25%|▎| 189/743 [47:03<2:23:06, 15.50s/batch, batch_loss=16, batch_i

Validation:  25%|▎| 189/743 [47:18<2:23:06, 15.50s/batch, batch_loss=970, batch_

Validation:  26%|▎| 190/743 [47:18<2:20:29, 15.24s/batch, batch_loss=970, batch_

Validation:  26%|▎| 190/743 [47:32<2:20:29, 15.24s/batch, batch_loss=21.3, batch

Validation:  26%|▎| 191/743 [47:32<2:16:34, 14.84s/batch, batch_loss=21.3, batch

Validation:  26%|▎| 191/743 [47:46<2:16:34, 14.84s/batch, batch_loss=11.8, batch

Validation:  26%|▎| 192/743 [47:46<2:14:52, 14.69s/batch, batch_loss=11.8, batch

Validation:  26%|▎| 192/743 [48:01<2:14:52, 14.69s/batch, batch_loss=16.5, batch

Validation:  26%|▎| 193/743 [48:01<2:14:36, 14.68s/batch, batch_loss=16.5, batch

Validation:  26%|▎| 193/743 [48:16<2:14:36, 14.68s/batch, batch_loss=16.3, batch

Validation:  26%|▎| 194/743 [48:16<2:16:17, 14.90s/batch, batch_loss=16.3, batch

Validation:  26%|▎| 194/743 [48:31<2:16:17, 14.90s/batch, batch_loss=9.32, batch

Validation:  26%|▎| 195/743 [48:31<2:16:02, 14.89s/batch, batch_loss=9.32, batch

Validation:  26%|▎| 195/743 [48:46<2:16:02, 14.89s/batch, batch_loss=16.4, batch

Validation:  26%|▎| 196/743 [48:46<2:15:52, 14.90s/batch, batch_loss=16.4, batch

Validation:  26%|▎| 196/743 [49:03<2:15:52, 14.90s/batch, batch_loss=8.8, batch_

Validation:  27%|▎| 197/743 [49:03<2:22:23, 15.65s/batch, batch_loss=8.8, batch_

Validation:  27%|▎| 197/743 [49:18<2:22:23, 15.65s/batch, batch_loss=18.7, batch

Validation:  27%|▎| 198/743 [49:18<2:18:46, 15.28s/batch, batch_loss=18.7, batch

Validation:  27%|▎| 198/743 [49:32<2:18:46, 15.28s/batch, batch_loss=18.2, batch

Validation:  27%|▎| 199/743 [49:32<2:15:43, 14.97s/batch, batch_loss=18.2, batch

Validation:  27%|▎| 199/743 [49:47<2:15:43, 14.97s/batch, batch_loss=279, batch_

Validation:  27%|▎| 200/743 [49:47<2:14:19, 14.84s/batch, batch_loss=279, batch_

Validation:  27%|▎| 200/743 [50:00<2:14:19, 14.84s/batch, batch_loss=29.4, batch

Validation:  27%|▎| 201/743 [50:00<2:11:06, 14.51s/batch, batch_loss=29.4, batch

Validation:  27%|▎| 201/743 [50:15<2:11:06, 14.51s/batch, batch_loss=20.4, batch

Validation:  27%|▎| 202/743 [50:15<2:11:01, 14.53s/batch, batch_loss=20.4, batch

Validation:  27%|▎| 202/743 [50:30<2:11:01, 14.53s/batch, batch_loss=14.9, batch

Validation:  27%|▎| 203/743 [50:30<2:11:19, 14.59s/batch, batch_loss=14.9, batch

Validation:  27%|▎| 203/743 [50:44<2:11:19, 14.59s/batch, batch_loss=18.7, batch

Validation:  27%|▎| 204/743 [50:44<2:09:42, 14.44s/batch, batch_loss=18.7, batch

Validation:  27%|▎| 204/743 [50:58<2:09:42, 14.44s/batch, batch_loss=20.1, batch

Validation:  28%|▎| 205/743 [50:58<2:09:49, 14.48s/batch, batch_loss=20.1, batch

Validation:  28%|▎| 205/743 [51:13<2:09:49, 14.48s/batch, batch_loss=12.1, batch

Validation:  28%|▎| 206/743 [51:13<2:09:59, 14.52s/batch, batch_loss=12.1, batch

Validation:  28%|▎| 206/743 [51:31<2:09:59, 14.52s/batch, batch_loss=18.7, batch

Validation:  28%|▎| 207/743 [51:31<2:18:58, 15.56s/batch, batch_loss=18.7, batch

Validation:  28%|▎| 207/743 [51:45<2:18:58, 15.56s/batch, batch_loss=17.1, batch

Validation:  28%|▎| 208/743 [51:45<2:14:17, 15.06s/batch, batch_loss=17.1, batch

Validation:  28%|▎| 208/743 [51:59<2:14:17, 15.06s/batch, batch_loss=8.34, batch

Validation:  28%|▎| 209/743 [51:59<2:12:27, 14.88s/batch, batch_loss=8.34, batch

Validation:  28%|▎| 209/743 [52:14<2:12:27, 14.88s/batch, batch_loss=9.66, batch

Validation:  28%|▎| 210/743 [52:14<2:12:41, 14.94s/batch, batch_loss=9.66, batch

Validation:  28%|▎| 210/743 [52:29<2:12:41, 14.94s/batch, batch_loss=12.2, batch

Validation:  28%|▎| 211/743 [52:29<2:11:40, 14.85s/batch, batch_loss=12.2, batch

Validation:  28%|▎| 211/743 [52:43<2:11:40, 14.85s/batch, batch_loss=12.7, batch

Validation:  29%|▎| 212/743 [52:43<2:10:19, 14.73s/batch, batch_loss=12.7, batch

Validation:  29%|▎| 212/743 [52:58<2:10:19, 14.73s/batch, batch_loss=540, batch_

Validation:  29%|▎| 213/743 [52:58<2:08:49, 14.58s/batch, batch_loss=540, batch_

Validation:  29%|▎| 213/743 [53:13<2:08:49, 14.58s/batch, batch_loss=11.5, batch

Validation:  29%|▎| 214/743 [53:13<2:10:39, 14.82s/batch, batch_loss=11.5, batch

Validation:  29%|▎| 214/743 [53:28<2:10:39, 14.82s/batch, batch_loss=12.3, batch

Validation:  29%|▎| 215/743 [53:28<2:11:12, 14.91s/batch, batch_loss=12.3, batch

Validation:  29%|▎| 215/743 [53:42<2:11:12, 14.91s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [53:42<2:09:28, 14.74s/batch, batch_loss=2.57e+3, ba

Validation:  29%|▎| 216/743 [53:57<2:09:28, 14.74s/batch, batch_loss=19.9, batch

Validation:  29%|▎| 217/743 [53:57<2:08:27, 14.65s/batch, batch_loss=19.9, batch

Validation:  29%|▎| 217/743 [54:11<2:08:27, 14.65s/batch, batch_loss=12.2, batch

Validation:  29%|▎| 218/743 [54:11<2:07:30, 14.57s/batch, batch_loss=12.2, batch

Validation:  29%|▎| 218/743 [54:26<2:07:30, 14.57s/batch, batch_loss=28.1, batch

Validation:  29%|▎| 219/743 [54:26<2:07:59, 14.66s/batch, batch_loss=28.1, batch

Validation:  29%|▎| 219/743 [54:40<2:07:59, 14.66s/batch, batch_loss=26.6, batch

Validation:  30%|▎| 220/743 [54:40<2:06:37, 14.53s/batch, batch_loss=26.6, batch

Validation:  30%|▎| 220/743 [54:53<2:06:37, 14.53s/batch, batch_loss=16.7, batch

Validation:  30%|▎| 221/743 [54:53<2:02:32, 14.09s/batch, batch_loss=16.7, batch

Validation:  30%|▎| 221/743 [55:06<2:02:32, 14.09s/batch, batch_loss=10.9, batch

Validation:  30%|▎| 222/743 [55:06<1:59:01, 13.71s/batch, batch_loss=10.9, batch

Validation:  30%|▎| 222/743 [55:19<1:59:01, 13.71s/batch, batch_loss=10.4, batch

Validation:  30%|▎| 223/743 [55:19<1:56:12, 13.41s/batch, batch_loss=10.4, batch

Validation:  30%|▎| 223/743 [55:32<1:56:12, 13.41s/batch, batch_loss=9.99, batch

Validation:  30%|▎| 224/743 [55:32<1:55:24, 13.34s/batch, batch_loss=9.99, batch

Validation:  30%|▎| 224/743 [55:48<1:55:24, 13.34s/batch, batch_loss=4.94e+3, ba

Validation:  30%|▎| 225/743 [55:48<2:02:04, 14.14s/batch, batch_loss=4.94e+3, ba

Validation:  30%|▎| 225/743 [56:02<2:02:04, 14.14s/batch, batch_loss=15.4, batch

Validation:  30%|▎| 226/743 [56:02<2:00:19, 13.96s/batch, batch_loss=15.4, batch

Validation:  30%|▎| 226/743 [56:15<2:00:19, 13.96s/batch, batch_loss=15.6, batch

Validation:  31%|▎| 227/743 [56:15<1:58:39, 13.80s/batch, batch_loss=15.6, batch

Validation:  31%|▎| 227/743 [56:28<1:58:39, 13.80s/batch, batch_loss=16.5, batch

Validation:  31%|▎| 228/743 [56:28<1:57:10, 13.65s/batch, batch_loss=16.5, batch

Validation:  31%|▎| 228/743 [56:43<1:57:10, 13.65s/batch, batch_loss=17.6, batch

Validation:  31%|▎| 229/743 [56:43<1:58:06, 13.79s/batch, batch_loss=17.6, batch

Validation:  31%|▎| 229/743 [56:57<1:58:06, 13.79s/batch, batch_loss=20.1, batch

Validation:  31%|▎| 230/743 [56:57<2:00:32, 14.10s/batch, batch_loss=20.1, batch

Validation:  31%|▎| 230/743 [57:10<2:00:32, 14.10s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [57:10<1:57:19, 13.75s/batch, batch_loss=3.23e+4, ba

Validation:  31%|▎| 231/743 [57:23<1:57:19, 13.75s/batch, batch_loss=17, batch_i

Validation:  31%|▎| 232/743 [57:23<1:55:00, 13.50s/batch, batch_loss=17, batch_i

Validation:  31%|▎| 232/743 [57:36<1:55:00, 13.50s/batch, batch_loss=9.36, batch

Validation:  31%|▎| 233/743 [57:36<1:53:31, 13.36s/batch, batch_loss=9.36, batch

Validation:  31%|▎| 233/743 [57:49<1:53:31, 13.36s/batch, batch_loss=12.1, batch

Validation:  31%|▎| 234/743 [57:49<1:51:41, 13.17s/batch, batch_loss=12.1, batch

Validation:  31%|▎| 234/743 [58:02<1:51:41, 13.17s/batch, batch_loss=15.9, batch

Validation:  32%|▎| 235/743 [58:02<1:51:07, 13.12s/batch, batch_loss=15.9, batch

Validation:  32%|▎| 235/743 [58:15<1:51:07, 13.12s/batch, batch_loss=2.77, batch

Validation:  32%|▎| 236/743 [58:15<1:51:04, 13.15s/batch, batch_loss=2.77, batch

Validation:  32%|▎| 236/743 [58:28<1:51:04, 13.15s/batch, batch_loss=18.5, batch

Validation:  32%|▎| 237/743 [58:28<1:50:12, 13.07s/batch, batch_loss=18.5, batch

Validation:  32%|▎| 237/743 [58:43<1:50:12, 13.07s/batch, batch_loss=14.3, batch

Validation:  32%|▎| 238/743 [58:43<1:55:53, 13.77s/batch, batch_loss=14.3, batch

Validation:  32%|▎| 238/743 [59:00<1:55:53, 13.77s/batch, batch_loss=4.5e+3, bat

Validation:  32%|▎| 239/743 [59:00<2:02:14, 14.55s/batch, batch_loss=4.5e+3, bat

Validation:  32%|▎| 239/743 [59:15<2:02:14, 14.55s/batch, batch_loss=18.5, batch

Validation:  32%|▎| 240/743 [59:15<2:02:58, 14.67s/batch, batch_loss=18.5, batch

Validation:  32%|▎| 240/743 [59:30<2:02:58, 14.67s/batch, batch_loss=17.4, batch

Validation:  32%|▎| 241/743 [59:30<2:02:54, 14.69s/batch, batch_loss=17.4, batch

Validation:  32%|▎| 241/743 [59:43<2:02:54, 14.69s/batch, batch_loss=232, batch_

Validation:  33%|▎| 242/743 [59:43<1:59:51, 14.35s/batch, batch_loss=232, batch_

Validation:  33%|▎| 242/743 [59:56<1:59:51, 14.35s/batch, batch_loss=8.99, batch

Validation:  33%|▎| 243/743 [59:56<1:55:23, 13.85s/batch, batch_loss=8.99, batch

Validation:  33%|▎| 243/743 [1:00:09<1:55:23, 13.85s/batch, batch_loss=12.9, bat

Validation:  33%|▎| 244/743 [1:00:09<1:53:26, 13.64s/batch, batch_loss=12.9, bat

Validation:  33%|▎| 244/743 [1:00:22<1:53:26, 13.64s/batch, batch_loss=21.7, bat

Validation:  33%|▎| 245/743 [1:00:22<1:51:10, 13.40s/batch, batch_loss=21.7, bat

Validation:  33%|▎| 245/743 [1:00:35<1:51:10, 13.40s/batch, batch_loss=6.09, bat

Validation:  33%|▎| 246/743 [1:00:35<1:49:35, 13.23s/batch, batch_loss=6.09, bat

Validation:  33%|▎| 246/743 [1:00:47<1:49:35, 13.23s/batch, batch_loss=14.1, bat

Validation:  33%|▎| 247/743 [1:00:47<1:48:18, 13.10s/batch, batch_loss=14.1, bat

Validation:  33%|▎| 247/743 [1:01:03<1:48:18, 13.10s/batch, batch_loss=36.9, bat

Validation:  33%|▎| 248/743 [1:01:03<1:53:48, 13.79s/batch, batch_loss=36.9, bat

Validation:  33%|▎| 248/743 [1:01:15<1:53:48, 13.79s/batch, batch_loss=12, batch

Validation:  34%|▎| 249/743 [1:01:15<1:50:38, 13.44s/batch, batch_loss=12, batch

Validation:  34%|▎| 249/743 [1:01:29<1:50:38, 13.44s/batch, batch_loss=18.9, bat

Validation:  34%|▎| 250/743 [1:01:29<1:50:04, 13.40s/batch, batch_loss=18.9, bat

Validation:  34%|▎| 250/743 [1:01:43<1:50:04, 13.40s/batch, batch_loss=18.5, bat

Validation:  34%|▎| 251/743 [1:01:43<1:50:50, 13.52s/batch, batch_loss=18.5, bat

Validation:  34%|▎| 251/743 [1:01:56<1:50:50, 13.52s/batch, batch_loss=21.3, bat

Validation:  34%|▎| 252/743 [1:01:56<1:49:48, 13.42s/batch, batch_loss=21.3, bat

Validation:  34%|▎| 252/743 [1:02:08<1:49:48, 13.42s/batch, batch_loss=17.6, bat

Validation:  34%|▎| 253/743 [1:02:08<1:47:58, 13.22s/batch, batch_loss=17.6, bat

Validation:  34%|▎| 253/743 [1:02:22<1:47:58, 13.22s/batch, batch_loss=1.15e+4, 

Validation:  34%|▎| 254/743 [1:02:22<1:48:54, 13.36s/batch, batch_loss=1.15e+4, 

Validation:  34%|▎| 254/743 [1:02:35<1:48:54, 13.36s/batch, batch_loss=2.44e+3, 

Validation:  34%|▎| 255/743 [1:02:35<1:47:17, 13.19s/batch, batch_loss=2.44e+3, 

Validation:  34%|▎| 255/743 [1:02:48<1:47:17, 13.19s/batch, batch_loss=19.6, bat

Validation:  34%|▎| 256/743 [1:02:48<1:46:08, 13.08s/batch, batch_loss=19.6, bat

Validation:  34%|▎| 256/743 [1:03:02<1:46:08, 13.08s/batch, batch_loss=20.7, bat

Validation:  35%|▎| 257/743 [1:03:02<1:48:03, 13.34s/batch, batch_loss=20.7, bat

Validation:  35%|▎| 257/743 [1:03:15<1:48:03, 13.34s/batch, batch_loss=11.8, bat

Validation:  35%|▎| 258/743 [1:03:15<1:48:47, 13.46s/batch, batch_loss=11.8, bat

Validation:  35%|▎| 258/743 [1:03:28<1:48:47, 13.46s/batch, batch_loss=3.13, bat

Validation:  35%|▎| 259/743 [1:03:28<1:46:45, 13.23s/batch, batch_loss=3.13, bat

Validation:  35%|▎| 259/743 [1:03:42<1:46:45, 13.23s/batch, batch_loss=1.56, bat

Validation:  35%|▎| 260/743 [1:03:42<1:47:00, 13.29s/batch, batch_loss=1.56, bat

Validation:  35%|▎| 260/743 [1:03:54<1:47:00, 13.29s/batch, batch_loss=7.75, bat

Validation:  35%|▎| 261/743 [1:03:54<1:44:48, 13.05s/batch, batch_loss=7.75, bat

Validation:  35%|▎| 261/743 [1:04:07<1:44:48, 13.05s/batch, batch_loss=25.4, bat

Validation:  35%|▎| 262/743 [1:04:07<1:45:13, 13.13s/batch, batch_loss=25.4, bat

Validation:  35%|▎| 262/743 [1:04:21<1:45:13, 13.13s/batch, batch_loss=2.72e+3, 

Validation:  35%|▎| 263/743 [1:04:21<1:45:53, 13.24s/batch, batch_loss=2.72e+3, 

Validation:  35%|▎| 263/743 [1:04:34<1:45:53, 13.24s/batch, batch_loss=10.8, bat

Validation:  36%|▎| 264/743 [1:04:34<1:45:29, 13.21s/batch, batch_loss=10.8, bat

Validation:  36%|▎| 264/743 [1:04:50<1:45:29, 13.21s/batch, batch_loss=20.2, bat

Validation:  36%|▎| 265/743 [1:04:50<1:52:52, 14.17s/batch, batch_loss=20.2, bat

Validation:  36%|▎| 265/743 [1:05:03<1:52:52, 14.17s/batch, batch_loss=23.7, bat

Validation:  36%|▎| 266/743 [1:05:03<1:49:51, 13.82s/batch, batch_loss=23.7, bat

Validation:  36%|▎| 266/743 [1:05:16<1:49:51, 13.82s/batch, batch_loss=18.7, bat

Validation:  36%|▎| 267/743 [1:05:16<1:46:28, 13.42s/batch, batch_loss=18.7, bat

Validation:  36%|▎| 267/743 [1:05:29<1:46:28, 13.42s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:05:29<1:45:35, 13.34s/batch, batch_loss=3.01e+3, 

Validation:  36%|▎| 268/743 [1:05:44<1:45:35, 13.34s/batch, batch_loss=35, batch

Validation:  36%|▎| 269/743 [1:05:44<1:49:14, 13.83s/batch, batch_loss=35, batch

Validation:  36%|▎| 269/743 [1:06:05<1:49:14, 13.83s/batch, batch_loss=31.5, bat

Validation:  36%|▎| 270/743 [1:06:05<2:05:41, 15.94s/batch, batch_loss=31.5, bat

Validation:  36%|▎| 270/743 [1:06:24<2:05:41, 15.94s/batch, batch_loss=24.3, bat

Validation:  36%|▎| 271/743 [1:06:24<2:13:59, 17.03s/batch, batch_loss=24.3, bat

Validation:  36%|▎| 271/743 [1:06:37<2:13:59, 17.03s/batch, batch_loss=1.06e+3, 

Validation:  37%|▎| 272/743 [1:06:37<2:03:13, 15.70s/batch, batch_loss=1.06e+3, 

Validation:  37%|▎| 272/743 [1:06:50<2:03:13, 15.70s/batch, batch_loss=16.6, bat

Validation:  37%|▎| 273/743 [1:06:50<1:56:06, 14.82s/batch, batch_loss=16.6, bat

Validation:  37%|▎| 273/743 [1:07:03<1:56:06, 14.82s/batch, batch_loss=20.6, bat

Validation:  37%|▎| 274/743 [1:07:03<1:50:58, 14.20s/batch, batch_loss=20.6, bat

Validation:  37%|▎| 274/743 [1:07:16<1:50:58, 14.20s/batch, batch_loss=17.7, bat

Validation:  37%|▎| 275/743 [1:07:16<1:50:01, 14.11s/batch, batch_loss=17.7, bat

Validation:  37%|▎| 275/743 [1:07:30<1:50:01, 14.11s/batch, batch_loss=12.9, bat

Validation:  37%|▎| 276/743 [1:07:30<1:48:59, 14.00s/batch, batch_loss=12.9, bat

Validation:  37%|▎| 276/743 [1:07:44<1:48:59, 14.00s/batch, batch_loss=24.8, bat

Validation:  37%|▎| 277/743 [1:07:44<1:49:01, 14.04s/batch, batch_loss=24.8, bat

Validation:  37%|▎| 277/743 [1:07:57<1:49:01, 14.04s/batch, batch_loss=19.3, bat

Validation:  37%|▎| 278/743 [1:07:57<1:46:19, 13.72s/batch, batch_loss=19.3, bat

Validation:  37%|▎| 278/743 [1:08:11<1:46:19, 13.72s/batch, batch_loss=8.96, bat

Validation:  38%|▍| 279/743 [1:08:11<1:45:48, 13.68s/batch, batch_loss=8.96, bat

Validation:  38%|▍| 279/743 [1:08:26<1:45:48, 13.68s/batch, batch_loss=13.2, bat

Validation:  38%|▍| 280/743 [1:08:26<1:47:48, 13.97s/batch, batch_loss=13.2, bat

Validation:  38%|▍| 280/743 [1:08:40<1:47:48, 13.97s/batch, batch_loss=17.3, bat

Validation:  38%|▍| 281/743 [1:08:40<1:47:49, 14.00s/batch, batch_loss=17.3, bat

Validation:  38%|▍| 281/743 [1:08:54<1:47:49, 14.00s/batch, batch_loss=20, batch

Validation:  38%|▍| 282/743 [1:08:54<1:49:29, 14.25s/batch, batch_loss=20, batch

Validation:  38%|▍| 282/743 [1:09:13<1:49:29, 14.25s/batch, batch_loss=15.1, bat

Validation:  38%|▍| 283/743 [1:09:13<1:59:22, 15.57s/batch, batch_loss=15.1, bat

Validation:  38%|▍| 283/743 [1:09:29<1:59:22, 15.57s/batch, batch_loss=15.4, bat

Validation:  38%|▍| 284/743 [1:09:29<1:59:24, 15.61s/batch, batch_loss=15.4, bat

Validation:  38%|▍| 284/743 [1:09:43<1:59:24, 15.61s/batch, batch_loss=13.5, bat

Validation:  38%|▍| 285/743 [1:09:43<1:56:17, 15.23s/batch, batch_loss=13.5, bat

Validation:  38%|▍| 285/743 [1:09:56<1:56:17, 15.23s/batch, batch_loss=14.5, bat

Validation:  38%|▍| 286/743 [1:09:56<1:51:35, 14.65s/batch, batch_loss=14.5, bat

Validation:  38%|▍| 286/743 [1:10:10<1:51:35, 14.65s/batch, batch_loss=1.19e+4, 

Validation:  39%|▍| 287/743 [1:10:10<1:48:43, 14.31s/batch, batch_loss=1.19e+4, 

Validation:  39%|▍| 287/743 [1:10:23<1:48:43, 14.31s/batch, batch_loss=20.8, bat

Validation:  39%|▍| 288/743 [1:10:23<1:46:39, 14.06s/batch, batch_loss=20.8, bat

Validation:  39%|▍| 288/743 [1:10:37<1:46:39, 14.06s/batch, batch_loss=19.6, bat

Validation:  39%|▍| 289/743 [1:10:37<1:46:07, 14.03s/batch, batch_loss=19.6, bat

Validation:  39%|▍| 289/743 [1:10:52<1:46:07, 14.03s/batch, batch_loss=483, batc

Validation:  39%|▍| 290/743 [1:10:52<1:46:17, 14.08s/batch, batch_loss=483, batc

Validation:  39%|▍| 290/743 [1:11:09<1:46:17, 14.08s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:11:09<1:53:15, 15.03s/batch, batch_loss=1.51e+3, 

Validation:  39%|▍| 291/743 [1:11:23<1:53:15, 15.03s/batch, batch_loss=1.19e+3, 

Validation:  39%|▍| 292/743 [1:11:23<1:52:03, 14.91s/batch, batch_loss=1.19e+3, 

Validation:  39%|▍| 292/743 [1:11:38<1:52:03, 14.91s/batch, batch_loss=25.9, bat

Validation:  39%|▍| 293/743 [1:11:38<1:50:48, 14.77s/batch, batch_loss=25.9, bat

Validation:  39%|▍| 293/743 [1:11:53<1:50:48, 14.77s/batch, batch_loss=1.09e+3, 

Validation:  40%|▍| 294/743 [1:11:53<1:50:34, 14.78s/batch, batch_loss=1.09e+3, 

Validation:  40%|▍| 294/743 [1:12:07<1:50:34, 14.78s/batch, batch_loss=17.8, bat

Validation:  40%|▍| 295/743 [1:12:07<1:49:23, 14.65s/batch, batch_loss=17.8, bat

Validation:  40%|▍| 295/743 [1:12:22<1:49:23, 14.65s/batch, batch_loss=16.2, bat

Validation:  40%|▍| 296/743 [1:12:22<1:49:25, 14.69s/batch, batch_loss=16.2, bat

Validation:  40%|▍| 296/743 [1:12:37<1:49:25, 14.69s/batch, batch_loss=11.1, bat

Validation:  40%|▍| 297/743 [1:12:37<1:49:11, 14.69s/batch, batch_loss=11.1, bat

Validation:  40%|▍| 297/743 [1:12:51<1:49:11, 14.69s/batch, batch_loss=21.2, bat

Validation:  40%|▍| 298/743 [1:12:51<1:47:32, 14.50s/batch, batch_loss=21.2, bat

Validation:  40%|▍| 298/743 [1:13:08<1:47:32, 14.50s/batch, batch_loss=30.5, bat

Validation:  40%|▍| 299/743 [1:13:08<1:53:26, 15.33s/batch, batch_loss=30.5, bat

Validation:  40%|▍| 299/743 [1:13:22<1:53:26, 15.33s/batch, batch_loss=33.8, bat

Validation:  40%|▍| 300/743 [1:13:22<1:50:01, 14.90s/batch, batch_loss=33.8, bat

Validation:  40%|▍| 300/743 [1:13:36<1:50:01, 14.90s/batch, batch_loss=833, batc

Validation:  41%|▍| 301/743 [1:13:36<1:48:12, 14.69s/batch, batch_loss=833, batc

Validation:  41%|▍| 301/743 [1:13:50<1:48:12, 14.69s/batch, batch_loss=9.76, bat

Validation:  41%|▍| 302/743 [1:13:50<1:47:35, 14.64s/batch, batch_loss=9.76, bat

Validation:  41%|▍| 302/743 [1:14:05<1:47:35, 14.64s/batch, batch_loss=14.7, bat

Validation:  41%|▍| 303/743 [1:14:05<1:47:45, 14.69s/batch, batch_loss=14.7, bat

Validation:  41%|▍| 303/743 [1:14:20<1:47:45, 14.69s/batch, batch_loss=16.2, bat

Validation:  41%|▍| 304/743 [1:14:20<1:46:27, 14.55s/batch, batch_loss=16.2, bat

Validation:  41%|▍| 304/743 [1:14:33<1:46:27, 14.55s/batch, batch_loss=11.6, bat

Validation:  41%|▍| 305/743 [1:14:33<1:44:20, 14.29s/batch, batch_loss=11.6, bat

Validation:  41%|▍| 305/743 [1:14:48<1:44:20, 14.29s/batch, batch_loss=19.3, bat

Validation:  41%|▍| 306/743 [1:14:48<1:44:24, 14.33s/batch, batch_loss=19.3, bat

Validation:  41%|▍| 306/743 [1:15:01<1:44:24, 14.33s/batch, batch_loss=19.1, bat

Validation:  41%|▍| 307/743 [1:15:01<1:43:03, 14.18s/batch, batch_loss=19.1, bat

Validation:  41%|▍| 307/743 [1:15:15<1:43:03, 14.18s/batch, batch_loss=880, batc

Validation:  41%|▍| 308/743 [1:15:15<1:40:35, 13.88s/batch, batch_loss=880, batc

Validation:  41%|▍| 308/743 [1:15:27<1:40:35, 13.88s/batch, batch_loss=24.5, bat

Validation:  42%|▍| 309/743 [1:15:27<1:37:53, 13.53s/batch, batch_loss=24.5, bat

Validation:  42%|▍| 309/743 [1:15:41<1:37:53, 13.53s/batch, batch_loss=17.1, bat

Validation:  42%|▍| 310/743 [1:15:41<1:37:53, 13.57s/batch, batch_loss=17.1, bat

Validation:  42%|▍| 310/743 [1:15:54<1:37:53, 13.57s/batch, batch_loss=17.3, bat

Validation:  42%|▍| 311/743 [1:15:54<1:35:45, 13.30s/batch, batch_loss=17.3, bat

Validation:  42%|▍| 311/743 [1:16:07<1:35:45, 13.30s/batch, batch_loss=16.1, bat

Validation:  42%|▍| 312/743 [1:16:07<1:35:09, 13.25s/batch, batch_loss=16.1, bat

Validation:  42%|▍| 312/743 [1:16:20<1:35:09, 13.25s/batch, batch_loss=7.22, bat

Validation:  42%|▍| 313/743 [1:16:20<1:34:18, 13.16s/batch, batch_loss=7.22, bat

Validation:  42%|▍| 313/743 [1:16:33<1:34:18, 13.16s/batch, batch_loss=11.5, bat

Validation:  42%|▍| 314/743 [1:16:33<1:33:46, 13.11s/batch, batch_loss=11.5, bat

Validation:  42%|▍| 314/743 [1:16:46<1:33:46, 13.11s/batch, batch_loss=19.3, bat

Validation:  42%|▍| 315/743 [1:16:46<1:34:37, 13.27s/batch, batch_loss=19.3, bat

Validation:  42%|▍| 315/743 [1:17:00<1:34:37, 13.27s/batch, batch_loss=19.2, bat

Validation:  43%|▍| 316/743 [1:17:00<1:34:06, 13.22s/batch, batch_loss=19.2, bat

Validation:  43%|▍| 316/743 [1:17:17<1:34:06, 13.22s/batch, batch_loss=21.1, bat

Validation:  43%|▍| 317/743 [1:17:17<1:42:53, 14.49s/batch, batch_loss=21.1, bat

Validation:  43%|▍| 317/743 [1:17:31<1:42:53, 14.49s/batch, batch_loss=13.6, bat

Validation:  43%|▍| 318/743 [1:17:31<1:41:18, 14.30s/batch, batch_loss=13.6, bat

Validation:  43%|▍| 318/743 [1:17:45<1:41:18, 14.30s/batch, batch_loss=19.6, bat

Validation:  43%|▍| 319/743 [1:17:45<1:39:47, 14.12s/batch, batch_loss=19.6, bat

Validation:  43%|▍| 319/743 [1:18:00<1:39:47, 14.12s/batch, batch_loss=17.8, bat

Validation:  43%|▍| 320/743 [1:18:00<1:41:48, 14.44s/batch, batch_loss=17.8, bat

Validation:  43%|▍| 320/743 [1:18:15<1:41:48, 14.44s/batch, batch_loss=16.3, bat

Validation:  43%|▍| 321/743 [1:18:15<1:42:33, 14.58s/batch, batch_loss=16.3, bat

Validation:  43%|▍| 321/743 [1:18:29<1:42:33, 14.58s/batch, batch_loss=14.3, bat

Validation:  43%|▍| 322/743 [1:18:29<1:41:37, 14.48s/batch, batch_loss=14.3, bat

Validation:  43%|▍| 322/743 [1:18:43<1:41:37, 14.48s/batch, batch_loss=17.7, bat

Validation:  43%|▍| 323/743 [1:18:43<1:41:34, 14.51s/batch, batch_loss=17.7, bat

Validation:  43%|▍| 323/743 [1:18:56<1:41:34, 14.51s/batch, batch_loss=295, batc

Validation:  44%|▍| 324/743 [1:18:56<1:38:07, 14.05s/batch, batch_loss=295, batc

Validation:  44%|▍| 324/743 [1:19:10<1:38:07, 14.05s/batch, batch_loss=19, batch

Validation:  44%|▍| 325/743 [1:19:10<1:37:16, 13.96s/batch, batch_loss=19, batch

Validation:  44%|▍| 325/743 [1:19:23<1:37:16, 13.96s/batch, batch_loss=16.7, bat

Validation:  44%|▍| 326/743 [1:19:23<1:35:22, 13.72s/batch, batch_loss=16.7, bat

Validation:  44%|▍| 326/743 [1:19:36<1:35:22, 13.72s/batch, batch_loss=18.5, bat

Validation:  44%|▍| 327/743 [1:19:36<1:33:09, 13.44s/batch, batch_loss=18.5, bat

Validation:  44%|▍| 327/743 [1:19:49<1:33:09, 13.44s/batch, batch_loss=17.2, bat

Validation:  44%|▍| 328/743 [1:19:49<1:31:48, 13.27s/batch, batch_loss=17.2, bat

Validation:  44%|▍| 328/743 [1:20:02<1:31:48, 13.27s/batch, batch_loss=5.95, bat

Validation:  44%|▍| 329/743 [1:20:02<1:31:37, 13.28s/batch, batch_loss=5.95, bat

Validation:  44%|▍| 329/743 [1:20:15<1:31:37, 13.28s/batch, batch_loss=13.9, bat

Validation:  44%|▍| 330/743 [1:20:15<1:30:32, 13.15s/batch, batch_loss=13.9, bat

Validation:  44%|▍| 330/743 [1:20:31<1:30:32, 13.15s/batch, batch_loss=21.7, bat

Validation:  45%|▍| 331/743 [1:20:31<1:35:40, 13.93s/batch, batch_loss=21.7, bat

Validation:  45%|▍| 331/743 [1:20:50<1:35:40, 13.93s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:20:50<1:45:49, 15.45s/batch, batch_loss=1.15e+4, 

Validation:  45%|▍| 332/743 [1:21:06<1:45:49, 15.45s/batch, batch_loss=30.7, bat

Validation:  45%|▍| 333/743 [1:21:06<1:47:39, 15.76s/batch, batch_loss=30.7, bat

Validation:  45%|▍| 333/743 [1:21:22<1:47:39, 15.76s/batch, batch_loss=20.8, bat

Validation:  45%|▍| 334/743 [1:21:22<1:46:54, 15.68s/batch, batch_loss=20.8, bat

Validation:  45%|▍| 334/743 [1:21:35<1:46:54, 15.68s/batch, batch_loss=34.5, bat

Validation:  45%|▍| 335/743 [1:21:35<1:40:45, 14.82s/batch, batch_loss=34.5, bat

Validation:  45%|▍| 335/743 [1:21:48<1:40:45, 14.82s/batch, batch_loss=11.5, bat

Validation:  45%|▍| 336/743 [1:21:48<1:37:08, 14.32s/batch, batch_loss=11.5, bat

Validation:  45%|▍| 336/743 [1:22:00<1:37:08, 14.32s/batch, batch_loss=22.2, bat

Validation:  45%|▍| 337/743 [1:22:00<1:33:24, 13.80s/batch, batch_loss=22.2, bat

Validation:  45%|▍| 337/743 [1:22:13<1:33:24, 13.80s/batch, batch_loss=31.7, bat

Validation:  45%|▍| 338/743 [1:22:13<1:31:25, 13.54s/batch, batch_loss=31.7, bat

Validation:  45%|▍| 338/743 [1:22:28<1:31:25, 13.54s/batch, batch_loss=30, batch

Validation:  46%|▍| 339/743 [1:22:28<1:32:31, 13.74s/batch, batch_loss=30, batch

Validation:  46%|▍| 339/743 [1:22:47<1:32:31, 13.74s/batch, batch_loss=28.7, bat

Validation:  46%|▍| 340/743 [1:22:47<1:42:55, 15.32s/batch, batch_loss=28.7, bat

Validation:  46%|▍| 340/743 [1:22:59<1:42:55, 15.32s/batch, batch_loss=13.5, bat

Validation:  46%|▍| 341/743 [1:22:59<1:37:19, 14.53s/batch, batch_loss=13.5, bat

Validation:  46%|▍| 341/743 [1:23:12<1:37:19, 14.53s/batch, batch_loss=20.8, bat

Validation:  46%|▍| 342/743 [1:23:12<1:33:09, 13.94s/batch, batch_loss=20.8, bat

Validation:  46%|▍| 342/743 [1:23:25<1:33:09, 13.94s/batch, batch_loss=20.5, bat

Validation:  46%|▍| 343/743 [1:23:25<1:31:13, 13.68s/batch, batch_loss=20.5, bat

Validation:  46%|▍| 343/743 [1:23:51<1:31:13, 13.68s/batch, batch_loss=22.9, bat

Validation:  46%|▍| 344/743 [1:23:51<1:55:52, 17.42s/batch, batch_loss=22.9, bat

Validation:  46%|▍| 344/743 [1:24:15<1:55:52, 17.42s/batch, batch_loss=18.6, bat

Validation:  46%|▍| 345/743 [1:24:15<2:09:20, 19.50s/batch, batch_loss=18.6, bat

Validation:  46%|▍| 345/743 [1:24:35<2:09:20, 19.50s/batch, batch_loss=29.9, bat

Validation:  47%|▍| 346/743 [1:24:35<2:09:07, 19.52s/batch, batch_loss=29.9, bat

Validation:  47%|▍| 346/743 [1:24:56<2:09:07, 19.52s/batch, batch_loss=20, batch

Validation:  47%|▍| 347/743 [1:24:56<2:12:34, 20.09s/batch, batch_loss=20, batch

Validation:  47%|▍| 347/743 [1:25:16<2:12:34, 20.09s/batch, batch_loss=27.4, bat

Validation:  47%|▍| 348/743 [1:25:16<2:11:55, 20.04s/batch, batch_loss=27.4, bat

Validation:  47%|▍| 348/743 [1:25:45<2:11:55, 20.04s/batch, batch_loss=22.1, bat

Validation:  47%|▍| 349/743 [1:25:45<2:28:25, 22.60s/batch, batch_loss=22.1, bat

Validation:  47%|▍| 349/743 [1:26:11<2:28:25, 22.60s/batch, batch_loss=19.8, bat

Validation:  47%|▍| 350/743 [1:26:11<2:35:02, 23.67s/batch, batch_loss=19.8, bat

Validation:  47%|▍| 350/743 [1:26:24<2:35:02, 23.67s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:26:24<2:13:42, 20.47s/batch, batch_loss=1.31e+4, 

Validation:  47%|▍| 351/743 [1:26:38<2:13:42, 20.47s/batch, batch_loss=29.7, bat

Validation:  47%|▍| 352/743 [1:26:38<2:00:00, 18.41s/batch, batch_loss=29.7, bat

Validation:  47%|▍| 352/743 [1:26:57<2:00:00, 18.41s/batch, batch_loss=16.9, bat

Validation:  48%|▍| 353/743 [1:26:57<2:00:45, 18.58s/batch, batch_loss=16.9, bat

Validation:  48%|▍| 353/743 [1:27:16<2:00:45, 18.58s/batch, batch_loss=21.6, bat

Validation:  48%|▍| 354/743 [1:27:16<2:02:53, 18.95s/batch, batch_loss=21.6, bat

Validation:  48%|▍| 354/743 [1:27:50<2:02:53, 18.95s/batch, batch_loss=25.9, bat

Validation:  48%|▍| 355/743 [1:27:50<2:30:58, 23.35s/batch, batch_loss=25.9, bat

Validation:  48%|▍| 355/743 [1:28:20<2:30:58, 23.35s/batch, batch_loss=34.4, bat

Validation:  48%|▍| 356/743 [1:28:20<2:42:48, 25.24s/batch, batch_loss=34.4, bat

Validation:  48%|▍| 356/743 [1:28:41<2:42:48, 25.24s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:28:41<2:35:01, 24.10s/batch, batch_loss=5.99e+4, 

Validation:  48%|▍| 357/743 [1:28:54<2:35:01, 24.10s/batch, batch_loss=14.4, bat

Validation:  48%|▍| 358/743 [1:28:54<2:13:03, 20.74s/batch, batch_loss=14.4, bat

Validation:  48%|▍| 358/743 [1:29:07<2:13:03, 20.74s/batch, batch_loss=10.7, bat

Validation:  48%|▍| 359/743 [1:29:07<1:57:34, 18.37s/batch, batch_loss=10.7, bat

Validation:  48%|▍| 359/743 [1:29:20<1:57:34, 18.37s/batch, batch_loss=21.5, bat

Validation:  48%|▍| 360/743 [1:29:20<1:47:19, 16.81s/batch, batch_loss=21.5, bat

Validation:  48%|▍| 360/743 [1:29:33<1:47:19, 16.81s/batch, batch_loss=14.9, bat

Validation:  49%|▍| 361/743 [1:29:33<1:39:48, 15.68s/batch, batch_loss=14.9, bat

Validation:  49%|▍| 361/743 [1:29:46<1:39:48, 15.68s/batch, batch_loss=24.8, bat

Validation:  49%|▍| 362/743 [1:29:46<1:34:34, 14.89s/batch, batch_loss=24.8, bat

Validation:  49%|▍| 362/743 [1:29:59<1:34:34, 14.89s/batch, batch_loss=22.6, bat

Validation:  49%|▍| 363/743 [1:29:59<1:30:18, 14.26s/batch, batch_loss=22.6, bat

Validation:  49%|▍| 363/743 [1:30:12<1:30:18, 14.26s/batch, batch_loss=22.4, bat

Validation:  49%|▍| 364/743 [1:30:12<1:27:21, 13.83s/batch, batch_loss=22.4, bat

Validation:  49%|▍| 364/743 [1:30:24<1:27:21, 13.83s/batch, batch_loss=16.7, bat

Validation:  49%|▍| 365/743 [1:30:24<1:24:58, 13.49s/batch, batch_loss=16.7, bat

Validation:  49%|▍| 365/743 [1:30:37<1:24:58, 13.49s/batch, batch_loss=15.5, bat

Validation:  49%|▍| 366/743 [1:30:37<1:23:29, 13.29s/batch, batch_loss=15.5, bat

Validation:  49%|▍| 366/743 [1:30:56<1:23:29, 13.29s/batch, batch_loss=16.5, bat

Validation:  49%|▍| 367/743 [1:30:56<1:33:22, 14.90s/batch, batch_loss=16.5, bat

Validation:  49%|▍| 367/743 [1:31:15<1:33:22, 14.90s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:31:15<1:41:04, 16.17s/batch, batch_loss=4.83e+3, 

Validation:  50%|▍| 368/743 [1:31:35<1:41:04, 16.17s/batch, batch_loss=17.2, bat

Validation:  50%|▍| 369/743 [1:31:35<1:46:50, 17.14s/batch, batch_loss=17.2, bat

Validation:  50%|▍| 369/743 [1:32:04<1:46:50, 17.14s/batch, batch_loss=26.4, bat

Validation:  50%|▍| 370/743 [1:32:04<2:09:40, 20.86s/batch, batch_loss=26.4, bat

Validation:  50%|▍| 370/743 [1:32:40<2:09:40, 20.86s/batch, batch_loss=19.9, bat

Validation:  50%|▍| 371/743 [1:32:40<2:36:58, 25.32s/batch, batch_loss=19.9, bat

Validation:  50%|▍| 371/743 [1:33:14<2:36:58, 25.32s/batch, batch_loss=19.5, bat

Validation:  50%|▌| 372/743 [1:33:14<2:52:55, 27.97s/batch, batch_loss=19.5, bat

Validation:  50%|▌| 372/743 [1:33:49<2:52:55, 27.97s/batch, batch_loss=23.6, bat

Validation:  50%|▌| 373/743 [1:33:49<3:05:14, 30.04s/batch, batch_loss=23.6, bat

Validation:  50%|▌| 373/743 [1:34:25<3:05:14, 30.04s/batch, batch_loss=16.4, bat

Validation:  50%|▌| 374/743 [1:34:25<3:15:25, 31.78s/batch, batch_loss=16.4, bat

Validation:  50%|▌| 374/743 [1:35:00<3:15:25, 31.78s/batch, batch_loss=8.77, bat

Validation:  50%|▌| 375/743 [1:35:00<3:20:38, 32.71s/batch, batch_loss=8.77, bat

Validation:  50%|▌| 375/743 [1:35:36<3:20:38, 32.71s/batch, batch_loss=30.6, bat

Validation:  51%|▌| 376/743 [1:35:36<3:26:18, 33.73s/batch, batch_loss=30.6, bat

Validation:  51%|▌| 376/743 [1:36:18<3:26:18, 33.73s/batch, batch_loss=10.3, bat

Validation:  51%|▌| 377/743 [1:36:18<3:42:27, 36.47s/batch, batch_loss=10.3, bat

Validation:  51%|▌| 377/743 [1:36:56<3:42:27, 36.47s/batch, batch_loss=19.7, bat

Validation:  51%|▌| 378/743 [1:36:56<3:43:41, 36.77s/batch, batch_loss=19.7, bat

Validation:  51%|▌| 378/743 [1:37:34<3:43:41, 36.77s/batch, batch_loss=7.92, bat

Validation:  51%|▌| 379/743 [1:37:34<3:44:44, 37.05s/batch, batch_loss=7.92, bat

Validation:  51%|▌| 379/743 [1:38:09<3:44:44, 37.05s/batch, batch_loss=7.27, bat

Validation:  51%|▌| 380/743 [1:38:09<3:40:28, 36.44s/batch, batch_loss=7.27, bat

Validation:  51%|▌| 380/743 [1:38:46<3:40:28, 36.44s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:38:46<3:41:06, 36.65s/batch, batch_loss=7.04e+4, 

Validation:  51%|▌| 381/743 [1:39:20<3:41:06, 36.65s/batch, batch_loss=915, batc

Validation:  51%|▌| 382/743 [1:39:20<3:36:47, 36.03s/batch, batch_loss=915, batc

Validation:  51%|▌| 382/743 [1:39:56<3:36:47, 36.03s/batch, batch_loss=208, batc

Validation:  52%|▌| 383/743 [1:39:56<3:34:33, 35.76s/batch, batch_loss=208, batc

Validation:  52%|▌| 383/743 [1:40:31<3:34:33, 35.76s/batch, batch_loss=281, batc

Validation:  52%|▌| 384/743 [1:40:31<3:32:44, 35.56s/batch, batch_loss=281, batc

Validation:  52%|▌| 384/743 [1:41:07<3:32:44, 35.56s/batch, batch_loss=18.9, bat

Validation:  52%|▌| 385/743 [1:41:07<3:34:03, 35.88s/batch, batch_loss=18.9, bat

Validation:  52%|▌| 385/743 [1:41:43<3:34:03, 35.88s/batch, batch_loss=9.94, bat

Validation:  52%|▌| 386/743 [1:41:43<3:33:22, 35.86s/batch, batch_loss=9.94, bat

Validation:  52%|▌| 386/743 [1:42:18<3:33:22, 35.86s/batch, batch_loss=7.15, bat

Validation:  52%|▌| 387/743 [1:42:18<3:30:27, 35.47s/batch, batch_loss=7.15, bat

Validation:  52%|▌| 387/743 [1:42:52<3:30:27, 35.47s/batch, batch_loss=15.4, bat

Validation:  52%|▌| 388/743 [1:42:52<3:28:02, 35.16s/batch, batch_loss=15.4, bat

Validation:  52%|▌| 388/743 [1:43:30<3:28:02, 35.16s/batch, batch_loss=11.2, bat

Validation:  52%|▌| 389/743 [1:43:30<3:31:43, 35.89s/batch, batch_loss=11.2, bat

Validation:  52%|▌| 389/743 [1:44:04<3:31:43, 35.89s/batch, batch_loss=17.5, bat

Validation:  52%|▌| 390/743 [1:44:04<3:29:00, 35.53s/batch, batch_loss=17.5, bat

Validation:  52%|▌| 390/743 [1:44:39<3:29:00, 35.53s/batch, batch_loss=13.3, bat

Validation:  53%|▌| 391/743 [1:44:39<3:27:44, 35.41s/batch, batch_loss=13.3, bat

Validation:  53%|▌| 391/743 [1:45:16<3:27:44, 35.41s/batch, batch_loss=13.9, bat

Validation:  53%|▌| 392/743 [1:45:16<3:28:37, 35.66s/batch, batch_loss=13.9, bat

Validation:  53%|▌| 392/743 [1:45:51<3:28:37, 35.66s/batch, batch_loss=17.2, bat

Validation:  53%|▌| 393/743 [1:45:51<3:27:57, 35.65s/batch, batch_loss=17.2, bat

Validation:  53%|▌| 393/743 [1:46:26<3:27:57, 35.65s/batch, batch_loss=16.1, bat

Validation:  53%|▌| 394/743 [1:46:26<3:26:12, 35.45s/batch, batch_loss=16.1, bat

Validation:  53%|▌| 394/743 [1:47:04<3:26:12, 35.45s/batch, batch_loss=10.1, bat

Validation:  53%|▌| 395/743 [1:47:04<3:29:21, 36.10s/batch, batch_loss=10.1, bat

Validation:  53%|▌| 395/743 [1:47:39<3:29:21, 36.10s/batch, batch_loss=15.8, bat

Validation:  53%|▌| 396/743 [1:47:39<3:27:01, 35.80s/batch, batch_loss=15.8, bat

Validation:  53%|▌| 396/743 [1:48:14<3:27:01, 35.80s/batch, batch_loss=10.1, bat

Validation:  53%|▌| 397/743 [1:48:14<3:25:32, 35.64s/batch, batch_loss=10.1, bat

Validation:  53%|▌| 397/743 [1:48:50<3:25:32, 35.64s/batch, batch_loss=17.4, bat

Validation:  54%|▌| 398/743 [1:48:50<3:24:53, 35.63s/batch, batch_loss=17.4, bat

Validation:  54%|▌| 398/743 [1:49:28<3:24:53, 35.63s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 399/743 [1:49:28<3:29:09, 36.48s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 399/743 [1:50:03<3:29:09, 36.48s/batch, batch_loss=20.4, bat

Validation:  54%|▌| 400/743 [1:50:03<3:25:58, 36.03s/batch, batch_loss=20.4, bat

Validation:  54%|▌| 400/743 [1:50:38<3:25:58, 36.03s/batch, batch_loss=16.7, bat

Validation:  54%|▌| 401/743 [1:50:38<3:23:46, 35.75s/batch, batch_loss=16.7, bat

Validation:  54%|▌| 401/743 [1:51:13<3:23:46, 35.75s/batch, batch_loss=6.36, bat

Validation:  54%|▌| 402/743 [1:51:13<3:21:27, 35.45s/batch, batch_loss=6.36, bat

Validation:  54%|▌| 402/743 [1:51:51<3:21:27, 35.45s/batch, batch_loss=15.5, bat

Validation:  54%|▌| 403/743 [1:51:51<3:24:22, 36.07s/batch, batch_loss=15.5, bat

Validation:  54%|▌| 403/743 [1:52:25<3:24:22, 36.07s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 404/743 [1:52:25<3:21:24, 35.65s/batch, batch_loss=13.5, bat

Validation:  54%|▌| 404/743 [1:53:00<3:21:24, 35.65s/batch, batch_loss=9.3, batc

Validation:  55%|▌| 405/743 [1:53:00<3:19:14, 35.37s/batch, batch_loss=9.3, batc

Validation:  55%|▌| 405/743 [1:53:36<3:19:14, 35.37s/batch, batch_loss=12.1, bat

Validation:  55%|▌| 406/743 [1:53:36<3:19:18, 35.48s/batch, batch_loss=12.1, bat

Validation:  55%|▌| 406/743 [1:54:11<3:19:18, 35.48s/batch, batch_loss=16.5, bat

Validation:  55%|▌| 407/743 [1:54:11<3:18:24, 35.43s/batch, batch_loss=16.5, bat

Validation:  55%|▌| 407/743 [1:54:46<3:18:24, 35.43s/batch, batch_loss=19.7, bat

Validation:  55%|▌| 408/743 [1:54:46<3:17:27, 35.37s/batch, batch_loss=19.7, bat

Validation:  55%|▌| 408/743 [1:55:21<3:17:27, 35.37s/batch, batch_loss=11.1, bat

Validation:  55%|▌| 409/743 [1:55:21<3:15:38, 35.15s/batch, batch_loss=11.1, bat

Validation:  55%|▌| 409/743 [1:55:54<3:15:38, 35.15s/batch, batch_loss=16.2, bat

Validation:  55%|▌| 410/743 [1:55:54<3:12:03, 34.61s/batch, batch_loss=16.2, bat

Validation:  55%|▌| 410/743 [1:56:30<3:12:03, 34.61s/batch, batch_loss=19.2, bat

Validation:  55%|▌| 411/743 [1:56:30<3:12:39, 34.82s/batch, batch_loss=19.2, bat

Validation:  55%|▌| 411/743 [1:57:05<3:12:39, 34.82s/batch, batch_loss=15.2, bat

Validation:  55%|▌| 412/743 [1:57:05<3:13:32, 35.08s/batch, batch_loss=15.2, bat

Validation:  55%|▌| 412/743 [1:57:42<3:13:32, 35.08s/batch, batch_loss=1.93e+3, 

Validation:  56%|▌| 413/743 [1:57:42<3:15:06, 35.47s/batch, batch_loss=1.93e+3, 

Validation:  56%|▌| 413/743 [1:58:16<3:15:06, 35.47s/batch, batch_loss=21.5, bat

Validation:  56%|▌| 414/743 [1:58:16<3:12:02, 35.02s/batch, batch_loss=21.5, bat

Validation:  56%|▌| 414/743 [1:58:50<3:12:02, 35.02s/batch, batch_loss=24.7, bat

Validation:  56%|▌| 415/743 [1:58:50<3:10:54, 34.92s/batch, batch_loss=24.7, bat

Validation:  56%|▌| 415/743 [1:59:27<3:10:54, 34.92s/batch, batch_loss=6.47e+3, 

Validation:  56%|▌| 416/743 [1:59:27<3:13:31, 35.51s/batch, batch_loss=6.47e+3, 

Validation:  56%|▌| 416/743 [2:00:03<3:13:31, 35.51s/batch, batch_loss=15.9, bat

Validation:  56%|▌| 417/743 [2:00:03<3:12:43, 35.47s/batch, batch_loss=15.9, bat

Validation:  56%|▌| 417/743 [2:00:37<3:12:43, 35.47s/batch, batch_loss=14.5, bat

Validation:  56%|▌| 418/743 [2:00:37<3:11:05, 35.28s/batch, batch_loss=14.5, bat

Validation:  56%|▌| 418/743 [2:01:14<3:11:05, 35.28s/batch, batch_loss=16.4, bat

Validation:  56%|▌| 419/743 [2:01:14<3:12:07, 35.58s/batch, batch_loss=16.4, bat

Validation:  56%|▌| 419/743 [2:01:49<3:12:07, 35.58s/batch, batch_loss=14.4, bat

Validation:  57%|▌| 420/743 [2:01:49<3:11:40, 35.61s/batch, batch_loss=14.4, bat

Validation:  57%|▌| 420/743 [2:02:11<3:11:40, 35.61s/batch, batch_loss=30.2, bat

Validation:  57%|▌| 421/743 [2:02:11<2:48:23, 31.38s/batch, batch_loss=30.2, bat

Validation:  57%|▌| 421/743 [2:02:25<2:48:23, 31.38s/batch, batch_loss=8.73, bat

Validation:  57%|▌| 422/743 [2:02:25<2:19:27, 26.07s/batch, batch_loss=8.73, bat

Validation:  57%|▌| 422/743 [2:02:39<2:19:27, 26.07s/batch, batch_loss=23.5, bat

Validation:  57%|▌| 423/743 [2:02:39<2:00:36, 22.61s/batch, batch_loss=23.5, bat

Validation:  57%|▌| 423/743 [2:02:54<2:00:36, 22.61s/batch, batch_loss=321, batc

Validation:  57%|▌| 424/743 [2:02:54<1:48:08, 20.34s/batch, batch_loss=321, batc

Validation:  57%|▌| 424/743 [2:03:09<1:48:08, 20.34s/batch, batch_loss=24.3, bat

Validation:  57%|▌| 425/743 [2:03:09<1:38:30, 18.59s/batch, batch_loss=24.3, bat

Validation:  57%|▌| 425/743 [2:03:24<1:38:30, 18.59s/batch, batch_loss=21.7, bat

Validation:  57%|▌| 426/743 [2:03:24<1:32:25, 17.49s/batch, batch_loss=21.7, bat

Validation:  57%|▌| 426/743 [2:03:38<1:32:25, 17.49s/batch, batch_loss=19.6, bat

Validation:  57%|▌| 427/743 [2:03:38<1:26:42, 16.46s/batch, batch_loss=19.6, bat

Validation:  57%|▌| 427/743 [2:03:52<1:26:42, 16.46s/batch, batch_loss=5.28e+3, 

Validation:  58%|▌| 428/743 [2:03:52<1:23:44, 15.95s/batch, batch_loss=5.28e+3, 

Validation:  58%|▌| 428/743 [2:04:07<1:23:44, 15.95s/batch, batch_loss=18.1, bat

Validation:  58%|▌| 429/743 [2:04:07<1:20:29, 15.38s/batch, batch_loss=18.1, bat

Validation:  58%|▌| 429/743 [2:04:20<1:20:29, 15.38s/batch, batch_loss=5.39e+3, 

Validation:  58%|▌| 430/743 [2:04:20<1:17:22, 14.83s/batch, batch_loss=5.39e+3, 

Validation:  58%|▌| 430/743 [2:04:33<1:17:22, 14.83s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [2:04:33<1:14:18, 14.29s/batch, batch_loss=1.4e+4, b

Validation:  58%|▌| 431/743 [2:04:49<1:14:18, 14.29s/batch, batch_loss=956, batc

Validation:  58%|▌| 432/743 [2:04:49<1:17:09, 14.88s/batch, batch_loss=956, batc

Validation:  58%|▌| 432/743 [2:05:03<1:17:09, 14.88s/batch, batch_loss=14.7, bat

Validation:  58%|▌| 433/743 [2:05:03<1:15:02, 14.52s/batch, batch_loss=14.7, bat

Validation:  58%|▌| 433/743 [2:05:17<1:15:02, 14.52s/batch, batch_loss=10.1, bat

Validation:  58%|▌| 434/743 [2:05:17<1:13:53, 14.35s/batch, batch_loss=10.1, bat

Validation:  58%|▌| 434/743 [2:05:30<1:13:53, 14.35s/batch, batch_loss=14.3, bat

Validation:  59%|▌| 435/743 [2:05:30<1:11:06, 13.85s/batch, batch_loss=14.3, bat

Validation:  59%|▌| 435/743 [2:05:42<1:11:06, 13.85s/batch, batch_loss=12, batch

Validation:  59%|▌| 436/743 [2:05:42<1:09:09, 13.52s/batch, batch_loss=12, batch

Validation:  59%|▌| 436/743 [2:05:54<1:09:09, 13.52s/batch, batch_loss=23.1, bat

Validation:  59%|▌| 437/743 [2:05:54<1:06:39, 13.07s/batch, batch_loss=23.1, bat

Validation:  59%|▌| 437/743 [2:06:14<1:06:39, 13.07s/batch, batch_loss=972, batc

Validation:  59%|▌| 438/743 [2:06:14<1:16:36, 15.07s/batch, batch_loss=972, batc

Validation:  59%|▌| 438/743 [2:06:46<1:16:36, 15.07s/batch, batch_loss=899, batc

Validation:  59%|▌| 439/743 [2:06:46<1:42:22, 20.21s/batch, batch_loss=899, batc

Validation:  59%|▌| 439/743 [2:07:18<1:42:22, 20.21s/batch, batch_loss=17.3, bat

Validation:  59%|▌| 440/743 [2:07:18<1:59:06, 23.59s/batch, batch_loss=17.3, bat

Validation:  59%|▌| 440/743 [2:07:49<1:59:06, 23.59s/batch, batch_loss=14.1, bat

Validation:  59%|▌| 441/743 [2:07:49<2:10:10, 25.86s/batch, batch_loss=14.1, bat

Validation:  59%|▌| 441/743 [2:08:21<2:10:10, 25.86s/batch, batch_loss=14.7, bat

Validation:  59%|▌| 442/743 [2:08:21<2:19:02, 27.72s/batch, batch_loss=14.7, bat

Validation:  59%|▌| 442/743 [2:08:52<2:19:02, 27.72s/batch, batch_loss=11.2, bat

Validation:  60%|▌| 443/743 [2:08:52<2:23:29, 28.70s/batch, batch_loss=11.2, bat

Validation:  60%|▌| 443/743 [2:09:25<2:23:29, 28.70s/batch, batch_loss=15.7, bat

Validation:  60%|▌| 444/743 [2:09:25<2:29:08, 29.93s/batch, batch_loss=15.7, bat

Validation:  60%|▌| 444/743 [2:09:57<2:29:08, 29.93s/batch, batch_loss=7.8, batc

Validation:  60%|▌| 445/743 [2:09:57<2:31:30, 30.50s/batch, batch_loss=7.8, batc

Validation:  60%|▌| 445/743 [2:10:18<2:31:30, 30.50s/batch, batch_loss=15.6, bat

Validation:  60%|▌| 446/743 [2:10:18<2:17:47, 27.84s/batch, batch_loss=15.6, bat

Validation:  60%|▌| 446/743 [2:10:31<2:17:47, 27.84s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [2:10:31<1:54:33, 23.22s/batch, batch_loss=6.84e+3, 

Validation:  60%|▌| 447/743 [2:10:43<1:54:33, 23.22s/batch, batch_loss=5.49, bat

Validation:  60%|▌| 448/743 [2:10:43<1:38:12, 19.98s/batch, batch_loss=5.49, bat

Validation:  60%|▌| 448/743 [2:10:56<1:38:12, 19.98s/batch, batch_loss=13.8, bat

Validation:  60%|▌| 449/743 [2:10:56<1:27:20, 17.82s/batch, batch_loss=13.8, bat

Validation:  60%|▌| 449/743 [2:11:11<1:27:20, 17.82s/batch, batch_loss=16.9, bat

Validation:  61%|▌| 450/743 [2:11:11<1:22:55, 16.98s/batch, batch_loss=16.9, bat

Validation:  61%|▌| 450/743 [2:11:23<1:22:55, 16.98s/batch, batch_loss=12.2, bat

Validation:  61%|▌| 451/743 [2:11:23<1:15:53, 15.59s/batch, batch_loss=12.2, bat

Validation:  61%|▌| 451/743 [2:11:36<1:15:53, 15.59s/batch, batch_loss=19.1, bat

Validation:  61%|▌| 452/743 [2:11:36<1:12:02, 14.85s/batch, batch_loss=19.1, bat

Validation:  61%|▌| 452/743 [2:11:49<1:12:02, 14.85s/batch, batch_loss=12.8, bat

Validation:  61%|▌| 453/743 [2:11:49<1:08:40, 14.21s/batch, batch_loss=12.8, bat

Validation:  61%|▌| 453/743 [2:12:02<1:08:40, 14.21s/batch, batch_loss=6.03, bat

Validation:  61%|▌| 454/743 [2:12:02<1:06:28, 13.80s/batch, batch_loss=6.03, bat

Validation:  61%|▌| 454/743 [2:12:15<1:06:28, 13.80s/batch, batch_loss=9.23, bat

Validation:  61%|▌| 455/743 [2:12:15<1:04:25, 13.42s/batch, batch_loss=9.23, bat

Validation:  61%|▌| 455/743 [2:12:27<1:04:25, 13.42s/batch, batch_loss=7.72, bat

Validation:  61%|▌| 456/743 [2:12:27<1:03:22, 13.25s/batch, batch_loss=7.72, bat

Validation:  61%|▌| 456/743 [2:12:40<1:03:22, 13.25s/batch, batch_loss=14.9, bat

Validation:  62%|▌| 457/743 [2:12:40<1:02:03, 13.02s/batch, batch_loss=14.9, bat

Validation:  62%|▌| 457/743 [2:12:53<1:02:03, 13.02s/batch, batch_loss=26.6, bat

Validation:  62%|▌| 458/743 [2:12:53<1:01:33, 12.96s/batch, batch_loss=26.6, bat

Validation:  62%|▌| 458/743 [2:13:08<1:01:33, 12.96s/batch, batch_loss=12.8, bat

Validation:  62%|▌| 459/743 [2:13:08<1:04:57, 13.73s/batch, batch_loss=12.8, bat

Validation:  62%|▌| 459/743 [2:13:21<1:04:57, 13.73s/batch, batch_loss=18.8, bat

Validation:  62%|▌| 460/743 [2:13:21<1:02:48, 13.31s/batch, batch_loss=18.8, bat

Validation:  62%|▌| 460/743 [2:13:34<1:02:48, 13.31s/batch, batch_loss=15, batch

Validation:  62%|▌| 461/743 [2:13:34<1:02:36, 13.32s/batch, batch_loss=15, batch

Validation:  62%|▌| 461/743 [2:14:01<1:02:36, 13.32s/batch, batch_loss=13.4, bat

Validation:  62%|▌| 462/743 [2:14:01<1:21:46, 17.46s/batch, batch_loss=13.4, bat

Validation:  62%|▌| 462/743 [2:14:36<1:21:46, 17.46s/batch, batch_loss=11.3, bat

Validation:  62%|▌| 463/743 [2:14:36<1:45:26, 22.59s/batch, batch_loss=11.3, bat

Validation:  62%|▌| 463/743 [2:15:10<1:45:26, 22.59s/batch, batch_loss=1.34e+4, 

Validation:  62%|▌| 464/743 [2:15:10<2:01:54, 26.22s/batch, batch_loss=1.34e+4, 

Validation:  62%|▌| 464/743 [2:15:45<2:01:54, 26.22s/batch, batch_loss=17.8, bat

Validation:  63%|▋| 465/743 [2:15:45<2:13:23, 28.79s/batch, batch_loss=17.8, bat

Validation:  63%|▋| 465/743 [2:16:20<2:13:23, 28.79s/batch, batch_loss=14.4, bat

Validation:  63%|▋| 466/743 [2:16:20<2:21:16, 30.60s/batch, batch_loss=14.4, bat

Validation:  63%|▋| 466/743 [2:16:55<2:21:16, 30.60s/batch, batch_loss=24.3, bat

Validation:  63%|▋| 467/743 [2:16:55<2:26:57, 31.95s/batch, batch_loss=24.3, bat

Validation:  63%|▋| 467/743 [2:17:32<2:26:57, 31.95s/batch, batch_loss=13.4, bat

Validation:  63%|▋| 468/743 [2:17:32<2:33:27, 33.48s/batch, batch_loss=13.4, bat

Validation:  63%|▋| 468/743 [2:18:07<2:33:27, 33.48s/batch, batch_loss=19.6, bat

Validation:  63%|▋| 469/743 [2:18:07<2:35:24, 34.03s/batch, batch_loss=19.6, bat

Validation:  63%|▋| 469/743 [2:18:21<2:35:24, 34.03s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [2:18:21<2:06:47, 27.86s/batch, batch_loss=6.35e+4, 

Validation:  63%|▋| 470/743 [2:18:34<2:06:47, 27.86s/batch, batch_loss=13.3, bat

Validation:  63%|▋| 471/743 [2:18:34<1:45:56, 23.37s/batch, batch_loss=13.3, bat

Validation:  63%|▋| 471/743 [2:18:47<1:45:56, 23.37s/batch, batch_loss=21.5, bat

Validation:  64%|▋| 472/743 [2:18:47<1:31:15, 20.21s/batch, batch_loss=21.5, bat

Validation:  64%|▋| 472/743 [2:18:59<1:31:15, 20.21s/batch, batch_loss=585, batc

Validation:  64%|▋| 473/743 [2:18:59<1:20:56, 17.99s/batch, batch_loss=585, batc

Validation:  64%|▋| 473/743 [2:19:12<1:20:56, 17.99s/batch, batch_loss=16.7, bat

Validation:  64%|▋| 474/743 [2:19:12<1:13:25, 16.38s/batch, batch_loss=16.7, bat

Validation:  64%|▋| 474/743 [2:19:26<1:13:25, 16.38s/batch, batch_loss=18.5, bat

Validation:  64%|▋| 475/743 [2:19:26<1:10:12, 15.72s/batch, batch_loss=18.5, bat

Validation:  64%|▋| 475/743 [2:19:41<1:10:12, 15.72s/batch, batch_loss=8.91, bat

Validation:  64%|▋| 476/743 [2:19:41<1:08:49, 15.47s/batch, batch_loss=8.91, bat

Validation:  64%|▋| 476/743 [2:19:55<1:08:49, 15.47s/batch, batch_loss=13.3, bat

Validation:  64%|▋| 477/743 [2:19:55<1:06:44, 15.05s/batch, batch_loss=13.3, bat

Validation:  64%|▋| 477/743 [2:20:10<1:06:44, 15.05s/batch, batch_loss=2.46e+3, 

Validation:  64%|▋| 478/743 [2:20:10<1:06:21, 15.03s/batch, batch_loss=2.46e+3, 

Validation:  64%|▋| 478/743 [2:20:25<1:06:21, 15.03s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [2:20:25<1:05:39, 14.92s/batch, batch_loss=2.08e+4, 

Validation:  64%|▋| 479/743 [2:20:40<1:05:39, 14.92s/batch, batch_loss=10.1, bat

Validation:  65%|▋| 480/743 [2:20:40<1:05:20, 14.91s/batch, batch_loss=10.1, bat

Validation:  65%|▋| 480/743 [2:20:54<1:05:20, 14.91s/batch, batch_loss=11.4, bat

Validation:  65%|▋| 481/743 [2:20:54<1:04:42, 14.82s/batch, batch_loss=11.4, bat

Validation:  65%|▋| 481/743 [2:21:12<1:04:42, 14.82s/batch, batch_loss=6.96e+3, 

Validation:  65%|▋| 482/743 [2:21:12<1:08:16, 15.70s/batch, batch_loss=6.96e+3, 

Validation:  65%|▋| 482/743 [2:21:27<1:08:16, 15.70s/batch, batch_loss=17.6, bat

Validation:  65%|▋| 483/743 [2:21:27<1:07:46, 15.64s/batch, batch_loss=17.6, bat

Validation:  65%|▋| 483/743 [2:21:42<1:07:46, 15.64s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [2:21:42<1:06:21, 15.37s/batch, batch_loss=2.31e+4, 

Validation:  65%|▋| 484/743 [2:21:58<1:06:21, 15.37s/batch, batch_loss=3.13e+4, 

Validation:  65%|▋| 485/743 [2:21:58<1:06:04, 15.36s/batch, batch_loss=3.13e+4, 

Validation:  65%|▋| 485/743 [2:22:12<1:06:04, 15.36s/batch, batch_loss=15.1, bat

Validation:  65%|▋| 486/743 [2:22:12<1:04:57, 15.17s/batch, batch_loss=15.1, bat

Validation:  65%|▋| 486/743 [2:22:27<1:04:57, 15.17s/batch, batch_loss=34.1, bat

Validation:  66%|▋| 487/743 [2:22:27<1:04:38, 15.15s/batch, batch_loss=34.1, bat

Validation:  66%|▋| 487/743 [2:22:45<1:04:38, 15.15s/batch, batch_loss=22.8, bat

Validation:  66%|▋| 488/743 [2:22:45<1:07:11, 15.81s/batch, batch_loss=22.8, bat

Validation:  66%|▋| 488/743 [2:23:00<1:07:11, 15.81s/batch, batch_loss=11.4, bat

Validation:  66%|▋| 489/743 [2:23:00<1:05:37, 15.50s/batch, batch_loss=11.4, bat

Validation:  66%|▋| 489/743 [2:23:15<1:05:37, 15.50s/batch, batch_loss=19, batch

Validation:  66%|▋| 490/743 [2:23:15<1:04:49, 15.37s/batch, batch_loss=19, batch

Validation:  66%|▋| 490/743 [2:23:30<1:04:49, 15.37s/batch, batch_loss=16.7, bat

Validation:  66%|▋| 491/743 [2:23:30<1:04:15, 15.30s/batch, batch_loss=16.7, bat

Validation:  66%|▋| 491/743 [2:23:46<1:04:15, 15.30s/batch, batch_loss=1.04e+3, 

Validation:  66%|▋| 492/743 [2:23:46<1:04:44, 15.47s/batch, batch_loss=1.04e+3, 

Validation:  66%|▋| 492/743 [2:24:01<1:04:44, 15.47s/batch, batch_loss=1.43e+4, 

Validation:  66%|▋| 493/743 [2:24:01<1:03:48, 15.31s/batch, batch_loss=1.43e+4, 

Validation:  66%|▋| 493/743 [2:24:16<1:03:48, 15.31s/batch, batch_loss=8.6, batc

Validation:  66%|▋| 494/743 [2:24:16<1:04:18, 15.50s/batch, batch_loss=8.6, batc

Validation:  66%|▋| 494/743 [2:24:35<1:04:18, 15.50s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:24:35<1:07:42, 16.38s/batch, batch_loss=1.18e+4, 

Validation:  67%|▋| 495/743 [2:24:50<1:07:42, 16.38s/batch, batch_loss=16.3, bat

Validation:  67%|▋| 496/743 [2:24:50<1:05:58, 16.02s/batch, batch_loss=16.3, bat

Validation:  67%|▋| 496/743 [2:25:05<1:05:58, 16.02s/batch, batch_loss=13.4, bat

Validation:  67%|▋| 497/743 [2:25:05<1:04:15, 15.67s/batch, batch_loss=13.4, bat

Validation:  67%|▋| 497/743 [2:25:20<1:04:15, 15.67s/batch, batch_loss=13.8, bat

Validation:  67%|▋| 498/743 [2:25:20<1:03:21, 15.52s/batch, batch_loss=13.8, bat

Validation:  67%|▋| 498/743 [2:25:35<1:03:21, 15.52s/batch, batch_loss=4.62, bat

Validation:  67%|▋| 499/743 [2:25:35<1:02:16, 15.31s/batch, batch_loss=4.62, bat

Validation:  67%|▋| 499/743 [2:25:50<1:02:16, 15.31s/batch, batch_loss=2.51e+4, 

Validation:  67%|▋| 500/743 [2:25:50<1:01:51, 15.27s/batch, batch_loss=2.51e+4, 

Validation:  67%|▋| 500/743 [2:26:06<1:01:51, 15.27s/batch, batch_loss=19, batch

Validation:  67%|▋| 501/743 [2:26:06<1:01:52, 15.34s/batch, batch_loss=19, batch

Validation:  67%|▋| 501/743 [2:26:21<1:01:52, 15.34s/batch, batch_loss=3.15e+3, 

Validation:  68%|▋| 502/743 [2:26:21<1:01:15, 15.25s/batch, batch_loss=3.15e+3, 

Validation:  68%|▋| 502/743 [2:26:37<1:01:15, 15.25s/batch, batch_loss=14.9, bat

Validation:  68%|▋| 503/743 [2:26:37<1:01:48, 15.45s/batch, batch_loss=14.9, bat

Validation:  68%|▋| 503/743 [2:26:52<1:01:48, 15.45s/batch, batch_loss=11.4, bat

Validation:  68%|▋| 504/743 [2:26:52<1:01:16, 15.38s/batch, batch_loss=11.4, bat

Validation:  68%|▋| 504/743 [2:27:07<1:01:16, 15.38s/batch, batch_loss=21.5, bat

Validation:  68%|▋| 505/743 [2:27:07<1:00:37, 15.28s/batch, batch_loss=21.5, bat

Validation:  68%|▋| 505/743 [2:27:22<1:00:37, 15.28s/batch, batch_loss=2.84e+3, 

Validation:  68%|▋| 506/743 [2:27:22<59:43, 15.12s/batch, batch_loss=2.84e+3, ba

Validation:  68%|▋| 506/743 [2:27:37<59:43, 15.12s/batch, batch_loss=1.99e+3, ba

Validation:  68%|▋| 507/743 [2:27:37<1:00:04, 15.27s/batch, batch_loss=1.99e+3, 

Validation:  68%|▋| 507/743 [2:27:51<1:00:04, 15.27s/batch, batch_loss=8.36e+3, 

Validation:  68%|▋| 508/743 [2:27:51<58:02, 14.82s/batch, batch_loss=8.36e+3, ba

Validation:  68%|▋| 508/743 [2:28:05<58:02, 14.82s/batch, batch_loss=8.47e+3, ba

Validation:  69%|▋| 509/743 [2:28:05<56:38, 14.52s/batch, batch_loss=8.47e+3, ba

Validation:  69%|▋| 509/743 [2:28:18<56:38, 14.52s/batch, batch_loss=12, batch_i

Validation:  69%|▋| 510/743 [2:28:18<55:19, 14.25s/batch, batch_loss=12, batch_i

Validation:  69%|▋| 510/743 [2:28:34<55:19, 14.25s/batch, batch_loss=18.4, batch

Validation:  69%|▋| 511/743 [2:28:34<56:04, 14.50s/batch, batch_loss=18.4, batch

Validation:  69%|▋| 511/743 [2:28:56<56:04, 14.50s/batch, batch_loss=15.6, batch

Validation:  69%|▋| 512/743 [2:28:56<1:05:25, 16.99s/batch, batch_loss=15.6, bat

Validation:  69%|▋| 512/743 [2:29:33<1:05:25, 16.99s/batch, batch_loss=16.9, bat

Validation:  69%|▋| 513/743 [2:29:33<1:27:14, 22.76s/batch, batch_loss=16.9, bat

Validation:  69%|▋| 513/743 [2:30:09<1:27:14, 22.76s/batch, batch_loss=14.9, bat

Validation:  69%|▋| 514/743 [2:30:09<1:42:03, 26.74s/batch, batch_loss=14.9, bat

Validation:  69%|▋| 514/743 [2:30:45<1:42:03, 26.74s/batch, batch_loss=12.4, bat

Validation:  69%|▋| 515/743 [2:30:45<1:53:07, 29.77s/batch, batch_loss=12.4, bat

Validation:  69%|▋| 515/743 [2:31:22<1:53:07, 29.77s/batch, batch_loss=13.6, bat

Validation:  69%|▋| 516/743 [2:31:22<2:00:34, 31.87s/batch, batch_loss=13.6, bat

Validation:  69%|▋| 516/743 [2:32:00<2:00:34, 31.87s/batch, batch_loss=6.16e+4, 

Validation:  70%|▋| 517/743 [2:32:00<2:06:47, 33.66s/batch, batch_loss=6.16e+4, 

Validation:  70%|▋| 517/743 [2:32:36<2:06:47, 33.66s/batch, batch_loss=502, batc

Validation:  70%|▋| 518/743 [2:32:36<2:08:58, 34.39s/batch, batch_loss=502, batc

Validation:  70%|▋| 518/743 [2:33:12<2:08:58, 34.39s/batch, batch_loss=10.3, bat

Validation:  70%|▋| 519/743 [2:33:12<2:10:12, 34.88s/batch, batch_loss=10.3, bat

Validation:  70%|▋| 519/743 [2:33:48<2:10:12, 34.88s/batch, batch_loss=16.7, bat

Validation:  70%|▋| 520/743 [2:33:48<2:11:08, 35.28s/batch, batch_loss=16.7, bat

Validation:  70%|▋| 520/743 [2:34:24<2:11:08, 35.28s/batch, batch_loss=13.2, bat

Validation:  70%|▋| 521/743 [2:34:24<2:11:04, 35.43s/batch, batch_loss=13.2, bat

Validation:  70%|▋| 521/743 [2:35:00<2:11:04, 35.43s/batch, batch_loss=11.9, bat

Validation:  70%|▋| 522/743 [2:35:00<2:11:18, 35.65s/batch, batch_loss=11.9, bat

Validation:  70%|▋| 522/743 [2:35:39<2:11:18, 35.65s/batch, batch_loss=427, batc

Validation:  70%|▋| 523/743 [2:35:39<2:13:50, 36.50s/batch, batch_loss=427, batc

Validation:  70%|▋| 523/743 [2:36:22<2:13:50, 36.50s/batch, batch_loss=14.1, bat

Validation:  71%|▋| 524/743 [2:36:22<2:20:54, 38.60s/batch, batch_loss=14.1, bat

Validation:  71%|▋| 524/743 [2:36:57<2:20:54, 38.60s/batch, batch_loss=22.7, bat

Validation:  71%|▋| 525/743 [2:36:57<2:16:14, 37.50s/batch, batch_loss=22.7, bat

Validation:  71%|▋| 525/743 [2:37:10<2:16:14, 37.50s/batch, batch_loss=10.3, bat

Validation:  71%|▋| 526/743 [2:37:10<1:49:18, 30.22s/batch, batch_loss=10.3, bat

Validation:  71%|▋| 526/743 [2:37:27<1:49:18, 30.22s/batch, batch_loss=3.75e+3, 

Validation:  71%|▋| 527/743 [2:37:27<1:33:43, 26.04s/batch, batch_loss=3.75e+3, 

Validation:  71%|▋| 527/743 [2:37:40<1:33:43, 26.04s/batch, batch_loss=509, batc

Validation:  71%|▋| 528/743 [2:37:40<1:19:42, 22.24s/batch, batch_loss=509, batc

Validation:  71%|▋| 528/743 [2:37:53<1:19:42, 22.24s/batch, batch_loss=6.51e+3, 

Validation:  71%|▋| 529/743 [2:37:53<1:09:35, 19.51s/batch, batch_loss=6.51e+3, 

Validation:  71%|▋| 529/743 [2:38:07<1:09:35, 19.51s/batch, batch_loss=206, batc

Validation:  71%|▋| 530/743 [2:38:07<1:02:37, 17.64s/batch, batch_loss=206, batc

Validation:  71%|▋| 530/743 [2:38:20<1:02:37, 17.64s/batch, batch_loss=39.8, bat

Validation:  71%|▋| 531/743 [2:38:20<57:43, 16.34s/batch, batch_loss=39.8, batch

Validation:  71%|▋| 531/743 [2:38:33<57:43, 16.34s/batch, batch_loss=257, batch_

Validation:  72%|▋| 532/743 [2:38:33<54:07, 15.39s/batch, batch_loss=257, batch_

Validation:  72%|▋| 532/743 [2:38:48<54:07, 15.39s/batch, batch_loss=7.33, batch

Validation:  72%|▋| 533/743 [2:38:48<53:58, 15.42s/batch, batch_loss=7.33, batch

Validation:  72%|▋| 533/743 [2:39:02<53:58, 15.42s/batch, batch_loss=11.4, batch

Validation:  72%|▋| 534/743 [2:39:02<51:22, 14.75s/batch, batch_loss=11.4, batch

Validation:  72%|▋| 534/743 [2:39:15<51:22, 14.75s/batch, batch_loss=16.6, batch

Validation:  72%|▋| 535/743 [2:39:15<49:41, 14.34s/batch, batch_loss=16.6, batch

Validation:  72%|▋| 535/743 [2:39:28<49:41, 14.34s/batch, batch_loss=17.5, batch

Validation:  72%|▋| 536/743 [2:39:28<48:23, 14.03s/batch, batch_loss=17.5, batch

Validation:  72%|▋| 536/743 [2:39:42<48:23, 14.03s/batch, batch_loss=11.5, batch

Validation:  72%|▋| 537/743 [2:39:42<47:21, 13.80s/batch, batch_loss=11.5, batch

Validation:  72%|▋| 537/743 [2:39:55<47:21, 13.80s/batch, batch_loss=15.7, batch

Validation:  72%|▋| 538/743 [2:39:55<46:52, 13.72s/batch, batch_loss=15.7, batch

Validation:  72%|▋| 538/743 [2:40:09<46:52, 13.72s/batch, batch_loss=249, batch_

Validation:  73%|▋| 539/743 [2:40:09<46:36, 13.71s/batch, batch_loss=249, batch_

Validation:  73%|▋| 539/743 [2:40:22<46:36, 13.71s/batch, batch_loss=17.2, batch

Validation:  73%|▋| 540/743 [2:40:22<45:59, 13.59s/batch, batch_loss=17.2, batch

Validation:  73%|▋| 540/743 [2:40:36<45:59, 13.59s/batch, batch_loss=26.5, batch

Validation:  73%|▋| 541/743 [2:40:36<45:48, 13.61s/batch, batch_loss=26.5, batch

Validation:  73%|▋| 541/743 [2:40:52<45:48, 13.61s/batch, batch_loss=1.94e+3, ba

Validation:  73%|▋| 542/743 [2:40:52<48:04, 14.35s/batch, batch_loss=1.94e+3, ba

Validation:  73%|▋| 542/743 [2:41:05<48:04, 14.35s/batch, batch_loss=19.1, batch

Validation:  73%|▋| 543/743 [2:41:05<46:53, 14.07s/batch, batch_loss=19.1, batch

Validation:  73%|▋| 543/743 [2:41:19<46:53, 14.07s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:41:19<45:58, 13.86s/batch, batch_loss=1.09e+4, ba

Validation:  73%|▋| 544/743 [2:41:32<45:58, 13.86s/batch, batch_loss=2.75e+3, ba

Validation:  73%|▋| 545/743 [2:41:32<45:28, 13.78s/batch, batch_loss=2.75e+3, ba

Validation:  73%|▋| 545/743 [2:41:46<45:28, 13.78s/batch, batch_loss=7.04, batch

Validation:  73%|▋| 546/743 [2:41:46<45:27, 13.85s/batch, batch_loss=7.04, batch

Validation:  73%|▋| 546/743 [2:42:00<45:27, 13.85s/batch, batch_loss=259, batch_

Validation:  74%|▋| 547/743 [2:42:00<45:05, 13.80s/batch, batch_loss=259, batch_

Validation:  74%|▋| 547/743 [2:42:14<45:05, 13.80s/batch, batch_loss=24.9, batch

Validation:  74%|▋| 548/743 [2:42:14<45:00, 13.85s/batch, batch_loss=24.9, batch

Validation:  74%|▋| 548/743 [2:42:28<45:00, 13.85s/batch, batch_loss=4.11e+3, ba

Validation:  74%|▋| 549/743 [2:42:28<44:45, 13.84s/batch, batch_loss=4.11e+3, ba

Validation:  74%|▋| 549/743 [2:42:42<44:45, 13.84s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:42:42<44:40, 13.89s/batch, batch_loss=1.28e+4, ba

Validation:  74%|▋| 550/743 [2:42:58<44:40, 13.89s/batch, batch_loss=15, batch_i

Validation:  74%|▋| 551/743 [2:42:58<46:45, 14.61s/batch, batch_loss=15, batch_i

Validation:  74%|▋| 551/743 [2:43:12<46:45, 14.61s/batch, batch_loss=6.77e+3, ba

Validation:  74%|▋| 552/743 [2:43:12<45:40, 14.35s/batch, batch_loss=6.77e+3, ba

Validation:  74%|▋| 552/743 [2:43:25<45:40, 14.35s/batch, batch_loss=20.3, batch

Validation:  74%|▋| 553/743 [2:43:25<44:26, 14.03s/batch, batch_loss=20.3, batch

Validation:  74%|▋| 553/743 [2:43:38<44:26, 14.03s/batch, batch_loss=20.8, batch

Validation:  75%|▋| 554/743 [2:43:38<43:37, 13.85s/batch, batch_loss=20.8, batch

Validation:  75%|▋| 554/743 [2:43:52<43:37, 13.85s/batch, batch_loss=2.47e+3, ba

Validation:  75%|▋| 555/743 [2:43:52<43:10, 13.78s/batch, batch_loss=2.47e+3, ba

Validation:  75%|▋| 555/743 [2:44:05<43:10, 13.78s/batch, batch_loss=32, batch_i

Validation:  75%|▋| 556/743 [2:44:05<42:31, 13.64s/batch, batch_loss=32, batch_i

Validation:  75%|▋| 556/743 [2:44:19<42:31, 13.64s/batch, batch_loss=8.24, batch

Validation:  75%|▋| 557/743 [2:44:19<42:09, 13.60s/batch, batch_loss=8.24, batch

Validation:  75%|▋| 557/743 [2:44:32<42:09, 13.60s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:44:32<41:49, 13.57s/batch, batch_loss=1.52e+4, ba

Validation:  75%|▊| 558/743 [2:44:46<41:49, 13.57s/batch, batch_loss=3.59e+3, ba

Validation:  75%|▊| 559/743 [2:44:46<41:21, 13.49s/batch, batch_loss=3.59e+3, ba

Validation:  75%|▊| 559/743 [2:45:02<41:21, 13.49s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:45:02<43:19, 14.21s/batch, batch_loss=2.93e+3, ba

Validation:  75%|▊| 560/743 [2:45:15<43:19, 14.21s/batch, batch_loss=9.91, batch

Validation:  76%|▊| 561/743 [2:45:15<42:12, 13.91s/batch, batch_loss=9.91, batch

Validation:  76%|▊| 561/743 [2:45:28<42:12, 13.91s/batch, batch_loss=14.2, batch

Validation:  76%|▊| 562/743 [2:45:28<40:54, 13.56s/batch, batch_loss=14.2, batch

Validation:  76%|▊| 562/743 [2:45:40<40:54, 13.56s/batch, batch_loss=17.5, batch

Validation:  76%|▊| 563/743 [2:45:40<40:02, 13.35s/batch, batch_loss=17.5, batch

Validation:  76%|▊| 563/743 [2:45:54<40:02, 13.35s/batch, batch_loss=1.09e+3, ba

Validation:  76%|▊| 564/743 [2:45:54<39:43, 13.32s/batch, batch_loss=1.09e+3, ba

Validation:  76%|▊| 564/743 [2:46:07<39:43, 13.32s/batch, batch_loss=3.68e+3, ba

Validation:  76%|▊| 565/743 [2:46:07<39:31, 13.32s/batch, batch_loss=3.68e+3, ba

Validation:  76%|▊| 565/743 [2:46:21<39:31, 13.32s/batch, batch_loss=11.1, batch

Validation:  76%|▊| 566/743 [2:46:21<39:31, 13.40s/batch, batch_loss=11.1, batch

Validation:  76%|▊| 566/743 [2:46:34<39:31, 13.40s/batch, batch_loss=16.1, batch

Validation:  76%|▊| 567/743 [2:46:34<38:58, 13.29s/batch, batch_loss=16.1, batch

Validation:  76%|▊| 567/743 [2:46:47<38:58, 13.29s/batch, batch_loss=12.3, batch

Validation:  76%|▊| 568/743 [2:46:47<38:38, 13.25s/batch, batch_loss=12.3, batch

Validation:  76%|▊| 568/743 [2:47:00<38:38, 13.25s/batch, batch_loss=15.5, batch

Validation:  77%|▊| 569/743 [2:47:00<38:37, 13.32s/batch, batch_loss=15.5, batch

Validation:  77%|▊| 569/743 [2:47:16<38:37, 13.32s/batch, batch_loss=19.5, batch

Validation:  77%|▊| 570/743 [2:47:16<40:54, 14.19s/batch, batch_loss=19.5, batch

Validation:  77%|▊| 570/743 [2:47:30<40:54, 14.19s/batch, batch_loss=10.8, batch

Validation:  77%|▊| 571/743 [2:47:30<39:52, 13.91s/batch, batch_loss=10.8, batch

Validation:  77%|▊| 571/743 [2:47:43<39:52, 13.91s/batch, batch_loss=21.6, batch

Validation:  77%|▊| 572/743 [2:47:43<39:07, 13.73s/batch, batch_loss=21.6, batch

Validation:  77%|▊| 572/743 [2:47:56<39:07, 13.73s/batch, batch_loss=13.3, batch

Validation:  77%|▊| 573/743 [2:47:56<38:23, 13.55s/batch, batch_loss=13.3, batch

Validation:  77%|▊| 573/743 [2:48:09<38:23, 13.55s/batch, batch_loss=14.9, batch

Validation:  77%|▊| 574/743 [2:48:09<37:56, 13.47s/batch, batch_loss=14.9, batch

Validation:  77%|▊| 574/743 [2:48:23<37:56, 13.47s/batch, batch_loss=14.7, batch

Validation:  77%|▊| 575/743 [2:48:23<37:27, 13.38s/batch, batch_loss=14.7, batch

Validation:  77%|▊| 575/743 [2:48:36<37:27, 13.38s/batch, batch_loss=21.1, batch

Validation:  78%|▊| 576/743 [2:48:36<37:07, 13.34s/batch, batch_loss=21.1, batch

Validation:  78%|▊| 576/743 [2:48:50<37:07, 13.34s/batch, batch_loss=18.9, batch

Validation:  78%|▊| 577/743 [2:48:50<37:09, 13.43s/batch, batch_loss=18.9, batch

Validation:  78%|▊| 577/743 [2:49:06<37:09, 13.43s/batch, batch_loss=24.6, batch

Validation:  78%|▊| 578/743 [2:49:06<39:03, 14.20s/batch, batch_loss=24.6, batch

Validation:  78%|▊| 578/743 [2:49:19<39:03, 14.20s/batch, batch_loss=314, batch_

Validation:  78%|▊| 579/743 [2:49:19<38:23, 14.05s/batch, batch_loss=314, batch_

Validation:  78%|▊| 579/743 [2:49:32<38:23, 14.05s/batch, batch_loss=6.55, batch

Validation:  78%|▊| 580/743 [2:49:32<37:22, 13.76s/batch, batch_loss=6.55, batch

Validation:  78%|▊| 580/743 [2:49:46<37:22, 13.76s/batch, batch_loss=8.74, batch

Validation:  78%|▊| 581/743 [2:49:46<36:44, 13.61s/batch, batch_loss=8.74, batch

Validation:  78%|▊| 581/743 [2:49:59<36:44, 13.61s/batch, batch_loss=16.5, batch

Validation:  78%|▊| 582/743 [2:49:59<36:18, 13.53s/batch, batch_loss=16.5, batch

Validation:  78%|▊| 582/743 [2:50:12<36:18, 13.53s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:50:12<35:59, 13.50s/batch, batch_loss=2.4e+3, bat

Validation:  78%|▊| 583/743 [2:50:25<35:59, 13.50s/batch, batch_loss=2.05, batch

Validation:  79%|▊| 584/743 [2:50:25<35:14, 13.30s/batch, batch_loss=2.05, batch

Validation:  79%|▊| 584/743 [2:50:38<35:14, 13.30s/batch, batch_loss=19.6, batch

Validation:  79%|▊| 585/743 [2:50:38<34:58, 13.28s/batch, batch_loss=19.6, batch

Validation:  79%|▊| 585/743 [2:50:52<34:58, 13.28s/batch, batch_loss=553, batch_

Validation:  79%|▊| 586/743 [2:50:52<34:53, 13.33s/batch, batch_loss=553, batch_

Validation:  79%|▊| 586/743 [2:51:05<34:53, 13.33s/batch, batch_loss=7.58, batch

Validation:  79%|▊| 587/743 [2:51:05<34:28, 13.26s/batch, batch_loss=7.58, batch

Validation:  79%|▊| 587/743 [2:51:21<34:28, 13.26s/batch, batch_loss=403, batch_

Validation:  79%|▊| 588/743 [2:51:21<36:32, 14.15s/batch, batch_loss=403, batch_

Validation:  79%|▊| 588/743 [2:51:34<36:32, 14.15s/batch, batch_loss=2.51e+4, ba

Validation:  79%|▊| 589/743 [2:51:34<35:41, 13.91s/batch, batch_loss=2.51e+4, ba

Validation:  79%|▊| 589/743 [2:51:48<35:41, 13.91s/batch, batch_loss=17.1, batch

Validation:  79%|▊| 590/743 [2:51:48<35:09, 13.79s/batch, batch_loss=17.1, batch

Validation:  79%|▊| 590/743 [2:52:01<35:09, 13.79s/batch, batch_loss=13.3, batch

Validation:  80%|▊| 591/743 [2:52:01<34:30, 13.62s/batch, batch_loss=13.3, batch

Validation:  80%|▊| 591/743 [2:52:14<34:30, 13.62s/batch, batch_loss=10.5, batch

Validation:  80%|▊| 592/743 [2:52:14<33:59, 13.50s/batch, batch_loss=10.5, batch

Validation:  80%|▊| 592/743 [2:52:28<33:59, 13.50s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:52:28<33:51, 13.54s/batch, batch_loss=2.39e+4, ba

Validation:  80%|▊| 593/743 [2:52:41<33:51, 13.54s/batch, batch_loss=3.62, batch

Validation:  80%|▊| 594/743 [2:52:41<33:16, 13.40s/batch, batch_loss=3.62, batch

Validation:  80%|▊| 594/743 [2:52:54<33:16, 13.40s/batch, batch_loss=4.28, batch

Validation:  80%|▊| 595/743 [2:52:54<32:57, 13.36s/batch, batch_loss=4.28, batch

Validation:  80%|▊| 595/743 [2:53:08<32:57, 13.36s/batch, batch_loss=5.96, batch

Validation:  80%|▊| 596/743 [2:53:08<32:48, 13.39s/batch, batch_loss=5.96, batch

Validation:  80%|▊| 596/743 [2:53:21<32:48, 13.39s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:53:21<32:23, 13.31s/batch, batch_loss=1.79e+3, ba

Validation:  80%|▊| 597/743 [2:53:34<32:23, 13.31s/batch, batch_loss=14, batch_i

Validation:  80%|▊| 598/743 [2:53:34<32:03, 13.26s/batch, batch_loss=14, batch_i

Validation:  80%|▊| 598/743 [2:53:47<32:03, 13.26s/batch, batch_loss=14, batch_i

Validation:  81%|▊| 599/743 [2:53:47<31:44, 13.22s/batch, batch_loss=14, batch_i

Validation:  81%|▊| 599/743 [2:54:01<31:44, 13.22s/batch, batch_loss=20.2, batch

Validation:  81%|▊| 600/743 [2:54:01<31:43, 13.31s/batch, batch_loss=20.2, batch

Validation:  81%|▊| 600/743 [2:54:14<31:43, 13.31s/batch, batch_loss=13.2, batch

Validation:  81%|▊| 601/743 [2:54:14<31:25, 13.28s/batch, batch_loss=13.2, batch

Validation:  81%|▊| 601/743 [2:54:27<31:25, 13.28s/batch, batch_loss=16.3, batch

Validation:  81%|▊| 602/743 [2:54:27<31:07, 13.24s/batch, batch_loss=16.3, batch

Validation:  81%|▊| 602/743 [2:54:40<31:07, 13.24s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:54:40<30:40, 13.15s/batch, batch_loss=1.15e+4, ba

Validation:  81%|▊| 603/743 [2:54:53<30:40, 13.15s/batch, batch_loss=20.1, batch

Validation:  81%|▊| 604/743 [2:54:53<30:03, 12.97s/batch, batch_loss=20.1, batch

Validation:  81%|▊| 604/743 [2:55:05<30:03, 12.97s/batch, batch_loss=24.8, batch

Validation:  81%|▊| 605/743 [2:55:05<29:39, 12.90s/batch, batch_loss=24.8, batch

Validation:  81%|▊| 605/743 [2:55:18<29:39, 12.90s/batch, batch_loss=251, batch_

Validation:  82%|▊| 606/743 [2:55:18<29:34, 12.95s/batch, batch_loss=251, batch_

Validation:  82%|▊| 606/743 [2:55:32<29:34, 12.95s/batch, batch_loss=27.5, batch

Validation:  82%|▊| 607/743 [2:55:32<29:26, 12.99s/batch, batch_loss=27.5, batch

Validation:  82%|▊| 607/743 [2:55:45<29:26, 12.99s/batch, batch_loss=20.3, batch

Validation:  82%|▊| 608/743 [2:55:45<29:24, 13.07s/batch, batch_loss=20.3, batch

Validation:  82%|▊| 608/743 [2:55:58<29:24, 13.07s/batch, batch_loss=15.2, batch

Validation:  82%|▊| 609/743 [2:55:58<29:16, 13.11s/batch, batch_loss=15.2, batch

Validation:  82%|▊| 609/743 [2:56:11<29:16, 13.11s/batch, batch_loss=17.7, batch

Validation:  82%|▊| 610/743 [2:56:11<28:57, 13.06s/batch, batch_loss=17.7, batch

Validation:  82%|▊| 610/743 [2:56:24<28:57, 13.06s/batch, batch_loss=18.2, batch

Validation:  82%|▊| 611/743 [2:56:24<28:32, 12.97s/batch, batch_loss=18.2, batch

Validation:  82%|▊| 611/743 [2:56:36<28:32, 12.97s/batch, batch_loss=9.31, batch

Validation:  82%|▊| 612/743 [2:56:36<28:09, 12.89s/batch, batch_loss=9.31, batch

Validation:  82%|▊| 612/743 [2:56:49<28:09, 12.89s/batch, batch_loss=14.1, batch

Validation:  83%|▊| 613/743 [2:56:49<27:43, 12.79s/batch, batch_loss=14.1, batch

Validation:  83%|▊| 613/743 [2:57:04<27:43, 12.79s/batch, batch_loss=5.63e+3, ba

Validation:  83%|▊| 614/743 [2:57:04<29:04, 13.52s/batch, batch_loss=5.63e+3, ba

Validation:  83%|▊| 614/743 [2:57:17<29:04, 13.52s/batch, batch_loss=12.3, batch

Validation:  83%|▊| 615/743 [2:57:17<28:37, 13.42s/batch, batch_loss=12.3, batch

Validation:  83%|▊| 615/743 [2:57:30<28:37, 13.42s/batch, batch_loss=13.2, batch

Validation:  83%|▊| 616/743 [2:57:30<27:39, 13.06s/batch, batch_loss=13.2, batch

Validation:  83%|▊| 616/743 [2:57:42<27:39, 13.06s/batch, batch_loss=6.08, batch

Validation:  83%|▊| 617/743 [2:57:42<27:17, 13.00s/batch, batch_loss=6.08, batch

Validation:  83%|▊| 617/743 [2:57:55<27:17, 13.00s/batch, batch_loss=9.62, batch

Validation:  83%|▊| 618/743 [2:57:55<27:01, 12.97s/batch, batch_loss=9.62, batch

Validation:  83%|▊| 618/743 [2:58:08<27:01, 12.97s/batch, batch_loss=341, batch_

Validation:  83%|▊| 619/743 [2:58:08<26:29, 12.82s/batch, batch_loss=341, batch_

Validation:  83%|▊| 619/743 [2:58:20<26:29, 12.82s/batch, batch_loss=14, batch_i

Validation:  83%|▊| 620/743 [2:58:20<26:07, 12.74s/batch, batch_loss=14, batch_i

Validation:  83%|▊| 620/743 [2:58:33<26:07, 12.74s/batch, batch_loss=7.37, batch

Validation:  84%|▊| 621/743 [2:58:33<25:51, 12.71s/batch, batch_loss=7.37, batch

Validation:  84%|▊| 621/743 [2:58:46<25:51, 12.71s/batch, batch_loss=11.8, batch

Validation:  84%|▊| 622/743 [2:58:46<25:46, 12.78s/batch, batch_loss=11.8, batch

Validation:  84%|▊| 622/743 [2:59:01<25:46, 12.78s/batch, batch_loss=190, batch_

Validation:  84%|▊| 623/743 [2:59:01<27:06, 13.55s/batch, batch_loss=190, batch_

Validation:  84%|▊| 623/743 [2:59:14<27:06, 13.55s/batch, batch_loss=13.4, batch

Validation:  84%|▊| 624/743 [2:59:14<26:33, 13.39s/batch, batch_loss=13.4, batch

Validation:  84%|▊| 624/743 [2:59:27<26:33, 13.39s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:59:27<25:49, 13.13s/batch, batch_loss=2.39e+3, ba

Validation:  84%|▊| 625/743 [2:59:42<25:49, 13.13s/batch, batch_loss=18.9, batch

Validation:  84%|▊| 626/743 [2:59:42<26:30, 13.59s/batch, batch_loss=18.9, batch

Validation:  84%|▊| 626/743 [2:59:57<26:30, 13.59s/batch, batch_loss=15.4, batch

Validation:  84%|▊| 627/743 [2:59:57<27:14, 14.09s/batch, batch_loss=15.4, batch

Validation:  84%|▊| 627/743 [3:00:12<27:14, 14.09s/batch, batch_loss=15.1, batch

Validation:  85%|▊| 628/743 [3:00:12<27:27, 14.33s/batch, batch_loss=15.1, batch

Validation:  85%|▊| 628/743 [3:00:27<27:27, 14.33s/batch, batch_loss=11.5, batch

Validation:  85%|▊| 629/743 [3:00:27<27:41, 14.58s/batch, batch_loss=11.5, batch

Validation:  85%|▊| 629/743 [3:00:42<27:41, 14.58s/batch, batch_loss=15, batch_i

Validation:  85%|▊| 630/743 [3:00:42<27:41, 14.71s/batch, batch_loss=15, batch_i

Validation:  85%|▊| 630/743 [3:00:59<27:41, 14.71s/batch, batch_loss=242, batch_

Validation:  85%|▊| 631/743 [3:00:59<28:48, 15.43s/batch, batch_loss=242, batch_

Validation:  85%|▊| 631/743 [3:01:14<28:48, 15.43s/batch, batch_loss=17.3, batch

Validation:  85%|▊| 632/743 [3:01:14<28:13, 15.26s/batch, batch_loss=17.3, batch

Validation:  85%|▊| 632/743 [3:01:28<28:13, 15.26s/batch, batch_loss=13.4, batch

Validation:  85%|▊| 633/743 [3:01:28<27:31, 15.01s/batch, batch_loss=13.4, batch

Validation:  85%|▊| 633/743 [3:01:43<27:31, 15.01s/batch, batch_loss=10.2, batch

Validation:  85%|▊| 634/743 [3:01:43<27:12, 14.98s/batch, batch_loss=10.2, batch

Validation:  85%|▊| 634/743 [3:01:58<27:12, 14.98s/batch, batch_loss=7.65, batch

Validation:  85%|▊| 635/743 [3:01:58<26:45, 14.86s/batch, batch_loss=7.65, batch

Validation:  85%|▊| 635/743 [3:02:13<26:45, 14.86s/batch, batch_loss=801, batch_

Validation:  86%|▊| 636/743 [3:02:13<26:47, 15.02s/batch, batch_loss=801, batch_

Validation:  86%|▊| 636/743 [3:02:27<26:47, 15.02s/batch, batch_loss=709, batch_

Validation:  86%|▊| 637/743 [3:02:27<25:59, 14.71s/batch, batch_loss=709, batch_

Validation:  86%|▊| 637/743 [3:02:42<25:59, 14.71s/batch, batch_loss=18.8, batch

Validation:  86%|▊| 638/743 [3:02:42<25:52, 14.79s/batch, batch_loss=18.8, batch

Validation:  86%|▊| 638/743 [3:02:57<25:52, 14.79s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [3:02:57<25:35, 14.76s/batch, batch_loss=1.21e+4, ba

Validation:  86%|▊| 639/743 [3:03:12<25:35, 14.76s/batch, batch_loss=21.2, batch

Validation:  86%|▊| 640/743 [3:03:12<25:39, 14.95s/batch, batch_loss=21.2, batch

Validation:  86%|▊| 640/743 [3:03:27<25:39, 14.95s/batch, batch_loss=30.3, batch

Validation:  86%|▊| 641/743 [3:03:27<25:33, 15.04s/batch, batch_loss=30.3, batch

Validation:  86%|▊| 641/743 [3:03:43<25:33, 15.04s/batch, batch_loss=28.2, batch

Validation:  86%|▊| 642/743 [3:03:43<25:23, 15.08s/batch, batch_loss=28.2, batch

Validation:  86%|▊| 642/743 [3:03:58<25:23, 15.08s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [3:03:58<25:13, 15.14s/batch, batch_loss=1.03e+3, ba

Validation:  87%|▊| 643/743 [3:04:13<25:13, 15.14s/batch, batch_loss=17.3, batch

Validation:  87%|▊| 644/743 [3:04:13<24:59, 15.15s/batch, batch_loss=17.3, batch

Validation:  87%|▊| 644/743 [3:04:28<24:59, 15.15s/batch, batch_loss=19.5, batch

Validation:  87%|▊| 645/743 [3:04:28<24:33, 15.03s/batch, batch_loss=19.5, batch

Validation:  87%|▊| 645/743 [3:04:46<24:33, 15.03s/batch, batch_loss=6.24e+3, ba

Validation:  87%|▊| 646/743 [3:04:46<26:03, 16.12s/batch, batch_loss=6.24e+3, ba

Validation:  87%|▊| 646/743 [3:05:01<26:03, 16.12s/batch, batch_loss=16.6, batch

Validation:  87%|▊| 647/743 [3:05:01<25:11, 15.75s/batch, batch_loss=16.6, batch

Validation:  87%|▊| 647/743 [3:05:17<25:11, 15.75s/batch, batch_loss=5.14, batch

Validation:  87%|▊| 648/743 [3:05:17<24:44, 15.62s/batch, batch_loss=5.14, batch

Validation:  87%|▊| 648/743 [3:05:31<24:44, 15.62s/batch, batch_loss=9.47, batch

Validation:  87%|▊| 649/743 [3:05:31<24:01, 15.33s/batch, batch_loss=9.47, batch

Validation:  87%|▊| 649/743 [3:05:46<24:01, 15.33s/batch, batch_loss=14.3, batch

Validation:  87%|▊| 650/743 [3:05:46<23:22, 15.08s/batch, batch_loss=14.3, batch

Validation:  87%|▊| 650/743 [3:05:59<23:22, 15.08s/batch, batch_loss=22.7, batch

Validation:  88%|▉| 651/743 [3:05:59<22:25, 14.63s/batch, batch_loss=22.7, batch

Validation:  88%|▉| 651/743 [3:06:13<22:25, 14.63s/batch, batch_loss=23.1, batch

Validation:  88%|▉| 652/743 [3:06:13<21:43, 14.33s/batch, batch_loss=23.1, batch

Validation:  88%|▉| 652/743 [3:06:27<21:43, 14.33s/batch, batch_loss=13.7, batch

Validation:  88%|▉| 653/743 [3:06:27<21:07, 14.08s/batch, batch_loss=13.7, batch

Validation:  88%|▉| 653/743 [3:06:41<21:07, 14.08s/batch, batch_loss=19, batch_i

Validation:  88%|▉| 654/743 [3:06:41<21:07, 14.25s/batch, batch_loss=19, batch_i

Validation:  88%|▉| 654/743 [3:06:56<21:07, 14.25s/batch, batch_loss=24, batch_i

Validation:  88%|▉| 655/743 [3:06:56<21:07, 14.41s/batch, batch_loss=24, batch_i

Validation:  88%|▉| 655/743 [3:07:12<21:07, 14.41s/batch, batch_loss=18, batch_i

Validation:  88%|▉| 656/743 [3:07:12<21:25, 14.78s/batch, batch_loss=18, batch_i

Validation:  88%|▉| 656/743 [3:07:27<21:25, 14.78s/batch, batch_loss=13.8, batch

Validation:  88%|▉| 657/743 [3:07:27<21:20, 14.89s/batch, batch_loss=13.8, batch

Validation:  88%|▉| 657/743 [3:07:42<21:20, 14.89s/batch, batch_loss=15.4, batch

Validation:  89%|▉| 658/743 [3:07:42<21:06, 14.90s/batch, batch_loss=15.4, batch

Validation:  89%|▉| 658/743 [3:07:57<21:06, 14.90s/batch, batch_loss=22.3, batch

Validation:  89%|▉| 659/743 [3:07:57<20:52, 14.91s/batch, batch_loss=22.3, batch

Validation:  89%|▉| 659/743 [3:08:12<20:52, 14.91s/batch, batch_loss=20.6, batch

Validation:  89%|▉| 660/743 [3:08:12<20:47, 15.03s/batch, batch_loss=20.6, batch

Validation:  89%|▉| 660/743 [3:08:27<20:47, 15.03s/batch, batch_loss=17.2, batch

Validation:  89%|▉| 661/743 [3:08:27<20:42, 15.16s/batch, batch_loss=17.2, batch

Validation:  89%|▉| 661/743 [3:08:42<20:42, 15.16s/batch, batch_loss=6.45, batch

Validation:  89%|▉| 662/743 [3:08:42<20:23, 15.10s/batch, batch_loss=6.45, batch

Validation:  89%|▉| 662/743 [3:08:58<20:23, 15.10s/batch, batch_loss=3.58e+3, ba

Validation:  89%|▉| 663/743 [3:08:58<20:15, 15.19s/batch, batch_loss=3.58e+3, ba

Validation:  89%|▉| 663/743 [3:09:16<20:15, 15.19s/batch, batch_loss=13.4, batch

Validation:  89%|▉| 664/743 [3:09:16<21:07, 16.04s/batch, batch_loss=13.4, batch

Validation:  89%|▉| 664/743 [3:09:31<21:07, 16.04s/batch, batch_loss=20.3, batch

Validation:  90%|▉| 665/743 [3:09:31<20:24, 15.70s/batch, batch_loss=20.3, batch

Validation:  90%|▉| 665/743 [3:09:46<20:24, 15.70s/batch, batch_loss=13, batch_i

Validation:  90%|▉| 666/743 [3:09:46<20:00, 15.59s/batch, batch_loss=13, batch_i

Validation:  90%|▉| 666/743 [3:10:01<20:00, 15.59s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [3:10:01<19:28, 15.38s/batch, batch_loss=2.09e+4, ba

Validation:  90%|▉| 667/743 [3:10:15<19:28, 15.38s/batch, batch_loss=19.2, batch

Validation:  90%|▉| 668/743 [3:10:15<18:53, 15.12s/batch, batch_loss=19.2, batch

Validation:  90%|▉| 668/743 [3:10:30<18:53, 15.12s/batch, batch_loss=23.6, batch

Validation:  90%|▉| 669/743 [3:10:30<18:20, 14.87s/batch, batch_loss=23.6, batch

Validation:  90%|▉| 669/743 [3:10:44<18:20, 14.87s/batch, batch_loss=19.6, batch

Validation:  90%|▉| 670/743 [3:10:44<17:56, 14.75s/batch, batch_loss=19.6, batch

Validation:  90%|▉| 670/743 [3:11:01<17:56, 14.75s/batch, batch_loss=3.11e+3, ba

Validation:  90%|▉| 671/743 [3:11:01<18:29, 15.41s/batch, batch_loss=3.11e+3, ba

Validation:  90%|▉| 671/743 [3:11:16<18:29, 15.41s/batch, batch_loss=20.8, batch

Validation:  90%|▉| 672/743 [3:11:16<18:10, 15.36s/batch, batch_loss=20.8, batch

Validation:  90%|▉| 672/743 [3:11:32<18:10, 15.36s/batch, batch_loss=13.9, batch

Validation:  91%|▉| 673/743 [3:11:32<17:56, 15.38s/batch, batch_loss=13.9, batch

Validation:  91%|▉| 673/743 [3:11:48<17:56, 15.38s/batch, batch_loss=11.1, batch

Validation:  91%|▉| 674/743 [3:11:48<17:51, 15.53s/batch, batch_loss=11.1, batch

Validation:  91%|▉| 674/743 [3:12:04<17:51, 15.53s/batch, batch_loss=20, batch_i

Validation:  91%|▉| 675/743 [3:12:04<17:45, 15.67s/batch, batch_loss=20, batch_i

Validation:  91%|▉| 675/743 [3:12:19<17:45, 15.67s/batch, batch_loss=17.2, batch

Validation:  91%|▉| 676/743 [3:12:19<17:25, 15.61s/batch, batch_loss=17.2, batch

Validation:  91%|▉| 676/743 [3:12:34<17:25, 15.61s/batch, batch_loss=20.2, batch

Validation:  91%|▉| 677/743 [3:12:34<16:51, 15.33s/batch, batch_loss=20.2, batch

Validation:  91%|▉| 677/743 [3:12:49<16:51, 15.33s/batch, batch_loss=15.6, batch

Validation:  91%|▉| 678/743 [3:12:49<16:36, 15.33s/batch, batch_loss=15.6, batch

Validation:  91%|▉| 678/743 [3:13:08<16:36, 15.33s/batch, batch_loss=14.2, batch

Validation:  91%|▉| 679/743 [3:13:08<17:26, 16.36s/batch, batch_loss=14.2, batch

Validation:  91%|▉| 679/743 [3:13:23<17:26, 16.36s/batch, batch_loss=20.9, batch

Validation:  92%|▉| 680/743 [3:13:23<16:46, 15.98s/batch, batch_loss=20.9, batch

Validation:  92%|▉| 680/743 [3:13:38<16:46, 15.98s/batch, batch_loss=21.2, batch

Validation:  92%|▉| 681/743 [3:13:38<16:13, 15.70s/batch, batch_loss=21.2, batch

Validation:  92%|▉| 681/743 [3:13:53<16:13, 15.70s/batch, batch_loss=24.7, batch

Validation:  92%|▉| 682/743 [3:13:53<15:45, 15.51s/batch, batch_loss=24.7, batch

Validation:  92%|▉| 682/743 [3:14:08<15:45, 15.51s/batch, batch_loss=16.7, batch

Validation:  92%|▉| 683/743 [3:14:08<15:15, 15.25s/batch, batch_loss=16.7, batch

Validation:  92%|▉| 683/743 [3:14:23<15:15, 15.25s/batch, batch_loss=13.7, batch

Validation:  92%|▉| 684/743 [3:14:23<14:57, 15.21s/batch, batch_loss=13.7, batch

Validation:  92%|▉| 684/743 [3:14:37<14:57, 15.21s/batch, batch_loss=14.7, batch

Validation:  92%|▉| 685/743 [3:14:37<14:30, 15.00s/batch, batch_loss=14.7, batch

Validation:  92%|▉| 685/743 [3:14:53<14:30, 15.00s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [3:14:53<14:20, 15.11s/batch, batch_loss=1.66e+3, ba

Validation:  92%|▉| 686/743 [3:15:08<14:20, 15.11s/batch, batch_loss=21.3, batch

Validation:  92%|▉| 687/743 [3:15:08<14:13, 15.24s/batch, batch_loss=21.3, batch

Validation:  92%|▉| 687/743 [3:15:22<14:13, 15.24s/batch, batch_loss=12.7, batch

Validation:  93%|▉| 688/743 [3:15:22<13:41, 14.93s/batch, batch_loss=12.7, batch

Validation:  93%|▉| 688/743 [3:15:37<13:41, 14.93s/batch, batch_loss=13.5, batch

Validation:  93%|▉| 689/743 [3:15:37<13:26, 14.94s/batch, batch_loss=13.5, batch

Validation:  93%|▉| 689/743 [3:15:51<13:26, 14.94s/batch, batch_loss=19.1, batch

Validation:  93%|▉| 690/743 [3:15:51<12:57, 14.67s/batch, batch_loss=19.1, batch

Validation:  93%|▉| 690/743 [3:16:05<12:57, 14.67s/batch, batch_loss=13.2, batch

Validation:  93%|▉| 691/743 [3:16:05<12:30, 14.44s/batch, batch_loss=13.2, batch

Validation:  93%|▉| 691/743 [3:16:21<12:30, 14.44s/batch, batch_loss=17.7, batch

Validation:  93%|▉| 692/743 [3:16:21<12:28, 14.68s/batch, batch_loss=17.7, batch

Validation:  93%|▉| 692/743 [3:16:35<12:28, 14.68s/batch, batch_loss=19.5, batch

Validation:  93%|▉| 693/743 [3:16:35<12:14, 14.69s/batch, batch_loss=19.5, batch

Validation:  93%|▉| 693/743 [3:16:50<12:14, 14.69s/batch, batch_loss=23.2, batch

Validation:  93%|▉| 694/743 [3:16:50<11:55, 14.59s/batch, batch_loss=23.2, batch

Validation:  93%|▉| 694/743 [3:17:07<11:55, 14.59s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [3:17:07<12:20, 15.42s/batch, batch_loss=3.12e+3, ba

Validation:  94%|▉| 695/743 [3:17:22<12:20, 15.42s/batch, batch_loss=7.07, batch

Validation:  94%|▉| 696/743 [3:17:22<11:58, 15.29s/batch, batch_loss=7.07, batch

Validation:  94%|▉| 696/743 [3:17:37<11:58, 15.29s/batch, batch_loss=34.5, batch

Validation:  94%|▉| 697/743 [3:17:37<11:39, 15.22s/batch, batch_loss=34.5, batch

Validation:  94%|▉| 697/743 [3:17:52<11:39, 15.22s/batch, batch_loss=755, batch_

Validation:  94%|▉| 698/743 [3:17:52<11:16, 15.03s/batch, batch_loss=755, batch_

Validation:  94%|▉| 698/743 [3:18:05<11:16, 15.03s/batch, batch_loss=6.05, batch

Validation:  94%|▉| 699/743 [3:18:05<10:39, 14.53s/batch, batch_loss=6.05, batch

Validation:  94%|▉| 699/743 [3:18:20<10:39, 14.53s/batch, batch_loss=944, batch_

Validation:  94%|▉| 700/743 [3:18:20<10:32, 14.71s/batch, batch_loss=944, batch_

Validation:  94%|▉| 700/743 [3:18:35<10:32, 14.71s/batch, batch_loss=6.07, batch

Validation:  94%|▉| 701/743 [3:18:35<10:18, 14.71s/batch, batch_loss=6.07, batch

Validation:  94%|▉| 701/743 [3:18:48<10:18, 14.71s/batch, batch_loss=6.62, batch

Validation:  94%|▉| 702/743 [3:18:48<09:47, 14.34s/batch, batch_loss=6.62, batch

Validation:  94%|▉| 702/743 [3:19:02<09:47, 14.34s/batch, batch_loss=175, batch_

Validation:  95%|▉| 703/743 [3:19:02<09:30, 14.26s/batch, batch_loss=175, batch_

Validation:  95%|▉| 703/743 [3:19:16<09:30, 14.26s/batch, batch_loss=471, batch_

Validation:  95%|▉| 704/743 [3:19:16<09:12, 14.17s/batch, batch_loss=471, batch_

Validation:  95%|▉| 704/743 [3:19:32<09:12, 14.17s/batch, batch_loss=8.02, batch

Validation:  95%|▉| 705/743 [3:19:32<09:10, 14.48s/batch, batch_loss=8.02, batch

Validation:  95%|▉| 705/743 [3:19:46<09:10, 14.48s/batch, batch_loss=16.1, batch

Validation:  95%|▉| 706/743 [3:19:46<08:56, 14.50s/batch, batch_loss=16.1, batch

Validation:  95%|▉| 706/743 [3:20:01<08:56, 14.50s/batch, batch_loss=414, batch_

Validation:  95%|▉| 707/743 [3:20:01<08:43, 14.55s/batch, batch_loss=414, batch_

Validation:  95%|▉| 707/743 [3:20:16<08:43, 14.55s/batch, batch_loss=15.7, batch

Validation:  95%|▉| 708/743 [3:20:16<08:33, 14.67s/batch, batch_loss=15.7, batch

Validation:  95%|▉| 708/743 [3:20:30<08:33, 14.67s/batch, batch_loss=23.4, batch

Validation:  95%|▉| 709/743 [3:20:30<08:16, 14.60s/batch, batch_loss=23.4, batch

Validation:  95%|▉| 709/743 [3:20:45<08:16, 14.60s/batch, batch_loss=16.4, batch

Validation:  96%|▉| 710/743 [3:20:45<08:00, 14.57s/batch, batch_loss=16.4, batch

Validation:  96%|▉| 710/743 [3:20:59<08:00, 14.57s/batch, batch_loss=15.3, batch

Validation:  96%|▉| 711/743 [3:20:59<07:44, 14.52s/batch, batch_loss=15.3, batch

Validation:  96%|▉| 711/743 [3:21:16<07:44, 14.52s/batch, batch_loss=19.3, batch

Validation:  96%|▉| 712/743 [3:21:16<07:53, 15.27s/batch, batch_loss=19.3, batch

Validation:  96%|▉| 712/743 [3:21:31<07:53, 15.27s/batch, batch_loss=16.1, batch

Validation:  96%|▉| 713/743 [3:21:31<07:30, 15.01s/batch, batch_loss=16.1, batch

Validation:  96%|▉| 713/743 [3:21:44<07:30, 15.01s/batch, batch_loss=5.69, batch

Validation:  96%|▉| 714/743 [3:21:44<07:05, 14.67s/batch, batch_loss=5.69, batch

Validation:  96%|▉| 714/743 [3:21:59<07:05, 14.67s/batch, batch_loss=9.7, batch_

Validation:  96%|▉| 715/743 [3:21:59<06:51, 14.71s/batch, batch_loss=9.7, batch_

Validation:  96%|▉| 715/743 [3:22:13<06:51, 14.71s/batch, batch_loss=19.6, batch

Validation:  96%|▉| 716/743 [3:22:13<06:33, 14.58s/batch, batch_loss=19.6, batch

Validation:  96%|▉| 716/743 [3:22:28<06:33, 14.58s/batch, batch_loss=387, batch_

Validation:  97%|▉| 717/743 [3:22:28<06:17, 14.53s/batch, batch_loss=387, batch_

Validation:  97%|▉| 717/743 [3:22:45<06:17, 14.53s/batch, batch_loss=18.6, batch

Validation:  97%|▉| 718/743 [3:22:45<06:25, 15.43s/batch, batch_loss=18.6, batch

Validation:  97%|▉| 718/743 [3:23:00<06:25, 15.43s/batch, batch_loss=16.1, batch

Validation:  97%|▉| 719/743 [3:23:00<06:02, 15.12s/batch, batch_loss=16.1, batch

Validation:  97%|▉| 719/743 [3:23:14<06:02, 15.12s/batch, batch_loss=17.2, batch

Validation:  97%|▉| 720/743 [3:23:14<05:42, 14.90s/batch, batch_loss=17.2, batch

Validation:  97%|▉| 720/743 [3:23:29<05:42, 14.90s/batch, batch_loss=10.3, batch

Validation:  97%|▉| 721/743 [3:23:29<05:26, 14.83s/batch, batch_loss=10.3, batch

Validation:  97%|▉| 721/743 [3:23:43<05:26, 14.83s/batch, batch_loss=23.4, batch

Validation:  97%|▉| 722/743 [3:23:43<05:09, 14.75s/batch, batch_loss=23.4, batch

Validation:  97%|▉| 722/743 [3:23:58<05:09, 14.75s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [3:23:58<04:56, 14.84s/batch, batch_loss=5.29e+3, ba

Validation:  97%|▉| 723/743 [3:24:16<04:56, 14.84s/batch, batch_loss=16, batch_i

Validation:  97%|▉| 724/743 [3:24:16<04:59, 15.78s/batch, batch_loss=16, batch_i

Validation:  97%|▉| 724/743 [3:24:32<04:59, 15.78s/batch, batch_loss=14.5, batch

Validation:  98%|▉| 725/743 [3:24:32<04:41, 15.63s/batch, batch_loss=14.5, batch

Validation:  98%|▉| 725/743 [3:24:47<04:41, 15.63s/batch, batch_loss=19.4, batch

Validation:  98%|▉| 726/743 [3:24:47<04:22, 15.46s/batch, batch_loss=19.4, batch

Validation:  98%|▉| 726/743 [3:25:01<04:22, 15.46s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [3:25:01<04:02, 15.16s/batch, batch_loss=2.41e+4, ba

Validation:  98%|▉| 727/743 [3:25:15<04:02, 15.16s/batch, batch_loss=28, batch_i

Validation:  98%|▉| 728/743 [3:25:15<03:42, 14.85s/batch, batch_loss=28, batch_i

Validation:  98%|▉| 728/743 [3:25:30<03:42, 14.85s/batch, batch_loss=31.9, batch

Validation:  98%|▉| 729/743 [3:25:30<03:26, 14.77s/batch, batch_loss=31.9, batch

Validation:  98%|▉| 729/743 [3:25:44<03:26, 14.77s/batch, batch_loss=19.1, batch

Validation:  98%|▉| 730/743 [3:25:44<03:09, 14.61s/batch, batch_loss=19.1, batch

Validation:  98%|▉| 730/743 [3:25:59<03:09, 14.61s/batch, batch_loss=14.1, batch

Validation:  98%|▉| 731/743 [3:25:59<02:57, 14.79s/batch, batch_loss=14.1, batch

Validation:  98%|▉| 731/743 [3:26:15<02:57, 14.79s/batch, batch_loss=9.47, batch

Validation:  99%|▉| 732/743 [3:26:15<02:45, 15.06s/batch, batch_loss=9.47, batch

Validation:  99%|▉| 732/743 [3:26:30<02:45, 15.06s/batch, batch_loss=26.6, batch

Validation:  99%|▉| 733/743 [3:26:30<02:31, 15.10s/batch, batch_loss=26.6, batch

Validation:  99%|▉| 733/743 [3:26:45<02:31, 15.10s/batch, batch_loss=3.42, batch

Validation:  99%|▉| 734/743 [3:26:45<02:15, 15.04s/batch, batch_loss=3.42, batch

Validation:  99%|▉| 734/743 [3:27:01<02:15, 15.04s/batch, batch_loss=6.94, batch

Validation:  99%|▉| 735/743 [3:27:01<02:01, 15.22s/batch, batch_loss=6.94, batch

Validation:  99%|▉| 735/743 [3:27:16<02:01, 15.22s/batch, batch_loss=1.13, batch

Validation:  99%|▉| 736/743 [3:27:16<01:46, 15.17s/batch, batch_loss=1.13, batch

Validation:  99%|▉| 736/743 [3:27:29<01:46, 15.17s/batch, batch_loss=0.0427, bat

Validation:  99%|▉| 737/743 [3:27:29<01:27, 14.62s/batch, batch_loss=0.0427, bat

Validation:  99%|▉| 737/743 [3:27:42<01:27, 14.62s/batch, batch_loss=0.0427, bat

Validation:  99%|▉| 738/743 [3:27:42<01:10, 14.14s/batch, batch_loss=0.0427, bat

Validation:  99%|▉| 738/743 [3:27:55<01:10, 14.14s/batch, batch_loss=0.0427, bat

Validation:  99%|▉| 739/743 [3:27:55<00:55, 13.80s/batch, batch_loss=0.0427, bat

Validation:  99%|▉| 739/743 [3:28:08<00:55, 13.80s/batch, batch_loss=0.0427, bat

Validation: 100%|▉| 740/743 [3:28:08<00:40, 13.46s/batch, batch_loss=0.0427, bat

Validation: 100%|▉| 740/743 [3:28:21<00:40, 13.46s/batch, batch_loss=0.0427, bat

Validation: 100%|▉| 741/743 [3:28:21<00:26, 13.24s/batch, batch_loss=0.0427, bat

Validation: 100%|▉| 741/743 [3:28:33<00:26, 13.24s/batch, batch_loss=0.0427, bat

Validation: 100%|▉| 742/743 [3:28:33<00:13, 13.11s/batch, batch_loss=0.0427, bat

Validation: 100%|▉| 742/743 [3:28:45<00:13, 13.11s/batch, batch_loss=0.0426, bat

Validation: 100%|█| 743/743 [3:28:45<00:00, 12.74s/batch, batch_loss=0.0426, bat

Validation: 100%|█| 743/743 [3:28:45<00:00, 16.86s/batch, batch_loss=0.0426, bat




Val Loss: 1295.8576


In [13]:
device = (torch.device("cuda" if torch.cuda.is_available() else "cpu"))
criterion = torch.nn.MSELoss()
model = NeuralNetwork(batch_size, input_window, prediction_window, device=device).to(device)
model.load_state_dict(torch.load("multimodal_seq2seq.pth", weights_only=True))

test_dataset = DoomMotionDataset(coco_test, TEST_RUN, input_window, prediction_window)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

model.eval()  # Set the model to evaluation mode
running_loss = 0.0


progress_bar = tqdm(test_loader, desc="Testing", unit="batch")

with torch.no_grad():  # Disable gradient calculations for evaluation
    for batch_idx, batch in enumerate(progress_bar):
        prev_motion, next_motion, previous_seg, previous_dep = batch["prev_motion"], batch["next_motion"], batch["previous_seg"], batch["previous_dep"]
        prev_motion, next_motion, previous_seg, previous_dep = prev_motion.to(device), next_motion.to(device), previous_seg.to(device), previous_dep.to(device)

        if prev_motion.size(0) != next_motion.size(0) != previous_seg.size(0) != previous_dep.size(0):
                continue
            
        outputs = model(previous_seg, previous_dep, prev_motion)
        outputs = outputs.permute(1, 0, 2)

        if outputs.size(0) != next_motion.size(0):
            continue
        
        loss = criterion(outputs, next_motion)
        
        running_loss += loss.item()
        
        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": prev_motion.size(0)
        })

# Average loss over all batches
test_loss = running_loss / len(test_loader)
print(f"Test Loss: {test_loss:.4f}")

Testing:   0%|                                       | 0/462 [00:00<?, ?batch/s]

  depth_mask = torch.tensor(depth_mask, dtype=torch.float32)


Testing:   0%| | 0/462 [00:15<?, ?batch/s, batch_loss=32.5, batch_index=1, batch

Testing:   0%| | 1/462 [00:15<1:55:16, 15.00s/batch, batch_loss=32.5, batch_inde

Testing:   0%| | 1/462 [00:30<1:55:16, 15.00s/batch, batch_loss=12, batch_index=

Testing:   0%| | 2/462 [00:30<1:57:55, 15.38s/batch, batch_loss=12, batch_index=

Testing:   0%| | 2/462 [00:45<1:57:55, 15.38s/batch, batch_loss=20.2, batch_inde

Testing:   1%| | 3/462 [00:45<1:57:20, 15.34s/batch, batch_loss=20.2, batch_inde

Testing:   1%| | 3/462 [01:02<1:57:20, 15.34s/batch, batch_loss=39.4, batch_inde

Testing:   1%| | 4/462 [01:02<2:02:04, 15.99s/batch, batch_loss=39.4, batch_inde

Testing:   1%| | 4/462 [01:18<2:02:04, 15.99s/batch, batch_loss=27.6, batch_inde

Testing:   1%| | 5/462 [01:18<1:59:43, 15.72s/batch, batch_loss=27.6, batch_inde

Testing:   1%| | 5/462 [01:33<1:59:43, 15.72s/batch, batch_loss=485, batch_index

Testing:   1%| | 6/462 [01:33<1:57:55, 15.52s/batch, batch_loss=485, batch_index

Testing:   1%| | 6/462 [01:49<1:57:55, 15.52s/batch, batch_loss=23.8, batch_inde

Testing:   2%| | 7/462 [01:49<1:58:37, 15.64s/batch, batch_loss=23.8, batch_inde

Testing:   2%| | 7/462 [02:04<1:58:37, 15.64s/batch, batch_loss=20.7, batch_inde

Testing:   2%| | 8/462 [02:04<1:57:07, 15.48s/batch, batch_loss=20.7, batch_inde

Testing:   2%| | 8/462 [02:19<1:57:07, 15.48s/batch, batch_loss=17.6, batch_inde

Testing:   2%| | 9/462 [02:19<1:57:11, 15.52s/batch, batch_loss=17.6, batch_inde

Testing:   2%| | 9/462 [02:35<1:57:11, 15.52s/batch, batch_loss=21.3, batch_inde

Testing:   2%| | 10/462 [02:35<1:56:41, 15.49s/batch, batch_loss=21.3, batch_ind

Testing:   2%| | 10/462 [02:51<1:56:41, 15.49s/batch, batch_loss=24.2, batch_ind

Testing:   2%| | 11/462 [02:51<1:57:46, 15.67s/batch, batch_loss=24.2, batch_ind

Testing:   2%| | 11/462 [03:06<1:57:46, 15.67s/batch, batch_loss=9.25e+3, batch_

Testing:   3%| | 12/462 [03:06<1:55:38, 15.42s/batch, batch_loss=9.25e+3, batch_

Testing:   3%| | 12/462 [03:22<1:55:38, 15.42s/batch, batch_loss=26.5, batch_ind

Testing:   3%| | 13/462 [03:22<1:56:24, 15.56s/batch, batch_loss=26.5, batch_ind

Testing:   3%| | 13/462 [03:37<1:56:24, 15.56s/batch, batch_loss=922, batch_inde

Testing:   3%| | 14/462 [03:37<1:54:50, 15.38s/batch, batch_loss=922, batch_inde

Testing:   3%| | 14/462 [03:52<1:54:50, 15.38s/batch, batch_loss=26.2, batch_ind

Testing:   3%| | 15/462 [03:52<1:54:59, 15.43s/batch, batch_loss=26.2, batch_ind

Testing:   3%| | 15/462 [04:08<1:54:59, 15.43s/batch, batch_loss=1.77e+3, batch_

Testing:   3%| | 16/462 [04:08<1:54:34, 15.41s/batch, batch_loss=1.77e+3, batch_

Testing:   3%| | 16/462 [04:24<1:54:34, 15.41s/batch, batch_loss=21.1, batch_ind

Testing:   4%| | 17/462 [04:24<1:55:58, 15.64s/batch, batch_loss=21.1, batch_ind

Testing:   4%| | 17/462 [04:39<1:55:58, 15.64s/batch, batch_loss=1.73e+3, batch_

Testing:   4%| | 18/462 [04:39<1:55:02, 15.55s/batch, batch_loss=1.73e+3, batch_

Testing:   4%| | 18/462 [04:56<1:55:02, 15.55s/batch, batch_loss=16.7, batch_ind

Testing:   4%| | 19/462 [04:56<1:57:08, 15.87s/batch, batch_loss=16.7, batch_ind

Testing:   4%| | 19/462 [05:12<1:57:08, 15.87s/batch, batch_loss=1.19e+4, batch_

Testing:   4%| | 20/462 [05:12<1:56:59, 15.88s/batch, batch_loss=1.19e+4, batch_

Testing:   4%| | 20/462 [05:27<1:56:59, 15.88s/batch, batch_loss=21.3, batch_ind

Testing:   5%| | 21/462 [05:27<1:56:02, 15.79s/batch, batch_loss=21.3, batch_ind

Testing:   5%| | 21/462 [05:42<1:56:02, 15.79s/batch, batch_loss=19.5, batch_ind

Testing:   5%| | 22/462 [05:42<1:54:25, 15.60s/batch, batch_loss=19.5, batch_ind

Testing:   5%| | 22/462 [05:58<1:54:25, 15.60s/batch, batch_loss=21.8, batch_ind

Testing:   5%| | 23/462 [05:58<1:54:43, 15.68s/batch, batch_loss=21.8, batch_ind

Testing:   5%| | 23/462 [06:13<1:54:43, 15.68s/batch, batch_loss=129, batch_inde

Testing:   5%| | 24/462 [06:13<1:52:59, 15.48s/batch, batch_loss=129, batch_inde

Testing:   5%| | 24/462 [06:32<1:52:59, 15.48s/batch, batch_loss=78.9, batch_ind

Testing:   5%| | 25/462 [06:32<1:59:34, 16.42s/batch, batch_loss=78.9, batch_ind

Testing:   5%| | 25/462 [06:47<1:59:34, 16.42s/batch, batch_loss=8.79e+3, batch_

Testing:   6%| | 26/462 [06:47<1:57:33, 16.18s/batch, batch_loss=8.79e+3, batch_

Testing:   6%| | 26/462 [07:02<1:57:33, 16.18s/batch, batch_loss=16.1, batch_ind

Testing:   6%| | 27/462 [07:02<1:53:52, 15.71s/batch, batch_loss=16.1, batch_ind

Testing:   6%| | 27/462 [07:16<1:53:52, 15.71s/batch, batch_loss=24.9, batch_ind

Testing:   6%| | 28/462 [07:16<1:49:13, 15.10s/batch, batch_loss=24.9, batch_ind

Testing:   6%| | 28/462 [07:31<1:49:13, 15.10s/batch, batch_loss=25.2, batch_ind

Testing:   6%| | 29/462 [07:31<1:49:05, 15.12s/batch, batch_loss=25.2, batch_ind

Testing:   6%| | 29/462 [07:46<1:49:05, 15.12s/batch, batch_loss=22.6, batch_ind

Testing:   6%| | 30/462 [07:46<1:49:27, 15.20s/batch, batch_loss=22.6, batch_ind

Testing:   6%| | 30/462 [08:03<1:49:27, 15.20s/batch, batch_loss=27.6, batch_ind

Testing:   7%| | 31/462 [08:03<1:53:16, 15.77s/batch, batch_loss=27.6, batch_ind

Testing:   7%| | 31/462 [08:19<1:53:16, 15.77s/batch, batch_loss=23.8, batch_ind

Testing:   7%| | 32/462 [08:19<1:52:16, 15.67s/batch, batch_loss=23.8, batch_ind

Testing:   7%| | 32/462 [08:34<1:52:16, 15.67s/batch, batch_loss=24.2, batch_ind

Testing:   7%| | 33/462 [08:34<1:51:40, 15.62s/batch, batch_loss=24.2, batch_ind

Testing:   7%| | 33/462 [08:49<1:51:40, 15.62s/batch, batch_loss=18.5, batch_ind

Testing:   7%| | 34/462 [08:49<1:50:23, 15.48s/batch, batch_loss=18.5, batch_ind

Testing:   7%| | 34/462 [09:05<1:50:23, 15.48s/batch, batch_loss=18.1, batch_ind

Testing:   8%| | 35/462 [09:05<1:49:43, 15.42s/batch, batch_loss=18.1, batch_ind

Testing:   8%| | 35/462 [09:20<1:49:43, 15.42s/batch, batch_loss=35, batch_index

Testing:   8%| | 36/462 [09:20<1:48:40, 15.31s/batch, batch_loss=35, batch_index

Testing:   8%| | 36/462 [09:35<1:48:40, 15.31s/batch, batch_loss=23.6, batch_ind

Testing:   8%| | 37/462 [09:35<1:49:14, 15.42s/batch, batch_loss=23.6, batch_ind

Testing:   8%| | 37/462 [09:54<1:49:14, 15.42s/batch, batch_loss=3.84e+3, batch_

Testing:   8%| | 38/462 [09:54<1:56:26, 16.48s/batch, batch_loss=3.84e+3, batch_

Testing:   8%| | 38/462 [10:10<1:56:26, 16.48s/batch, batch_loss=28.6, batch_ind

Testing:   8%| | 39/462 [10:10<1:54:24, 16.23s/batch, batch_loss=28.6, batch_ind

Testing:   8%| | 39/462 [10:25<1:54:24, 16.23s/batch, batch_loss=182, batch_inde

Testing:   9%| | 40/462 [10:25<1:52:26, 15.99s/batch, batch_loss=182, batch_inde

Testing:   9%| | 40/462 [10:40<1:52:26, 15.99s/batch, batch_loss=14.4, batch_ind

Testing:   9%| | 41/462 [10:40<1:49:06, 15.55s/batch, batch_loss=14.4, batch_ind

Testing:   9%| | 41/462 [10:55<1:49:06, 15.55s/batch, batch_loss=15.9, batch_ind

Testing:   9%| | 42/462 [10:55<1:48:29, 15.50s/batch, batch_loss=15.9, batch_ind

Testing:   9%| | 42/462 [11:12<1:48:29, 15.50s/batch, batch_loss=20.1, batch_ind

Testing:   9%| | 43/462 [11:12<1:50:19, 15.80s/batch, batch_loss=20.1, batch_ind

Testing:   9%| | 43/462 [11:28<1:50:19, 15.80s/batch, batch_loss=15.9, batch_ind

Testing:  10%| | 44/462 [11:28<1:51:11, 15.96s/batch, batch_loss=15.9, batch_ind

Testing:  10%| | 44/462 [11:44<1:51:11, 15.96s/batch, batch_loss=19.3, batch_ind

Testing:  10%| | 45/462 [11:44<1:50:14, 15.86s/batch, batch_loss=19.3, batch_ind

Testing:  10%| | 45/462 [12:03<1:50:14, 15.86s/batch, batch_loss=21.7, batch_ind

Testing:  10%| | 46/462 [12:03<1:57:41, 16.97s/batch, batch_loss=21.7, batch_ind

Testing:  10%| | 46/462 [12:19<1:57:41, 16.97s/batch, batch_loss=24.5, batch_ind

Testing:  10%| | 47/462 [12:19<1:53:44, 16.44s/batch, batch_loss=24.5, batch_ind

Testing:  10%| | 47/462 [12:34<1:53:44, 16.44s/batch, batch_loss=21.3, batch_ind

Testing:  10%| | 48/462 [12:34<1:51:08, 16.11s/batch, batch_loss=21.3, batch_ind

Testing:  10%| | 48/462 [12:50<1:51:08, 16.11s/batch, batch_loss=15.3, batch_ind

Testing:  11%| | 49/462 [12:50<1:51:45, 16.24s/batch, batch_loss=15.3, batch_ind

Testing:  11%| | 49/462 [13:06<1:51:45, 16.24s/batch, batch_loss=18.1, batch_ind

Testing:  11%| | 50/462 [13:06<1:50:37, 16.11s/batch, batch_loss=18.1, batch_ind

Testing:  11%| | 50/462 [13:23<1:50:37, 16.11s/batch, batch_loss=233, batch_inde

Testing:  11%| | 51/462 [13:23<1:51:00, 16.21s/batch, batch_loss=233, batch_inde

Testing:  11%| | 51/462 [13:40<1:51:00, 16.21s/batch, batch_loss=1.26e+3, batch_

Testing:  11%| | 52/462 [13:40<1:52:15, 16.43s/batch, batch_loss=1.26e+3, batch_

Testing:  11%| | 52/462 [13:59<1:52:15, 16.43s/batch, batch_loss=25, batch_index

Testing:  11%| | 53/462 [13:59<1:57:40, 17.26s/batch, batch_loss=25, batch_index

Testing:  11%| | 53/462 [14:15<1:57:40, 17.26s/batch, batch_loss=21, batch_index

Testing:  12%| | 54/462 [14:15<1:56:02, 17.07s/batch, batch_loss=21, batch_index

Testing:  12%| | 54/462 [14:31<1:56:02, 17.07s/batch, batch_loss=25, batch_index

Testing:  12%| | 55/462 [14:31<1:53:15, 16.70s/batch, batch_loss=25, batch_index

Testing:  12%| | 55/462 [14:48<1:53:15, 16.70s/batch, batch_loss=22.5, batch_ind

Testing:  12%| | 56/462 [14:48<1:53:23, 16.76s/batch, batch_loss=22.5, batch_ind

Testing:  12%| | 56/462 [15:05<1:53:23, 16.76s/batch, batch_loss=22.1, batch_ind

Testing:  12%| | 57/462 [15:05<1:52:45, 16.70s/batch, batch_loss=22.1, batch_ind

Testing:  12%| | 57/462 [15:22<1:52:45, 16.70s/batch, batch_loss=2.84e+4, batch_

Testing:  13%|▏| 58/462 [15:22<1:52:29, 16.71s/batch, batch_loss=2.84e+4, batch_

Testing:  13%|▏| 58/462 [15:37<1:52:29, 16.71s/batch, batch_loss=28.6, batch_ind

Testing:  13%|▏| 59/462 [15:37<1:50:44, 16.49s/batch, batch_loss=28.6, batch_ind

Testing:  13%|▏| 59/462 [15:54<1:50:44, 16.49s/batch, batch_loss=27.1, batch_ind

Testing:  13%|▏| 60/462 [15:54<1:50:09, 16.44s/batch, batch_loss=27.1, batch_ind

Testing:  13%|▏| 60/462 [16:12<1:50:09, 16.44s/batch, batch_loss=38.7, batch_ind

Testing:  13%|▏| 61/462 [16:12<1:53:44, 17.02s/batch, batch_loss=38.7, batch_ind

Testing:  13%|▏| 61/462 [16:27<1:53:44, 17.02s/batch, batch_loss=27.8, batch_ind

Testing:  13%|▏| 62/462 [16:27<1:49:30, 16.43s/batch, batch_loss=27.8, batch_ind

Testing:  13%|▏| 62/462 [16:42<1:49:30, 16.43s/batch, batch_loss=35.1, batch_ind

Testing:  14%|▏| 63/462 [16:42<1:45:19, 15.84s/batch, batch_loss=35.1, batch_ind

Testing:  14%|▏| 63/462 [16:58<1:45:19, 15.84s/batch, batch_loss=27.7, batch_ind

Testing:  14%|▏| 64/462 [16:58<1:45:21, 15.88s/batch, batch_loss=27.7, batch_ind

Testing:  14%|▏| 64/462 [17:13<1:45:21, 15.88s/batch, batch_loss=22.7, batch_ind

Testing:  14%|▏| 65/462 [17:13<1:43:18, 15.61s/batch, batch_loss=22.7, batch_ind

Testing:  14%|▏| 65/462 [17:28<1:43:18, 15.61s/batch, batch_loss=26.1, batch_ind

Testing:  14%|▏| 66/462 [17:28<1:42:21, 15.51s/batch, batch_loss=26.1, batch_ind

Testing:  14%|▏| 66/462 [17:47<1:42:21, 15.51s/batch, batch_loss=3.6e+3, batch_i

Testing:  15%|▏| 67/462 [17:47<1:49:20, 16.61s/batch, batch_loss=3.6e+3, batch_i

Testing:  15%|▏| 67/462 [18:02<1:49:20, 16.61s/batch, batch_loss=23.9, batch_ind

Testing:  15%|▏| 68/462 [18:02<1:46:28, 16.22s/batch, batch_loss=23.9, batch_ind

Testing:  15%|▏| 68/462 [18:17<1:46:28, 16.22s/batch, batch_loss=22.2, batch_ind

Testing:  15%|▏| 69/462 [18:17<1:43:18, 15.77s/batch, batch_loss=22.2, batch_ind

Testing:  15%|▏| 69/462 [18:32<1:43:18, 15.77s/batch, batch_loss=3.48e+3, batch_

Testing:  15%|▏| 70/462 [18:32<1:41:06, 15.48s/batch, batch_loss=3.48e+3, batch_

Testing:  15%|▏| 70/462 [18:48<1:41:06, 15.48s/batch, batch_loss=181, batch_inde

Testing:  15%|▏| 71/462 [18:48<1:41:06, 15.51s/batch, batch_loss=181, batch_inde

Testing:  15%|▏| 71/462 [19:03<1:41:06, 15.51s/batch, batch_loss=31.8, batch_ind

Testing:  16%|▏| 72/462 [19:03<1:41:09, 15.56s/batch, batch_loss=31.8, batch_ind

Testing:  16%|▏| 72/462 [19:18<1:41:09, 15.56s/batch, batch_loss=28.6, batch_ind

Testing:  16%|▏| 73/462 [19:18<1:39:07, 15.29s/batch, batch_loss=28.6, batch_ind

Testing:  16%|▏| 73/462 [19:32<1:39:07, 15.29s/batch, batch_loss=5.58e+3, batch_

Testing:  16%|▏| 74/462 [19:32<1:37:05, 15.01s/batch, batch_loss=5.58e+3, batch_

Testing:  16%|▏| 74/462 [19:48<1:37:05, 15.01s/batch, batch_loss=23.2, batch_ind

Testing:  16%|▏| 75/462 [19:48<1:38:26, 15.26s/batch, batch_loss=23.2, batch_ind

Testing:  16%|▏| 75/462 [20:06<1:38:26, 15.26s/batch, batch_loss=25.2, batch_ind

Testing:  16%|▏| 76/462 [20:06<1:44:00, 16.17s/batch, batch_loss=25.2, batch_ind

Testing:  16%|▏| 76/462 [20:22<1:44:00, 16.17s/batch, batch_loss=33.8, batch_ind

Testing:  17%|▏| 77/462 [20:22<1:42:08, 15.92s/batch, batch_loss=33.8, batch_ind

Testing:  17%|▏| 77/462 [20:37<1:42:08, 15.92s/batch, batch_loss=16.8, batch_ind

Testing:  17%|▏| 78/462 [20:37<1:40:36, 15.72s/batch, batch_loss=16.8, batch_ind

Testing:  17%|▏| 78/462 [20:53<1:40:36, 15.72s/batch, batch_loss=18.7, batch_ind

Testing:  17%|▏| 79/462 [20:53<1:41:06, 15.84s/batch, batch_loss=18.7, batch_ind

Testing:  17%|▏| 79/462 [21:08<1:41:06, 15.84s/batch, batch_loss=20.4, batch_ind

Testing:  17%|▏| 80/462 [21:08<1:38:52, 15.53s/batch, batch_loss=20.4, batch_ind

Testing:  17%|▏| 80/462 [21:23<1:38:52, 15.53s/batch, batch_loss=28.5, batch_ind

Testing:  18%|▏| 81/462 [21:23<1:38:09, 15.46s/batch, batch_loss=28.5, batch_ind

Testing:  18%|▏| 81/462 [21:38<1:38:09, 15.46s/batch, batch_loss=22, batch_index

Testing:  18%|▏| 82/462 [21:38<1:37:27, 15.39s/batch, batch_loss=22, batch_index

Testing:  18%|▏| 82/462 [21:54<1:37:27, 15.39s/batch, batch_loss=17.6, batch_ind

Testing:  18%|▏| 83/462 [21:54<1:37:02, 15.36s/batch, batch_loss=17.6, batch_ind

Testing:  18%|▏| 83/462 [22:10<1:37:02, 15.36s/batch, batch_loss=20.4, batch_ind

Testing:  18%|▏| 84/462 [22:10<1:37:45, 15.52s/batch, batch_loss=20.4, batch_ind

Testing:  18%|▏| 84/462 [22:29<1:37:45, 15.52s/batch, batch_loss=28.7, batch_ind

Testing:  18%|▏| 85/462 [22:29<1:43:59, 16.55s/batch, batch_loss=28.7, batch_ind

Testing:  18%|▏| 85/462 [22:44<1:43:59, 16.55s/batch, batch_loss=25.8, batch_ind

Testing:  19%|▏| 86/462 [22:44<1:41:24, 16.18s/batch, batch_loss=25.8, batch_ind

Testing:  19%|▏| 86/462 [22:59<1:41:24, 16.18s/batch, batch_loss=27.8, batch_ind

Testing:  19%|▏| 87/462 [22:59<1:40:02, 16.01s/batch, batch_loss=27.8, batch_ind

Testing:  19%|▏| 87/462 [23:15<1:40:02, 16.01s/batch, batch_loss=22.8, batch_ind

Testing:  19%|▏| 88/462 [23:16<1:39:51, 16.02s/batch, batch_loss=22.8, batch_ind

Testing:  19%|▏| 88/462 [23:31<1:39:51, 16.02s/batch, batch_loss=201, batch_inde

Testing:  19%|▏| 89/462 [23:31<1:38:02, 15.77s/batch, batch_loss=201, batch_inde

Testing:  19%|▏| 89/462 [23:46<1:38:02, 15.77s/batch, batch_loss=22.8, batch_ind

Testing:  19%|▏| 90/462 [23:46<1:36:45, 15.61s/batch, batch_loss=22.8, batch_ind

Testing:  19%|▏| 90/462 [24:01<1:36:45, 15.61s/batch, batch_loss=19.4, batch_ind

Testing:  20%|▏| 91/462 [24:01<1:35:16, 15.41s/batch, batch_loss=19.4, batch_ind

Testing:  20%|▏| 91/462 [24:16<1:35:16, 15.41s/batch, batch_loss=15.8, batch_ind

Testing:  20%|▏| 92/462 [24:16<1:33:46, 15.21s/batch, batch_loss=15.8, batch_ind

Testing:  20%|▏| 92/462 [24:31<1:33:46, 15.21s/batch, batch_loss=17.8, batch_ind

Testing:  20%|▏| 93/462 [24:31<1:33:20, 15.18s/batch, batch_loss=17.8, batch_ind

Testing:  20%|▏| 93/462 [24:46<1:33:20, 15.18s/batch, batch_loss=41.1, batch_ind

Testing:  20%|▏| 94/462 [24:46<1:34:13, 15.36s/batch, batch_loss=41.1, batch_ind

Testing:  20%|▏| 94/462 [25:02<1:34:13, 15.36s/batch, batch_loss=3.21e+4, batch_

Testing:  21%|▏| 95/462 [25:02<1:34:20, 15.42s/batch, batch_loss=3.21e+4, batch_

Testing:  21%|▏| 95/462 [25:18<1:34:20, 15.42s/batch, batch_loss=36, batch_index

Testing:  21%|▏| 96/462 [25:18<1:34:15, 15.45s/batch, batch_loss=36, batch_index

Testing:  21%|▏| 96/462 [25:33<1:34:15, 15.45s/batch, batch_loss=36.6, batch_ind

Testing:  21%|▏| 97/462 [25:33<1:33:02, 15.29s/batch, batch_loss=36.6, batch_ind

Testing:  21%|▏| 97/462 [25:48<1:33:02, 15.29s/batch, batch_loss=30, batch_index

Testing:  21%|▏| 98/462 [25:48<1:33:22, 15.39s/batch, batch_loss=30, batch_index

Testing:  21%|▏| 98/462 [26:05<1:33:22, 15.39s/batch, batch_loss=29.7, batch_ind

Testing:  21%|▏| 99/462 [26:05<1:35:51, 15.84s/batch, batch_loss=29.7, batch_ind

Testing:  21%|▏| 99/462 [26:21<1:35:51, 15.84s/batch, batch_loss=2.28e+4, batch_

Testing:  22%|▏| 100/462 [26:21<1:35:28, 15.82s/batch, batch_loss=2.28e+4, batch

Testing:  22%|▏| 100/462 [26:36<1:35:28, 15.82s/batch, batch_loss=26.6, batch_in

Testing:  22%|▏| 101/462 [26:36<1:33:56, 15.61s/batch, batch_loss=26.6, batch_in

Testing:  22%|▏| 101/462 [26:52<1:33:56, 15.61s/batch, batch_loss=27.9, batch_in

Testing:  22%|▏| 102/462 [26:52<1:34:55, 15.82s/batch, batch_loss=27.9, batch_in

Testing:  22%|▏| 102/462 [27:08<1:34:55, 15.82s/batch, batch_loss=27.4, batch_in

Testing:  22%|▏| 103/462 [27:08<1:34:58, 15.87s/batch, batch_loss=27.4, batch_in

Testing:  22%|▏| 103/462 [27:23<1:34:58, 15.87s/batch, batch_loss=21, batch_inde

Testing:  23%|▏| 104/462 [27:23<1:33:08, 15.61s/batch, batch_loss=21, batch_inde

Testing:  23%|▏| 104/462 [27:38<1:33:08, 15.61s/batch, batch_loss=26.9, batch_in

Testing:  23%|▏| 105/462 [27:38<1:32:11, 15.49s/batch, batch_loss=26.9, batch_in

Testing:  23%|▏| 105/462 [27:55<1:32:11, 15.49s/batch, batch_loss=20.7, batch_in

Testing:  23%|▏| 106/462 [27:55<1:33:22, 15.74s/batch, batch_loss=20.7, batch_in

Testing:  23%|▏| 106/462 [28:13<1:33:22, 15.74s/batch, batch_loss=27.4, batch_in

Testing:  23%|▏| 107/462 [28:13<1:37:32, 16.49s/batch, batch_loss=27.4, batch_in

Testing:  23%|▏| 107/462 [28:30<1:37:32, 16.49s/batch, batch_loss=24.1, batch_in

Testing:  23%|▏| 108/462 [28:30<1:37:42, 16.56s/batch, batch_loss=24.1, batch_in

Testing:  23%|▏| 108/462 [28:46<1:37:42, 16.56s/batch, batch_loss=21.7, batch_in

Testing:  24%|▏| 109/462 [28:46<1:37:09, 16.51s/batch, batch_loss=21.7, batch_in

Testing:  24%|▏| 109/462 [29:02<1:37:09, 16.51s/batch, batch_loss=27, batch_inde

Testing:  24%|▏| 110/462 [29:02<1:35:09, 16.22s/batch, batch_loss=27, batch_inde

Testing:  24%|▏| 110/462 [29:15<1:35:09, 16.22s/batch, batch_loss=28, batch_inde

Testing:  24%|▏| 111/462 [29:15<1:30:42, 15.51s/batch, batch_loss=28, batch_inde

Testing:  24%|▏| 111/462 [29:31<1:30:42, 15.51s/batch, batch_loss=19.5, batch_in

Testing:  24%|▏| 112/462 [29:31<1:31:17, 15.65s/batch, batch_loss=19.5, batch_in

Testing:  24%|▏| 112/462 [29:48<1:31:17, 15.65s/batch, batch_loss=20.7, batch_in

Testing:  24%|▏| 113/462 [29:48<1:31:49, 15.79s/batch, batch_loss=20.7, batch_in

Testing:  24%|▏| 113/462 [30:03<1:31:49, 15.79s/batch, batch_loss=19.9, batch_in

Testing:  25%|▏| 114/462 [30:03<1:31:40, 15.81s/batch, batch_loss=19.9, batch_in

Testing:  25%|▏| 114/462 [30:19<1:31:40, 15.81s/batch, batch_loss=29.8, batch_in

Testing:  25%|▏| 115/462 [30:19<1:31:02, 15.74s/batch, batch_loss=29.8, batch_in

Testing:  25%|▏| 115/462 [30:34<1:31:02, 15.74s/batch, batch_loss=27.5, batch_in

Testing:  25%|▎| 116/462 [30:34<1:30:13, 15.65s/batch, batch_loss=27.5, batch_in

Testing:  25%|▎| 116/462 [30:49<1:30:13, 15.65s/batch, batch_loss=30.2, batch_in

Testing:  25%|▎| 117/462 [30:49<1:28:40, 15.42s/batch, batch_loss=30.2, batch_in

Testing:  25%|▎| 117/462 [31:04<1:28:40, 15.42s/batch, batch_loss=6.73e+3, batch

Testing:  26%|▎| 118/462 [31:04<1:27:03, 15.18s/batch, batch_loss=6.73e+3, batch

Testing:  26%|▎| 118/462 [31:20<1:27:03, 15.18s/batch, batch_loss=21, batch_inde

Testing:  26%|▎| 119/462 [31:20<1:28:05, 15.41s/batch, batch_loss=21, batch_inde

Testing:  26%|▎| 119/462 [31:35<1:28:05, 15.41s/batch, batch_loss=22.9, batch_in

Testing:  26%|▎| 120/462 [31:35<1:27:34, 15.36s/batch, batch_loss=22.9, batch_in

Testing:  26%|▎| 120/462 [31:50<1:27:34, 15.36s/batch, batch_loss=28.2, batch_in

Testing:  26%|▎| 121/462 [31:50<1:26:41, 15.26s/batch, batch_loss=28.2, batch_in

Testing:  26%|▎| 121/462 [32:08<1:26:41, 15.26s/batch, batch_loss=35.3, batch_in

Testing:  26%|▎| 122/462 [32:08<1:31:35, 16.16s/batch, batch_loss=35.3, batch_in

Testing:  26%|▎| 122/462 [32:23<1:31:35, 16.16s/batch, batch_loss=23.2, batch_in

Testing:  27%|▎| 123/462 [32:23<1:29:12, 15.79s/batch, batch_loss=23.2, batch_in

Testing:  27%|▎| 123/462 [32:38<1:29:12, 15.79s/batch, batch_loss=20.9, batch_in

Testing:  27%|▎| 124/462 [32:38<1:27:16, 15.49s/batch, batch_loss=20.9, batch_in

Testing:  27%|▎| 124/462 [32:54<1:27:16, 15.49s/batch, batch_loss=280, batch_ind

Testing:  27%|▎| 125/462 [32:54<1:27:48, 15.63s/batch, batch_loss=280, batch_ind

Testing:  27%|▎| 125/462 [33:09<1:27:48, 15.63s/batch, batch_loss=26.3, batch_in

Testing:  27%|▎| 126/462 [33:09<1:25:26, 15.26s/batch, batch_loss=26.3, batch_in

Testing:  27%|▎| 126/462 [33:23<1:25:26, 15.26s/batch, batch_loss=25.4, batch_in

Testing:  27%|▎| 127/462 [33:23<1:24:11, 15.08s/batch, batch_loss=25.4, batch_in

Testing:  27%|▎| 127/462 [33:38<1:24:11, 15.08s/batch, batch_loss=21.4, batch_in

Testing:  28%|▎| 128/462 [33:38<1:23:30, 15.00s/batch, batch_loss=21.4, batch_in

Testing:  28%|▎| 128/462 [33:53<1:23:30, 15.00s/batch, batch_loss=18.8, batch_in

Testing:  28%|▎| 129/462 [33:53<1:23:37, 15.07s/batch, batch_loss=18.8, batch_in

Testing:  28%|▎| 129/462 [34:08<1:23:37, 15.07s/batch, batch_loss=24.6, batch_in

Testing:  28%|▎| 130/462 [34:08<1:23:25, 15.08s/batch, batch_loss=24.6, batch_in

Testing:  28%|▎| 130/462 [34:24<1:23:25, 15.08s/batch, batch_loss=10.6, batch_in

Testing:  28%|▎| 131/462 [34:24<1:23:21, 15.11s/batch, batch_loss=10.6, batch_in

Testing:  28%|▎| 131/462 [34:38<1:23:21, 15.11s/batch, batch_loss=454, batch_ind

Testing:  29%|▎| 132/462 [34:38<1:21:25, 14.80s/batch, batch_loss=454, batch_ind

Testing:  29%|▎| 132/462 [34:52<1:21:25, 14.80s/batch, batch_loss=46.7, batch_in

Testing:  29%|▎| 133/462 [34:52<1:20:51, 14.75s/batch, batch_loss=46.7, batch_in

Testing:  29%|▎| 133/462 [35:06<1:20:51, 14.75s/batch, batch_loss=23.9, batch_in

Testing:  29%|▎| 134/462 [35:06<1:19:15, 14.50s/batch, batch_loss=23.9, batch_in

Testing:  29%|▎| 134/462 [35:20<1:19:15, 14.50s/batch, batch_loss=7.99, batch_in

Testing:  29%|▎| 135/462 [35:20<1:18:44, 14.45s/batch, batch_loss=7.99, batch_in

Testing:  29%|▎| 135/462 [35:36<1:18:44, 14.45s/batch, batch_loss=497, batch_ind

Testing:  29%|▎| 136/462 [35:36<1:20:23, 14.80s/batch, batch_loss=497, batch_ind

Testing:  29%|▎| 136/462 [35:54<1:20:23, 14.80s/batch, batch_loss=20.9, batch_in

Testing:  30%|▎| 137/462 [35:54<1:25:45, 15.83s/batch, batch_loss=20.9, batch_in

Testing:  30%|▎| 137/462 [36:08<1:25:45, 15.83s/batch, batch_loss=29.9, batch_in

Testing:  30%|▎| 138/462 [36:08<1:22:23, 15.26s/batch, batch_loss=29.9, batch_in

Testing:  30%|▎| 138/462 [36:24<1:22:23, 15.26s/batch, batch_loss=26.9, batch_in

Testing:  30%|▎| 139/462 [36:24<1:22:53, 15.40s/batch, batch_loss=26.9, batch_in

Testing:  30%|▎| 139/462 [36:40<1:22:53, 15.40s/batch, batch_loss=25.2, batch_in

Testing:  30%|▎| 140/462 [36:40<1:24:11, 15.69s/batch, batch_loss=25.2, batch_in

Testing:  30%|▎| 140/462 [36:54<1:24:11, 15.69s/batch, batch_loss=21.9, batch_in

Testing:  31%|▎| 141/462 [36:54<1:21:00, 15.14s/batch, batch_loss=21.9, batch_in

Testing:  31%|▎| 141/462 [37:08<1:21:00, 15.14s/batch, batch_loss=1.13e+3, batch

Testing:  31%|▎| 142/462 [37:08<1:18:35, 14.74s/batch, batch_loss=1.13e+3, batch

Testing:  31%|▎| 142/462 [37:25<1:18:35, 14.74s/batch, batch_loss=6.02e+3, batch

Testing:  31%|▎| 143/462 [37:25<1:21:13, 15.28s/batch, batch_loss=6.02e+3, batch

Testing:  31%|▎| 143/462 [37:39<1:21:13, 15.28s/batch, batch_loss=25, batch_inde

Testing:  31%|▎| 144/462 [37:39<1:20:07, 15.12s/batch, batch_loss=25, batch_inde

Testing:  31%|▎| 144/462 [37:55<1:20:07, 15.12s/batch, batch_loss=23.3, batch_in

Testing:  31%|▎| 145/462 [37:55<1:20:39, 15.27s/batch, batch_loss=23.3, batch_in

Testing:  31%|▎| 145/462 [38:10<1:20:39, 15.27s/batch, batch_loss=30.5, batch_in

Testing:  32%|▎| 146/462 [38:10<1:20:17, 15.25s/batch, batch_loss=30.5, batch_in

Testing:  32%|▎| 146/462 [38:26<1:20:17, 15.25s/batch, batch_loss=24.4, batch_in

Testing:  32%|▎| 147/462 [38:26<1:21:13, 15.47s/batch, batch_loss=24.4, batch_in

Testing:  32%|▎| 147/462 [38:42<1:21:13, 15.47s/batch, batch_loss=25, batch_inde

Testing:  32%|▎| 148/462 [38:42<1:21:25, 15.56s/batch, batch_loss=25, batch_inde

Testing:  32%|▎| 148/462 [38:57<1:21:25, 15.56s/batch, batch_loss=23.1, batch_in

Testing:  32%|▎| 149/462 [38:57<1:21:19, 15.59s/batch, batch_loss=23.1, batch_in

Testing:  32%|▎| 149/462 [39:13<1:21:19, 15.59s/batch, batch_loss=3.78e+4, batch

Testing:  32%|▎| 150/462 [39:13<1:20:42, 15.52s/batch, batch_loss=3.78e+4, batch

Testing:  32%|▎| 150/462 [39:28<1:20:42, 15.52s/batch, batch_loss=29.3, batch_in

Testing:  33%|▎| 151/462 [39:28<1:20:13, 15.48s/batch, batch_loss=29.3, batch_in

Testing:  33%|▎| 151/462 [39:44<1:20:13, 15.48s/batch, batch_loss=22, batch_inde

Testing:  33%|▎| 152/462 [39:44<1:21:06, 15.70s/batch, batch_loss=22, batch_inde

Testing:  33%|▎| 152/462 [40:00<1:21:06, 15.70s/batch, batch_loss=18.9, batch_in

Testing:  33%|▎| 153/462 [40:00<1:20:26, 15.62s/batch, batch_loss=18.9, batch_in

Testing:  33%|▎| 153/462 [40:17<1:20:26, 15.62s/batch, batch_loss=2.2e+3, batch_

Testing:  33%|▎| 154/462 [40:17<1:22:38, 16.10s/batch, batch_loss=2.2e+3, batch_

Testing:  33%|▎| 154/462 [40:33<1:22:38, 16.10s/batch, batch_loss=28.5, batch_in

Testing:  34%|▎| 155/462 [40:33<1:21:34, 15.94s/batch, batch_loss=28.5, batch_in

Testing:  34%|▎| 155/462 [40:48<1:21:34, 15.94s/batch, batch_loss=3.1, batch_ind

Testing:  34%|▎| 156/462 [40:48<1:20:02, 15.70s/batch, batch_loss=3.1, batch_ind

Testing:  34%|▎| 156/462 [41:03<1:20:02, 15.70s/batch, batch_loss=20.3, batch_in

Testing:  34%|▎| 157/462 [41:03<1:18:43, 15.49s/batch, batch_loss=20.3, batch_in

Testing:  34%|▎| 157/462 [41:17<1:18:43, 15.49s/batch, batch_loss=4.44e+3, batch

Testing:  34%|▎| 158/462 [41:17<1:17:11, 15.23s/batch, batch_loss=4.44e+3, batch

Testing:  34%|▎| 158/462 [41:31<1:17:11, 15.23s/batch, batch_loss=19.4, batch_in

Testing:  34%|▎| 159/462 [41:31<1:15:04, 14.87s/batch, batch_loss=19.4, batch_in

Testing:  34%|▎| 159/462 [41:46<1:15:04, 14.87s/batch, batch_loss=30.1, batch_in

Testing:  35%|▎| 160/462 [41:46<1:14:18, 14.76s/batch, batch_loss=30.1, batch_in

Testing:  35%|▎| 160/462 [42:01<1:14:18, 14.76s/batch, batch_loss=27.9, batch_in

Testing:  35%|▎| 161/462 [42:01<1:13:47, 14.71s/batch, batch_loss=27.9, batch_in

Testing:  35%|▎| 161/462 [42:17<1:13:47, 14.71s/batch, batch_loss=32.2, batch_in

Testing:  35%|▎| 162/462 [42:17<1:16:33, 15.31s/batch, batch_loss=32.2, batch_in

Testing:  35%|▎| 162/462 [42:32<1:16:33, 15.31s/batch, batch_loss=4.94e+3, batch

Testing:  35%|▎| 163/462 [42:32<1:15:21, 15.12s/batch, batch_loss=4.94e+3, batch

Testing:  35%|▎| 163/462 [42:48<1:15:21, 15.12s/batch, batch_loss=2.66e+3, batch

Testing:  35%|▎| 164/462 [42:48<1:16:02, 15.31s/batch, batch_loss=2.66e+3, batch

Testing:  35%|▎| 164/462 [43:02<1:16:02, 15.31s/batch, batch_loss=26.1, batch_in

Testing:  36%|▎| 165/462 [43:02<1:14:58, 15.15s/batch, batch_loss=26.1, batch_in

Testing:  36%|▎| 165/462 [43:17<1:14:58, 15.15s/batch, batch_loss=24.6, batch_in

Testing:  36%|▎| 166/462 [43:17<1:14:26, 15.09s/batch, batch_loss=24.6, batch_in

Testing:  36%|▎| 166/462 [43:33<1:14:26, 15.09s/batch, batch_loss=19.6, batch_in

Testing:  36%|▎| 167/462 [43:33<1:14:23, 15.13s/batch, batch_loss=19.6, batch_in

Testing:  36%|▎| 167/462 [43:48<1:14:23, 15.13s/batch, batch_loss=16, batch_inde

Testing:  36%|▎| 168/462 [43:48<1:14:50, 15.27s/batch, batch_loss=16, batch_inde

Testing:  36%|▎| 168/462 [44:03<1:14:50, 15.27s/batch, batch_loss=25, batch_inde

Testing:  37%|▎| 169/462 [44:03<1:14:26, 15.24s/batch, batch_loss=25, batch_inde

Testing:  37%|▎| 169/462 [44:21<1:14:26, 15.24s/batch, batch_loss=11.8, batch_in

Testing:  37%|▎| 170/462 [44:21<1:17:46, 15.98s/batch, batch_loss=11.8, batch_in

Testing:  37%|▎| 170/462 [44:36<1:17:46, 15.98s/batch, batch_loss=5.65, batch_in

Testing:  37%|▎| 171/462 [44:36<1:15:27, 15.56s/batch, batch_loss=5.65, batch_in

Testing:  37%|▎| 171/462 [44:50<1:15:27, 15.56s/batch, batch_loss=17.7, batch_in

Testing:  37%|▎| 172/462 [44:50<1:13:54, 15.29s/batch, batch_loss=17.7, batch_in

Testing:  37%|▎| 172/462 [45:06<1:13:54, 15.29s/batch, batch_loss=23.1, batch_in

Testing:  37%|▎| 173/462 [45:06<1:13:53, 15.34s/batch, batch_loss=23.1, batch_in

Testing:  37%|▎| 173/462 [45:22<1:13:53, 15.34s/batch, batch_loss=100, batch_ind

Testing:  38%|▍| 174/462 [45:22<1:14:20, 15.49s/batch, batch_loss=100, batch_ind

Testing:  38%|▍| 174/462 [45:38<1:14:20, 15.49s/batch, batch_loss=19.7, batch_in

Testing:  38%|▍| 175/462 [45:38<1:14:45, 15.63s/batch, batch_loss=19.7, batch_in

Testing:  38%|▍| 175/462 [45:53<1:14:45, 15.63s/batch, batch_loss=24.4, batch_in

Testing:  38%|▍| 176/462 [45:53<1:14:00, 15.53s/batch, batch_loss=24.4, batch_in

Testing:  38%|▍| 176/462 [46:07<1:14:00, 15.53s/batch, batch_loss=22.4, batch_in

Testing:  38%|▍| 177/462 [46:07<1:12:14, 15.21s/batch, batch_loss=22.4, batch_in

Testing:  38%|▍| 177/462 [46:23<1:12:14, 15.21s/batch, batch_loss=34.1, batch_in

Testing:  39%|▍| 178/462 [46:23<1:12:39, 15.35s/batch, batch_loss=34.1, batch_in

Testing:  39%|▍| 178/462 [46:39<1:12:39, 15.35s/batch, batch_loss=36.5, batch_in

Testing:  39%|▍| 179/462 [46:39<1:13:13, 15.52s/batch, batch_loss=36.5, batch_in

Testing:  39%|▍| 179/462 [46:55<1:13:13, 15.52s/batch, batch_loss=22.3, batch_in

Testing:  39%|▍| 180/462 [46:55<1:13:15, 15.59s/batch, batch_loss=22.3, batch_in

Testing:  39%|▍| 180/462 [47:12<1:13:15, 15.59s/batch, batch_loss=23.6, batch_in

Testing:  39%|▍| 181/462 [47:12<1:14:51, 15.99s/batch, batch_loss=23.6, batch_in

Testing:  39%|▍| 181/462 [47:27<1:14:51, 15.99s/batch, batch_loss=24, batch_inde

Testing:  39%|▍| 182/462 [47:27<1:13:47, 15.81s/batch, batch_loss=24, batch_inde

Testing:  39%|▍| 182/462 [47:43<1:13:47, 15.81s/batch, batch_loss=25.9, batch_in

Testing:  40%|▍| 183/462 [47:43<1:13:02, 15.71s/batch, batch_loss=25.9, batch_in

Testing:  40%|▍| 183/462 [47:57<1:13:02, 15.71s/batch, batch_loss=21.1, batch_in

Testing:  40%|▍| 184/462 [47:57<1:11:28, 15.43s/batch, batch_loss=21.1, batch_in

Testing:  40%|▍| 184/462 [48:15<1:11:28, 15.43s/batch, batch_loss=17089.0, batch

Testing:  40%|▍| 185/462 [48:15<1:14:49, 16.21s/batch, batch_loss=17089.0, batch

Testing:  40%|▍| 185/462 [48:31<1:14:49, 16.21s/batch, batch_loss=40.5, batch_in

Testing:  40%|▍| 186/462 [48:31<1:13:48, 16.05s/batch, batch_loss=40.5, batch_in

Testing:  40%|▍| 186/462 [48:46<1:13:48, 16.05s/batch, batch_loss=38.8, batch_in

Testing:  40%|▍| 187/462 [48:46<1:12:14, 15.76s/batch, batch_loss=38.8, batch_in

Testing:  40%|▍| 187/462 [49:01<1:12:14, 15.76s/batch, batch_loss=537, batch_ind

Testing:  41%|▍| 188/462 [49:01<1:10:54, 15.53s/batch, batch_loss=537, batch_ind

Testing:  41%|▍| 188/462 [49:17<1:10:54, 15.53s/batch, batch_loss=1.04e+3, batch

Testing:  41%|▍| 189/462 [49:17<1:11:06, 15.63s/batch, batch_loss=1.04e+3, batch

Testing:  41%|▍| 189/462 [49:33<1:11:06, 15.63s/batch, batch_loss=27.3, batch_in

Testing:  41%|▍| 190/462 [49:33<1:11:24, 15.75s/batch, batch_loss=27.3, batch_in

Testing:  41%|▍| 190/462 [49:49<1:11:24, 15.75s/batch, batch_loss=301, batch_ind

Testing:  41%|▍| 191/462 [49:49<1:11:16, 15.78s/batch, batch_loss=301, batch_ind

Testing:  41%|▍| 191/462 [50:03<1:11:16, 15.78s/batch, batch_loss=22.9, batch_in

Testing:  42%|▍| 192/462 [50:03<1:09:18, 15.40s/batch, batch_loss=22.9, batch_in

Testing:  42%|▍| 192/462 [50:19<1:09:18, 15.40s/batch, batch_loss=233, batch_ind

Testing:  42%|▍| 193/462 [50:19<1:08:49, 15.35s/batch, batch_loss=233, batch_ind

Testing:  42%|▍| 193/462 [50:34<1:08:49, 15.35s/batch, batch_loss=1.05e+3, batch

Testing:  42%|▍| 194/462 [50:34<1:08:19, 15.30s/batch, batch_loss=1.05e+3, batch

Testing:  42%|▍| 194/462 [50:49<1:08:19, 15.30s/batch, batch_loss=19.2, batch_in

Testing:  42%|▍| 195/462 [50:49<1:08:12, 15.33s/batch, batch_loss=19.2, batch_in

Testing:  42%|▍| 195/462 [51:04<1:08:12, 15.33s/batch, batch_loss=28.3, batch_in

Testing:  42%|▍| 196/462 [51:04<1:07:52, 15.31s/batch, batch_loss=28.3, batch_in

Testing:  42%|▍| 196/462 [51:20<1:07:52, 15.31s/batch, batch_loss=461, batch_ind

Testing:  43%|▍| 197/462 [51:20<1:08:15, 15.45s/batch, batch_loss=461, batch_ind

Testing:  43%|▍| 197/462 [51:36<1:08:15, 15.45s/batch, batch_loss=36.8, batch_in

Testing:  43%|▍| 198/462 [51:36<1:08:43, 15.62s/batch, batch_loss=36.8, batch_in

Testing:  43%|▍| 198/462 [51:52<1:08:43, 15.62s/batch, batch_loss=19.5, batch_in

Testing:  43%|▍| 199/462 [51:52<1:09:16, 15.81s/batch, batch_loss=19.5, batch_in

Testing:  43%|▍| 199/462 [52:10<1:09:16, 15.81s/batch, batch_loss=30.2, batch_in

Testing:  43%|▍| 200/462 [52:10<1:11:28, 16.37s/batch, batch_loss=30.2, batch_in

Testing:  43%|▍| 200/462 [52:26<1:11:28, 16.37s/batch, batch_loss=13.8, batch_in

Testing:  44%|▍| 201/462 [52:26<1:10:38, 16.24s/batch, batch_loss=13.8, batch_in

Testing:  44%|▍| 201/462 [52:42<1:10:38, 16.24s/batch, batch_loss=13.3, batch_in

Testing:  44%|▍| 202/462 [52:42<1:09:35, 16.06s/batch, batch_loss=13.3, batch_in

Testing:  44%|▍| 202/462 [52:58<1:09:35, 16.06s/batch, batch_loss=28.9, batch_in

Testing:  44%|▍| 203/462 [52:58<1:09:06, 16.01s/batch, batch_loss=28.9, batch_in

Testing:  44%|▍| 203/462 [53:13<1:09:06, 16.01s/batch, batch_loss=29, batch_inde

Testing:  44%|▍| 204/462 [53:13<1:08:21, 15.90s/batch, batch_loss=29, batch_inde

Testing:  44%|▍| 204/462 [53:29<1:08:21, 15.90s/batch, batch_loss=35.9, batch_in

Testing:  44%|▍| 205/462 [53:29<1:07:29, 15.76s/batch, batch_loss=35.9, batch_in

Testing:  44%|▍| 205/462 [53:44<1:07:29, 15.76s/batch, batch_loss=26.2, batch_in

Testing:  45%|▍| 206/462 [53:44<1:07:12, 15.75s/batch, batch_loss=26.2, batch_in

Testing:  45%|▍| 206/462 [54:02<1:07:12, 15.75s/batch, batch_loss=24.7, batch_in

Testing:  45%|▍| 207/462 [54:02<1:09:29, 16.35s/batch, batch_loss=24.7, batch_in

Testing:  45%|▍| 207/462 [54:18<1:09:29, 16.35s/batch, batch_loss=1.07e+3, batch

Testing:  45%|▍| 208/462 [54:18<1:08:36, 16.21s/batch, batch_loss=1.07e+3, batch

Testing:  45%|▍| 208/462 [54:33<1:08:36, 16.21s/batch, batch_loss=18.2, batch_in

Testing:  45%|▍| 209/462 [54:33<1:06:23, 15.74s/batch, batch_loss=18.2, batch_in

Testing:  45%|▍| 209/462 [54:47<1:06:23, 15.74s/batch, batch_loss=32.2, batch_in

Testing:  45%|▍| 210/462 [54:47<1:04:26, 15.34s/batch, batch_loss=32.2, batch_in

Testing:  45%|▍| 210/462 [55:03<1:04:26, 15.34s/batch, batch_loss=15.5, batch_in

Testing:  46%|▍| 211/462 [55:03<1:04:44, 15.47s/batch, batch_loss=15.5, batch_in

Testing:  46%|▍| 211/462 [55:18<1:04:44, 15.47s/batch, batch_loss=18.6, batch_in

Testing:  46%|▍| 212/462 [55:18<1:04:35, 15.50s/batch, batch_loss=18.6, batch_in

Testing:  46%|▍| 212/462 [55:34<1:04:35, 15.50s/batch, batch_loss=34.9, batch_in

Testing:  46%|▍| 213/462 [55:34<1:04:33, 15.56s/batch, batch_loss=34.9, batch_in

Testing:  46%|▍| 213/462 [55:49<1:04:33, 15.56s/batch, batch_loss=1.16e+4, batch

Testing:  46%|▍| 214/462 [55:49<1:04:00, 15.49s/batch, batch_loss=1.16e+4, batch

Testing:  46%|▍| 214/462 [56:05<1:04:00, 15.49s/batch, batch_loss=30.1, batch_in

Testing:  47%|▍| 215/462 [56:05<1:04:13, 15.60s/batch, batch_loss=30.1, batch_in

Testing:  47%|▍| 215/462 [56:21<1:04:13, 15.60s/batch, batch_loss=24.5, batch_in

Testing:  47%|▍| 216/462 [56:21<1:03:45, 15.55s/batch, batch_loss=24.5, batch_in

Testing:  47%|▍| 216/462 [56:35<1:03:45, 15.55s/batch, batch_loss=34.6, batch_in

Testing:  47%|▍| 217/462 [56:35<1:02:15, 15.25s/batch, batch_loss=34.6, batch_in

Testing:  47%|▍| 217/462 [56:50<1:02:15, 15.25s/batch, batch_loss=45.6, batch_in

Testing:  47%|▍| 218/462 [56:50<1:01:34, 15.14s/batch, batch_loss=45.6, batch_in

Testing:  47%|▍| 218/462 [57:05<1:01:34, 15.14s/batch, batch_loss=31, batch_inde

Testing:  47%|▍| 219/462 [57:05<1:01:02, 15.07s/batch, batch_loss=31, batch_inde

Testing:  47%|▍| 219/462 [57:20<1:01:02, 15.07s/batch, batch_loss=33.5, batch_in

Testing:  48%|▍| 220/462 [57:20<1:00:16, 14.94s/batch, batch_loss=33.5, batch_in

Testing:  48%|▍| 220/462 [57:35<1:00:16, 14.94s/batch, batch_loss=35.5, batch_in

Testing:  48%|▍| 221/462 [57:35<1:00:19, 15.02s/batch, batch_loss=35.5, batch_in

Testing:  48%|▍| 221/462 [57:53<1:00:19, 15.02s/batch, batch_loss=24.6, batch_in

Testing:  48%|▍| 222/462 [57:53<1:03:25, 15.86s/batch, batch_loss=24.6, batch_in

Testing:  48%|▍| 222/462 [58:08<1:03:25, 15.86s/batch, batch_loss=37.4, batch_in

Testing:  48%|▍| 223/462 [58:08<1:01:57, 15.55s/batch, batch_loss=37.4, batch_in

Testing:  48%|▍| 223/462 [58:23<1:01:57, 15.55s/batch, batch_loss=26.9, batch_in

Testing:  48%|▍| 224/462 [58:23<1:01:09, 15.42s/batch, batch_loss=26.9, batch_in

Testing:  48%|▍| 224/462 [58:38<1:01:09, 15.42s/batch, batch_loss=37.8, batch_in

Testing:  49%|▍| 225/462 [58:38<1:00:12, 15.24s/batch, batch_loss=37.8, batch_in

Testing:  49%|▍| 225/462 [58:52<1:00:12, 15.24s/batch, batch_loss=26.2, batch_in

Testing:  49%|▍| 226/462 [58:52<59:08, 15.04s/batch, batch_loss=26.2, batch_inde

Testing:  49%|▍| 226/462 [59:07<59:08, 15.04s/batch, batch_loss=24.9, batch_inde

Testing:  49%|▍| 227/462 [59:07<58:26, 14.92s/batch, batch_loss=24.9, batch_inde

Testing:  49%|▍| 227/462 [59:22<58:26, 14.92s/batch, batch_loss=3.94e+4, batch_i

Testing:  49%|▍| 228/462 [59:22<58:04, 14.89s/batch, batch_loss=3.94e+4, batch_i

Testing:  49%|▍| 228/462 [59:36<58:04, 14.89s/batch, batch_loss=30.4, batch_inde

Testing:  50%|▍| 229/462 [59:36<57:29, 14.81s/batch, batch_loss=30.4, batch_inde

Testing:  50%|▍| 229/462 [59:51<57:29, 14.81s/batch, batch_loss=29.8, batch_inde

Testing:  50%|▍| 230/462 [59:51<57:13, 14.80s/batch, batch_loss=29.8, batch_inde

Testing:  50%|▍| 230/462 [1:00:05<57:13, 14.80s/batch, batch_loss=41.1, batch_in

Testing:  50%|▌| 231/462 [1:00:05<56:37, 14.71s/batch, batch_loss=41.1, batch_in

Testing:  50%|▌| 231/462 [1:00:20<56:37, 14.71s/batch, batch_loss=27.2, batch_in

Testing:  50%|▌| 232/462 [1:00:20<56:26, 14.72s/batch, batch_loss=27.2, batch_in

Testing:  50%|▌| 232/462 [1:00:34<56:26, 14.72s/batch, batch_loss=5.87e+4, batch

Testing:  50%|▌| 233/462 [1:00:34<55:42, 14.60s/batch, batch_loss=5.87e+4, batch

Testing:  50%|▌| 233/462 [1:00:49<55:42, 14.60s/batch, batch_loss=29.2, batch_in

Testing:  51%|▌| 234/462 [1:00:49<54:50, 14.43s/batch, batch_loss=29.2, batch_in

Testing:  51%|▌| 234/462 [1:01:03<54:50, 14.43s/batch, batch_loss=35.8, batch_in

Testing:  51%|▌| 235/462 [1:01:03<54:52, 14.50s/batch, batch_loss=35.8, batch_in

Testing:  51%|▌| 235/462 [1:01:18<54:52, 14.50s/batch, batch_loss=21.8, batch_in

Testing:  51%|▌| 236/462 [1:01:18<54:27, 14.46s/batch, batch_loss=21.8, batch_in

Testing:  51%|▌| 236/462 [1:01:33<54:27, 14.46s/batch, batch_loss=28.2, batch_in

Testing:  51%|▌| 237/462 [1:01:33<54:46, 14.61s/batch, batch_loss=28.2, batch_in

Testing:  51%|▌| 237/462 [1:01:48<54:46, 14.61s/batch, batch_loss=20, batch_inde

Testing:  52%|▌| 238/462 [1:01:48<55:04, 14.75s/batch, batch_loss=20, batch_inde

Testing:  52%|▌| 238/462 [1:02:02<55:04, 14.75s/batch, batch_loss=18.3, batch_in

Testing:  52%|▌| 239/462 [1:02:02<54:43, 14.73s/batch, batch_loss=18.3, batch_in

Testing:  52%|▌| 239/462 [1:02:18<54:43, 14.73s/batch, batch_loss=22.2, batch_in

Testing:  52%|▌| 240/462 [1:02:18<55:55, 15.11s/batch, batch_loss=22.2, batch_in

Testing:  52%|▌| 240/462 [1:02:34<55:55, 15.11s/batch, batch_loss=35.7, batch_in

Testing:  52%|▌| 241/462 [1:02:34<56:01, 15.21s/batch, batch_loss=35.7, batch_in

Testing:  52%|▌| 241/462 [1:02:48<56:01, 15.21s/batch, batch_loss=26.5, batch_in

Testing:  52%|▌| 242/462 [1:02:48<55:12, 15.06s/batch, batch_loss=26.5, batch_in

Testing:  52%|▌| 242/462 [1:03:03<55:12, 15.06s/batch, batch_loss=20.4, batch_in

Testing:  53%|▌| 243/462 [1:03:03<54:08, 14.84s/batch, batch_loss=20.4, batch_in

Testing:  53%|▌| 243/462 [1:03:17<54:08, 14.84s/batch, batch_loss=48, batch_inde

Testing:  53%|▌| 244/462 [1:03:17<53:24, 14.70s/batch, batch_loss=48, batch_inde

Testing:  53%|▌| 244/462 [1:03:33<53:24, 14.70s/batch, batch_loss=18.7, batch_in

Testing:  53%|▌| 245/462 [1:03:33<53:57, 14.92s/batch, batch_loss=18.7, batch_in

Testing:  53%|▌| 245/462 [1:03:46<53:57, 14.92s/batch, batch_loss=34.5, batch_in

Testing:  53%|▌| 246/462 [1:03:46<52:30, 14.59s/batch, batch_loss=34.5, batch_in

Testing:  53%|▌| 246/462 [1:04:01<52:30, 14.59s/batch, batch_loss=34.1, batch_in

Testing:  53%|▌| 247/462 [1:04:01<52:30, 14.65s/batch, batch_loss=34.1, batch_in

Testing:  53%|▌| 247/462 [1:04:16<52:30, 14.65s/batch, batch_loss=8.65e+4, batch

Testing:  54%|▌| 248/462 [1:04:16<52:55, 14.84s/batch, batch_loss=8.65e+4, batch

Testing:  54%|▌| 248/462 [1:04:31<52:55, 14.84s/batch, batch_loss=192, batch_ind

Testing:  54%|▌| 249/462 [1:04:31<52:42, 14.85s/batch, batch_loss=192, batch_ind

Testing:  54%|▌| 249/462 [1:04:46<52:42, 14.85s/batch, batch_loss=29, batch_inde

Testing:  54%|▌| 250/462 [1:04:46<52:45, 14.93s/batch, batch_loss=29, batch_inde

Testing:  54%|▌| 250/462 [1:05:01<52:45, 14.93s/batch, batch_loss=17.4, batch_in

Testing:  54%|▌| 251/462 [1:05:01<52:32, 14.94s/batch, batch_loss=17.4, batch_in

Testing:  54%|▌| 251/462 [1:05:17<52:32, 14.94s/batch, batch_loss=16.6, batch_in

Testing:  55%|▌| 252/462 [1:05:17<53:13, 15.21s/batch, batch_loss=16.6, batch_in

Testing:  55%|▌| 252/462 [1:05:33<53:13, 15.21s/batch, batch_loss=30.2, batch_in

Testing:  55%|▌| 253/462 [1:05:33<53:23, 15.33s/batch, batch_loss=30.2, batch_in

Testing:  55%|▌| 253/462 [1:05:48<53:23, 15.33s/batch, batch_loss=14.6, batch_in

Testing:  55%|▌| 254/462 [1:05:48<53:04, 15.31s/batch, batch_loss=14.6, batch_in

Testing:  55%|▌| 254/462 [1:06:03<53:04, 15.31s/batch, batch_loss=8.78, batch_in

Testing:  55%|▌| 255/462 [1:06:03<52:44, 15.29s/batch, batch_loss=8.78, batch_in

Testing:  55%|▌| 255/462 [1:06:20<52:44, 15.29s/batch, batch_loss=23.8, batch_in

Testing:  55%|▌| 256/462 [1:06:20<54:02, 15.74s/batch, batch_loss=23.8, batch_in

Testing:  55%|▌| 256/462 [1:06:35<54:02, 15.74s/batch, batch_loss=25.7, batch_in

Testing:  56%|▌| 257/462 [1:06:35<53:10, 15.56s/batch, batch_loss=25.7, batch_in

Testing:  56%|▌| 257/462 [1:06:51<53:10, 15.56s/batch, batch_loss=24.2, batch_in

Testing:  56%|▌| 258/462 [1:06:51<53:14, 15.66s/batch, batch_loss=24.2, batch_in

Testing:  56%|▌| 258/462 [1:07:05<53:14, 15.66s/batch, batch_loss=23.9, batch_in

Testing:  56%|▌| 259/462 [1:07:05<51:28, 15.22s/batch, batch_loss=23.9, batch_in

Testing:  56%|▌| 259/462 [1:07:18<51:28, 15.22s/batch, batch_loss=26.1, batch_in

Testing:  56%|▌| 260/462 [1:07:18<48:36, 14.44s/batch, batch_loss=26.1, batch_in

Testing:  56%|▌| 260/462 [1:07:33<48:36, 14.44s/batch, batch_loss=28.3, batch_in

Testing:  56%|▌| 261/462 [1:07:33<49:22, 14.74s/batch, batch_loss=28.3, batch_in

Testing:  56%|▌| 261/462 [1:07:52<49:22, 14.74s/batch, batch_loss=18.7, batch_in

Testing:  57%|▌| 262/462 [1:07:52<53:02, 15.91s/batch, batch_loss=18.7, batch_in

Testing:  57%|▌| 262/462 [1:08:07<53:02, 15.91s/batch, batch_loss=25.7, batch_in

Testing:  57%|▌| 263/462 [1:08:07<52:13, 15.75s/batch, batch_loss=25.7, batch_in

Testing:  57%|▌| 263/462 [1:08:27<52:13, 15.75s/batch, batch_loss=24.8, batch_in

Testing:  57%|▌| 264/462 [1:08:27<56:10, 17.03s/batch, batch_loss=24.8, batch_in

Testing:  57%|▌| 264/462 [1:08:48<56:10, 17.03s/batch, batch_loss=18.8, batch_in

Testing:  57%|▌| 265/462 [1:08:48<59:05, 18.00s/batch, batch_loss=18.8, batch_in

Testing:  57%|▌| 265/462 [1:09:07<59:05, 18.00s/batch, batch_loss=17.3, batch_in

Testing:  58%|▌| 266/462 [1:09:07<1:00:27, 18.51s/batch, batch_loss=17.3, batch_

Testing:  58%|▌| 266/462 [1:09:26<1:00:27, 18.51s/batch, batch_loss=26.3, batch_

Testing:  58%|▌| 267/462 [1:09:26<59:52, 18.42s/batch, batch_loss=26.3, batch_in

Testing:  58%|▌| 267/462 [1:09:44<59:52, 18.42s/batch, batch_loss=16.9, batch_in

Testing:  58%|▌| 268/462 [1:09:44<59:16, 18.33s/batch, batch_loss=16.9, batch_in

Testing:  58%|▌| 268/462 [1:10:03<59:16, 18.33s/batch, batch_loss=22, batch_inde

Testing:  58%|▌| 269/462 [1:10:03<1:00:06, 18.69s/batch, batch_loss=22, batch_in

Testing:  58%|▌| 269/462 [1:10:21<1:00:06, 18.69s/batch, batch_loss=2.41e+3, bat

Testing:  58%|▌| 270/462 [1:10:21<58:39, 18.33s/batch, batch_loss=2.41e+3, batch

Testing:  58%|▌| 270/462 [1:10:39<58:39, 18.33s/batch, batch_loss=23.1, batch_in

Testing:  59%|▌| 271/462 [1:10:39<57:50, 18.17s/batch, batch_loss=23.1, batch_in

Testing:  59%|▌| 271/462 [1:10:56<57:50, 18.17s/batch, batch_loss=4.2e+3, batch_

Testing:  59%|▌| 272/462 [1:10:56<57:07, 18.04s/batch, batch_loss=4.2e+3, batch_

Testing:  59%|▌| 272/462 [1:11:13<57:07, 18.04s/batch, batch_loss=22.7, batch_in

Testing:  59%|▌| 273/462 [1:11:13<55:57, 17.76s/batch, batch_loss=22.7, batch_in

Testing:  59%|▌| 273/462 [1:11:31<55:57, 17.76s/batch, batch_loss=34.3, batch_in

Testing:  59%|▌| 274/462 [1:11:31<55:45, 17.79s/batch, batch_loss=34.3, batch_in

Testing:  59%|▌| 274/462 [1:11:50<55:45, 17.79s/batch, batch_loss=20.3, batch_in

Testing:  60%|▌| 275/462 [1:11:50<55:58, 17.96s/batch, batch_loss=20.3, batch_in

Testing:  60%|▌| 275/462 [1:12:13<55:58, 17.96s/batch, batch_loss=1.38e+4, batch

Testing:  60%|▌| 276/462 [1:12:13<1:00:42, 19.58s/batch, batch_loss=1.38e+4, bat

Testing:  60%|▌| 276/462 [1:12:32<1:00:42, 19.58s/batch, batch_loss=22.1, batch_

Testing:  60%|▌| 277/462 [1:12:32<1:00:06, 19.49s/batch, batch_loss=22.1, batch_

Testing:  60%|▌| 277/462 [1:12:50<1:00:06, 19.49s/batch, batch_loss=27.7, batch_

Testing:  60%|▌| 278/462 [1:12:50<58:23, 19.04s/batch, batch_loss=27.7, batch_in

Testing:  60%|▌| 278/462 [1:13:09<58:23, 19.04s/batch, batch_loss=23.7, batch_in

Testing:  60%|▌| 279/462 [1:13:09<58:04, 19.04s/batch, batch_loss=23.7, batch_in

Testing:  60%|▌| 279/462 [1:13:28<58:04, 19.04s/batch, batch_loss=1.27e+3, batch

Testing:  61%|▌| 280/462 [1:13:28<57:21, 18.91s/batch, batch_loss=1.27e+3, batch

Testing:  61%|▌| 280/462 [1:13:46<57:21, 18.91s/batch, batch_loss=320, batch_ind

Testing:  61%|▌| 281/462 [1:13:46<56:34, 18.76s/batch, batch_loss=320, batch_ind

Testing:  61%|▌| 281/462 [1:14:07<56:34, 18.76s/batch, batch_loss=346, batch_ind

Testing:  61%|▌| 282/462 [1:14:07<58:27, 19.48s/batch, batch_loss=346, batch_ind

Testing:  61%|▌| 282/462 [1:14:26<58:27, 19.48s/batch, batch_loss=28.5, batch_in

Testing:  61%|▌| 283/462 [1:14:26<56:54, 19.08s/batch, batch_loss=28.5, batch_in

Testing:  61%|▌| 283/462 [1:14:44<56:54, 19.08s/batch, batch_loss=4.81e+3, batch

Testing:  61%|▌| 284/462 [1:14:44<55:36, 18.74s/batch, batch_loss=4.81e+3, batch

Testing:  61%|▌| 284/462 [1:15:00<55:36, 18.74s/batch, batch_loss=2134.25, batch

Testing:  62%|▌| 285/462 [1:15:00<53:28, 18.12s/batch, batch_loss=2134.25, batch

Testing:  62%|▌| 285/462 [1:15:19<53:28, 18.12s/batch, batch_loss=42.7, batch_in

Testing:  62%|▌| 286/462 [1:15:19<53:51, 18.36s/batch, batch_loss=42.7, batch_in

Testing:  62%|▌| 286/462 [1:15:37<53:51, 18.36s/batch, batch_loss=38.5, batch_in

Testing:  62%|▌| 287/462 [1:15:37<52:38, 18.05s/batch, batch_loss=38.5, batch_in

Testing:  62%|▌| 287/462 [1:15:56<52:38, 18.05s/batch, batch_loss=198, batch_ind

Testing:  62%|▌| 288/462 [1:15:56<53:17, 18.37s/batch, batch_loss=198, batch_ind

Testing:  62%|▌| 288/462 [1:16:14<53:17, 18.37s/batch, batch_loss=1.39e+3, batch

Testing:  63%|▋| 289/462 [1:16:14<52:53, 18.34s/batch, batch_loss=1.39e+3, batch

Testing:  63%|▋| 289/462 [1:16:33<52:53, 18.34s/batch, batch_loss=22.6, batch_in

Testing:  63%|▋| 290/462 [1:16:33<53:07, 18.53s/batch, batch_loss=22.6, batch_in

Testing:  63%|▋| 290/462 [1:16:52<53:07, 18.53s/batch, batch_loss=24.9, batch_in

Testing:  63%|▋| 291/462 [1:16:52<53:00, 18.60s/batch, batch_loss=24.9, batch_in

Testing:  63%|▋| 291/462 [1:17:13<53:00, 18.60s/batch, batch_loss=16.3, batch_in

Testing:  63%|▋| 292/462 [1:17:13<55:20, 19.53s/batch, batch_loss=16.3, batch_in

Testing:  63%|▋| 292/462 [1:17:31<55:20, 19.53s/batch, batch_loss=3.4e+3, batch_

Testing:  63%|▋| 293/462 [1:17:31<53:50, 19.12s/batch, batch_loss=3.4e+3, batch_

Testing:  63%|▋| 293/462 [1:17:51<53:50, 19.12s/batch, batch_loss=21.1, batch_in

Testing:  64%|▋| 294/462 [1:17:51<53:44, 19.20s/batch, batch_loss=21.1, batch_in

Testing:  64%|▋| 294/462 [1:18:09<53:44, 19.20s/batch, batch_loss=23.3, batch_in

Testing:  64%|▋| 295/462 [1:18:09<52:24, 18.83s/batch, batch_loss=23.3, batch_in

Testing:  64%|▋| 295/462 [1:18:27<52:24, 18.83s/batch, batch_loss=18.8, batch_in

Testing:  64%|▋| 296/462 [1:18:27<51:17, 18.54s/batch, batch_loss=18.8, batch_in

Testing:  64%|▋| 296/462 [1:18:43<51:17, 18.54s/batch, batch_loss=21.4, batch_in

Testing:  64%|▋| 297/462 [1:18:43<49:12, 17.89s/batch, batch_loss=21.4, batch_in

Testing:  64%|▋| 297/462 [1:19:00<49:12, 17.89s/batch, batch_loss=24.8, batch_in

Testing:  65%|▋| 298/462 [1:19:00<48:30, 17.75s/batch, batch_loss=24.8, batch_in

Testing:  65%|▋| 298/462 [1:19:17<48:30, 17.75s/batch, batch_loss=17.2, batch_in

Testing:  65%|▋| 299/462 [1:19:17<47:11, 17.37s/batch, batch_loss=17.2, batch_in

Testing:  65%|▋| 299/462 [1:19:34<47:11, 17.37s/batch, batch_loss=23.7, batch_in

Testing:  65%|▋| 300/462 [1:19:34<46:33, 17.24s/batch, batch_loss=23.7, batch_in

Testing:  65%|▋| 300/462 [1:19:51<46:33, 17.24s/batch, batch_loss=23.2, batch_in

Testing:  65%|▋| 301/462 [1:19:51<46:15, 17.24s/batch, batch_loss=23.2, batch_in

Testing:  65%|▋| 301/462 [1:20:11<46:15, 17.24s/batch, batch_loss=31.4, batch_in

Testing:  65%|▋| 302/462 [1:20:11<48:25, 18.16s/batch, batch_loss=31.4, batch_in

Testing:  65%|▋| 302/462 [1:20:30<48:25, 18.16s/batch, batch_loss=2e+3, batch_in

Testing:  66%|▋| 303/462 [1:20:30<48:04, 18.14s/batch, batch_loss=2e+3, batch_in

Testing:  66%|▋| 303/462 [1:20:47<48:04, 18.14s/batch, batch_loss=4.63e+3, batch

Testing:  66%|▋| 304/462 [1:20:47<47:02, 17.86s/batch, batch_loss=4.63e+3, batch

Testing:  66%|▋| 304/462 [1:21:03<47:02, 17.86s/batch, batch_loss=17.9, batch_in

Testing:  66%|▋| 305/462 [1:21:03<45:29, 17.39s/batch, batch_loss=17.9, batch_in

Testing:  66%|▋| 305/462 [1:21:20<45:29, 17.39s/batch, batch_loss=27.8, batch_in

Testing:  66%|▋| 306/462 [1:21:20<45:10, 17.37s/batch, batch_loss=27.8, batch_in

Testing:  66%|▋| 306/462 [1:21:38<45:10, 17.37s/batch, batch_loss=29.4, batch_in

Testing:  66%|▋| 307/462 [1:21:38<45:11, 17.49s/batch, batch_loss=29.4, batch_in

Testing:  66%|▋| 307/462 [1:21:56<45:11, 17.49s/batch, batch_loss=1.74e+4, batch

Testing:  67%|▋| 308/462 [1:21:56<45:32, 17.74s/batch, batch_loss=1.74e+4, batch

Testing:  67%|▋| 308/462 [1:22:15<45:32, 17.74s/batch, batch_loss=35.6, batch_in

Testing:  67%|▋| 309/462 [1:22:15<45:46, 17.95s/batch, batch_loss=35.6, batch_in

Testing:  67%|▋| 309/462 [1:22:36<45:46, 17.95s/batch, batch_loss=873, batch_ind

Testing:  67%|▋| 310/462 [1:22:36<48:04, 18.98s/batch, batch_loss=873, batch_ind

Testing:  67%|▋| 310/462 [1:22:53<48:04, 18.98s/batch, batch_loss=24.2, batch_in

Testing:  67%|▋| 311/462 [1:22:53<46:11, 18.35s/batch, batch_loss=24.2, batch_in

Testing:  67%|▋| 311/462 [1:23:11<46:11, 18.35s/batch, batch_loss=20.9, batch_in

Testing:  68%|▋| 312/462 [1:23:11<45:32, 18.22s/batch, batch_loss=20.9, batch_in

Testing:  68%|▋| 312/462 [1:23:31<45:32, 18.22s/batch, batch_loss=14.6, batch_in

Testing:  68%|▋| 313/462 [1:23:31<46:21, 18.67s/batch, batch_loss=14.6, batch_in

Testing:  68%|▋| 313/462 [1:23:50<46:21, 18.67s/batch, batch_loss=3.16e+3, batch

Testing:  68%|▋| 314/462 [1:23:50<46:45, 18.96s/batch, batch_loss=3.16e+3, batch

Testing:  68%|▋| 314/462 [1:24:10<46:45, 18.96s/batch, batch_loss=1.69e+3, batch

Testing:  68%|▋| 315/462 [1:24:10<46:46, 19.09s/batch, batch_loss=1.69e+3, batch

Testing:  68%|▋| 315/462 [1:24:30<46:46, 19.09s/batch, batch_loss=16.8, batch_in

Testing:  68%|▋| 316/462 [1:24:30<47:03, 19.34s/batch, batch_loss=16.8, batch_in

Testing:  68%|▋| 316/462 [1:24:49<47:03, 19.34s/batch, batch_loss=2.23e+3, batch

Testing:  69%|▋| 317/462 [1:24:49<46:45, 19.35s/batch, batch_loss=2.23e+3, batch

Testing:  69%|▋| 317/462 [1:25:07<46:45, 19.35s/batch, batch_loss=587, batch_ind

Testing:  69%|▋| 318/462 [1:25:07<45:26, 18.93s/batch, batch_loss=587, batch_ind

Testing:  69%|▋| 318/462 [1:25:25<45:26, 18.93s/batch, batch_loss=4.31e+4, batch

Testing:  69%|▋| 319/462 [1:25:25<44:28, 18.66s/batch, batch_loss=4.31e+4, batch

Testing:  69%|▋| 319/462 [1:25:43<44:28, 18.66s/batch, batch_loss=38.4, batch_in

Testing:  69%|▋| 320/462 [1:25:43<43:33, 18.41s/batch, batch_loss=38.4, batch_in

Testing:  69%|▋| 320/462 [1:26:02<43:33, 18.41s/batch, batch_loss=38.8, batch_in

Testing:  69%|▋| 321/462 [1:26:02<43:23, 18.47s/batch, batch_loss=38.8, batch_in

Testing:  69%|▋| 321/462 [1:26:21<43:23, 18.47s/batch, batch_loss=28.2, batch_in

Testing:  70%|▋| 322/462 [1:26:21<43:39, 18.71s/batch, batch_loss=28.2, batch_in

Testing:  70%|▋| 322/462 [1:26:40<43:39, 18.71s/batch, batch_loss=1.58e+4, batch

Testing:  70%|▋| 323/462 [1:26:40<43:51, 18.94s/batch, batch_loss=1.58e+4, batch

Testing:  70%|▋| 323/462 [1:27:00<43:51, 18.94s/batch, batch_loss=18.2, batch_in

Testing:  70%|▋| 324/462 [1:27:00<43:48, 19.05s/batch, batch_loss=18.2, batch_in

Testing:  70%|▋| 324/462 [1:27:18<43:48, 19.05s/batch, batch_loss=1.69e+4, batch

Testing:  70%|▋| 325/462 [1:27:18<43:19, 18.98s/batch, batch_loss=1.69e+4, batch

Testing:  70%|▋| 325/462 [1:27:38<43:19, 18.98s/batch, batch_loss=3.32e+3, batch

Testing:  71%|▋| 326/462 [1:27:38<43:20, 19.12s/batch, batch_loss=3.32e+3, batch

Testing:  71%|▋| 326/462 [1:27:56<43:20, 19.12s/batch, batch_loss=218, batch_ind

Testing:  71%|▋| 327/462 [1:27:56<42:39, 18.96s/batch, batch_loss=218, batch_ind

Testing:  71%|▋| 327/462 [1:28:17<42:39, 18.96s/batch, batch_loss=807, batch_ind

Testing:  71%|▋| 328/462 [1:28:17<43:44, 19.58s/batch, batch_loss=807, batch_ind

Testing:  71%|▋| 328/462 [1:28:37<43:44, 19.58s/batch, batch_loss=4.53e+3, batch

Testing:  71%|▋| 329/462 [1:28:37<43:36, 19.68s/batch, batch_loss=4.53e+3, batch

Testing:  71%|▋| 329/462 [1:28:58<43:36, 19.68s/batch, batch_loss=7.48e+3, batch

Testing:  71%|▋| 330/462 [1:28:58<43:44, 19.88s/batch, batch_loss=7.48e+3, batch

Testing:  71%|▋| 330/462 [1:29:20<43:44, 19.88s/batch, batch_loss=29.9, batch_in

Testing:  72%|▋| 331/462 [1:29:20<44:49, 20.53s/batch, batch_loss=29.9, batch_in

Testing:  72%|▋| 331/462 [1:29:41<44:49, 20.53s/batch, batch_loss=26.4, batch_in

Testing:  72%|▋| 332/462 [1:29:41<45:01, 20.78s/batch, batch_loss=26.4, batch_in

Testing:  72%|▋| 332/462 [1:30:01<45:01, 20.78s/batch, batch_loss=17.3, batch_in

Testing:  72%|▋| 333/462 [1:30:01<44:21, 20.63s/batch, batch_loss=17.3, batch_in

Testing:  72%|▋| 333/462 [1:30:25<44:21, 20.63s/batch, batch_loss=30.3, batch_in

Testing:  72%|▋| 334/462 [1:30:25<45:52, 21.50s/batch, batch_loss=30.3, batch_in

Testing:  72%|▋| 334/462 [1:30:44<45:52, 21.50s/batch, batch_loss=7.49e+4, batch

Testing:  73%|▋| 335/462 [1:30:44<44:12, 20.88s/batch, batch_loss=7.49e+4, batch

Testing:  73%|▋| 335/462 [1:31:03<44:12, 20.88s/batch, batch_loss=183, batch_ind

Testing:  73%|▋| 336/462 [1:31:03<42:41, 20.33s/batch, batch_loss=183, batch_ind

Testing:  73%|▋| 336/462 [1:31:23<42:41, 20.33s/batch, batch_loss=34.7, batch_in

Testing:  73%|▋| 337/462 [1:31:23<41:36, 19.97s/batch, batch_loss=34.7, batch_in

Testing:  73%|▋| 337/462 [1:31:40<41:36, 19.97s/batch, batch_loss=848, batch_ind

Testing:  73%|▋| 338/462 [1:31:40<39:53, 19.31s/batch, batch_loss=848, batch_ind

Testing:  73%|▋| 338/462 [1:31:59<39:53, 19.31s/batch, batch_loss=18, batch_inde

Testing:  73%|▋| 339/462 [1:31:59<38:58, 19.01s/batch, batch_loss=18, batch_inde

Testing:  73%|▋| 339/462 [1:32:18<38:58, 19.01s/batch, batch_loss=419, batch_ind

Testing:  74%|▋| 340/462 [1:32:18<38:39, 19.01s/batch, batch_loss=419, batch_ind

Testing:  74%|▋| 340/462 [1:32:36<38:39, 19.01s/batch, batch_loss=6.74e+3, batch

Testing:  74%|▋| 341/462 [1:32:36<38:01, 18.85s/batch, batch_loss=6.74e+3, batch

Testing:  74%|▋| 341/462 [1:32:55<38:01, 18.85s/batch, batch_loss=457, batch_ind

Testing:  74%|▋| 342/462 [1:32:55<37:51, 18.93s/batch, batch_loss=457, batch_ind

Testing:  74%|▋| 342/462 [1:33:13<37:51, 18.93s/batch, batch_loss=1.1e+3, batch_

Testing:  74%|▋| 343/462 [1:33:13<36:48, 18.56s/batch, batch_loss=1.1e+3, batch_

Testing:  74%|▋| 343/462 [1:33:30<36:48, 18.56s/batch, batch_loss=21.9, batch_in

Testing:  74%|▋| 344/462 [1:33:30<35:39, 18.13s/batch, batch_loss=21.9, batch_in

Testing:  74%|▋| 344/462 [1:33:47<35:39, 18.13s/batch, batch_loss=82.7, batch_in

Testing:  75%|▋| 345/462 [1:33:47<34:26, 17.66s/batch, batch_loss=82.7, batch_in

Testing:  75%|▋| 345/462 [1:34:03<34:26, 17.66s/batch, batch_loss=14.5, batch_in

Testing:  75%|▋| 346/462 [1:34:03<33:38, 17.40s/batch, batch_loss=14.5, batch_in

Testing:  75%|▋| 346/462 [1:34:22<33:38, 17.40s/batch, batch_loss=29.7, batch_in

Testing:  75%|▊| 347/462 [1:34:22<34:09, 17.82s/batch, batch_loss=29.7, batch_in

Testing:  75%|▊| 347/462 [1:34:41<34:09, 17.82s/batch, batch_loss=3.58e+3, batch

Testing:  75%|▊| 348/462 [1:34:41<34:17, 18.05s/batch, batch_loss=3.58e+3, batch

Testing:  75%|▊| 348/462 [1:34:58<34:17, 18.05s/batch, batch_loss=6.31e+3, batch

Testing:  76%|▊| 349/462 [1:34:58<33:42, 17.90s/batch, batch_loss=6.31e+3, batch

Testing:  76%|▊| 349/462 [1:35:16<33:42, 17.90s/batch, batch_loss=14.4, batch_in

Testing:  76%|▊| 350/462 [1:35:16<33:14, 17.81s/batch, batch_loss=14.4, batch_in

Testing:  76%|▊| 350/462 [1:35:34<33:14, 17.81s/batch, batch_loss=8.18e+3, batch

Testing:  76%|▊| 351/462 [1:35:34<33:10, 17.93s/batch, batch_loss=8.18e+3, batch

Testing:  76%|▊| 351/462 [1:35:56<33:10, 17.93s/batch, batch_loss=1.22e+4, batch

Testing:  76%|▊| 352/462 [1:35:56<34:45, 18.96s/batch, batch_loss=1.22e+4, batch

Testing:  76%|▊| 352/462 [1:36:13<34:45, 18.96s/batch, batch_loss=7.08e+3, batch

Testing:  76%|▊| 353/462 [1:36:13<33:44, 18.58s/batch, batch_loss=7.08e+3, batch

Testing:  76%|▊| 353/462 [1:36:30<33:44, 18.58s/batch, batch_loss=33.1, batch_in

Testing:  77%|▊| 354/462 [1:36:30<32:38, 18.13s/batch, batch_loss=33.1, batch_in

Testing:  77%|▊| 354/462 [1:36:47<32:38, 18.13s/batch, batch_loss=1.17e+4, batch

Testing:  77%|▊| 355/462 [1:36:47<31:43, 17.79s/batch, batch_loss=1.17e+4, batch

Testing:  77%|▊| 355/462 [1:37:07<31:43, 17.79s/batch, batch_loss=1.27e+3, batch

Testing:  77%|▊| 356/462 [1:37:07<32:28, 18.38s/batch, batch_loss=1.27e+3, batch

Testing:  77%|▊| 356/462 [1:37:22<32:28, 18.38s/batch, batch_loss=869, batch_ind

Testing:  77%|▊| 357/462 [1:37:22<30:33, 17.46s/batch, batch_loss=869, batch_ind

Testing:  77%|▊| 357/462 [1:37:39<30:33, 17.46s/batch, batch_loss=29.8, batch_in

Testing:  77%|▊| 358/462 [1:37:39<29:35, 17.07s/batch, batch_loss=29.8, batch_in

Testing:  77%|▊| 358/462 [1:37:58<29:35, 17.07s/batch, batch_loss=31.4, batch_in

Testing:  78%|▊| 359/462 [1:37:58<30:46, 17.93s/batch, batch_loss=31.4, batch_in

Testing:  78%|▊| 359/462 [1:38:16<30:46, 17.93s/batch, batch_loss=4.46e+3, batch

Testing:  78%|▊| 360/462 [1:38:16<30:29, 17.94s/batch, batch_loss=4.46e+3, batch

Testing:  78%|▊| 360/462 [1:38:34<30:29, 17.94s/batch, batch_loss=23, batch_inde

Testing:  78%|▊| 361/462 [1:38:34<29:57, 17.80s/batch, batch_loss=23, batch_inde

Testing:  78%|▊| 361/462 [1:38:51<29:57, 17.80s/batch, batch_loss=21.5, batch_in

Testing:  78%|▊| 362/462 [1:38:51<29:12, 17.52s/batch, batch_loss=21.5, batch_in

Testing:  78%|▊| 362/462 [1:39:07<29:12, 17.52s/batch, batch_loss=19.1, batch_in

Testing:  79%|▊| 363/462 [1:39:07<28:11, 17.08s/batch, batch_loss=19.1, batch_in

Testing:  79%|▊| 363/462 [1:39:24<28:11, 17.08s/batch, batch_loss=14, batch_inde

Testing:  79%|▊| 364/462 [1:39:24<27:44, 16.99s/batch, batch_loss=14, batch_inde

Testing:  79%|▊| 364/462 [1:39:41<27:44, 16.99s/batch, batch_loss=29.2, batch_in

Testing:  79%|▊| 365/462 [1:39:41<27:41, 17.13s/batch, batch_loss=29.2, batch_in

Testing:  79%|▊| 365/462 [1:39:57<27:41, 17.13s/batch, batch_loss=16.9, batch_in

Testing:  79%|▊| 366/462 [1:39:57<26:55, 16.83s/batch, batch_loss=16.9, batch_in

Testing:  79%|▊| 366/462 [1:40:20<26:55, 16.83s/batch, batch_loss=29.5, batch_in

Testing:  79%|▊| 367/462 [1:40:20<29:27, 18.61s/batch, batch_loss=29.5, batch_in

Testing:  79%|▊| 367/462 [1:40:40<29:27, 18.61s/batch, batch_loss=34.2, batch_in

Testing:  80%|▊| 368/462 [1:40:40<29:43, 18.98s/batch, batch_loss=34.2, batch_in

Testing:  80%|▊| 368/462 [1:40:59<29:43, 18.98s/batch, batch_loss=27.9, batch_in

Testing:  80%|▊| 369/462 [1:40:59<29:43, 19.17s/batch, batch_loss=27.9, batch_in

Testing:  80%|▊| 369/462 [1:41:20<29:43, 19.17s/batch, batch_loss=16.5, batch_in

Testing:  80%|▊| 370/462 [1:41:20<29:53, 19.49s/batch, batch_loss=16.5, batch_in

Testing:  80%|▊| 370/462 [1:41:40<29:53, 19.49s/batch, batch_loss=15.8, batch_in

Testing:  80%|▊| 371/462 [1:41:40<29:52, 19.70s/batch, batch_loss=15.8, batch_in

Testing:  80%|▊| 371/462 [1:41:59<29:52, 19.70s/batch, batch_loss=309, batch_ind

Testing:  81%|▊| 372/462 [1:41:59<29:23, 19.59s/batch, batch_loss=309, batch_ind

Testing:  81%|▊| 372/462 [1:42:20<29:23, 19.59s/batch, batch_loss=6.99e+3, batch

Testing:  81%|▊| 373/462 [1:42:20<29:40, 20.00s/batch, batch_loss=6.99e+3, batch

Testing:  81%|▊| 373/462 [1:42:39<29:40, 20.00s/batch, batch_loss=422, batch_ind

Testing:  81%|▊| 374/462 [1:42:39<28:53, 19.70s/batch, batch_loss=422, batch_ind

Testing:  81%|▊| 374/462 [1:42:58<28:53, 19.70s/batch, batch_loss=2.49e+4, batch

Testing:  81%|▊| 375/462 [1:42:58<28:13, 19.47s/batch, batch_loss=2.49e+4, batch

Testing:  81%|▊| 375/462 [1:43:17<28:13, 19.47s/batch, batch_loss=27.7, batch_in

Testing:  81%|▊| 376/462 [1:43:17<27:34, 19.24s/batch, batch_loss=27.7, batch_in

Testing:  81%|▊| 376/462 [1:43:36<27:34, 19.24s/batch, batch_loss=2.14e+4, batch

Testing:  82%|▊| 377/462 [1:43:36<27:21, 19.31s/batch, batch_loss=2.14e+4, batch

Testing:  82%|▊| 377/462 [1:43:56<27:21, 19.31s/batch, batch_loss=2.06e+3, batch

Testing:  82%|▊| 378/462 [1:43:56<27:17, 19.49s/batch, batch_loss=2.06e+3, batch

Testing:  82%|▊| 378/462 [1:44:17<27:17, 19.49s/batch, batch_loss=2.69e+3, batch

Testing:  82%|▊| 379/462 [1:44:17<27:43, 20.05s/batch, batch_loss=2.69e+3, batch

Testing:  82%|▊| 379/462 [1:44:37<27:43, 20.05s/batch, batch_loss=21.8, batch_in

Testing:  82%|▊| 380/462 [1:44:37<27:01, 19.78s/batch, batch_loss=21.8, batch_in

Testing:  82%|▊| 380/462 [1:44:57<27:01, 19.78s/batch, batch_loss=26.5, batch_in

Testing:  82%|▊| 381/462 [1:44:57<26:47, 19.85s/batch, batch_loss=26.5, batch_in

Testing:  82%|▊| 381/462 [1:45:16<26:47, 19.85s/batch, batch_loss=310, batch_ind

Testing:  83%|▊| 382/462 [1:45:16<26:07, 19.59s/batch, batch_loss=310, batch_ind

Testing:  83%|▊| 382/462 [1:45:35<26:07, 19.59s/batch, batch_loss=34.1, batch_in

Testing:  83%|▊| 383/462 [1:45:35<25:37, 19.46s/batch, batch_loss=34.1, batch_in

Testing:  83%|▊| 383/462 [1:45:55<25:37, 19.46s/batch, batch_loss=25.4, batch_in

Testing:  83%|▊| 384/462 [1:45:55<25:36, 19.70s/batch, batch_loss=25.4, batch_in

Testing:  83%|▊| 384/462 [1:46:16<25:36, 19.70s/batch, batch_loss=24.3, batch_in

Testing:  83%|▊| 385/462 [1:46:16<25:54, 20.19s/batch, batch_loss=24.3, batch_in

Testing:  83%|▊| 385/462 [1:46:35<25:54, 20.19s/batch, batch_loss=12.8, batch_in

Testing:  84%|▊| 386/462 [1:46:35<25:02, 19.77s/batch, batch_loss=12.8, batch_in

Testing:  84%|▊| 386/462 [1:46:54<25:02, 19.77s/batch, batch_loss=1.59e+4, batch

Testing:  84%|▊| 387/462 [1:46:54<24:11, 19.36s/batch, batch_loss=1.59e+4, batch

Testing:  84%|▊| 387/462 [1:47:12<24:11, 19.36s/batch, batch_loss=399, batch_ind

Testing:  84%|▊| 388/462 [1:47:12<23:36, 19.14s/batch, batch_loss=399, batch_ind

Testing:  84%|▊| 388/462 [1:47:32<23:36, 19.14s/batch, batch_loss=30, batch_inde

Testing:  84%|▊| 389/462 [1:47:32<23:35, 19.40s/batch, batch_loss=30, batch_inde

Testing:  84%|▊| 389/462 [1:47:52<23:35, 19.40s/batch, batch_loss=23, batch_inde

Testing:  84%|▊| 390/462 [1:47:52<23:26, 19.54s/batch, batch_loss=23, batch_inde

Testing:  84%|▊| 390/462 [1:48:14<23:26, 19.54s/batch, batch_loss=3.51e+3, batch

Testing:  85%|▊| 391/462 [1:48:14<24:06, 20.37s/batch, batch_loss=3.51e+3, batch

Testing:  85%|▊| 391/462 [1:48:34<24:06, 20.37s/batch, batch_loss=25.6, batch_in

Testing:  85%|▊| 392/462 [1:48:34<23:27, 20.11s/batch, batch_loss=25.6, batch_in

Testing:  85%|▊| 392/462 [1:48:55<23:27, 20.11s/batch, batch_loss=30.7, batch_in

Testing:  85%|▊| 393/462 [1:48:55<23:22, 20.33s/batch, batch_loss=30.7, batch_in

Testing:  85%|▊| 393/462 [1:49:16<23:22, 20.33s/batch, batch_loss=15.7, batch_in

Testing:  85%|▊| 394/462 [1:49:16<23:23, 20.64s/batch, batch_loss=15.7, batch_in

Testing:  85%|▊| 394/462 [1:49:35<23:23, 20.64s/batch, batch_loss=200, batch_ind

Testing:  85%|▊| 395/462 [1:49:35<22:37, 20.27s/batch, batch_loss=200, batch_ind

Testing:  85%|▊| 395/462 [1:49:54<22:37, 20.27s/batch, batch_loss=25.8, batch_in

Testing:  86%|▊| 396/462 [1:49:54<21:45, 19.78s/batch, batch_loss=25.8, batch_in

Testing:  86%|▊| 396/462 [1:50:13<21:45, 19.78s/batch, batch_loss=916, batch_ind

Testing:  86%|▊| 397/462 [1:50:13<21:17, 19.65s/batch, batch_loss=916, batch_ind

Testing:  86%|▊| 397/462 [1:50:33<21:17, 19.65s/batch, batch_loss=24.7, batch_in

Testing:  86%|▊| 398/462 [1:50:33<20:51, 19.56s/batch, batch_loss=24.7, batch_in

Testing:  86%|▊| 398/462 [1:50:53<20:51, 19.56s/batch, batch_loss=22.9, batch_in

Testing:  86%|▊| 399/462 [1:50:53<20:52, 19.88s/batch, batch_loss=22.9, batch_in

Testing:  86%|▊| 399/462 [1:51:13<20:52, 19.88s/batch, batch_loss=1.16e+4, batch

Testing:  87%|▊| 400/462 [1:51:13<20:30, 19.85s/batch, batch_loss=1.16e+4, batch

Testing:  87%|▊| 400/462 [1:51:33<20:30, 19.85s/batch, batch_loss=19.6, batch_in

Testing:  87%|▊| 401/462 [1:51:33<20:04, 19.74s/batch, batch_loss=19.6, batch_in

Testing:  87%|▊| 401/462 [1:51:53<20:04, 19.74s/batch, batch_loss=21.1, batch_in

Testing:  87%|▊| 402/462 [1:51:53<19:47, 19.79s/batch, batch_loss=21.1, batch_in

Testing:  87%|▊| 402/462 [1:52:15<19:47, 19.79s/batch, batch_loss=33.6, batch_in

Testing:  87%|▊| 403/462 [1:52:15<20:17, 20.64s/batch, batch_loss=33.6, batch_in

Testing:  87%|▊| 403/462 [1:52:35<20:17, 20.64s/batch, batch_loss=43.7, batch_in

Testing:  87%|▊| 404/462 [1:52:35<19:37, 20.30s/batch, batch_loss=43.7, batch_in

Testing:  87%|▊| 404/462 [1:52:55<19:37, 20.30s/batch, batch_loss=19, batch_inde

Testing:  88%|▉| 405/462 [1:52:55<19:11, 20.21s/batch, batch_loss=19, batch_inde

Testing:  88%|▉| 405/462 [1:53:14<19:11, 20.21s/batch, batch_loss=18.9, batch_in

Testing:  88%|▉| 406/462 [1:53:14<18:39, 19.98s/batch, batch_loss=18.9, batch_in

Testing:  88%|▉| 406/462 [1:53:35<18:39, 19.98s/batch, batch_loss=3.75e+3, batch

Testing:  88%|▉| 407/462 [1:53:35<18:24, 20.08s/batch, batch_loss=3.75e+3, batch

Testing:  88%|▉| 407/462 [1:53:57<18:24, 20.08s/batch, batch_loss=23.2, batch_in

Testing:  88%|▉| 408/462 [1:53:57<18:39, 20.73s/batch, batch_loss=23.2, batch_in

Testing:  88%|▉| 408/462 [1:54:17<18:39, 20.73s/batch, batch_loss=25.8, batch_in

Testing:  89%|▉| 409/462 [1:54:17<18:11, 20.59s/batch, batch_loss=25.8, batch_in

Testing:  89%|▉| 409/462 [1:54:35<18:11, 20.59s/batch, batch_loss=277, batch_ind

Testing:  89%|▉| 410/462 [1:54:35<17:17, 19.95s/batch, batch_loss=277, batch_ind

Testing:  89%|▉| 410/462 [1:54:55<17:17, 19.95s/batch, batch_loss=32.2, batch_in

Testing:  89%|▉| 411/462 [1:54:55<16:46, 19.74s/batch, batch_loss=32.2, batch_in

Testing:  89%|▉| 411/462 [1:55:16<16:46, 19.74s/batch, batch_loss=35.5, batch_in

Testing:  89%|▉| 412/462 [1:55:16<16:46, 20.12s/batch, batch_loss=35.5, batch_in

Testing:  89%|▉| 412/462 [1:55:34<16:46, 20.12s/batch, batch_loss=29.3, batch_in

Testing:  89%|▉| 413/462 [1:55:34<15:56, 19.52s/batch, batch_loss=29.3, batch_in

Testing:  89%|▉| 413/462 [1:55:53<15:56, 19.52s/batch, batch_loss=23.7, batch_in

Testing:  90%|▉| 414/462 [1:55:53<15:28, 19.35s/batch, batch_loss=23.7, batch_in

Testing:  90%|▉| 414/462 [1:56:12<15:28, 19.35s/batch, batch_loss=31.4, batch_in

Testing:  90%|▉| 415/462 [1:56:12<15:01, 19.18s/batch, batch_loss=31.4, batch_in

Testing:  90%|▉| 415/462 [1:56:31<15:01, 19.18s/batch, batch_loss=1.01e+3, batch

Testing:  90%|▉| 416/462 [1:56:31<14:41, 19.15s/batch, batch_loss=1.01e+3, batch

Testing:  90%|▉| 416/462 [1:56:49<14:41, 19.15s/batch, batch_loss=38.7, batch_in

Testing:  90%|▉| 417/462 [1:56:49<14:13, 18.96s/batch, batch_loss=38.7, batch_in

Testing:  90%|▉| 417/462 [1:57:10<14:13, 18.96s/batch, batch_loss=3.88e+3, batch

Testing:  90%|▉| 418/462 [1:57:10<14:17, 19.48s/batch, batch_loss=3.88e+3, batch

Testing:  90%|▉| 418/462 [1:57:29<14:17, 19.48s/batch, batch_loss=33.3, batch_in

Testing:  91%|▉| 419/462 [1:57:29<13:50, 19.32s/batch, batch_loss=33.3, batch_in

Testing:  91%|▉| 419/462 [1:57:48<13:50, 19.32s/batch, batch_loss=21.1, batch_in

Testing:  91%|▉| 420/462 [1:57:48<13:24, 19.16s/batch, batch_loss=21.1, batch_in

Testing:  91%|▉| 420/462 [1:58:06<13:24, 19.16s/batch, batch_loss=31.1, batch_in

Testing:  91%|▉| 421/462 [1:58:06<12:52, 18.84s/batch, batch_loss=31.1, batch_in

Testing:  91%|▉| 421/462 [1:58:23<12:52, 18.84s/batch, batch_loss=31.9, batch_in

Testing:  91%|▉| 422/462 [1:58:23<12:20, 18.50s/batch, batch_loss=31.9, batch_in

Testing:  91%|▉| 422/462 [1:58:41<12:20, 18.50s/batch, batch_loss=1.65e+3, batch

Testing:  92%|▉| 423/462 [1:58:41<11:47, 18.14s/batch, batch_loss=1.65e+3, batch

Testing:  92%|▉| 423/462 [1:58:59<11:47, 18.14s/batch, batch_loss=30.4, batch_in

Testing:  92%|▉| 424/462 [1:58:59<11:31, 18.20s/batch, batch_loss=30.4, batch_in

Testing:  92%|▉| 424/462 [1:59:19<11:31, 18.20s/batch, batch_loss=34.2, batch_in

Testing:  92%|▉| 425/462 [1:59:19<11:30, 18.65s/batch, batch_loss=34.2, batch_in

Testing:  92%|▉| 425/462 [1:59:37<11:30, 18.65s/batch, batch_loss=23.6, batch_in

Testing:  92%|▉| 426/462 [1:59:37<11:03, 18.43s/batch, batch_loss=23.6, batch_in

Testing:  92%|▉| 426/462 [1:59:55<11:03, 18.43s/batch, batch_loss=29.1, batch_in

Testing:  92%|▉| 427/462 [1:59:55<10:47, 18.50s/batch, batch_loss=29.1, batch_in

Testing:  92%|▉| 427/462 [2:00:14<10:47, 18.50s/batch, batch_loss=36.6, batch_in

Testing:  93%|▉| 428/462 [2:00:14<10:27, 18.46s/batch, batch_loss=36.6, batch_in

Testing:  93%|▉| 428/462 [2:00:32<10:27, 18.46s/batch, batch_loss=26.6, batch_in

Testing:  93%|▉| 429/462 [2:00:32<10:05, 18.36s/batch, batch_loss=26.6, batch_in

Testing:  93%|▉| 429/462 [2:00:51<10:05, 18.36s/batch, batch_loss=39.4, batch_in

Testing:  93%|▉| 430/462 [2:00:51<09:56, 18.64s/batch, batch_loss=39.4, batch_in

Testing:  93%|▉| 430/462 [2:01:09<09:56, 18.64s/batch, batch_loss=28.9, batch_in

Testing:  93%|▉| 431/462 [2:01:09<09:35, 18.55s/batch, batch_loss=28.9, batch_in

Testing:  93%|▉| 431/462 [2:01:28<09:35, 18.55s/batch, batch_loss=25.5, batch_in

Testing:  94%|▉| 432/462 [2:01:28<09:15, 18.52s/batch, batch_loss=25.5, batch_in

Testing:  94%|▉| 432/462 [2:01:46<09:15, 18.52s/batch, batch_loss=2.74e+4, batch

Testing:  94%|▉| 433/462 [2:01:46<08:52, 18.37s/batch, batch_loss=2.74e+4, batch

Testing:  94%|▉| 433/462 [2:02:05<08:52, 18.37s/batch, batch_loss=1.03e+3, batch

Testing:  94%|▉| 434/462 [2:02:05<08:41, 18.62s/batch, batch_loss=1.03e+3, batch

Testing:  94%|▉| 434/462 [2:02:24<08:41, 18.62s/batch, batch_loss=15.7, batch_in

Testing:  94%|▉| 435/462 [2:02:24<08:23, 18.66s/batch, batch_loss=15.7, batch_in

Testing:  94%|▉| 435/462 [2:02:42<08:23, 18.66s/batch, batch_loss=194, batch_ind

Testing:  94%|▉| 436/462 [2:02:42<08:03, 18.59s/batch, batch_loss=194, batch_ind

Testing:  94%|▉| 436/462 [2:03:01<08:03, 18.59s/batch, batch_loss=1.24e+3, batch

Testing:  95%|▉| 437/462 [2:03:01<07:47, 18.69s/batch, batch_loss=1.24e+3, batch

Testing:  95%|▉| 437/462 [2:03:20<07:47, 18.69s/batch, batch_loss=20.1, batch_in

Testing:  95%|▉| 438/462 [2:03:20<07:26, 18.62s/batch, batch_loss=20.1, batch_in

Testing:  95%|▉| 438/462 [2:03:38<07:26, 18.62s/batch, batch_loss=555, batch_ind

Testing:  95%|▉| 439/462 [2:03:38<07:08, 18.64s/batch, batch_loss=555, batch_ind

Testing:  95%|▉| 439/462 [2:03:58<07:08, 18.64s/batch, batch_loss=27.1, batch_in

Testing:  95%|▉| 440/462 [2:03:58<06:57, 18.98s/batch, batch_loss=27.1, batch_in

Testing:  95%|▉| 440/462 [2:04:18<06:57, 18.98s/batch, batch_loss=21, batch_inde

Testing:  95%|▉| 441/462 [2:04:18<06:42, 19.15s/batch, batch_loss=21, batch_inde

Testing:  95%|▉| 441/462 [2:04:37<06:42, 19.15s/batch, batch_loss=22.7, batch_in

Testing:  96%|▉| 442/462 [2:04:37<06:24, 19.21s/batch, batch_loss=22.7, batch_in

Testing:  96%|▉| 442/462 [2:04:57<06:24, 19.21s/batch, batch_loss=14.5, batch_in

Testing:  96%|▉| 443/462 [2:04:57<06:09, 19.47s/batch, batch_loss=14.5, batch_in

Testing:  96%|▉| 443/462 [2:05:17<06:09, 19.47s/batch, batch_loss=471, batch_ind

Testing:  96%|▉| 444/462 [2:05:17<05:54, 19.68s/batch, batch_loss=471, batch_ind

Testing:  96%|▉| 444/462 [2:05:38<05:54, 19.68s/batch, batch_loss=34.5, batch_in

Testing:  96%|▉| 445/462 [2:05:38<05:38, 19.93s/batch, batch_loss=34.5, batch_in

Testing:  96%|▉| 445/462 [2:05:58<05:38, 19.93s/batch, batch_loss=39.2, batch_in

Testing:  97%|▉| 446/462 [2:05:58<05:18, 19.89s/batch, batch_loss=39.2, batch_in

Testing:  97%|▉| 446/462 [2:06:20<05:18, 19.89s/batch, batch_loss=19.6, batch_in

Testing:  97%|▉| 447/462 [2:06:20<05:10, 20.69s/batch, batch_loss=19.6, batch_in

Testing:  97%|▉| 447/462 [2:06:40<05:10, 20.69s/batch, batch_loss=8.03e+3, batch

Testing:  97%|▉| 448/462 [2:06:40<04:45, 20.41s/batch, batch_loss=8.03e+3, batch

Testing:  97%|▉| 448/462 [2:07:00<04:45, 20.41s/batch, batch_loss=28.9, batch_in

Testing:  97%|▉| 449/462 [2:07:00<04:24, 20.35s/batch, batch_loss=28.9, batch_in

Testing:  97%|▉| 449/462 [2:07:19<04:24, 20.35s/batch, batch_loss=7.06e+3, batch

Testing:  97%|▉| 450/462 [2:07:19<03:57, 19.77s/batch, batch_loss=7.06e+3, batch

Testing:  97%|▉| 450/462 [2:07:39<03:57, 19.77s/batch, batch_loss=28.8, batch_in

Testing:  98%|▉| 451/462 [2:07:39<03:38, 19.82s/batch, batch_loss=28.8, batch_in

Testing:  98%|▉| 451/462 [2:07:58<03:38, 19.82s/batch, batch_loss=19, batch_inde

Testing:  98%|▉| 452/462 [2:07:58<03:18, 19.87s/batch, batch_loss=19, batch_inde

Testing:  98%|▉| 452/462 [2:08:21<03:18, 19.87s/batch, batch_loss=3.97, batch_in

Testing:  98%|▉| 453/462 [2:08:21<03:06, 20.73s/batch, batch_loss=3.97, batch_in

Testing:  98%|▉| 453/462 [2:08:38<03:06, 20.73s/batch, batch_loss=0.782, batch_i

Testing:  98%|▉| 454/462 [2:08:38<02:36, 19.50s/batch, batch_loss=0.782, batch_i

Testing:  98%|▉| 454/462 [2:08:51<02:36, 19.50s/batch, batch_loss=0.1, batch_ind

Testing:  98%|▉| 455/462 [2:08:51<02:03, 17.67s/batch, batch_loss=0.1, batch_ind

Testing:  98%|▉| 455/462 [2:09:04<02:03, 17.67s/batch, batch_loss=0.1, batch_ind

Testing:  99%|▉| 456/462 [2:09:04<01:36, 16.15s/batch, batch_loss=0.1, batch_ind

Testing:  99%|▉| 456/462 [2:09:17<01:36, 16.15s/batch, batch_loss=0.1, batch_ind

Testing:  99%|▉| 457/462 [2:09:17<01:16, 15.28s/batch, batch_loss=0.1, batch_ind

Testing:  99%|▉| 457/462 [2:09:30<01:16, 15.28s/batch, batch_loss=0.1, batch_ind

Testing:  99%|▉| 458/462 [2:09:30<00:58, 14.58s/batch, batch_loss=0.1, batch_ind

Testing:  99%|▉| 458/462 [2:09:44<00:58, 14.58s/batch, batch_loss=0.1, batch_ind

Testing:  99%|▉| 459/462 [2:09:44<00:43, 14.34s/batch, batch_loss=0.1, batch_ind

Testing:  99%|▉| 459/462 [2:09:59<00:43, 14.34s/batch, batch_loss=0.1, batch_ind

Testing: 100%|▉| 460/462 [2:09:59<00:29, 14.67s/batch, batch_loss=0.1, batch_ind

Testing: 100%|▉| 460/462 [2:10:12<00:29, 14.67s/batch, batch_loss=0.1, batch_ind

Testing: 100%|▉| 461/462 [2:10:12<00:14, 14.10s/batch, batch_loss=0.1, batch_ind

Testing: 100%|▉| 461/462 [2:10:20<00:14, 14.10s/batch, batch_loss=0.098, batch_i

Testing: 100%|█| 462/462 [2:10:20<00:00, 12.31s/batch, batch_loss=0.098, batch_i

Testing: 100%|█| 462/462 [2:10:20<00:00, 16.93s/batch, batch_loss=0.098, batch_i

Test Loss: 1901.0149



