<h1>NN Model</h1>

In [1]:
from pycocotools.coco import COCO
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as functions
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
#import torchvision
from torchvision import transforms
import re

In [2]:
print("cuda" if torch.cuda.is_available() else "cpu")

cuda


In [3]:
DATADIR = "cocodoom/"
USED_RUNS = ["run1", "run2", "run3"]

dataSplit, TRAIN_RUN = "run-full-train", "run1"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [4]:
coco_train = COCO(annFile)

loading annotations into memory...


Done (t=18.76s)
creating index...


index created!


In [5]:
dataSplit, VAL_RUN = "run-full-val", "run2"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [6]:
coco_val = COCO(annFile)

loading annotations into memory...


Done (t=17.34s)
creating index...


index created!


In [7]:
dataSplit, TEST_RUN = "run-full-test", "run3"

annFile = '{}{}.json'.format(DATADIR,dataSplit)

In [8]:
coco_test = COCO(annFile)

loading annotations into memory...


Done (t=9.66s)
creating index...


index created!


In [9]:
player_positions = {"run1":[], "run2":[], "run3":[]}
motion_vectors = {"run1":[], "run2":[], "run3":[]}

for run in USED_RUNS:
    with open(DATADIR+run+"/log.txt", 'r') as log_file:
        for line in log_file:
            if "player" in line:
                line = line.strip()
                tic, stats = line.split("player:")
                x, y, z, angle = stats.split(",")
    
                # Store position in the dictionary
                player_positions[run].append((float(x), float(y), float(z), float(angle)))
                if len(player_positions[run]) >= 2:
                    player_position = player_positions[run][-1]
                    prev_player_position = player_positions[run][-2]
                    
                    dx = player_position[0] - prev_player_position[0]
                    dy = player_position[1] - prev_player_position[1]
                    dz = player_position[2] - prev_player_position[2]
                    dangle = np.pi - abs(abs(player_position[3] - prev_player_position[3]) - np.pi)
                    
                    dx_relative = dx * np.cos(2 * np.pi - prev_player_position[3]) + dy * np.cos(prev_player_position[3] - 1/2 * np.pi)
                    dy_relative = dx * np.sin(2 * np.pi - prev_player_position[3]) + dy * np.sin(prev_player_position[3] - 1/2 * np.pi)
                    motion_vector = (dx_relative, dy_relative, dz, dangle)
                    motion_vectors[run].append(motion_vector)

In [10]:
class DoomMotionDataset(Dataset):
    def __init__(self, coco, run, input_window, prediction_window, transform=None):
        self.coco = coco
        self.run = run
        self.img_ids = self.coco.getImgIds()
        self.transform = transform
        self.input_window = input_window
        self.prediction_window = prediction_window

    def __len__(self):
        return len(self.img_ids)

    def fullSegmentationFormat(self, rgb_filename):
        seg_image = self.load_image(self.getSegmentationMask(DATADIR + rgb_filename))
        if seg_image == None:
            return seg_image
        seg_class_map = self.color_to_index(seg_image)
        seg_class_one_hot = functions.one_hot(seg_class_map, num_classes=4).to(dtype=torch.float).permute(2, 0, 1)
        return seg_class_one_hot

    def fullDepthFormat(self, rgb_filename):
        depth_mask = self.load_image(self.getDepthMask(DATADIR + rgb_filename))
        if depth_mask == None:
            return depth_mask
        depth_mask = torch.tensor(depth_mask, dtype=torch.float32)
        return depth_mask

    def getSegmentationMask(self, rgb_filename):
        return rgb_filename.replace("rgb", "objects")

    def getDepthMask(self, rgb_filename):
        return rgb_filename.replace("rgb", "depth")

    def color_to_index(self, segmentation_image):
        # Map colors to class indices
        r, g, b = segmentation_image
        pixel_values = r + (g *  2**8) + (b * 2**16)  # From cocodoom documentation, converts to an object id

        class_map = torch.full_like(pixel_values, 3, dtype=torch.long)

        sky = (1 << 23) + 0
        horizontal = (1 << 23) + 1
        vertical = (1 << 23) + 2
        
        class_map[x == sky] = 0
        class_map[x == horizontal] = 1
        class_map[x == vertical] = 2
        return class_map

    def load_image(self, path):
        if os.path.exists(path):
            img = Image.open(path)
            return transforms.ToTensor()(img)
        return None

    def __getitem__(self, idx):
        # Load the RGB image
        rgb_filename = self.coco.loadImgs(self.img_ids[idx])[0]['file_name']
        #print(rgb_filename)
        tic = int(rgb_filename.replace(".png", "").split("/")[-1])
        next_tic = tic+1
        previous_tic = tic-1
        prev_motion_vectors = []
        next_motion_vectors = []
        prev_seg = []
        prev_dep = []

        for t in range(input_window, 0, -1):
            if tic-t < 0:
                prev_motion_vectors.append(motion_vectors[self.run][0])
                prev_filename = self.coco.loadImgs(self.img_ids[0])[0]['file_name']
                seg = self.fullSegmentationFormat(prev_filename)
                dep = self.fullDepthFormat(prev_filename)
                prev_seg.append(seg)
                prev_dep.append(dep)
                continue
            elif tic-t >= len(motion_vectors[self.run]):
                prev_motion_vectors.append(motion_vectors[self.run][-1])
                prev_filename = self.coco.loadImgs(self.img_ids[-1])[0]['file_name']
                seg = self.fullSegmentationFormat(prev_filename)
                dep = self.fullDepthFormat(prev_filename)
                prev_seg.append(seg)
                prev_dep.append(dep)
                continue
            prev_motion_vectors.append(motion_vectors[self.run][tic-t])
            prev_filename = rgb_filename[:-10] + str(max(tic - t, 2)).rjust(6, "0") + ".png"
            # run1/map01/rgb/000002.png
            if os.path.exists(DATADIR + prev_filename):
                seg = self.fullSegmentationFormat(prev_filename)
                #print(f"seg shape: {seg.shape}")
                dep = self.fullDepthFormat(prev_filename)
                #print(f"dep shape: {dep.shape}")
                prev_seg.append(seg)
                prev_dep.append(dep)
            else:
                prev_seg.append(torch.zeros((4, 200, 320)))
                prev_dep.append(torch.zeros((1, 200, 320)))
                

        for t in range(1, prediction_window+1):
            if tic+t >= len(motion_vectors[self.run]):
                next_motion_vectors.append(motion_vectors[self.run][-1])
                continue
            next_motion_vectors.append(motion_vectors[self.run][tic+t])

        # if dx > 1000:
        #     print(f"idx: {idx}")
        #     print(f"rgb_filename: {rgb_filename}")
        #     print(f"tic: {tic}")
        #     print(f"next_tic: {next_tic}")
        #     print(f"previous_tic: {previous_tic}")
        #     print(f"Sus {idx}")
        #     print(f"prev_player_position: {prev_player_position}")
        #     print(f"player_position: {player_position}")
        #     print(f"next_player_position: {next_player_position}")
        #     print(f"prev_motion_vector: {prev_motion_vector}")
        #     print(f"next_motion_vector: {next_motion_vector}")

        #print(prev_motion_vectors)
        #print(next_motion_vectors)
            
        prev_motion_vectors = torch.tensor(prev_motion_vectors, dtype=torch.float32)
        next_motion_vectors = torch.tensor(next_motion_vectors, dtype=torch.float32)
        #print(len(prev_seg))
        prev_seg = torch.stack(prev_seg)
        prev_dep = torch.stack(prev_dep)
        
        return {"prev_motion" : prev_motion_vectors, "next_motion" : next_motion_vectors, "previous_seg" : prev_seg, "previous_dep" : prev_dep}


In [11]:
class NeuralNetwork(nn.Module):
  def __init__(self, batch_size, input_length, sequence_length, activation_function=functions.relu, device=torch.device("cpu")):
    super(NeuralNetwork, self).__init__()
    self.batch_size = batch_size
    self.input_length = input_length
    self.sequence_length = sequence_length

    # Encoder
    # Conv layers
    self.conv_seg = nn.Conv2d(4, 1, kernel_size=3, stride=2, padding=1, bias=False).to(device)
    self.conv_dep = nn.Conv2d(1, 1, kernel_size=3, stride=2, padding=1, bias=False).to(device)

    self.motion_fc = nn.Linear(4, 32).to(device)
      
    # Pre-fusion LSTMs
    self.vis_LSTM = nn.LSTM(input_size=32000, hidden_size=256, batch_first=True).to(device)
    self.inertia_LSTM = nn.LSTM(input_size=32, hidden_size=256, batch_first=True).to(device)

    # Fusion LSTM
    self.fusion_LSTM = nn.LSTM(input_size=512, hidden_size=256, batch_first=True).to(device)

    # Decoder
    self.de_motion_fc = nn.Linear(4, 32).to(device)
    self.de_vis_LSTM = nn.LSTM(input_size=32, hidden_size=256, batch_first=True).to(device) #Unsure what the input size of this should be as it actually receives nothing
    self.de_inertia_LSTM = nn.LSTM(input_size=32, hidden_size=256, batch_first=True).to(device)
    self.de_fusion_LSTM = nn.LSTM(input_size=512, hidden_size=256, batch_first=True).to(device)
    self.output_fc = nn.Linear(256, 4).to(device)

  def forward(self, segmentation, depth, prev_motion):
    hidden_vis = None
    hidden_inert = None
    hidden_fus = None
    
    for t in range(self.input_length):
        #print(segmentation.shape)
        seg = self.conv_seg(segmentation[:,t])
        #print(seg.shape)
        dep = self.conv_dep(depth[:,t])
        #print(dep.shape)
        mot = self.motion_fc(prev_motion[:,t])
        vis = torch.cat((seg, dep), dim=1)
        vis = torch.flatten(vis, start_dim=1)
        #print(vis.shape)
        if hidden_vis != None:
            output_vis, hidden_vis = self.vis_LSTM(vis, hidden_vis)
        else:
            output_vis, hidden_vis = self.vis_LSTM(vis)
        if hidden_inert != None:
            output_inert, hidden_inert = self.inertia_LSTM(mot, hidden_inert)
        else:
            output_inert, hidden_inert = self.inertia_LSTM(mot)
        combined = torch.cat((output_vis, output_inert), dim=1)
        if hidden_fus != None:
            _, hidden_fus = self.fusion_LSTM(combined, hidden_fus)
        else:
            _, hidden_fus = self.fusion_LSTM(combined)

    #print("Prev motion: " + str(prev_motion.shape))
    de_mot = prev_motion[:,-1]
    output_tensor = torch.zeros(self.sequence_length, segmentation.size(0), 4).to(segmentation.device)
    for t in range(self.sequence_length):
        #print(de_mot.shape)
        de_mot = self.de_motion_fc(de_mot)
        de_output_inert, hidden_inert = self.de_inertia_LSTM(de_mot, hidden_inert)
        de_output_vis, hidden_vis = self.de_vis_LSTM(torch.zeros(segmentation.size(0), 32).to(segmentation.device), hidden_vis)
        #print(de_output_vis.shape, de_output_inert.shape)
        combined = torch.cat((de_output_vis, de_output_inert), dim=1)
        de_output_fus, hidden_fus = self.de_fusion_LSTM(combined, hidden_fus)
        #print("de_output_fus: " + str(de_output_fus.shape))
        output_t = self.output_fc(de_output_fus)
        #print("output_t: " + str(output_t.shape))
        #output_t = output_t.unsqueeze(0)
        de_mot = output_t
        output_tensor[t] = output_t.unsqueeze(0)
        
    return output_tensor

In [12]:
torch.cuda.empty_cache()
batch_size = 256
learning_rate = 1e-3
num_epochs = 10
input_window = 5
prediction_window = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("cuda" if torch.cuda.is_available() else "cpu")
model = NeuralNetwork(batch_size, input_window, prediction_window, device=device).to(device)

train_dataset = DoomMotionDataset(coco_train, TRAIN_RUN, input_window, prediction_window)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

val_dataset = DoomMotionDataset(coco_val, VAL_RUN, input_window, prediction_window)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
    for batch_idx, batch in enumerate(progress_bar):
        prev_motion, next_motion, previous_seg, previous_dep = batch["prev_motion"], batch["next_motion"], batch["previous_seg"], batch["previous_dep"]
        prev_motion, next_motion, previous_seg, previous_dep = prev_motion.to(device), next_motion.to(device), previous_seg.to(device), previous_dep.to(device)

        if prev_motion.size(0) != next_motion.size(0) != previous_seg.size(0) != previous_dep.size(0):
                continue

        optimizer.zero_grad()

        outputs = model(previous_seg, previous_dep, prev_motion)
        outputs = outputs.permute(1, 0, 2)

        if outputs.size(0) != next_motion.size(0):
            continue
        
        loss = criterion(outputs, next_motion)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": prev_motion.size(0)
        })

    # Average loss per epoch
    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    
    
    progress_bar = tqdm(val_loader, desc="Validation", unit="batch")
    
    with torch.no_grad():  # Disable gradient calculations for evaluation
        for batch_idx, batch in enumerate(progress_bar):
            prev_motion, next_motion, previous_seg, previous_dep = batch["prev_motion"], batch["next_motion"], batch["previous_seg"], batch["previous_dep"]
            prev_motion, next_motion, previous_seg, previous_dep = prev_motion.to(device), next_motion.to(device), previous_seg.to(device), previous_dep.to(device)

            if prev_motion.size(0) != next_motion.size(0) != previous_seg.size(0) != previous_dep.size(0):
                continue
                
            outputs = model(previous_seg, previous_dep, prev_motion)
            outputs = outputs.permute(1, 0, 2)

            if outputs.size(0) != next_motion.size(0):
                continue
            
            loss = criterion(outputs, next_motion)
            
            running_loss += loss.item()
            
            progress_bar.set_postfix({
                "batch_loss": loss.item(),
                "batch_index": batch_idx + 1,
                "batch_size": prev_motion.size(0)
            })
    
    # Average loss over all batches
    val_loss = running_loss / len(val_loader)
    print(f"Val Loss: {val_loss:.4f}")

# Save the trained model
torch.save(model.state_dict(), "multimodal_seq2seq.pth")

cuda


Epoch 1/10:   0%|                                                                            | 0/991 [00:00<?, ?batch/s]

  depth_mask = torch.tensor(depth_mask, dtype=torch.float32)


Epoch 1/10:   0%|                            | 0/991 [00:15<?, ?batch/s, batch_loss=32.5, batch_index=1, batch_size=256]

Epoch 1/10:   0%|                  | 1/991 [00:15<4:08:34, 15.06s/batch, batch_loss=32.5, batch_index=1, batch_size=256]

Epoch 1/10:   0%|                  | 1/991 [00:31<4:08:34, 15.06s/batch, batch_loss=32.5, batch_index=2, batch_size=256]

Epoch 1/10:   0%|                  | 2/991 [00:31<4:20:06, 15.78s/batch, batch_loss=32.5, batch_index=2, batch_size=256]

Epoch 1/10:   0%|                  | 2/991 [00:45<4:20:06, 15.78s/batch, batch_loss=26.6, batch_index=3, batch_size=256]

Epoch 1/10:   0%|                  | 3/991 [00:45<4:08:42, 15.10s/batch, batch_loss=26.6, batch_index=3, batch_size=256]

Epoch 1/10:   0%|                  | 3/991 [00:59<4:08:42, 15.10s/batch, batch_loss=12.4, batch_index=4, batch_size=256]

Epoch 1/10:   0%|                  | 4/991 [00:59<4:03:22, 14.80s/batch, batch_loss=12.4, batch_index=4, batch_size=256]

Epoch 1/10:   0%|                  | 4/991 [01:14<4:03:22, 14.80s/batch, batch_loss=40.9, batch_index=5, batch_size=256]

Epoch 1/10:   1%|                  | 5/991 [01:14<4:00:35, 14.64s/batch, batch_loss=40.9, batch_index=5, batch_size=256]

Epoch 1/10:   1%|                  | 5/991 [01:29<4:00:35, 14.64s/batch, batch_loss=33.9, batch_index=6, batch_size=256]

Epoch 1/10:   1%|                  | 6/991 [01:29<4:04:08, 14.87s/batch, batch_loss=33.9, batch_index=6, batch_size=256]

Epoch 1/10:   1%|                  | 6/991 [01:43<4:04:08, 14.87s/batch, batch_loss=30.4, batch_index=7, batch_size=256]

Epoch 1/10:   1%|▏                 | 7/991 [01:43<3:59:40, 14.61s/batch, batch_loss=30.4, batch_index=7, batch_size=256]

Epoch 1/10:   1%|▏                  | 7/991 [01:58<3:59:40, 14.61s/batch, batch_loss=608, batch_index=8, batch_size=256]

Epoch 1/10:   1%|▏                  | 8/991 [01:58<3:57:36, 14.50s/batch, batch_loss=608, batch_index=8, batch_size=256]

Epoch 1/10:   1%|▏                 | 8/991 [02:13<3:57:36, 14.50s/batch, batch_loss=18.6, batch_index=9, batch_size=256]

Epoch 1/10:   1%|▏                 | 9/991 [02:13<4:00:14, 14.68s/batch, batch_loss=18.6, batch_index=9, batch_size=256]

Epoch 1/10:   1%|▏                | 9/991 [02:28<4:00:14, 14.68s/batch, batch_loss=22.2, batch_index=10, batch_size=256]

Epoch 1/10:   1%|▏               | 10/991 [02:28<4:01:21, 14.76s/batch, batch_loss=22.2, batch_index=10, batch_size=256]

Epoch 1/10:   1%|▏               | 10/991 [02:42<4:01:21, 14.76s/batch, batch_loss=18.5, batch_index=11, batch_size=256]

Epoch 1/10:   1%|▏               | 11/991 [02:42<4:02:07, 14.82s/batch, batch_loss=18.5, batch_index=11, batch_size=256]

Epoch 1/10:   1%|▏            | 11/991 [02:57<4:02:07, 14.82s/batch, batch_loss=2.01e+3, batch_index=12, batch_size=256]

Epoch 1/10:   1%|▏            | 12/991 [02:57<4:01:34, 14.81s/batch, batch_loss=2.01e+3, batch_index=12, batch_size=256]

Epoch 1/10:   1%|▏               | 12/991 [03:11<4:01:34, 14.81s/batch, batch_loss=28.7, batch_index=13, batch_size=256]

Epoch 1/10:   1%|▏               | 13/991 [03:11<3:56:53, 14.53s/batch, batch_loss=28.7, batch_index=13, batch_size=256]

Epoch 1/10:   1%|▏               | 13/991 [03:25<3:56:53, 14.53s/batch, batch_loss=14.6, batch_index=14, batch_size=256]

Epoch 1/10:   1%|▏               | 14/991 [03:25<3:54:29, 14.40s/batch, batch_loss=14.6, batch_index=14, batch_size=256]

Epoch 1/10:   1%|▏               | 14/991 [03:40<3:54:29, 14.40s/batch, batch_loss=18.6, batch_index=15, batch_size=256]

Epoch 1/10:   2%|▏               | 15/991 [03:40<3:55:19, 14.47s/batch, batch_loss=18.6, batch_index=15, batch_size=256]

Epoch 1/10:   2%|▏               | 15/991 [03:55<3:55:19, 14.47s/batch, batch_loss=18.5, batch_index=16, batch_size=256]

Epoch 1/10:   2%|▎               | 16/991 [03:55<3:58:34, 14.68s/batch, batch_loss=18.5, batch_index=16, batch_size=256]

Epoch 1/10:   2%|▎               | 16/991 [04:11<3:58:34, 14.68s/batch, batch_loss=21.7, batch_index=17, batch_size=256]

Epoch 1/10:   2%|▎               | 17/991 [04:11<4:03:54, 15.03s/batch, batch_loss=21.7, batch_index=17, batch_size=256]

Epoch 1/10:   2%|▎               | 17/991 [04:24<4:03:54, 15.03s/batch, batch_loss=15.8, batch_index=18, batch_size=256]

Epoch 1/10:   2%|▎               | 18/991 [04:24<3:54:13, 14.44s/batch, batch_loss=15.8, batch_index=18, batch_size=256]

Epoch 1/10:   2%|▏            | 18/991 [04:37<3:54:13, 14.44s/batch, batch_loss=9.02e+3, batch_index=19, batch_size=256]

Epoch 1/10:   2%|▏            | 19/991 [04:37<3:48:48, 14.12s/batch, batch_loss=9.02e+3, batch_index=19, batch_size=256]

Epoch 1/10:   2%|▎               | 19/991 [04:52<3:48:48, 14.12s/batch, batch_loss=17.8, batch_index=20, batch_size=256]

Epoch 1/10:   2%|▎               | 20/991 [04:52<3:50:18, 14.23s/batch, batch_loss=17.8, batch_index=20, batch_size=256]

Epoch 1/10:   2%|▎                 | 20/991 [05:05<3:50:18, 14.23s/batch, batch_loss=29, batch_index=21, batch_size=256]

Epoch 1/10:   2%|▍                 | 21/991 [05:05<3:47:11, 14.05s/batch, batch_loss=29, batch_index=21, batch_size=256]

Epoch 1/10:   2%|▎            | 21/991 [05:20<3:47:11, 14.05s/batch, batch_loss=1.07e+3, batch_index=22, batch_size=256]

Epoch 1/10:   2%|▎            | 22/991 [05:20<3:49:03, 14.18s/batch, batch_loss=1.07e+3, batch_index=22, batch_size=256]

Epoch 1/10:   2%|▎               | 22/991 [05:37<3:49:03, 14.18s/batch, batch_loss=10.2, batch_index=23, batch_size=256]

Epoch 1/10:   2%|▎               | 23/991 [05:37<4:01:54, 14.99s/batch, batch_loss=10.2, batch_index=23, batch_size=256]

Epoch 1/10:   2%|▍                 | 23/991 [05:52<4:01:54, 14.99s/batch, batch_loss=22, batch_index=24, batch_size=256]

Epoch 1/10:   2%|▍                 | 24/991 [05:52<4:03:04, 15.08s/batch, batch_loss=22, batch_index=24, batch_size=256]

Epoch 1/10:   2%|▍               | 24/991 [06:06<4:03:04, 15.08s/batch, batch_loss=16.1, batch_index=25, batch_size=256]

Epoch 1/10:   3%|▍               | 25/991 [06:06<3:56:26, 14.69s/batch, batch_loss=16.1, batch_index=25, batch_size=256]

Epoch 1/10:   3%|▍               | 25/991 [06:20<3:56:26, 14.69s/batch, batch_loss=30.4, batch_index=26, batch_size=256]

Epoch 1/10:   3%|▍               | 26/991 [06:20<3:54:31, 14.58s/batch, batch_loss=30.4, batch_index=26, batch_size=256]

Epoch 1/10:   3%|▍               | 26/991 [06:35<3:54:31, 14.58s/batch, batch_loss=24.5, batch_index=27, batch_size=256]

Epoch 1/10:   3%|▍               | 27/991 [06:35<3:54:12, 14.58s/batch, batch_loss=24.5, batch_index=27, batch_size=256]

Epoch 1/10:   3%|▎            | 27/991 [06:50<3:54:12, 14.58s/batch, batch_loss=1.13e+3, batch_index=28, batch_size=256]

Epoch 1/10:   3%|▎            | 28/991 [06:50<3:55:12, 14.66s/batch, batch_loss=1.13e+3, batch_index=28, batch_size=256]

Epoch 1/10:   3%|▍               | 28/991 [07:04<3:55:12, 14.66s/batch, batch_loss=17.7, batch_index=29, batch_size=256]

Epoch 1/10:   3%|▍               | 29/991 [07:04<3:55:15, 14.67s/batch, batch_loss=17.7, batch_index=29, batch_size=256]

Epoch 1/10:   3%|▍               | 29/991 [07:18<3:55:15, 14.67s/batch, batch_loss=15.8, batch_index=30, batch_size=256]

Epoch 1/10:   3%|▍               | 30/991 [07:18<3:52:23, 14.51s/batch, batch_loss=15.8, batch_index=30, batch_size=256]

Epoch 1/10:   3%|▍               | 30/991 [07:33<3:52:23, 14.51s/batch, batch_loss=13.4, batch_index=31, batch_size=256]

Epoch 1/10:   3%|▌               | 31/991 [07:33<3:51:33, 14.47s/batch, batch_loss=13.4, batch_index=31, batch_size=256]

Epoch 1/10:   3%|▍             | 31/991 [07:47<3:51:33, 14.47s/batch, batch_loss=1.3e+4, batch_index=32, batch_size=256]

Epoch 1/10:   3%|▍             | 32/991 [07:47<3:51:15, 14.47s/batch, batch_loss=1.3e+4, batch_index=32, batch_size=256]

Epoch 1/10:   3%|▌               | 32/991 [08:03<3:51:15, 14.47s/batch, batch_loss=24.8, batch_index=33, batch_size=256]

Epoch 1/10:   3%|▌               | 33/991 [08:03<3:55:20, 14.74s/batch, batch_loss=24.8, batch_index=33, batch_size=256]

Epoch 1/10:   3%|▌               | 33/991 [08:17<3:55:20, 14.74s/batch, batch_loss=13.5, batch_index=34, batch_size=256]

Epoch 1/10:   3%|▌               | 34/991 [08:17<3:55:02, 14.74s/batch, batch_loss=13.5, batch_index=34, batch_size=256]

Epoch 1/10:   3%|▌               | 34/991 [08:35<3:55:02, 14.74s/batch, batch_loss=19.1, batch_index=35, batch_size=256]

Epoch 1/10:   4%|▌               | 35/991 [08:35<4:08:12, 15.58s/batch, batch_loss=19.1, batch_index=35, batch_size=256]

Epoch 1/10:   4%|▌               | 35/991 [08:49<4:08:12, 15.58s/batch, batch_loss=12.3, batch_index=36, batch_size=256]

Epoch 1/10:   4%|▌               | 36/991 [08:49<3:58:15, 14.97s/batch, batch_loss=12.3, batch_index=36, batch_size=256]

Epoch 1/10:   4%|▌               | 36/991 [09:03<3:58:15, 14.97s/batch, batch_loss=16.8, batch_index=37, batch_size=256]

Epoch 1/10:   4%|▌               | 37/991 [09:03<3:55:21, 14.80s/batch, batch_loss=16.8, batch_index=37, batch_size=256]

Epoch 1/10:   4%|▌               | 37/991 [09:18<3:55:21, 14.80s/batch, batch_loss=13.2, batch_index=38, batch_size=256]

Epoch 1/10:   4%|▌               | 38/991 [09:18<3:56:49, 14.91s/batch, batch_loss=13.2, batch_index=38, batch_size=256]

Epoch 1/10:   4%|▍            | 38/991 [09:33<3:56:49, 14.91s/batch, batch_loss=1.73e+3, batch_index=39, batch_size=256]

Epoch 1/10:   4%|▌            | 39/991 [09:33<3:56:00, 14.87s/batch, batch_loss=1.73e+3, batch_index=39, batch_size=256]

Epoch 1/10:   4%|▋               | 39/991 [09:46<3:56:00, 14.87s/batch, batch_loss=18.5, batch_index=40, batch_size=256]

Epoch 1/10:   4%|▋               | 40/991 [09:46<3:49:25, 14.47s/batch, batch_loss=18.5, batch_index=40, batch_size=256]

Epoch 1/10:   4%|▌             | 40/991 [10:00<3:49:25, 14.47s/batch, batch_loss=6.2e+3, batch_index=41, batch_size=256]

Epoch 1/10:   4%|▌             | 41/991 [10:00<3:45:49, 14.26s/batch, batch_loss=6.2e+3, batch_index=41, batch_size=256]

Epoch 1/10:   4%|▋               | 41/991 [10:14<3:45:49, 14.26s/batch, batch_loss=21.6, batch_index=42, batch_size=256]

Epoch 1/10:   4%|▋               | 42/991 [10:14<3:44:32, 14.20s/batch, batch_loss=21.6, batch_index=42, batch_size=256]

Epoch 1/10:   4%|▋               | 42/991 [10:29<3:44:32, 14.20s/batch, batch_loss=13.3, batch_index=43, batch_size=256]

Epoch 1/10:   4%|▋               | 43/991 [10:29<3:47:37, 14.41s/batch, batch_loss=13.3, batch_index=43, batch_size=256]

Epoch 1/10:   4%|▋               | 43/991 [10:47<3:47:37, 14.41s/batch, batch_loss=20.7, batch_index=44, batch_size=256]

Epoch 1/10:   4%|▋               | 44/991 [10:47<4:04:25, 15.49s/batch, batch_loss=20.7, batch_index=44, batch_size=256]

Epoch 1/10:   4%|▋               | 44/991 [11:01<4:04:25, 15.49s/batch, batch_loss=24.9, batch_index=45, batch_size=256]

Epoch 1/10:   5%|▋               | 45/991 [11:01<3:57:52, 15.09s/batch, batch_loss=24.9, batch_index=45, batch_size=256]

Epoch 1/10:   5%|▋               | 45/991 [11:14<3:57:52, 15.09s/batch, batch_loss=17.9, batch_index=46, batch_size=256]

Epoch 1/10:   5%|▋               | 46/991 [11:14<3:47:49, 14.47s/batch, batch_loss=17.9, batch_index=46, batch_size=256]

Epoch 1/10:   5%|▋               | 46/991 [11:29<3:47:49, 14.47s/batch, batch_loss=9.56, batch_index=47, batch_size=256]

Epoch 1/10:   5%|▊               | 47/991 [11:29<3:47:48, 14.48s/batch, batch_loss=9.56, batch_index=47, batch_size=256]

Epoch 1/10:   5%|▊               | 47/991 [11:44<3:47:48, 14.48s/batch, batch_loss=14.8, batch_index=48, batch_size=256]

Epoch 1/10:   5%|▊               | 48/991 [11:44<3:49:19, 14.59s/batch, batch_loss=14.8, batch_index=48, batch_size=256]

Epoch 1/10:   5%|▊               | 48/991 [11:58<3:49:19, 14.59s/batch, batch_loss=15.4, batch_index=49, batch_size=256]

Epoch 1/10:   5%|▊               | 49/991 [11:58<3:47:52, 14.51s/batch, batch_loss=15.4, batch_index=49, batch_size=256]

Epoch 1/10:   5%|▊               | 49/991 [12:13<3:47:52, 14.51s/batch, batch_loss=27.5, batch_index=50, batch_size=256]

Epoch 1/10:   5%|▊               | 50/991 [12:13<3:48:00, 14.54s/batch, batch_loss=27.5, batch_index=50, batch_size=256]

Epoch 1/10:   5%|▊               | 50/991 [12:26<3:48:00, 14.54s/batch, batch_loss=9.89, batch_index=51, batch_size=256]

Epoch 1/10:   5%|▊               | 51/991 [12:26<3:41:42, 14.15s/batch, batch_loss=9.89, batch_index=51, batch_size=256]

Epoch 1/10:   5%|▊               | 51/991 [12:42<3:41:42, 14.15s/batch, batch_loss=20.5, batch_index=52, batch_size=256]

Epoch 1/10:   5%|▊               | 52/991 [12:42<3:49:30, 14.67s/batch, batch_loss=20.5, batch_index=52, batch_size=256]

Epoch 1/10:   5%|▊               | 52/991 [12:56<3:49:30, 14.67s/batch, batch_loss=20.4, batch_index=53, batch_size=256]

Epoch 1/10:   5%|▊               | 53/991 [12:56<3:48:56, 14.64s/batch, batch_loss=20.4, batch_index=53, batch_size=256]

Epoch 1/10:   5%|▊               | 53/991 [13:10<3:48:56, 14.64s/batch, batch_loss=13.3, batch_index=54, batch_size=256]

Epoch 1/10:   5%|▊               | 54/991 [13:10<3:43:54, 14.34s/batch, batch_loss=13.3, batch_index=54, batch_size=256]

Epoch 1/10:   5%|▊               | 54/991 [13:23<3:43:54, 14.34s/batch, batch_loss=15.8, batch_index=55, batch_size=256]

Epoch 1/10:   6%|▉               | 55/991 [13:24<3:40:08, 14.11s/batch, batch_loss=15.8, batch_index=55, batch_size=256]

Epoch 1/10:   6%|▉               | 55/991 [13:37<3:40:08, 14.11s/batch, batch_loss=14.4, batch_index=56, batch_size=256]

Epoch 1/10:   6%|▉               | 56/991 [13:37<3:38:57, 14.05s/batch, batch_loss=14.4, batch_index=56, batch_size=256]

Epoch 1/10:   6%|▉               | 56/991 [13:52<3:38:57, 14.05s/batch, batch_loss=10.8, batch_index=57, batch_size=256]

Epoch 1/10:   6%|▉               | 57/991 [13:52<3:39:12, 14.08s/batch, batch_loss=10.8, batch_index=57, batch_size=256]

Epoch 1/10:   6%|▉               | 57/991 [14:05<3:39:12, 14.08s/batch, batch_loss=32.4, batch_index=58, batch_size=256]

Epoch 1/10:   6%|▉               | 58/991 [14:05<3:37:12, 13.97s/batch, batch_loss=32.4, batch_index=58, batch_size=256]

Epoch 1/10:   6%|▉               | 58/991 [14:20<3:37:12, 13.97s/batch, batch_loss=12.2, batch_index=59, batch_size=256]

Epoch 1/10:   6%|▉               | 59/991 [14:20<3:39:18, 14.12s/batch, batch_loss=12.2, batch_index=59, batch_size=256]

Epoch 1/10:   6%|▉               | 59/991 [14:35<3:39:18, 14.12s/batch, batch_loss=34.2, batch_index=60, batch_size=256]

Epoch 1/10:   6%|▉               | 60/991 [14:35<3:45:02, 14.50s/batch, batch_loss=34.2, batch_index=60, batch_size=256]

Epoch 1/10:   6%|▉               | 60/991 [14:50<3:45:02, 14.50s/batch, batch_loss=17.6, batch_index=61, batch_size=256]

Epoch 1/10:   6%|▉               | 61/991 [14:50<3:45:46, 14.57s/batch, batch_loss=17.6, batch_index=61, batch_size=256]

Epoch 1/10:   6%|▉               | 61/991 [15:04<3:45:46, 14.57s/batch, batch_loss=17.5, batch_index=62, batch_size=256]

Epoch 1/10:   6%|█               | 62/991 [15:04<3:43:52, 14.46s/batch, batch_loss=17.5, batch_index=62, batch_size=256]

Epoch 1/10:   6%|█                | 62/991 [15:18<3:43:52, 14.46s/batch, batch_loss=430, batch_index=63, batch_size=256]

Epoch 1/10:   6%|█                | 63/991 [15:18<3:42:34, 14.39s/batch, batch_loss=430, batch_index=63, batch_size=256]

Epoch 1/10:   6%|█                | 63/991 [15:33<3:42:34, 14.39s/batch, batch_loss=792, batch_index=64, batch_size=256]

Epoch 1/10:   6%|█                | 64/991 [15:33<3:42:17, 14.39s/batch, batch_loss=792, batch_index=64, batch_size=256]

Epoch 1/10:   6%|▊            | 64/991 [15:47<3:42:17, 14.39s/batch, batch_loss=2.55e+3, batch_index=65, batch_size=256]

Epoch 1/10:   7%|▊            | 65/991 [15:47<3:39:43, 14.24s/batch, batch_loss=2.55e+3, batch_index=65, batch_size=256]

Epoch 1/10:   7%|█               | 65/991 [16:01<3:39:43, 14.24s/batch, batch_loss=6.87, batch_index=66, batch_size=256]

Epoch 1/10:   7%|█               | 66/991 [16:01<3:40:57, 14.33s/batch, batch_loss=6.87, batch_index=66, batch_size=256]

Epoch 1/10:   7%|█               | 66/991 [16:16<3:40:57, 14.33s/batch, batch_loss=17.3, batch_index=67, batch_size=256]

Epoch 1/10:   7%|█               | 67/991 [16:16<3:41:54, 14.41s/batch, batch_loss=17.3, batch_index=67, batch_size=256]

Epoch 1/10:   7%|█               | 67/991 [16:30<3:41:54, 14.41s/batch, batch_loss=13.6, batch_index=68, batch_size=256]

Epoch 1/10:   7%|█               | 68/991 [16:30<3:42:42, 14.48s/batch, batch_loss=13.6, batch_index=68, batch_size=256]

Epoch 1/10:   7%|█               | 68/991 [16:48<3:42:42, 14.48s/batch, batch_loss=34.5, batch_index=69, batch_size=256]

Epoch 1/10:   7%|█               | 69/991 [16:48<3:57:48, 15.48s/batch, batch_loss=34.5, batch_index=69, batch_size=256]

Epoch 1/10:   7%|█               | 69/991 [17:02<3:57:48, 15.48s/batch, batch_loss=9.55, batch_index=70, batch_size=256]

Epoch 1/10:   7%|█▏              | 70/991 [17:02<3:51:03, 15.05s/batch, batch_loss=9.55, batch_index=70, batch_size=256]

Epoch 1/10:   7%|█▏              | 70/991 [17:16<3:51:03, 15.05s/batch, batch_loss=26.5, batch_index=71, batch_size=256]

Epoch 1/10:   7%|█▏              | 71/991 [17:16<3:46:58, 14.80s/batch, batch_loss=26.5, batch_index=71, batch_size=256]

Epoch 1/10:   7%|█▎                | 71/991 [17:32<3:46:58, 14.80s/batch, batch_loss=19, batch_index=72, batch_size=256]

Epoch 1/10:   7%|█▎                | 72/991 [17:32<3:48:20, 14.91s/batch, batch_loss=19, batch_index=72, batch_size=256]

Epoch 1/10:   7%|█▎                | 72/991 [17:45<3:48:20, 14.91s/batch, batch_loss=31, batch_index=73, batch_size=256]

Epoch 1/10:   7%|█▎                | 73/991 [17:45<3:42:36, 14.55s/batch, batch_loss=31, batch_index=73, batch_size=256]

Epoch 1/10:   7%|▉            | 73/991 [18:00<3:42:36, 14.55s/batch, batch_loss=1.74e+3, batch_index=74, batch_size=256]

Epoch 1/10:   7%|▉            | 74/991 [18:00<3:42:24, 14.55s/batch, batch_loss=1.74e+3, batch_index=74, batch_size=256]

Epoch 1/10:   7%|█▏              | 74/991 [18:15<3:42:24, 14.55s/batch, batch_loss=24.5, batch_index=75, batch_size=256]

Epoch 1/10:   8%|█▏              | 75/991 [18:15<3:43:22, 14.63s/batch, batch_loss=24.5, batch_index=75, batch_size=256]

Epoch 1/10:   8%|█▏              | 75/991 [18:29<3:43:22, 14.63s/batch, batch_loss=15.4, batch_index=76, batch_size=256]

Epoch 1/10:   8%|█▏              | 76/991 [18:29<3:41:57, 14.56s/batch, batch_loss=15.4, batch_index=76, batch_size=256]

Epoch 1/10:   8%|█▏              | 76/991 [18:44<3:41:57, 14.56s/batch, batch_loss=14.9, batch_index=77, batch_size=256]

Epoch 1/10:   8%|█▏              | 77/991 [18:44<3:43:45, 14.69s/batch, batch_loss=14.9, batch_index=77, batch_size=256]

Epoch 1/10:   8%|█▏              | 77/991 [18:59<3:43:45, 14.69s/batch, batch_loss=13.2, batch_index=78, batch_size=256]

Epoch 1/10:   8%|█▎              | 78/991 [18:59<3:42:30, 14.62s/batch, batch_loss=13.2, batch_index=78, batch_size=256]

Epoch 1/10:   8%|█▎              | 78/991 [19:13<3:42:30, 14.62s/batch, batch_loss=11.6, batch_index=79, batch_size=256]

Epoch 1/10:   8%|█▎              | 79/991 [19:13<3:40:01, 14.48s/batch, batch_loss=11.6, batch_index=79, batch_size=256]

Epoch 1/10:   8%|█▎              | 79/991 [19:27<3:40:01, 14.48s/batch, batch_loss=15.3, batch_index=80, batch_size=256]

Epoch 1/10:   8%|█▎              | 80/991 [19:27<3:40:14, 14.51s/batch, batch_loss=15.3, batch_index=80, batch_size=256]

Epoch 1/10:   8%|█▎              | 80/991 [19:42<3:40:14, 14.51s/batch, batch_loss=19.9, batch_index=81, batch_size=256]

Epoch 1/10:   8%|█▎              | 81/991 [19:42<3:42:37, 14.68s/batch, batch_loss=19.9, batch_index=81, batch_size=256]

Epoch 1/10:   8%|█▎              | 81/991 [19:57<3:42:37, 14.68s/batch, batch_loss=20.8, batch_index=82, batch_size=256]

Epoch 1/10:   8%|█▎              | 82/991 [19:57<3:41:45, 14.64s/batch, batch_loss=20.8, batch_index=82, batch_size=256]

Epoch 1/10:   8%|█▎              | 82/991 [20:12<3:41:45, 14.64s/batch, batch_loss=13.8, batch_index=83, batch_size=256]

Epoch 1/10:   8%|█▎              | 83/991 [20:12<3:45:31, 14.90s/batch, batch_loss=13.8, batch_index=83, batch_size=256]

Epoch 1/10:   8%|█▎              | 83/991 [20:30<3:45:31, 14.90s/batch, batch_loss=16.3, batch_index=84, batch_size=256]

Epoch 1/10:   8%|█▎              | 84/991 [20:30<3:57:17, 15.70s/batch, batch_loss=16.3, batch_index=84, batch_size=256]

Epoch 1/10:   8%|█▎              | 84/991 [20:45<3:57:17, 15.70s/batch, batch_loss=13.4, batch_index=85, batch_size=256]

Epoch 1/10:   9%|█▎              | 85/991 [20:45<3:52:52, 15.42s/batch, batch_loss=13.4, batch_index=85, batch_size=256]

Epoch 1/10:   9%|█▎              | 85/991 [21:00<3:52:52, 15.42s/batch, batch_loss=15.9, batch_index=86, batch_size=256]

Epoch 1/10:   9%|█▍              | 86/991 [21:00<3:51:56, 15.38s/batch, batch_loss=15.9, batch_index=86, batch_size=256]

Epoch 1/10:   9%|█▍              | 86/991 [21:15<3:51:56, 15.38s/batch, batch_loss=12.3, batch_index=87, batch_size=256]

Epoch 1/10:   9%|█▍              | 87/991 [21:15<3:50:42, 15.31s/batch, batch_loss=12.3, batch_index=87, batch_size=256]

Epoch 1/10:   9%|█▌                | 87/991 [21:29<3:50:42, 15.31s/batch, batch_loss=17, batch_index=88, batch_size=256]

Epoch 1/10:   9%|█▌                | 88/991 [21:29<3:44:58, 14.95s/batch, batch_loss=17, batch_index=88, batch_size=256]

Epoch 1/10:   9%|█▍              | 88/991 [21:43<3:44:58, 14.95s/batch, batch_loss=8.16, batch_index=89, batch_size=256]

Epoch 1/10:   9%|█▍              | 89/991 [21:43<3:40:06, 14.64s/batch, batch_loss=8.16, batch_index=89, batch_size=256]

Epoch 1/10:   9%|█▌               | 89/991 [22:01<3:40:06, 14.64s/batch, batch_loss=245, batch_index=90, batch_size=256]

Epoch 1/10:   9%|█▌               | 90/991 [22:01<3:54:10, 15.59s/batch, batch_loss=245, batch_index=90, batch_size=256]

Epoch 1/10:   9%|█▏           | 90/991 [22:15<3:54:10, 15.59s/batch, batch_loss=1.56e+3, batch_index=91, batch_size=256]

Epoch 1/10:   9%|█▏           | 91/991 [22:15<3:45:56, 15.06s/batch, batch_loss=1.56e+3, batch_index=91, batch_size=256]

Epoch 1/10:   9%|█▋                | 91/991 [22:30<3:45:56, 15.06s/batch, batch_loss=34, batch_index=92, batch_size=256]

Epoch 1/10:   9%|█▋                | 92/991 [22:30<3:44:55, 15.01s/batch, batch_loss=34, batch_index=92, batch_size=256]

Epoch 1/10:   9%|█▍              | 92/991 [22:45<3:44:55, 15.01s/batch, batch_loss=34.6, batch_index=93, batch_size=256]

Epoch 1/10:   9%|█▌              | 93/991 [22:45<3:44:32, 15.00s/batch, batch_loss=34.6, batch_index=93, batch_size=256]

Epoch 1/10:   9%|█▌              | 93/991 [22:59<3:44:32, 15.00s/batch, batch_loss=35.7, batch_index=94, batch_size=256]

Epoch 1/10:   9%|█▌              | 94/991 [22:59<3:39:09, 14.66s/batch, batch_loss=35.7, batch_index=94, batch_size=256]

Epoch 1/10:   9%|█▌              | 94/991 [23:13<3:39:09, 14.66s/batch, batch_loss=30.2, batch_index=95, batch_size=256]

Epoch 1/10:  10%|█▌              | 95/991 [23:13<3:36:55, 14.53s/batch, batch_loss=30.2, batch_index=95, batch_size=256]

Epoch 1/10:  10%|█▋                | 95/991 [23:27<3:36:55, 14.53s/batch, batch_loss=28, batch_index=96, batch_size=256]

Epoch 1/10:  10%|█▋                | 96/991 [23:27<3:37:04, 14.55s/batch, batch_loss=28, batch_index=96, batch_size=256]

Epoch 1/10:  10%|█▌              | 96/991 [23:42<3:37:04, 14.55s/batch, batch_loss=25.1, batch_index=97, batch_size=256]

Epoch 1/10:  10%|█▌              | 97/991 [23:42<3:38:22, 14.66s/batch, batch_loss=25.1, batch_index=97, batch_size=256]

Epoch 1/10:  10%|█▌              | 97/991 [23:57<3:38:22, 14.66s/batch, batch_loss=33.4, batch_index=98, batch_size=256]

Epoch 1/10:  10%|█▌              | 98/991 [23:57<3:39:04, 14.72s/batch, batch_loss=33.4, batch_index=98, batch_size=256]

Epoch 1/10:  10%|█▊                | 98/991 [24:11<3:39:04, 14.72s/batch, batch_loss=27, batch_index=99, batch_size=256]

Epoch 1/10:  10%|█▊                | 99/991 [24:11<3:35:07, 14.47s/batch, batch_loss=27, batch_index=99, batch_size=256]

Epoch 1/10:  10%|█▋               | 99/991 [24:26<3:35:07, 14.47s/batch, batch_loss=31, batch_index=100, batch_size=256]

Epoch 1/10:  10%|█▌              | 100/991 [24:26<3:36:03, 14.55s/batch, batch_loss=31, batch_index=100, batch_size=256]

Epoch 1/10:  10%|█▍            | 100/991 [24:41<3:36:03, 14.55s/batch, batch_loss=26.2, batch_index=101, batch_size=256]

Epoch 1/10:  10%|█▍            | 101/991 [24:41<3:38:20, 14.72s/batch, batch_loss=26.2, batch_index=101, batch_size=256]

Epoch 1/10:  10%|█▍            | 101/991 [24:56<3:38:20, 14.72s/batch, batch_loss=61.4, batch_index=102, batch_size=256]

Epoch 1/10:  10%|█▍            | 102/991 [24:56<3:40:00, 14.85s/batch, batch_loss=61.4, batch_index=102, batch_size=256]

Epoch 1/10:  10%|█▌             | 102/991 [25:11<3:40:00, 14.85s/batch, batch_loss=939, batch_index=103, batch_size=256]

Epoch 1/10:  10%|█▌             | 103/991 [25:11<3:40:42, 14.91s/batch, batch_loss=939, batch_index=103, batch_size=256]

Epoch 1/10:  10%|█▍            | 103/991 [25:26<3:40:42, 14.91s/batch, batch_loss=25.6, batch_index=104, batch_size=256]

Epoch 1/10:  10%|█▍            | 104/991 [25:26<3:40:25, 14.91s/batch, batch_loss=25.6, batch_index=104, batch_size=256]

Epoch 1/10:  10%|█▍            | 104/991 [25:41<3:40:25, 14.91s/batch, batch_loss=14.4, batch_index=105, batch_size=256]

Epoch 1/10:  11%|█▍            | 105/991 [25:41<3:42:12, 15.05s/batch, batch_loss=14.4, batch_index=105, batch_size=256]

Epoch 1/10:  11%|█▍            | 105/991 [25:55<3:42:12, 15.05s/batch, batch_loss=20.4, batch_index=106, batch_size=256]

Epoch 1/10:  11%|█▍            | 106/991 [25:55<3:37:57, 14.78s/batch, batch_loss=20.4, batch_index=106, batch_size=256]

Epoch 1/10:  11%|█▍            | 106/991 [26:10<3:37:57, 14.78s/batch, batch_loss=26.9, batch_index=107, batch_size=256]

Epoch 1/10:  11%|█▌            | 107/991 [26:10<3:35:12, 14.61s/batch, batch_loss=26.9, batch_index=107, batch_size=256]

Epoch 1/10:  11%|█▌            | 107/991 [26:24<3:35:12, 14.61s/batch, batch_loss=36.2, batch_index=108, batch_size=256]

Epoch 1/10:  11%|█▌            | 108/991 [26:24<3:35:32, 14.65s/batch, batch_loss=36.2, batch_index=108, batch_size=256]

Epoch 1/10:  11%|█▌            | 108/991 [26:39<3:35:32, 14.65s/batch, batch_loss=26.2, batch_index=109, batch_size=256]

Epoch 1/10:  11%|█▌            | 109/991 [26:39<3:34:19, 14.58s/batch, batch_loss=26.2, batch_index=109, batch_size=256]

Epoch 1/10:  11%|█▌            | 109/991 [26:54<3:34:19, 14.58s/batch, batch_loss=23.2, batch_index=110, batch_size=256]

Epoch 1/10:  11%|█▌            | 110/991 [26:54<3:35:43, 14.69s/batch, batch_loss=23.2, batch_index=110, batch_size=256]

Epoch 1/10:  11%|█▌            | 110/991 [27:09<3:35:43, 14.69s/batch, batch_loss=39.4, batch_index=111, batch_size=256]

Epoch 1/10:  11%|█▌            | 111/991 [27:09<3:37:41, 14.84s/batch, batch_loss=39.4, batch_index=111, batch_size=256]

Epoch 1/10:  11%|█▌            | 111/991 [27:24<3:37:41, 14.84s/batch, batch_loss=33.1, batch_index=112, batch_size=256]

Epoch 1/10:  11%|█▌            | 112/991 [27:24<3:36:29, 14.78s/batch, batch_loss=33.1, batch_index=112, batch_size=256]

Epoch 1/10:  11%|█▌            | 112/991 [27:39<3:36:29, 14.78s/batch, batch_loss=22.2, batch_index=113, batch_size=256]

Epoch 1/10:  11%|█▌            | 113/991 [27:39<3:36:44, 14.81s/batch, batch_loss=22.2, batch_index=113, batch_size=256]

Epoch 1/10:  11%|█▌            | 113/991 [27:57<3:36:44, 14.81s/batch, batch_loss=20.7, batch_index=114, batch_size=256]

Epoch 1/10:  12%|█▌            | 114/991 [27:57<3:51:07, 15.81s/batch, batch_loss=20.7, batch_index=114, batch_size=256]

Epoch 1/10:  12%|█▌            | 114/991 [28:11<3:51:07, 15.81s/batch, batch_loss=49.3, batch_index=115, batch_size=256]

Epoch 1/10:  12%|█▌            | 115/991 [28:11<3:45:00, 15.41s/batch, batch_loss=49.3, batch_index=115, batch_size=256]

Epoch 1/10:  12%|█▌            | 115/991 [28:26<3:45:00, 15.41s/batch, batch_loss=18.4, batch_index=116, batch_size=256]

Epoch 1/10:  12%|█▋            | 116/991 [28:26<3:41:33, 15.19s/batch, batch_loss=18.4, batch_index=116, batch_size=256]

Epoch 1/10:  12%|█▋            | 116/991 [28:40<3:41:33, 15.19s/batch, batch_loss=21.1, batch_index=117, batch_size=256]

Epoch 1/10:  12%|█▋            | 117/991 [28:40<3:37:26, 14.93s/batch, batch_loss=21.1, batch_index=117, batch_size=256]

Epoch 1/10:  12%|█▋            | 117/991 [28:55<3:37:26, 14.93s/batch, batch_loss=32.6, batch_index=118, batch_size=256]

Epoch 1/10:  12%|█▋            | 118/991 [28:55<3:35:12, 14.79s/batch, batch_loss=32.6, batch_index=118, batch_size=256]

Epoch 1/10:  12%|█▋            | 118/991 [29:10<3:35:12, 14.79s/batch, batch_loss=48.8, batch_index=119, batch_size=256]

Epoch 1/10:  12%|█▋            | 119/991 [29:10<3:35:49, 14.85s/batch, batch_loss=48.8, batch_index=119, batch_size=256]

Epoch 1/10:  12%|█▋            | 119/991 [29:24<3:35:49, 14.85s/batch, batch_loss=19.1, batch_index=120, batch_size=256]

Epoch 1/10:  12%|█▋            | 120/991 [29:24<3:32:10, 14.62s/batch, batch_loss=19.1, batch_index=120, batch_size=256]

Epoch 1/10:  12%|█▋            | 120/991 [29:38<3:32:10, 14.62s/batch, batch_loss=47.1, batch_index=121, batch_size=256]

Epoch 1/10:  12%|█▋            | 121/991 [29:38<3:28:59, 14.41s/batch, batch_loss=47.1, batch_index=121, batch_size=256]

Epoch 1/10:  12%|█▋            | 121/991 [29:52<3:28:59, 14.41s/batch, batch_loss=14.6, batch_index=122, batch_size=256]

Epoch 1/10:  12%|█▋            | 122/991 [29:52<3:29:50, 14.49s/batch, batch_loss=14.6, batch_index=122, batch_size=256]

Epoch 1/10:  12%|█▋            | 122/991 [30:07<3:29:50, 14.49s/batch, batch_loss=29.1, batch_index=123, batch_size=256]

Epoch 1/10:  12%|█▋            | 123/991 [30:07<3:29:08, 14.46s/batch, batch_loss=29.1, batch_index=123, batch_size=256]

Epoch 1/10:  12%|█▎         | 123/991 [30:21<3:29:08, 14.46s/batch, batch_loss=3.51e+3, batch_index=124, batch_size=256]

Epoch 1/10:  13%|█▍         | 124/991 [30:21<3:28:38, 14.44s/batch, batch_loss=3.51e+3, batch_index=124, batch_size=256]

Epoch 1/10:  13%|█▊            | 124/991 [30:35<3:28:38, 14.44s/batch, batch_loss=12.6, batch_index=125, batch_size=256]

Epoch 1/10:  13%|█▊            | 125/991 [30:35<3:26:45, 14.33s/batch, batch_loss=12.6, batch_index=125, batch_size=256]

Epoch 1/10:  13%|█▊            | 125/991 [30:50<3:26:45, 14.33s/batch, batch_loss=17.4, batch_index=126, batch_size=256]

Epoch 1/10:  13%|█▊            | 126/991 [30:50<3:28:29, 14.46s/batch, batch_loss=17.4, batch_index=126, batch_size=256]

Epoch 1/10:  13%|█▌          | 126/991 [31:05<3:28:29, 14.46s/batch, batch_loss=1.9e+3, batch_index=127, batch_size=256]

Epoch 1/10:  13%|█▌          | 127/991 [31:05<3:29:41, 14.56s/batch, batch_loss=1.9e+3, batch_index=127, batch_size=256]

Epoch 1/10:  13%|█▍         | 127/991 [31:19<3:29:41, 14.56s/batch, batch_loss=1.59e+3, batch_index=128, batch_size=256]

Epoch 1/10:  13%|█▍         | 128/991 [31:19<3:28:29, 14.50s/batch, batch_loss=1.59e+3, batch_index=128, batch_size=256]

Epoch 1/10:  13%|█▉             | 128/991 [31:33<3:28:29, 14.50s/batch, batch_loss=233, batch_index=129, batch_size=256]

Epoch 1/10:  13%|█▉             | 129/991 [31:33<3:27:33, 14.45s/batch, batch_loss=233, batch_index=129, batch_size=256]

Epoch 1/10:  13%|█▍         | 129/991 [31:49<3:27:33, 14.45s/batch, batch_loss=1.02e+3, batch_index=130, batch_size=256]

Epoch 1/10:  13%|█▍         | 130/991 [31:49<3:30:18, 14.66s/batch, batch_loss=1.02e+3, batch_index=130, batch_size=256]

Epoch 1/10:  13%|█▍         | 130/991 [32:03<3:30:18, 14.66s/batch, batch_loss=8.35e+3, batch_index=131, batch_size=256]

Epoch 1/10:  13%|█▍         | 131/991 [32:03<3:28:08, 14.52s/batch, batch_loss=8.35e+3, batch_index=131, batch_size=256]

Epoch 1/10:  13%|█▊            | 131/991 [32:16<3:28:08, 14.52s/batch, batch_loss=24.1, batch_index=132, batch_size=256]

Epoch 1/10:  13%|█▊            | 132/991 [32:16<3:23:19, 14.20s/batch, batch_loss=24.1, batch_index=132, batch_size=256]

Epoch 1/10:  13%|█▊            | 132/991 [32:31<3:23:19, 14.20s/batch, batch_loss=10.3, batch_index=133, batch_size=256]

Epoch 1/10:  13%|█▉            | 133/991 [32:31<3:24:48, 14.32s/batch, batch_loss=10.3, batch_index=133, batch_size=256]

Epoch 1/10:  13%|█▉            | 133/991 [32:45<3:24:48, 14.32s/batch, batch_loss=15.5, batch_index=134, batch_size=256]

Epoch 1/10:  14%|█▉            | 134/991 [32:45<3:23:18, 14.23s/batch, batch_loss=15.5, batch_index=134, batch_size=256]

Epoch 1/10:  14%|█▉            | 134/991 [32:59<3:23:18, 14.23s/batch, batch_loss=47.2, batch_index=135, batch_size=256]

Epoch 1/10:  14%|█▉            | 135/991 [32:59<3:24:19, 14.32s/batch, batch_loss=47.2, batch_index=135, batch_size=256]

Epoch 1/10:  14%|█▉            | 135/991 [33:13<3:24:19, 14.32s/batch, batch_loss=19.5, batch_index=136, batch_size=256]

Epoch 1/10:  14%|█▉            | 136/991 [33:13<3:21:46, 14.16s/batch, batch_loss=19.5, batch_index=136, batch_size=256]

Epoch 1/10:  14%|█▉            | 136/991 [33:28<3:21:46, 14.16s/batch, batch_loss=15.4, batch_index=137, batch_size=256]

Epoch 1/10:  14%|█▉            | 137/991 [33:28<3:22:42, 14.24s/batch, batch_loss=15.4, batch_index=137, batch_size=256]

Epoch 1/10:  14%|█▉            | 137/991 [33:43<3:22:42, 14.24s/batch, batch_loss=19.9, batch_index=138, batch_size=256]

Epoch 1/10:  14%|█▉            | 138/991 [33:43<3:26:03, 14.49s/batch, batch_loss=19.9, batch_index=138, batch_size=256]

Epoch 1/10:  14%|█▉            | 138/991 [33:56<3:26:03, 14.49s/batch, batch_loss=8.03, batch_index=139, batch_size=256]

Epoch 1/10:  14%|█▉            | 139/991 [33:56<3:21:24, 14.18s/batch, batch_loss=8.03, batch_index=139, batch_size=256]

Epoch 1/10:  14%|█▉            | 139/991 [34:12<3:21:24, 14.18s/batch, batch_loss=14.3, batch_index=140, batch_size=256]

Epoch 1/10:  14%|█▉            | 140/991 [34:12<3:28:43, 14.72s/batch, batch_loss=14.3, batch_index=140, batch_size=256]

Epoch 1/10:  14%|█▉            | 140/991 [34:26<3:28:43, 14.72s/batch, batch_loss=6.57, batch_index=141, batch_size=256]

Epoch 1/10:  14%|█▉            | 141/991 [34:26<3:23:21, 14.35s/batch, batch_loss=6.57, batch_index=141, batch_size=256]

Epoch 1/10:  14%|█▉            | 141/991 [34:39<3:23:21, 14.35s/batch, batch_loss=7.92, batch_index=142, batch_size=256]

Epoch 1/10:  14%|██            | 142/991 [34:39<3:17:54, 13.99s/batch, batch_loss=7.92, batch_index=142, batch_size=256]

Epoch 1/10:  14%|██▎             | 142/991 [34:53<3:17:54, 13.99s/batch, batch_loss=14, batch_index=143, batch_size=256]

Epoch 1/10:  14%|██▎             | 143/991 [34:53<3:17:05, 13.94s/batch, batch_loss=14, batch_index=143, batch_size=256]

Epoch 1/10:  14%|██            | 143/991 [35:08<3:17:05, 13.94s/batch, batch_loss=28.8, batch_index=144, batch_size=256]

Epoch 1/10:  15%|██            | 144/991 [35:08<3:22:55, 14.37s/batch, batch_loss=28.8, batch_index=144, batch_size=256]

Epoch 1/10:  15%|██            | 144/991 [35:22<3:22:55, 14.37s/batch, batch_loss=22.2, batch_index=145, batch_size=256]

Epoch 1/10:  15%|██            | 145/991 [35:22<3:22:37, 14.37s/batch, batch_loss=22.2, batch_index=145, batch_size=256]

Epoch 1/10:  15%|██            | 145/991 [35:37<3:22:37, 14.37s/batch, batch_loss=19.5, batch_index=146, batch_size=256]

Epoch 1/10:  15%|██            | 146/991 [35:37<3:23:30, 14.45s/batch, batch_loss=19.5, batch_index=146, batch_size=256]

Epoch 1/10:  15%|██            | 146/991 [35:51<3:23:30, 14.45s/batch, batch_loss=9.41, batch_index=147, batch_size=256]

Epoch 1/10:  15%|██            | 147/991 [35:51<3:23:47, 14.49s/batch, batch_loss=9.41, batch_index=147, batch_size=256]

Epoch 1/10:  15%|██            | 147/991 [36:09<3:23:47, 14.49s/batch, batch_loss=35.2, batch_index=148, batch_size=256]

Epoch 1/10:  15%|██            | 148/991 [36:09<3:35:07, 15.31s/batch, batch_loss=35.2, batch_index=148, batch_size=256]

Epoch 1/10:  15%|██            | 148/991 [36:23<3:35:07, 15.31s/batch, batch_loss=22.4, batch_index=149, batch_size=256]

Epoch 1/10:  15%|██            | 149/991 [36:23<3:29:26, 14.92s/batch, batch_loss=22.4, batch_index=149, batch_size=256]

Epoch 1/10:  15%|██            | 149/991 [36:38<3:29:26, 14.92s/batch, batch_loss=16.2, batch_index=150, batch_size=256]

Epoch 1/10:  15%|██            | 150/991 [36:38<3:30:56, 15.05s/batch, batch_loss=16.2, batch_index=150, batch_size=256]

Epoch 1/10:  15%|██            | 150/991 [36:53<3:30:56, 15.05s/batch, batch_loss=27.6, batch_index=151, batch_size=256]

Epoch 1/10:  15%|██▏           | 151/991 [36:53<3:30:06, 15.01s/batch, batch_loss=27.6, batch_index=151, batch_size=256]

Epoch 1/10:  15%|██▏           | 151/991 [37:08<3:30:06, 15.01s/batch, batch_loss=19.9, batch_index=152, batch_size=256]

Epoch 1/10:  15%|██▏           | 152/991 [37:08<3:31:25, 15.12s/batch, batch_loss=19.9, batch_index=152, batch_size=256]

Epoch 1/10:  15%|██▏           | 152/991 [37:23<3:31:25, 15.12s/batch, batch_loss=28.3, batch_index=153, batch_size=256]

Epoch 1/10:  15%|██▏           | 153/991 [37:23<3:28:51, 14.95s/batch, batch_loss=28.3, batch_index=153, batch_size=256]

Epoch 1/10:  15%|██▏           | 153/991 [37:38<3:28:51, 14.95s/batch, batch_loss=26.1, batch_index=154, batch_size=256]

Epoch 1/10:  16%|██▏           | 154/991 [37:38<3:29:43, 15.03s/batch, batch_loss=26.1, batch_index=154, batch_size=256]

Epoch 1/10:  16%|██▏           | 154/991 [37:53<3:29:43, 15.03s/batch, batch_loss=35.1, batch_index=155, batch_size=256]

Epoch 1/10:  16%|██▏           | 155/991 [37:53<3:29:02, 15.00s/batch, batch_loss=35.1, batch_index=155, batch_size=256]

Epoch 1/10:  16%|██▏           | 155/991 [38:10<3:29:02, 15.00s/batch, batch_loss=9.36, batch_index=156, batch_size=256]

Epoch 1/10:  16%|██▏           | 156/991 [38:10<3:36:34, 15.56s/batch, batch_loss=9.36, batch_index=156, batch_size=256]

Epoch 1/10:  16%|██▏           | 156/991 [38:25<3:36:34, 15.56s/batch, batch_loss=37.2, batch_index=157, batch_size=256]

Epoch 1/10:  16%|██▏           | 157/991 [38:25<3:32:29, 15.29s/batch, batch_loss=37.2, batch_index=157, batch_size=256]

Epoch 1/10:  16%|██▏           | 157/991 [38:38<3:32:29, 15.29s/batch, batch_loss=16.1, batch_index=158, batch_size=256]

Epoch 1/10:  16%|██▏           | 158/991 [38:38<3:26:18, 14.86s/batch, batch_loss=16.1, batch_index=158, batch_size=256]

Epoch 1/10:  16%|██▏           | 158/991 [38:53<3:26:18, 14.86s/batch, batch_loss=10.5, batch_index=159, batch_size=256]

Epoch 1/10:  16%|██▏           | 159/991 [38:53<3:24:57, 14.78s/batch, batch_loss=10.5, batch_index=159, batch_size=256]

Epoch 1/10:  16%|██▏           | 159/991 [39:07<3:24:57, 14.78s/batch, batch_loss=12.7, batch_index=160, batch_size=256]

Epoch 1/10:  16%|██▎           | 160/991 [39:08<3:23:16, 14.68s/batch, batch_loss=12.7, batch_index=160, batch_size=256]

Epoch 1/10:  16%|██▍            | 160/991 [39:22<3:23:16, 14.68s/batch, batch_loss=463, batch_index=161, batch_size=256]

Epoch 1/10:  16%|██▍            | 161/991 [39:22<3:22:09, 14.61s/batch, batch_loss=463, batch_index=161, batch_size=256]

Epoch 1/10:  16%|██▌             | 161/991 [39:37<3:22:09, 14.61s/batch, batch_loss=24, batch_index=162, batch_size=256]

Epoch 1/10:  16%|██▌             | 162/991 [39:37<3:22:41, 14.67s/batch, batch_loss=24, batch_index=162, batch_size=256]

Epoch 1/10:  16%|██▎           | 162/991 [39:51<3:22:41, 14.67s/batch, batch_loss=9.49, batch_index=163, batch_size=256]

Epoch 1/10:  16%|██▎           | 163/991 [39:51<3:19:43, 14.47s/batch, batch_loss=9.49, batch_index=163, batch_size=256]

Epoch 1/10:  16%|██▎           | 163/991 [40:06<3:19:43, 14.47s/batch, batch_loss=18.3, batch_index=164, batch_size=256]

Epoch 1/10:  17%|██▎           | 164/991 [40:06<3:21:25, 14.61s/batch, batch_loss=18.3, batch_index=164, batch_size=256]

Epoch 1/10:  17%|██▎           | 164/991 [40:19<3:21:25, 14.61s/batch, batch_loss=19.6, batch_index=165, batch_size=256]

Epoch 1/10:  17%|██▎           | 165/991 [40:19<3:16:37, 14.28s/batch, batch_loss=19.6, batch_index=165, batch_size=256]

Epoch 1/10:  17%|██▎           | 165/991 [40:35<3:16:37, 14.28s/batch, batch_loss=12.4, batch_index=166, batch_size=256]

Epoch 1/10:  17%|██▎           | 166/991 [40:35<3:24:27, 14.87s/batch, batch_loss=12.4, batch_index=166, batch_size=256]

Epoch 1/10:  17%|██▎           | 166/991 [40:50<3:24:27, 14.87s/batch, batch_loss=35.2, batch_index=167, batch_size=256]

Epoch 1/10:  17%|██▎           | 167/991 [40:50<3:22:14, 14.73s/batch, batch_loss=35.2, batch_index=167, batch_size=256]

Epoch 1/10:  17%|██▎           | 167/991 [41:04<3:22:14, 14.73s/batch, batch_loss=20.4, batch_index=168, batch_size=256]

Epoch 1/10:  17%|██▎           | 168/991 [41:04<3:19:00, 14.51s/batch, batch_loss=20.4, batch_index=168, batch_size=256]

Epoch 1/10:  17%|██▎           | 168/991 [41:18<3:19:00, 14.51s/batch, batch_loss=25.2, batch_index=169, batch_size=256]

Epoch 1/10:  17%|██▍           | 169/991 [41:18<3:18:29, 14.49s/batch, batch_loss=25.2, batch_index=169, batch_size=256]

Epoch 1/10:  17%|██▍           | 169/991 [41:34<3:18:29, 14.49s/batch, batch_loss=13.5, batch_index=170, batch_size=256]

Epoch 1/10:  17%|██▍           | 170/991 [41:34<3:22:34, 14.80s/batch, batch_loss=13.5, batch_index=170, batch_size=256]

Epoch 1/10:  17%|██▌            | 170/991 [41:48<3:22:34, 14.80s/batch, batch_loss=8.9, batch_index=171, batch_size=256]

Epoch 1/10:  17%|██▌            | 171/991 [41:48<3:19:26, 14.59s/batch, batch_loss=8.9, batch_index=171, batch_size=256]

Epoch 1/10:  17%|██▍           | 171/991 [42:05<3:19:26, 14.59s/batch, batch_loss=12.4, batch_index=172, batch_size=256]

Epoch 1/10:  17%|██▍           | 172/991 [42:05<3:29:37, 15.36s/batch, batch_loss=12.4, batch_index=172, batch_size=256]

Epoch 1/10:  17%|██▍           | 172/991 [42:20<3:29:37, 15.36s/batch, batch_loss=8.68, batch_index=173, batch_size=256]

Epoch 1/10:  17%|██▍           | 173/991 [42:20<3:26:26, 15.14s/batch, batch_loss=8.68, batch_index=173, batch_size=256]

Epoch 1/10:  17%|█▉         | 173/991 [42:35<3:26:26, 15.14s/batch, batch_loss=3.02e+4, batch_index=174, batch_size=256]

Epoch 1/10:  18%|█▉         | 174/991 [42:35<3:25:30, 15.09s/batch, batch_loss=3.02e+4, batch_index=174, batch_size=256]

Epoch 1/10:  18%|██▍           | 174/991 [42:50<3:25:30, 15.09s/batch, batch_loss=28.9, batch_index=175, batch_size=256]

Epoch 1/10:  18%|██▍           | 175/991 [42:50<3:27:26, 15.25s/batch, batch_loss=28.9, batch_index=175, batch_size=256]

Epoch 1/10:  18%|██▍           | 175/991 [43:05<3:27:26, 15.25s/batch, batch_loss=52.6, batch_index=176, batch_size=256]

Epoch 1/10:  18%|██▍           | 176/991 [43:05<3:24:46, 15.08s/batch, batch_loss=52.6, batch_index=176, batch_size=256]

Epoch 1/10:  18%|██▍           | 176/991 [43:20<3:24:46, 15.08s/batch, batch_loss=45.4, batch_index=177, batch_size=256]

Epoch 1/10:  18%|██▌           | 177/991 [43:20<3:25:00, 15.11s/batch, batch_loss=45.4, batch_index=177, batch_size=256]

Epoch 1/10:  18%|██▌           | 177/991 [43:36<3:25:00, 15.11s/batch, batch_loss=31.8, batch_index=178, batch_size=256]

Epoch 1/10:  18%|██▌           | 178/991 [43:36<3:27:19, 15.30s/batch, batch_loss=31.8, batch_index=178, batch_size=256]

Epoch 1/10:  18%|██▌           | 178/991 [43:51<3:27:19, 15.30s/batch, batch_loss=20.2, batch_index=179, batch_size=256]

Epoch 1/10:  18%|██▌           | 179/991 [43:51<3:24:32, 15.11s/batch, batch_loss=20.2, batch_index=179, batch_size=256]

Epoch 1/10:  18%|██▌           | 179/991 [44:06<3:24:32, 15.11s/batch, batch_loss=9.72, batch_index=180, batch_size=256]

Epoch 1/10:  18%|██▌           | 180/991 [44:06<3:23:30, 15.06s/batch, batch_loss=9.72, batch_index=180, batch_size=256]

Epoch 1/10:  18%|█▉         | 180/991 [44:23<3:23:30, 15.06s/batch, batch_loss=2.51e+4, batch_index=181, batch_size=256]

Epoch 1/10:  18%|██         | 181/991 [44:23<3:32:29, 15.74s/batch, batch_loss=2.51e+4, batch_index=181, batch_size=256]

Epoch 1/10:  18%|██▌           | 181/991 [44:37<3:32:29, 15.74s/batch, batch_loss=16.1, batch_index=182, batch_size=256]

Epoch 1/10:  18%|██▌           | 182/991 [44:37<3:25:51, 15.27s/batch, batch_loss=16.1, batch_index=182, batch_size=256]

Epoch 1/10:  18%|██▌           | 182/991 [44:52<3:25:51, 15.27s/batch, batch_loss=22.7, batch_index=183, batch_size=256]

Epoch 1/10:  18%|██▌           | 183/991 [44:52<3:22:57, 15.07s/batch, batch_loss=22.7, batch_index=183, batch_size=256]

Epoch 1/10:  18%|██▌           | 183/991 [45:05<3:22:57, 15.07s/batch, batch_loss=23.7, batch_index=184, batch_size=256]

Epoch 1/10:  19%|██▌           | 184/991 [45:05<3:15:48, 14.56s/batch, batch_loss=23.7, batch_index=184, batch_size=256]

Epoch 1/10:  19%|██▌           | 184/991 [45:20<3:15:48, 14.56s/batch, batch_loss=23.5, batch_index=185, batch_size=256]

Epoch 1/10:  19%|██▌           | 185/991 [45:20<3:18:59, 14.81s/batch, batch_loss=23.5, batch_index=185, batch_size=256]

Epoch 1/10:  19%|██▌           | 185/991 [45:36<3:18:59, 14.81s/batch, batch_loss=22.2, batch_index=186, batch_size=256]

Epoch 1/10:  19%|██▋           | 186/991 [45:36<3:20:40, 14.96s/batch, batch_loss=22.2, batch_index=186, batch_size=256]

Epoch 1/10:  19%|██▋           | 186/991 [45:50<3:20:40, 14.96s/batch, batch_loss=17.9, batch_index=187, batch_size=256]

Epoch 1/10:  19%|██▋           | 187/991 [45:50<3:17:47, 14.76s/batch, batch_loss=17.9, batch_index=187, batch_size=256]

Epoch 1/10:  19%|██▋           | 187/991 [46:04<3:17:47, 14.76s/batch, batch_loss=17.6, batch_index=188, batch_size=256]

Epoch 1/10:  19%|██▋           | 188/991 [46:04<3:14:36, 14.54s/batch, batch_loss=17.6, batch_index=188, batch_size=256]

Epoch 1/10:  19%|██▋           | 188/991 [46:18<3:14:36, 14.54s/batch, batch_loss=30.9, batch_index=189, batch_size=256]

Epoch 1/10:  19%|██▋           | 189/991 [46:18<3:13:08, 14.45s/batch, batch_loss=30.9, batch_index=189, batch_size=256]

Epoch 1/10:  19%|██▋           | 189/991 [46:33<3:13:08, 14.45s/batch, batch_loss=28.3, batch_index=190, batch_size=256]

Epoch 1/10:  19%|██▋           | 190/991 [46:33<3:13:24, 14.49s/batch, batch_loss=28.3, batch_index=190, batch_size=256]

Epoch 1/10:  19%|██▋           | 190/991 [46:48<3:13:24, 14.49s/batch, batch_loss=25.2, batch_index=191, batch_size=256]

Epoch 1/10:  19%|██▋           | 191/991 [46:48<3:15:27, 14.66s/batch, batch_loss=25.2, batch_index=191, batch_size=256]

Epoch 1/10:  19%|██▋           | 191/991 [47:03<3:15:27, 14.66s/batch, batch_loss=21.6, batch_index=192, batch_size=256]

Epoch 1/10:  19%|██▋           | 192/991 [47:03<3:16:22, 14.75s/batch, batch_loss=21.6, batch_index=192, batch_size=256]

Epoch 1/10:  19%|██▋           | 192/991 [47:17<3:16:22, 14.75s/batch, batch_loss=28.7, batch_index=193, batch_size=256]

Epoch 1/10:  19%|██▋           | 193/991 [47:17<3:13:59, 14.59s/batch, batch_loss=28.7, batch_index=193, batch_size=256]

Epoch 1/10:  19%|██▋           | 193/991 [47:32<3:13:59, 14.59s/batch, batch_loss=13.2, batch_index=194, batch_size=256]

Epoch 1/10:  20%|██▋           | 194/991 [47:32<3:13:48, 14.59s/batch, batch_loss=13.2, batch_index=194, batch_size=256]

Epoch 1/10:  20%|██▋           | 194/991 [47:45<3:13:48, 14.59s/batch, batch_loss=4.74, batch_index=195, batch_size=256]

Epoch 1/10:  20%|██▊           | 195/991 [47:45<3:09:37, 14.29s/batch, batch_loss=4.74, batch_index=195, batch_size=256]

Epoch 1/10:  20%|██▊           | 195/991 [48:00<3:09:37, 14.29s/batch, batch_loss=11.2, batch_index=196, batch_size=256]

Epoch 1/10:  20%|██▊           | 196/991 [48:00<3:11:57, 14.49s/batch, batch_loss=11.2, batch_index=196, batch_size=256]

Epoch 1/10:  20%|██▊           | 196/991 [48:17<3:11:57, 14.49s/batch, batch_loss=17.8, batch_index=197, batch_size=256]

Epoch 1/10:  20%|██▊           | 197/991 [48:17<3:19:20, 15.06s/batch, batch_loss=17.8, batch_index=197, batch_size=256]

Epoch 1/10:  20%|██▊           | 197/991 [48:31<3:19:20, 15.06s/batch, batch_loss=14.2, batch_index=198, batch_size=256]

Epoch 1/10:  20%|██▊           | 198/991 [48:31<3:15:51, 14.82s/batch, batch_loss=14.2, batch_index=198, batch_size=256]

Epoch 1/10:  20%|██▊           | 198/991 [48:46<3:15:51, 14.82s/batch, batch_loss=21.5, batch_index=199, batch_size=256]

Epoch 1/10:  20%|██▊           | 199/991 [48:46<3:17:34, 14.97s/batch, batch_loss=21.5, batch_index=199, batch_size=256]

Epoch 1/10:  20%|██▊           | 199/991 [49:01<3:17:34, 14.97s/batch, batch_loss=12.5, batch_index=200, batch_size=256]

Epoch 1/10:  20%|██▊           | 200/991 [49:01<3:17:41, 15.00s/batch, batch_loss=12.5, batch_index=200, batch_size=256]

Epoch 1/10:  20%|██▊           | 200/991 [49:16<3:17:41, 15.00s/batch, batch_loss=19.6, batch_index=201, batch_size=256]

Epoch 1/10:  20%|██▊           | 201/991 [49:16<3:15:29, 14.85s/batch, batch_loss=19.6, batch_index=201, batch_size=256]

Epoch 1/10:  20%|██▊           | 201/991 [49:31<3:15:29, 14.85s/batch, batch_loss=14.1, batch_index=202, batch_size=256]

Epoch 1/10:  20%|██▊           | 202/991 [49:31<3:16:24, 14.94s/batch, batch_loss=14.1, batch_index=202, batch_size=256]

Epoch 1/10:  20%|██▊           | 202/991 [49:45<3:16:24, 14.94s/batch, batch_loss=17.6, batch_index=203, batch_size=256]

Epoch 1/10:  20%|██▊           | 203/991 [49:45<3:13:19, 14.72s/batch, batch_loss=17.6, batch_index=203, batch_size=256]

Epoch 1/10:  20%|██▊           | 203/991 [50:01<3:13:19, 14.72s/batch, batch_loss=43.4, batch_index=204, batch_size=256]

Epoch 1/10:  21%|██▉           | 204/991 [50:01<3:17:02, 15.02s/batch, batch_loss=43.4, batch_index=204, batch_size=256]

Epoch 1/10:  21%|██▉           | 204/991 [50:16<3:17:02, 15.02s/batch, batch_loss=28.8, batch_index=205, batch_size=256]

Epoch 1/10:  21%|██▉           | 205/991 [50:16<3:15:57, 14.96s/batch, batch_loss=28.8, batch_index=205, batch_size=256]

Epoch 1/10:  21%|██▉           | 205/991 [50:33<3:15:57, 14.96s/batch, batch_loss=10.7, batch_index=206, batch_size=256]

Epoch 1/10:  21%|██▉           | 206/991 [50:33<3:24:54, 15.66s/batch, batch_loss=10.7, batch_index=206, batch_size=256]

Epoch 1/10:  21%|██▉           | 206/991 [50:48<3:24:54, 15.66s/batch, batch_loss=11.1, batch_index=207, batch_size=256]

Epoch 1/10:  21%|██▉           | 207/991 [50:48<3:21:25, 15.41s/batch, batch_loss=11.1, batch_index=207, batch_size=256]

Epoch 1/10:  21%|██▉           | 207/991 [51:03<3:21:25, 15.41s/batch, batch_loss=13.8, batch_index=208, batch_size=256]

Epoch 1/10:  21%|██▉           | 208/991 [51:03<3:18:37, 15.22s/batch, batch_loss=13.8, batch_index=208, batch_size=256]

Epoch 1/10:  21%|███▎            | 208/991 [51:17<3:18:37, 15.22s/batch, batch_loss=11, batch_index=209, batch_size=256]

Epoch 1/10:  21%|███▎            | 209/991 [51:17<3:16:38, 15.09s/batch, batch_loss=11, batch_index=209, batch_size=256]

Epoch 1/10:  21%|██▉           | 209/991 [51:32<3:16:38, 15.09s/batch, batch_loss=29.3, batch_index=210, batch_size=256]

Epoch 1/10:  21%|██▉           | 210/991 [51:32<3:14:58, 14.98s/batch, batch_loss=29.3, batch_index=210, batch_size=256]

Epoch 1/10:  21%|██▉           | 210/991 [51:47<3:14:58, 14.98s/batch, batch_loss=14.4, batch_index=211, batch_size=256]

Epoch 1/10:  21%|██▉           | 211/991 [51:47<3:16:20, 15.10s/batch, batch_loss=14.4, batch_index=211, batch_size=256]

Epoch 1/10:  21%|██▉           | 211/991 [52:03<3:16:20, 15.10s/batch, batch_loss=27.6, batch_index=212, batch_size=256]

Epoch 1/10:  21%|██▉           | 212/991 [52:03<3:17:11, 15.19s/batch, batch_loss=27.6, batch_index=212, batch_size=256]

Epoch 1/10:  21%|██▉           | 212/991 [52:18<3:17:11, 15.19s/batch, batch_loss=4.47, batch_index=213, batch_size=256]

Epoch 1/10:  21%|███           | 213/991 [52:18<3:16:37, 15.16s/batch, batch_loss=4.47, batch_index=213, batch_size=256]

Epoch 1/10:  21%|███           | 213/991 [52:34<3:16:37, 15.16s/batch, batch_loss=16.4, batch_index=214, batch_size=256]

Epoch 1/10:  22%|███           | 214/991 [52:34<3:18:16, 15.31s/batch, batch_loss=16.4, batch_index=214, batch_size=256]

Epoch 1/10:  22%|███           | 214/991 [52:49<3:18:16, 15.31s/batch, batch_loss=23.3, batch_index=215, batch_size=256]

Epoch 1/10:  22%|███           | 215/991 [52:49<3:18:13, 15.33s/batch, batch_loss=23.3, batch_index=215, batch_size=256]

Epoch 1/10:  22%|███           | 215/991 [53:04<3:18:13, 15.33s/batch, batch_loss=14.7, batch_index=216, batch_size=256]

Epoch 1/10:  22%|███           | 216/991 [53:04<3:16:04, 15.18s/batch, batch_loss=14.7, batch_index=216, batch_size=256]

Epoch 1/10:  22%|███           | 216/991 [53:18<3:16:04, 15.18s/batch, batch_loss=17.6, batch_index=217, batch_size=256]

Epoch 1/10:  22%|███           | 217/991 [53:18<3:10:22, 14.76s/batch, batch_loss=17.6, batch_index=217, batch_size=256]

Epoch 1/10:  22%|███           | 217/991 [53:32<3:10:22, 14.76s/batch, batch_loss=27.3, batch_index=218, batch_size=256]

Epoch 1/10:  22%|███           | 218/991 [53:32<3:09:48, 14.73s/batch, batch_loss=27.3, batch_index=218, batch_size=256]

Epoch 1/10:  22%|███           | 218/991 [53:46<3:09:48, 14.73s/batch, batch_loss=34.5, batch_index=219, batch_size=256]

Epoch 1/10:  22%|███           | 219/991 [53:46<3:05:51, 14.44s/batch, batch_loss=34.5, batch_index=219, batch_size=256]

Epoch 1/10:  22%|███▌            | 219/991 [54:01<3:05:51, 14.44s/batch, batch_loss=35, batch_index=220, batch_size=256]

Epoch 1/10:  22%|███▌            | 220/991 [54:01<3:06:20, 14.50s/batch, batch_loss=35, batch_index=220, batch_size=256]

Epoch 1/10:  22%|███           | 220/991 [54:15<3:06:20, 14.50s/batch, batch_loss=28.3, batch_index=221, batch_size=256]

Epoch 1/10:  22%|███           | 221/991 [54:15<3:07:25, 14.60s/batch, batch_loss=28.3, batch_index=221, batch_size=256]

Epoch 1/10:  22%|███           | 221/991 [54:31<3:07:25, 14.60s/batch, batch_loss=21.8, batch_index=222, batch_size=256]

Epoch 1/10:  22%|███▏          | 222/991 [54:31<3:09:32, 14.79s/batch, batch_loss=21.8, batch_index=222, batch_size=256]

Epoch 1/10:  22%|███▏          | 222/991 [54:45<3:09:32, 14.79s/batch, batch_loss=36.6, batch_index=223, batch_size=256]

Epoch 1/10:  23%|███▏          | 223/991 [54:45<3:08:03, 14.69s/batch, batch_loss=36.6, batch_index=223, batch_size=256]

Epoch 1/10:  23%|███▏          | 223/991 [55:00<3:08:03, 14.69s/batch, batch_loss=18.2, batch_index=224, batch_size=256]

Epoch 1/10:  23%|███▏          | 224/991 [55:00<3:07:25, 14.66s/batch, batch_loss=18.2, batch_index=224, batch_size=256]

Epoch 1/10:  23%|███▏          | 224/991 [55:14<3:07:25, 14.66s/batch, batch_loss=12.7, batch_index=225, batch_size=256]

Epoch 1/10:  23%|███▏          | 225/991 [55:14<3:06:39, 14.62s/batch, batch_loss=12.7, batch_index=225, batch_size=256]

Epoch 1/10:  23%|███▏          | 225/991 [55:28<3:06:39, 14.62s/batch, batch_loss=31.6, batch_index=226, batch_size=256]

Epoch 1/10:  23%|███▏          | 226/991 [55:28<3:04:35, 14.48s/batch, batch_loss=31.6, batch_index=226, batch_size=256]

Epoch 1/10:  23%|██▌        | 226/991 [55:43<3:04:35, 14.48s/batch, batch_loss=2.42e+3, batch_index=227, batch_size=256]

Epoch 1/10:  23%|██▌        | 227/991 [55:43<3:05:51, 14.60s/batch, batch_loss=2.42e+3, batch_index=227, batch_size=256]

Epoch 1/10:  23%|██▌        | 227/991 [55:58<3:05:51, 14.60s/batch, batch_loss=3.61e+3, batch_index=228, batch_size=256]

Epoch 1/10:  23%|██▌        | 228/991 [55:58<3:06:15, 14.65s/batch, batch_loss=3.61e+3, batch_index=228, batch_size=256]

Epoch 1/10:  23%|███▏          | 228/991 [56:12<3:06:15, 14.65s/batch, batch_loss=15.4, batch_index=229, batch_size=256]

Epoch 1/10:  23%|███▏          | 229/991 [56:12<3:03:13, 14.43s/batch, batch_loss=15.4, batch_index=229, batch_size=256]

Epoch 1/10:  23%|███▏          | 229/991 [56:28<3:03:13, 14.43s/batch, batch_loss=11.6, batch_index=230, batch_size=256]

Epoch 1/10:  23%|███▏          | 230/991 [56:28<3:10:47, 15.04s/batch, batch_loss=11.6, batch_index=230, batch_size=256]

Epoch 1/10:  23%|███▏          | 230/991 [56:43<3:10:47, 15.04s/batch, batch_loss=16.2, batch_index=231, batch_size=256]

Epoch 1/10:  23%|███▎          | 231/991 [56:43<3:08:55, 14.91s/batch, batch_loss=16.2, batch_index=231, batch_size=256]

Epoch 1/10:  23%|███▎          | 231/991 [56:58<3:08:55, 14.91s/batch, batch_loss=9.96, batch_index=232, batch_size=256]

Epoch 1/10:  23%|███▎          | 232/991 [56:58<3:10:10, 15.03s/batch, batch_loss=9.96, batch_index=232, batch_size=256]

Epoch 1/10:  23%|███▎          | 232/991 [57:13<3:10:10, 15.03s/batch, batch_loss=8.62, batch_index=233, batch_size=256]

Epoch 1/10:  24%|███▎          | 233/991 [57:13<3:09:51, 15.03s/batch, batch_loss=8.62, batch_index=233, batch_size=256]

Epoch 1/10:  24%|███▎          | 233/991 [57:28<3:09:51, 15.03s/batch, batch_loss=19.2, batch_index=234, batch_size=256]

Epoch 1/10:  24%|███▎          | 234/991 [57:28<3:08:44, 14.96s/batch, batch_loss=19.2, batch_index=234, batch_size=256]

Epoch 1/10:  24%|███▎          | 234/991 [57:42<3:08:44, 14.96s/batch, batch_loss=19.6, batch_index=235, batch_size=256]

Epoch 1/10:  24%|███▎          | 235/991 [57:42<3:03:59, 14.60s/batch, batch_loss=19.6, batch_index=235, batch_size=256]

Epoch 1/10:  24%|███▎          | 235/991 [57:57<3:03:59, 14.60s/batch, batch_loss=28.2, batch_index=236, batch_size=256]

Epoch 1/10:  24%|███▎          | 236/991 [57:57<3:05:13, 14.72s/batch, batch_loss=28.2, batch_index=236, batch_size=256]

Epoch 1/10:  24%|███▎          | 236/991 [58:12<3:05:13, 14.72s/batch, batch_loss=34.8, batch_index=237, batch_size=256]

Epoch 1/10:  24%|███▎          | 237/991 [58:12<3:06:38, 14.85s/batch, batch_loss=34.8, batch_index=237, batch_size=256]

Epoch 1/10:  24%|███▎          | 237/991 [58:27<3:06:38, 14.85s/batch, batch_loss=26.1, batch_index=238, batch_size=256]

Epoch 1/10:  24%|███▎          | 238/991 [58:27<3:04:39, 14.71s/batch, batch_loss=26.1, batch_index=238, batch_size=256]

Epoch 1/10:  24%|███▎          | 238/991 [58:41<3:04:39, 14.71s/batch, batch_loss=6.84, batch_index=239, batch_size=256]

Epoch 1/10:  24%|███▍          | 239/991 [58:41<3:02:03, 14.53s/batch, batch_loss=6.84, batch_index=239, batch_size=256]

Epoch 1/10:  24%|███▍          | 239/991 [58:58<3:02:03, 14.53s/batch, batch_loss=9.33, batch_index=240, batch_size=256]

Epoch 1/10:  24%|███▍          | 240/991 [58:58<3:12:11, 15.36s/batch, batch_loss=9.33, batch_index=240, batch_size=256]

Epoch 1/10:  24%|███▍          | 240/991 [59:13<3:12:11, 15.36s/batch, batch_loss=10.2, batch_index=241, batch_size=256]

Epoch 1/10:  24%|███▍          | 241/991 [59:13<3:10:17, 15.22s/batch, batch_loss=10.2, batch_index=241, batch_size=256]

Epoch 1/10:  24%|███▍          | 241/991 [59:27<3:10:17, 15.22s/batch, batch_loss=39.9, batch_index=242, batch_size=256]

Epoch 1/10:  24%|███▍          | 242/991 [59:27<3:05:56, 14.90s/batch, batch_loss=39.9, batch_index=242, batch_size=256]

Epoch 1/10:  24%|███▋           | 242/991 [59:42<3:05:56, 14.90s/batch, batch_loss=272, batch_index=243, batch_size=256]

Epoch 1/10:  25%|███▋           | 243/991 [59:42<3:05:44, 14.90s/batch, batch_loss=272, batch_index=243, batch_size=256]

Epoch 1/10:  25%|███▍          | 243/991 [59:56<3:05:44, 14.90s/batch, batch_loss=27.7, batch_index=244, batch_size=256]

Epoch 1/10:  25%|███▍          | 244/991 [59:56<3:04:23, 14.81s/batch, batch_loss=27.7, batch_index=244, batch_size=256]

Epoch 1/10:  25%|██▉         | 244/991 [1:00:11<3:04:23, 14.81s/batch, batch_loss=8.76, batch_index=245, batch_size=256]

Epoch 1/10:  25%|██▉         | 245/991 [1:00:11<3:03:24, 14.75s/batch, batch_loss=8.76, batch_index=245, batch_size=256]

Epoch 1/10:  25%|██▉         | 245/991 [1:00:28<3:03:24, 14.75s/batch, batch_loss=7.43, batch_index=246, batch_size=256]

Epoch 1/10:  25%|██▉         | 246/991 [1:00:28<3:11:23, 15.41s/batch, batch_loss=7.43, batch_index=246, batch_size=256]

Epoch 1/10:  25%|██▉         | 246/991 [1:00:43<3:11:23, 15.41s/batch, batch_loss=20.8, batch_index=247, batch_size=256]

Epoch 1/10:  25%|██▉         | 247/991 [1:00:43<3:08:58, 15.24s/batch, batch_loss=20.8, batch_index=247, batch_size=256]

Epoch 1/10:  25%|██▉         | 247/991 [1:00:58<3:08:58, 15.24s/batch, batch_loss=4.67, batch_index=248, batch_size=256]

Epoch 1/10:  25%|███         | 248/991 [1:00:58<3:08:35, 15.23s/batch, batch_loss=4.67, batch_index=248, batch_size=256]

Epoch 1/10:  25%|███         | 248/991 [1:01:12<3:08:35, 15.23s/batch, batch_loss=15.3, batch_index=249, batch_size=256]

Epoch 1/10:  25%|███         | 249/991 [1:01:12<3:04:18, 14.90s/batch, batch_loss=15.3, batch_index=249, batch_size=256]

Epoch 1/10:  25%|███         | 249/991 [1:01:27<3:04:18, 14.90s/batch, batch_loss=10.9, batch_index=250, batch_size=256]

Epoch 1/10:  25%|███         | 250/991 [1:01:27<3:05:11, 14.99s/batch, batch_loss=10.9, batch_index=250, batch_size=256]

Epoch 1/10:  25%|███         | 250/991 [1:01:41<3:05:11, 14.99s/batch, batch_loss=6.73, batch_index=251, batch_size=256]

Epoch 1/10:  25%|███         | 251/991 [1:01:41<3:00:59, 14.68s/batch, batch_loss=6.73, batch_index=251, batch_size=256]

Epoch 1/10:  25%|███         | 251/991 [1:01:56<3:00:59, 14.68s/batch, batch_loss=18.9, batch_index=252, batch_size=256]

Epoch 1/10:  25%|███         | 252/991 [1:01:56<3:00:29, 14.65s/batch, batch_loss=18.9, batch_index=252, batch_size=256]

Epoch 1/10:  25%|███         | 252/991 [1:02:10<3:00:29, 14.65s/batch, batch_loss=8.54, batch_index=253, batch_size=256]

Epoch 1/10:  26%|███         | 253/991 [1:02:10<2:58:19, 14.50s/batch, batch_loss=8.54, batch_index=253, batch_size=256]

Epoch 1/10:  26%|███         | 253/991 [1:02:25<2:58:19, 14.50s/batch, batch_loss=21.9, batch_index=254, batch_size=256]

Epoch 1/10:  26%|███         | 254/991 [1:02:25<2:57:58, 14.49s/batch, batch_loss=21.9, batch_index=254, batch_size=256]

Epoch 1/10:  26%|███         | 254/991 [1:02:39<2:57:58, 14.49s/batch, batch_loss=17.4, batch_index=255, batch_size=256]

Epoch 1/10:  26%|███         | 255/991 [1:02:39<2:57:11, 14.44s/batch, batch_loss=17.4, batch_index=255, batch_size=256]

Epoch 1/10:  26%|███▎         | 255/991 [1:02:52<2:57:11, 14.44s/batch, batch_loss=694, batch_index=256, batch_size=256]

Epoch 1/10:  26%|███▎         | 256/991 [1:02:52<2:52:40, 14.10s/batch, batch_loss=694, batch_index=256, batch_size=256]

Epoch 1/10:  26%|███         | 256/991 [1:03:06<2:52:40, 14.10s/batch, batch_loss=25.6, batch_index=257, batch_size=256]

Epoch 1/10:  26%|███         | 257/991 [1:03:06<2:51:36, 14.03s/batch, batch_loss=25.6, batch_index=257, batch_size=256]

Epoch 1/10:  26%|███▎         | 257/991 [1:03:21<2:51:36, 14.03s/batch, batch_loss=210, batch_index=258, batch_size=256]

Epoch 1/10:  26%|███▍         | 258/991 [1:03:21<2:53:06, 14.17s/batch, batch_loss=210, batch_index=258, batch_size=256]

Epoch 1/10:  26%|███         | 258/991 [1:03:35<2:53:06, 14.17s/batch, batch_loss=18.2, batch_index=259, batch_size=256]

Epoch 1/10:  26%|███▏        | 259/991 [1:03:35<2:54:13, 14.28s/batch, batch_loss=18.2, batch_index=259, batch_size=256]

Epoch 1/10:  26%|███▏        | 259/991 [1:03:49<2:54:13, 14.28s/batch, batch_loss=50.5, batch_index=260, batch_size=256]

Epoch 1/10:  26%|███▏        | 260/991 [1:03:49<2:51:16, 14.06s/batch, batch_loss=50.5, batch_index=260, batch_size=256]

Epoch 1/10:  26%|███▏        | 260/991 [1:04:06<2:51:16, 14.06s/batch, batch_loss=21.7, batch_index=261, batch_size=256]

Epoch 1/10:  26%|███▏        | 261/991 [1:04:06<3:02:26, 15.00s/batch, batch_loss=21.7, batch_index=261, batch_size=256]

Epoch 1/10:  26%|███▏        | 261/991 [1:04:19<3:02:26, 15.00s/batch, batch_loss=12.1, batch_index=262, batch_size=256]

Epoch 1/10:  26%|███▏        | 262/991 [1:04:19<2:57:19, 14.60s/batch, batch_loss=12.1, batch_index=262, batch_size=256]

Epoch 1/10:  26%|███▏        | 262/991 [1:04:34<2:57:19, 14.60s/batch, batch_loss=14.7, batch_index=263, batch_size=256]

Epoch 1/10:  27%|███▏        | 263/991 [1:04:34<2:55:25, 14.46s/batch, batch_loss=14.7, batch_index=263, batch_size=256]

Epoch 1/10:  27%|███▏        | 263/991 [1:04:47<2:55:25, 14.46s/batch, batch_loss=21.3, batch_index=264, batch_size=256]

Epoch 1/10:  27%|███▏        | 264/991 [1:04:47<2:50:58, 14.11s/batch, batch_loss=21.3, batch_index=264, batch_size=256]

Epoch 1/10:  27%|███▏        | 264/991 [1:05:01<2:50:58, 14.11s/batch, batch_loss=19.8, batch_index=265, batch_size=256]

Epoch 1/10:  27%|███▏        | 265/991 [1:05:01<2:50:07, 14.06s/batch, batch_loss=19.8, batch_index=265, batch_size=256]

Epoch 1/10:  27%|███▏        | 265/991 [1:05:15<2:50:07, 14.06s/batch, batch_loss=18.8, batch_index=266, batch_size=256]

Epoch 1/10:  27%|███▏        | 266/991 [1:05:15<2:50:58, 14.15s/batch, batch_loss=18.8, batch_index=266, batch_size=256]

Epoch 1/10:  27%|███▏        | 266/991 [1:05:30<2:50:58, 14.15s/batch, batch_loss=16.5, batch_index=267, batch_size=256]

Epoch 1/10:  27%|███▏        | 267/991 [1:05:30<2:52:18, 14.28s/batch, batch_loss=16.5, batch_index=267, batch_size=256]

Epoch 1/10:  27%|███▏        | 267/991 [1:05:44<2:52:18, 14.28s/batch, batch_loss=9.41, batch_index=268, batch_size=256]

Epoch 1/10:  27%|███▏        | 268/991 [1:05:44<2:53:06, 14.37s/batch, batch_loss=9.41, batch_index=268, batch_size=256]

Epoch 1/10:  27%|███▏        | 268/991 [1:05:59<2:53:06, 14.37s/batch, batch_loss=19.8, batch_index=269, batch_size=256]

Epoch 1/10:  27%|███▎        | 269/991 [1:05:59<2:54:44, 14.52s/batch, batch_loss=19.8, batch_index=269, batch_size=256]

Epoch 1/10:  27%|███▎        | 269/991 [1:06:13<2:54:44, 14.52s/batch, batch_loss=2.05, batch_index=270, batch_size=256]

Epoch 1/10:  27%|███▎        | 270/991 [1:06:13<2:51:54, 14.31s/batch, batch_loss=2.05, batch_index=270, batch_size=256]

Epoch 1/10:  27%|███▎        | 270/991 [1:06:28<2:51:54, 14.31s/batch, batch_loss=18.5, batch_index=271, batch_size=256]

Epoch 1/10:  27%|███▎        | 271/991 [1:06:28<2:54:37, 14.55s/batch, batch_loss=18.5, batch_index=271, batch_size=256]

Epoch 1/10:  27%|███▎        | 271/991 [1:06:43<2:54:37, 14.55s/batch, batch_loss=16.3, batch_index=272, batch_size=256]

Epoch 1/10:  27%|███▎        | 272/991 [1:06:43<2:55:08, 14.62s/batch, batch_loss=16.3, batch_index=272, batch_size=256]

Epoch 1/10:  27%|███▊          | 272/991 [1:06:57<2:55:08, 14.62s/batch, batch_loss=34, batch_index=273, batch_size=256]

Epoch 1/10:  28%|███▊          | 273/991 [1:06:57<2:53:57, 14.54s/batch, batch_loss=34, batch_index=273, batch_size=256]

Epoch 1/10:  28%|███▎        | 273/991 [1:07:12<2:53:57, 14.54s/batch, batch_loss=14.7, batch_index=274, batch_size=256]

Epoch 1/10:  28%|███▎        | 274/991 [1:07:12<2:53:52, 14.55s/batch, batch_loss=14.7, batch_index=274, batch_size=256]

Epoch 1/10:  28%|██▍      | 274/991 [1:07:27<2:53:52, 14.55s/batch, batch_loss=3.32e+3, batch_index=275, batch_size=256]

Epoch 1/10:  28%|██▍      | 275/991 [1:07:27<2:54:19, 14.61s/batch, batch_loss=3.32e+3, batch_index=275, batch_size=256]

Epoch 1/10:  28%|███▎        | 275/991 [1:07:42<2:54:19, 14.61s/batch, batch_loss=17.8, batch_index=276, batch_size=256]

Epoch 1/10:  28%|███▎        | 276/991 [1:07:42<2:56:55, 14.85s/batch, batch_loss=17.8, batch_index=276, batch_size=256]

Epoch 1/10:  28%|██▌      | 276/991 [1:07:57<2:56:55, 14.85s/batch, batch_loss=4.79e+3, batch_index=277, batch_size=256]

Epoch 1/10:  28%|██▌      | 277/991 [1:07:57<2:55:23, 14.74s/batch, batch_loss=4.79e+3, batch_index=277, batch_size=256]

Epoch 1/10:  28%|███▎        | 277/991 [1:08:11<2:55:23, 14.74s/batch, batch_loss=19.2, batch_index=278, batch_size=256]

Epoch 1/10:  28%|███▎        | 278/991 [1:08:11<2:54:00, 14.64s/batch, batch_loss=19.2, batch_index=278, batch_size=256]

Epoch 1/10:  28%|███▎        | 278/991 [1:08:26<2:54:00, 14.64s/batch, batch_loss=27.1, batch_index=279, batch_size=256]

Epoch 1/10:  28%|███▍        | 279/991 [1:08:26<2:53:50, 14.65s/batch, batch_loss=27.1, batch_index=279, batch_size=256]

Epoch 1/10:  28%|███▍        | 279/991 [1:08:40<2:53:50, 14.65s/batch, batch_loss=14.5, batch_index=280, batch_size=256]

Epoch 1/10:  28%|███▍        | 280/991 [1:08:40<2:51:32, 14.48s/batch, batch_loss=14.5, batch_index=280, batch_size=256]

Epoch 1/10:  28%|███▍        | 280/991 [1:08:54<2:51:32, 14.48s/batch, batch_loss=15.2, batch_index=281, batch_size=256]

Epoch 1/10:  28%|███▍        | 281/991 [1:08:54<2:51:39, 14.51s/batch, batch_loss=15.2, batch_index=281, batch_size=256]

Epoch 1/10:  28%|████▎          | 281/991 [1:09:08<2:51:39, 14.51s/batch, batch_loss=8, batch_index=282, batch_size=256]

Epoch 1/10:  28%|████▎          | 282/991 [1:09:08<2:50:15, 14.41s/batch, batch_loss=8, batch_index=282, batch_size=256]

Epoch 1/10:  28%|███▉          | 282/991 [1:09:24<2:50:15, 14.41s/batch, batch_loss=21, batch_index=283, batch_size=256]

Epoch 1/10:  29%|███▉          | 283/991 [1:09:24<2:54:16, 14.77s/batch, batch_loss=21, batch_index=283, batch_size=256]

Epoch 1/10:  29%|███▍        | 283/991 [1:09:39<2:54:16, 14.77s/batch, batch_loss=50.7, batch_index=284, batch_size=256]

Epoch 1/10:  29%|███▍        | 284/991 [1:09:39<2:54:38, 14.82s/batch, batch_loss=50.7, batch_index=284, batch_size=256]

Epoch 1/10:  29%|███▍        | 284/991 [1:09:53<2:54:38, 14.82s/batch, batch_loss=16.1, batch_index=285, batch_size=256]

Epoch 1/10:  29%|███▍        | 285/991 [1:09:53<2:53:18, 14.73s/batch, batch_loss=16.1, batch_index=285, batch_size=256]

Epoch 1/10:  29%|███▍        | 285/991 [1:10:08<2:53:18, 14.73s/batch, batch_loss=9.66, batch_index=286, batch_size=256]

Epoch 1/10:  29%|███▍        | 286/991 [1:10:08<2:50:46, 14.53s/batch, batch_loss=9.66, batch_index=286, batch_size=256]

Epoch 1/10:  29%|███▍        | 286/991 [1:10:24<2:50:46, 14.53s/batch, batch_loss=8.58, batch_index=287, batch_size=256]

Epoch 1/10:  29%|███▍        | 287/991 [1:10:24<2:58:30, 15.21s/batch, batch_loss=8.58, batch_index=287, batch_size=256]

Epoch 1/10:  29%|██▌      | 287/991 [1:10:39<2:58:30, 15.21s/batch, batch_loss=2.59e+3, batch_index=288, batch_size=256]

Epoch 1/10:  29%|██▌      | 288/991 [1:10:39<2:54:46, 14.92s/batch, batch_loss=2.59e+3, batch_index=288, batch_size=256]

Epoch 1/10:  29%|██▌      | 288/991 [1:10:54<2:54:46, 14.92s/batch, batch_loss=1.26e+3, batch_index=289, batch_size=256]

Epoch 1/10:  29%|██▌      | 289/991 [1:10:54<2:55:05, 14.96s/batch, batch_loss=1.26e+3, batch_index=289, batch_size=256]

Epoch 1/10:  29%|███▍        | 289/991 [1:11:08<2:55:05, 14.96s/batch, batch_loss=18.1, batch_index=290, batch_size=256]

Epoch 1/10:  29%|███▌        | 290/991 [1:11:08<2:53:33, 14.86s/batch, batch_loss=18.1, batch_index=290, batch_size=256]

Epoch 1/10:  29%|███▌        | 290/991 [1:11:22<2:53:33, 14.86s/batch, batch_loss=7.55, batch_index=291, batch_size=256]

Epoch 1/10:  29%|███▌        | 291/991 [1:11:22<2:50:38, 14.63s/batch, batch_loss=7.55, batch_index=291, batch_size=256]

Epoch 1/10:  29%|███▌        | 291/991 [1:11:38<2:50:38, 14.63s/batch, batch_loss=15.6, batch_index=292, batch_size=256]

Epoch 1/10:  29%|███▌        | 292/991 [1:11:38<2:52:51, 14.84s/batch, batch_loss=15.6, batch_index=292, batch_size=256]

Epoch 1/10:  29%|███▌        | 292/991 [1:11:52<2:52:51, 14.84s/batch, batch_loss=17.4, batch_index=293, batch_size=256]

Epoch 1/10:  30%|███▌        | 293/991 [1:11:52<2:49:32, 14.57s/batch, batch_loss=17.4, batch_index=293, batch_size=256]

Epoch 1/10:  30%|████▏         | 293/991 [1:12:06<2:49:32, 14.57s/batch, batch_loss=17, batch_index=294, batch_size=256]

Epoch 1/10:  30%|████▏         | 294/991 [1:12:06<2:47:24, 14.41s/batch, batch_loss=17, batch_index=294, batch_size=256]

Epoch 1/10:  30%|███▌        | 294/991 [1:12:23<2:47:24, 14.41s/batch, batch_loss=12.4, batch_index=295, batch_size=256]

Epoch 1/10:  30%|███▌        | 295/991 [1:12:23<2:55:58, 15.17s/batch, batch_loss=12.4, batch_index=295, batch_size=256]

Epoch 1/10:  30%|███▌        | 295/991 [1:12:37<2:55:58, 15.17s/batch, batch_loss=23.3, batch_index=296, batch_size=256]

Epoch 1/10:  30%|███▌        | 296/991 [1:12:37<2:53:25, 14.97s/batch, batch_loss=23.3, batch_index=296, batch_size=256]

Epoch 1/10:  30%|███▌        | 296/991 [1:12:51<2:53:25, 14.97s/batch, batch_loss=19.9, batch_index=297, batch_size=256]

Epoch 1/10:  30%|███▌        | 297/991 [1:12:51<2:50:38, 14.75s/batch, batch_loss=19.9, batch_index=297, batch_size=256]

Epoch 1/10:  30%|██▋      | 297/991 [1:13:06<2:50:38, 14.75s/batch, batch_loss=3.24e+4, batch_index=298, batch_size=256]

Epoch 1/10:  30%|██▋      | 298/991 [1:13:06<2:50:32, 14.77s/batch, batch_loss=3.24e+4, batch_index=298, batch_size=256]

Epoch 1/10:  30%|███▌        | 298/991 [1:13:21<2:50:32, 14.77s/batch, batch_loss=27.9, batch_index=299, batch_size=256]

Epoch 1/10:  30%|███▌        | 299/991 [1:13:21<2:50:44, 14.80s/batch, batch_loss=27.9, batch_index=299, batch_size=256]

Epoch 1/10:  30%|███▌        | 299/991 [1:13:36<2:50:44, 14.80s/batch, batch_loss=9.65, batch_index=300, batch_size=256]

Epoch 1/10:  30%|███▋        | 300/991 [1:13:36<2:50:13, 14.78s/batch, batch_loss=9.65, batch_index=300, batch_size=256]

Epoch 1/10:  30%|███▋        | 300/991 [1:13:50<2:50:13, 14.78s/batch, batch_loss=11.1, batch_index=301, batch_size=256]

Epoch 1/10:  30%|███▋        | 301/991 [1:13:50<2:47:17, 14.55s/batch, batch_loss=11.1, batch_index=301, batch_size=256]

Epoch 1/10:  30%|███▋        | 301/991 [1:14:04<2:47:17, 14.55s/batch, batch_loss=12.6, batch_index=302, batch_size=256]

Epoch 1/10:  30%|███▋        | 302/991 [1:14:04<2:46:43, 14.52s/batch, batch_loss=12.6, batch_index=302, batch_size=256]

Epoch 1/10:  30%|███▋        | 302/991 [1:14:19<2:46:43, 14.52s/batch, batch_loss=12.3, batch_index=303, batch_size=256]

Epoch 1/10:  31%|███▋        | 303/991 [1:14:19<2:48:40, 14.71s/batch, batch_loss=12.3, batch_index=303, batch_size=256]

Epoch 1/10:  31%|███▋        | 303/991 [1:14:34<2:48:40, 14.71s/batch, batch_loss=4.07, batch_index=304, batch_size=256]

Epoch 1/10:  31%|███▋        | 304/991 [1:14:34<2:48:46, 14.74s/batch, batch_loss=4.07, batch_index=304, batch_size=256]

Epoch 1/10:  31%|███▋        | 304/991 [1:14:49<2:48:46, 14.74s/batch, batch_loss=16.4, batch_index=305, batch_size=256]

Epoch 1/10:  31%|███▋        | 305/991 [1:14:49<2:49:54, 14.86s/batch, batch_loss=16.4, batch_index=305, batch_size=256]

Epoch 1/10:  31%|███▋        | 305/991 [1:15:04<2:49:54, 14.86s/batch, batch_loss=11.9, batch_index=306, batch_size=256]

Epoch 1/10:  31%|███▋        | 306/991 [1:15:04<2:47:47, 14.70s/batch, batch_loss=11.9, batch_index=306, batch_size=256]

Epoch 1/10:  31%|██▊      | 306/991 [1:15:18<2:47:47, 14.70s/batch, batch_loss=6.28e+3, batch_index=307, batch_size=256]

Epoch 1/10:  31%|██▊      | 307/991 [1:15:18<2:45:09, 14.49s/batch, batch_loss=6.28e+3, batch_index=307, batch_size=256]

Epoch 1/10:  31%|████▎         | 307/991 [1:15:32<2:45:09, 14.49s/batch, batch_loss=16, batch_index=308, batch_size=256]

Epoch 1/10:  31%|████▎         | 308/991 [1:15:32<2:45:52, 14.57s/batch, batch_loss=16, batch_index=308, batch_size=256]

Epoch 1/10:  31%|███▋        | 308/991 [1:15:47<2:45:52, 14.57s/batch, batch_loss=34.1, batch_index=309, batch_size=256]

Epoch 1/10:  31%|███▋        | 309/991 [1:15:47<2:44:59, 14.52s/batch, batch_loss=34.1, batch_index=309, batch_size=256]

Epoch 1/10:  31%|███▋        | 309/991 [1:16:01<2:44:59, 14.52s/batch, batch_loss=26.1, batch_index=310, batch_size=256]

Epoch 1/10:  31%|███▊        | 310/991 [1:16:01<2:42:50, 14.35s/batch, batch_loss=26.1, batch_index=310, batch_size=256]

Epoch 1/10:  31%|███▊        | 310/991 [1:16:18<2:42:50, 14.35s/batch, batch_loss=42.1, batch_index=311, batch_size=256]

Epoch 1/10:  31%|███▊        | 311/991 [1:16:18<2:52:55, 15.26s/batch, batch_loss=42.1, batch_index=311, batch_size=256]

Epoch 1/10:  31%|███▊        | 311/991 [1:16:32<2:52:55, 15.26s/batch, batch_loss=17.6, batch_index=312, batch_size=256]

Epoch 1/10:  31%|███▊        | 312/991 [1:16:32<2:47:36, 14.81s/batch, batch_loss=17.6, batch_index=312, batch_size=256]

Epoch 1/10:  31%|██▊      | 312/991 [1:16:46<2:47:36, 14.81s/batch, batch_loss=1.06e+4, batch_index=313, batch_size=256]

Epoch 1/10:  32%|██▊      | 313/991 [1:16:46<2:45:30, 14.65s/batch, batch_loss=1.06e+4, batch_index=313, batch_size=256]

Epoch 1/10:  32%|███▊        | 313/991 [1:17:00<2:45:30, 14.65s/batch, batch_loss=10.5, batch_index=314, batch_size=256]

Epoch 1/10:  32%|███▊        | 314/991 [1:17:00<2:40:50, 14.25s/batch, batch_loss=10.5, batch_index=314, batch_size=256]

Epoch 1/10:  32%|███▊        | 314/991 [1:17:13<2:40:50, 14.25s/batch, batch_loss=17.3, batch_index=315, batch_size=256]

Epoch 1/10:  32%|███▊        | 315/991 [1:17:13<2:38:47, 14.09s/batch, batch_loss=17.3, batch_index=315, batch_size=256]

Epoch 1/10:  32%|███▊        | 315/991 [1:17:27<2:38:47, 14.09s/batch, batch_loss=27.4, batch_index=316, batch_size=256]

Epoch 1/10:  32%|███▊        | 316/991 [1:17:27<2:36:15, 13.89s/batch, batch_loss=27.4, batch_index=316, batch_size=256]

Epoch 1/10:  32%|███▊        | 316/991 [1:17:39<2:36:15, 13.89s/batch, batch_loss=30.7, batch_index=317, batch_size=256]

Epoch 1/10:  32%|███▊        | 317/991 [1:17:39<2:31:37, 13.50s/batch, batch_loss=30.7, batch_index=317, batch_size=256]

Epoch 1/10:  32%|███▊        | 317/991 [1:17:51<2:31:37, 13.50s/batch, batch_loss=29.4, batch_index=318, batch_size=256]

Epoch 1/10:  32%|███▊        | 318/991 [1:17:51<2:24:02, 12.84s/batch, batch_loss=29.4, batch_index=318, batch_size=256]

Epoch 1/10:  32%|███▊        | 318/991 [1:18:03<2:24:02, 12.84s/batch, batch_loss=30.1, batch_index=319, batch_size=256]

Epoch 1/10:  32%|███▊        | 319/991 [1:18:03<2:22:06, 12.69s/batch, batch_loss=30.1, batch_index=319, batch_size=256]

Epoch 1/10:  32%|███▊        | 319/991 [1:18:14<2:22:06, 12.69s/batch, batch_loss=20.8, batch_index=320, batch_size=256]

Epoch 1/10:  32%|███▊        | 320/991 [1:18:14<2:17:23, 12.29s/batch, batch_loss=20.8, batch_index=320, batch_size=256]

Epoch 1/10:  32%|███▊        | 320/991 [1:18:26<2:17:23, 12.29s/batch, batch_loss=36.9, batch_index=321, batch_size=256]

Epoch 1/10:  32%|███▉        | 321/991 [1:18:26<2:14:31, 12.05s/batch, batch_loss=36.9, batch_index=321, batch_size=256]

Epoch 1/10:  32%|███▉        | 321/991 [1:18:38<2:14:31, 12.05s/batch, batch_loss=12.8, batch_index=322, batch_size=256]

Epoch 1/10:  32%|███▉        | 322/991 [1:18:38<2:14:57, 12.10s/batch, batch_loss=12.8, batch_index=322, batch_size=256]

Epoch 1/10:  32%|███▉        | 322/991 [1:18:50<2:14:57, 12.10s/batch, batch_loss=16.4, batch_index=323, batch_size=256]

Epoch 1/10:  33%|███▉        | 323/991 [1:18:50<2:13:44, 12.01s/batch, batch_loss=16.4, batch_index=323, batch_size=256]

Epoch 1/10:  33%|████▌         | 323/991 [1:19:03<2:13:44, 12.01s/batch, batch_loss=30, batch_index=324, batch_size=256]

Epoch 1/10:  33%|████▌         | 324/991 [1:19:03<2:17:12, 12.34s/batch, batch_loss=30, batch_index=324, batch_size=256]

Epoch 1/10:  33%|███▉        | 324/991 [1:19:15<2:17:12, 12.34s/batch, batch_loss=13.3, batch_index=325, batch_size=256]

Epoch 1/10:  33%|███▉        | 325/991 [1:19:15<2:17:23, 12.38s/batch, batch_loss=13.3, batch_index=325, batch_size=256]

Epoch 1/10:  33%|███▉        | 325/991 [1:19:28<2:17:23, 12.38s/batch, batch_loss=43.4, batch_index=326, batch_size=256]

Epoch 1/10:  33%|███▉        | 326/991 [1:19:28<2:18:12, 12.47s/batch, batch_loss=43.4, batch_index=326, batch_size=256]

Epoch 1/10:  33%|██▉      | 326/991 [1:19:40<2:18:12, 12.47s/batch, batch_loss=3.05e+3, batch_index=327, batch_size=256]

Epoch 1/10:  33%|██▉      | 327/991 [1:19:40<2:17:49, 12.45s/batch, batch_loss=3.05e+3, batch_index=327, batch_size=256]

Epoch 1/10:  33%|███▉        | 327/991 [1:19:54<2:17:49, 12.45s/batch, batch_loss=9.06, batch_index=328, batch_size=256]

Epoch 1/10:  33%|███▉        | 328/991 [1:19:54<2:20:10, 12.69s/batch, batch_loss=9.06, batch_index=328, batch_size=256]

Epoch 1/10:  33%|███▉        | 328/991 [1:20:07<2:20:10, 12.69s/batch, batch_loss=29.4, batch_index=329, batch_size=256]

Epoch 1/10:  33%|███▉        | 329/991 [1:20:07<2:20:51, 12.77s/batch, batch_loss=29.4, batch_index=329, batch_size=256]

Epoch 1/10:  33%|███▉        | 329/991 [1:20:21<2:20:51, 12.77s/batch, batch_loss=21.6, batch_index=330, batch_size=256]

Epoch 1/10:  33%|███▉        | 330/991 [1:20:21<2:27:04, 13.35s/batch, batch_loss=21.6, batch_index=330, batch_size=256]

Epoch 1/10:  33%|███▉        | 330/991 [1:20:34<2:27:04, 13.35s/batch, batch_loss=18.6, batch_index=331, batch_size=256]

Epoch 1/10:  33%|████        | 331/991 [1:20:34<2:24:53, 13.17s/batch, batch_loss=18.6, batch_index=331, batch_size=256]

Epoch 1/10:  33%|████        | 331/991 [1:20:47<2:24:53, 13.17s/batch, batch_loss=19.5, batch_index=332, batch_size=256]

Epoch 1/10:  34%|████        | 332/991 [1:20:47<2:24:25, 13.15s/batch, batch_loss=19.5, batch_index=332, batch_size=256]

Epoch 1/10:  34%|████        | 332/991 [1:21:00<2:24:25, 13.15s/batch, batch_loss=26.4, batch_index=333, batch_size=256]

Epoch 1/10:  34%|████        | 333/991 [1:21:00<2:23:24, 13.08s/batch, batch_loss=26.4, batch_index=333, batch_size=256]

Epoch 1/10:  34%|████        | 333/991 [1:21:13<2:23:24, 13.08s/batch, batch_loss=19.5, batch_index=334, batch_size=256]

Epoch 1/10:  34%|████        | 334/991 [1:21:13<2:22:38, 13.03s/batch, batch_loss=19.5, batch_index=334, batch_size=256]

Epoch 1/10:  34%|████        | 334/991 [1:21:27<2:22:38, 13.03s/batch, batch_loss=7.28, batch_index=335, batch_size=256]

Epoch 1/10:  34%|████        | 335/991 [1:21:27<2:24:53, 13.25s/batch, batch_loss=7.28, batch_index=335, batch_size=256]

Epoch 1/10:  34%|███      | 335/991 [1:21:40<2:24:53, 13.25s/batch, batch_loss=8.47e+3, batch_index=336, batch_size=256]

Epoch 1/10:  34%|███      | 336/991 [1:21:40<2:24:45, 13.26s/batch, batch_loss=8.47e+3, batch_index=336, batch_size=256]

Epoch 1/10:  34%|███      | 336/991 [1:21:54<2:24:45, 13.26s/batch, batch_loss=2.37e+3, batch_index=337, batch_size=256]

Epoch 1/10:  34%|███      | 337/991 [1:21:54<2:25:09, 13.32s/batch, batch_loss=2.37e+3, batch_index=337, batch_size=256]

Epoch 1/10:  34%|████        | 337/991 [1:22:09<2:25:09, 13.32s/batch, batch_loss=8.42, batch_index=338, batch_size=256]

Epoch 1/10:  34%|████        | 338/991 [1:22:09<2:31:51, 13.95s/batch, batch_loss=8.42, batch_index=338, batch_size=256]

Epoch 1/10:  34%|████        | 338/991 [1:22:22<2:31:51, 13.95s/batch, batch_loss=39.5, batch_index=339, batch_size=256]

Epoch 1/10:  34%|████        | 339/991 [1:22:22<2:29:36, 13.77s/batch, batch_loss=39.5, batch_index=339, batch_size=256]

Epoch 1/10:  34%|████        | 339/991 [1:22:35<2:29:36, 13.77s/batch, batch_loss=14.1, batch_index=340, batch_size=256]

Epoch 1/10:  34%|████        | 340/991 [1:22:35<2:26:41, 13.52s/batch, batch_loss=14.1, batch_index=340, batch_size=256]

Epoch 1/10:  34%|████        | 340/991 [1:22:48<2:26:41, 13.52s/batch, batch_loss=10.4, batch_index=341, batch_size=256]

Epoch 1/10:  34%|████▏       | 341/991 [1:22:48<2:24:52, 13.37s/batch, batch_loss=10.4, batch_index=341, batch_size=256]

Epoch 1/10:  34%|████▏       | 341/991 [1:23:01<2:24:52, 13.37s/batch, batch_loss=2.18, batch_index=342, batch_size=256]

Epoch 1/10:  35%|████▏       | 342/991 [1:23:01<2:22:26, 13.17s/batch, batch_loss=2.18, batch_index=342, batch_size=256]

Epoch 1/10:  35%|████▏       | 342/991 [1:23:13<2:22:26, 13.17s/batch, batch_loss=9.87, batch_index=343, batch_size=256]

Epoch 1/10:  35%|████▏       | 343/991 [1:23:13<2:19:50, 12.95s/batch, batch_loss=9.87, batch_index=343, batch_size=256]

Epoch 1/10:  35%|████▏       | 343/991 [1:23:27<2:19:50, 12.95s/batch, batch_loss=27.9, batch_index=344, batch_size=256]

Epoch 1/10:  35%|████▏       | 344/991 [1:23:27<2:20:16, 13.01s/batch, batch_loss=27.9, batch_index=344, batch_size=256]

Epoch 1/10:  35%|████▌        | 344/991 [1:23:40<2:20:16, 13.01s/batch, batch_loss=124, batch_index=345, batch_size=256]

Epoch 1/10:  35%|████▌        | 345/991 [1:23:40<2:21:00, 13.10s/batch, batch_loss=124, batch_index=345, batch_size=256]

Epoch 1/10:  35%|████▏       | 345/991 [1:23:53<2:21:00, 13.10s/batch, batch_loss=21.5, batch_index=346, batch_size=256]

Epoch 1/10:  35%|████▏       | 346/991 [1:23:53<2:21:36, 13.17s/batch, batch_loss=21.5, batch_index=346, batch_size=256]

Epoch 1/10:  35%|████▏       | 346/991 [1:24:06<2:21:36, 13.17s/batch, batch_loss=13.7, batch_index=347, batch_size=256]

Epoch 1/10:  35%|████▏       | 347/991 [1:24:06<2:21:43, 13.20s/batch, batch_loss=13.7, batch_index=347, batch_size=256]

Epoch 1/10:  35%|████▏       | 347/991 [1:24:20<2:21:43, 13.20s/batch, batch_loss=18.6, batch_index=348, batch_size=256]

Epoch 1/10:  35%|████▏       | 348/991 [1:24:20<2:22:44, 13.32s/batch, batch_loss=18.6, batch_index=348, batch_size=256]

Epoch 1/10:  35%|████▏       | 348/991 [1:24:34<2:22:44, 13.32s/batch, batch_loss=11.3, batch_index=349, batch_size=256]

Epoch 1/10:  35%|████▏       | 349/991 [1:24:34<2:24:03, 13.46s/batch, batch_loss=11.3, batch_index=349, batch_size=256]

Epoch 1/10:  35%|████▏       | 349/991 [1:24:47<2:24:03, 13.46s/batch, batch_loss=15.3, batch_index=350, batch_size=256]

Epoch 1/10:  35%|████▏       | 350/991 [1:24:47<2:22:54, 13.38s/batch, batch_loss=15.3, batch_index=350, batch_size=256]

Epoch 1/10:  35%|████▏       | 350/991 [1:25:01<2:22:54, 13.38s/batch, batch_loss=8.92, batch_index=351, batch_size=256]

Epoch 1/10:  35%|████▎       | 351/991 [1:25:01<2:23:25, 13.45s/batch, batch_loss=8.92, batch_index=351, batch_size=256]

Epoch 1/10:  35%|████▎       | 351/991 [1:25:14<2:23:25, 13.45s/batch, batch_loss=19.4, batch_index=352, batch_size=256]

Epoch 1/10:  36%|████▎       | 352/991 [1:25:14<2:23:34, 13.48s/batch, batch_loss=19.4, batch_index=352, batch_size=256]

Epoch 1/10:  36%|████▎       | 352/991 [1:25:27<2:23:34, 13.48s/batch, batch_loss=31.2, batch_index=353, batch_size=256]

Epoch 1/10:  36%|████▎       | 353/991 [1:25:27<2:22:27, 13.40s/batch, batch_loss=31.2, batch_index=353, batch_size=256]

Epoch 1/10:  36%|████▎       | 353/991 [1:25:41<2:22:27, 13.40s/batch, batch_loss=28.2, batch_index=354, batch_size=256]

Epoch 1/10:  36%|████▎       | 354/991 [1:25:41<2:21:51, 13.36s/batch, batch_loss=28.2, batch_index=354, batch_size=256]

Epoch 1/10:  36%|████▎       | 354/991 [1:25:54<2:21:51, 13.36s/batch, batch_loss=13.9, batch_index=355, batch_size=256]

Epoch 1/10:  36%|████▎       | 355/991 [1:25:54<2:21:13, 13.32s/batch, batch_loss=13.9, batch_index=355, batch_size=256]

Epoch 1/10:  36%|████▎       | 355/991 [1:26:07<2:21:13, 13.32s/batch, batch_loss=20.4, batch_index=356, batch_size=256]

Epoch 1/10:  36%|████▎       | 356/991 [1:26:07<2:19:06, 13.14s/batch, batch_loss=20.4, batch_index=356, batch_size=256]

Epoch 1/10:  36%|████▎       | 356/991 [1:26:20<2:19:06, 13.14s/batch, batch_loss=26.6, batch_index=357, batch_size=256]

Epoch 1/10:  36%|████▎       | 357/991 [1:26:20<2:18:19, 13.09s/batch, batch_loss=26.6, batch_index=357, batch_size=256]

Epoch 1/10:  36%|████▎       | 357/991 [1:26:34<2:18:19, 13.09s/batch, batch_loss=19.9, batch_index=358, batch_size=256]

Epoch 1/10:  36%|████▎       | 358/991 [1:26:34<2:22:01, 13.46s/batch, batch_loss=19.9, batch_index=358, batch_size=256]

Epoch 1/10:  36%|████▎       | 358/991 [1:26:46<2:22:01, 13.46s/batch, batch_loss=7.26, batch_index=359, batch_size=256]

Epoch 1/10:  36%|████▎       | 359/991 [1:26:46<2:17:15, 13.03s/batch, batch_loss=7.26, batch_index=359, batch_size=256]

Epoch 1/10:  36%|████▎       | 359/991 [1:26:58<2:17:15, 13.03s/batch, batch_loss=12.6, batch_index=360, batch_size=256]

Epoch 1/10:  36%|████▎       | 360/991 [1:26:58<2:13:21, 12.68s/batch, batch_loss=12.6, batch_index=360, batch_size=256]

Epoch 1/10:  36%|████▎       | 360/991 [1:27:10<2:13:21, 12.68s/batch, batch_loss=33.4, batch_index=361, batch_size=256]

Epoch 1/10:  36%|████▎       | 361/991 [1:27:10<2:10:23, 12.42s/batch, batch_loss=33.4, batch_index=361, batch_size=256]

Epoch 1/10:  36%|█████         | 361/991 [1:27:24<2:10:23, 12.42s/batch, batch_loss=26, batch_index=362, batch_size=256]

Epoch 1/10:  37%|█████         | 362/991 [1:27:24<2:17:04, 13.08s/batch, batch_loss=26, batch_index=362, batch_size=256]

Epoch 1/10:  37%|████▍       | 362/991 [1:27:37<2:17:04, 13.08s/batch, batch_loss=14.1, batch_index=363, batch_size=256]

Epoch 1/10:  37%|████▍       | 363/991 [1:27:37<2:16:47, 13.07s/batch, batch_loss=14.1, batch_index=363, batch_size=256]

Epoch 1/10:  37%|████▍       | 363/991 [1:27:50<2:16:47, 13.07s/batch, batch_loss=19.1, batch_index=364, batch_size=256]

Epoch 1/10:  37%|████▍       | 364/991 [1:27:50<2:14:13, 12.84s/batch, batch_loss=19.1, batch_index=364, batch_size=256]

Epoch 1/10:  37%|█████▏        | 364/991 [1:28:03<2:14:13, 12.84s/batch, batch_loss=13, batch_index=365, batch_size=256]

Epoch 1/10:  37%|█████▏        | 365/991 [1:28:03<2:16:42, 13.10s/batch, batch_loss=13, batch_index=365, batch_size=256]

Epoch 1/10:  37%|█████▏        | 365/991 [1:28:17<2:16:42, 13.10s/batch, batch_loss=16, batch_index=366, batch_size=256]

Epoch 1/10:  37%|█████▏        | 366/991 [1:28:17<2:18:56, 13.34s/batch, batch_loss=16, batch_index=366, batch_size=256]

Epoch 1/10:  37%|█████▏        | 366/991 [1:28:30<2:18:56, 13.34s/batch, batch_loss=23, batch_index=367, batch_size=256]

Epoch 1/10:  37%|█████▏        | 367/991 [1:28:30<2:17:46, 13.25s/batch, batch_loss=23, batch_index=367, batch_size=256]

Epoch 1/10:  37%|████▍       | 367/991 [1:28:43<2:17:46, 13.25s/batch, batch_loss=26.5, batch_index=368, batch_size=256]

Epoch 1/10:  37%|████▍       | 368/991 [1:28:43<2:15:57, 13.09s/batch, batch_loss=26.5, batch_index=368, batch_size=256]

Epoch 1/10:  37%|████▍       | 368/991 [1:28:56<2:15:57, 13.09s/batch, batch_loss=14.1, batch_index=369, batch_size=256]

Epoch 1/10:  37%|████▍       | 369/991 [1:28:56<2:14:38, 12.99s/batch, batch_loss=14.1, batch_index=369, batch_size=256]

Epoch 1/10:  37%|███▎     | 369/991 [1:29:10<2:14:38, 12.99s/batch, batch_loss=1.21e+4, batch_index=370, batch_size=256]

Epoch 1/10:  37%|███▎     | 370/991 [1:29:10<2:17:18, 13.27s/batch, batch_loss=1.21e+4, batch_index=370, batch_size=256]

Epoch 1/10:  37%|████▍       | 370/991 [1:29:25<2:17:18, 13.27s/batch, batch_loss=26.1, batch_index=371, batch_size=256]

Epoch 1/10:  37%|████▍       | 371/991 [1:29:25<2:22:08, 13.76s/batch, batch_loss=26.1, batch_index=371, batch_size=256]

Epoch 1/10:  37%|████▍       | 371/991 [1:29:39<2:22:08, 13.76s/batch, batch_loss=15.8, batch_index=372, batch_size=256]

Epoch 1/10:  38%|████▌       | 372/991 [1:29:39<2:24:11, 13.98s/batch, batch_loss=15.8, batch_index=372, batch_size=256]

Epoch 1/10:  38%|████▌       | 372/991 [1:29:51<2:24:11, 13.98s/batch, batch_loss=26.9, batch_index=373, batch_size=256]

Epoch 1/10:  38%|████▌       | 373/991 [1:29:51<2:19:13, 13.52s/batch, batch_loss=26.9, batch_index=373, batch_size=256]

Epoch 1/10:  38%|████▉        | 373/991 [1:30:03<2:19:13, 13.52s/batch, batch_loss=482, batch_index=374, batch_size=256]

Epoch 1/10:  38%|████▉        | 374/991 [1:30:03<2:13:32, 12.99s/batch, batch_loss=482, batch_index=374, batch_size=256]

Epoch 1/10:  38%|███▍     | 374/991 [1:30:15<2:13:32, 12.99s/batch, batch_loss=1.42e+3, batch_index=375, batch_size=256]

Epoch 1/10:  38%|███▍     | 375/991 [1:30:15<2:08:57, 12.56s/batch, batch_loss=1.42e+3, batch_index=375, batch_size=256]

Epoch 1/10:  38%|███▍     | 375/991 [1:30:26<2:08:57, 12.56s/batch, batch_loss=1.21e+3, batch_index=376, batch_size=256]

Epoch 1/10:  38%|███▍     | 376/991 [1:30:26<2:06:01, 12.30s/batch, batch_loss=1.21e+3, batch_index=376, batch_size=256]

Epoch 1/10:  38%|████▌       | 376/991 [1:30:38<2:06:01, 12.30s/batch, batch_loss=26.6, batch_index=377, batch_size=256]

Epoch 1/10:  38%|████▌       | 377/991 [1:30:38<2:03:39, 12.08s/batch, batch_loss=26.6, batch_index=377, batch_size=256]

Epoch 1/10:  38%|███▍     | 377/991 [1:30:50<2:03:39, 12.08s/batch, batch_loss=1.17e+3, batch_index=378, batch_size=256]

Epoch 1/10:  38%|███▍     | 378/991 [1:30:50<2:01:59, 11.94s/batch, batch_loss=1.17e+3, batch_index=378, batch_size=256]

Epoch 1/10:  38%|████▌       | 378/991 [1:31:01<2:01:59, 11.94s/batch, batch_loss=10.8, batch_index=379, batch_size=256]

Epoch 1/10:  38%|████▌       | 379/991 [1:31:01<2:00:40, 11.83s/batch, batch_loss=10.8, batch_index=379, batch_size=256]

Epoch 1/10:  38%|████▌       | 379/991 [1:31:13<2:00:40, 11.83s/batch, batch_loss=18.8, batch_index=380, batch_size=256]

Epoch 1/10:  38%|████▌       | 380/991 [1:31:13<2:00:40, 11.85s/batch, batch_loss=18.8, batch_index=380, batch_size=256]

Epoch 1/10:  38%|████▌       | 380/991 [1:31:25<2:00:40, 11.85s/batch, batch_loss=21.2, batch_index=381, batch_size=256]

Epoch 1/10:  38%|████▌       | 381/991 [1:31:25<1:59:10, 11.72s/batch, batch_loss=21.2, batch_index=381, batch_size=256]

Epoch 1/10:  38%|████▌       | 381/991 [1:31:36<1:59:10, 11.72s/batch, batch_loss=13.2, batch_index=382, batch_size=256]

Epoch 1/10:  39%|████▋       | 382/991 [1:31:36<1:58:34, 11.68s/batch, batch_loss=13.2, batch_index=382, batch_size=256]

Epoch 1/10:  39%|█████▍        | 382/991 [1:31:49<1:58:34, 11.68s/batch, batch_loss=12, batch_index=383, batch_size=256]

Epoch 1/10:  39%|█████▍        | 383/991 [1:31:49<2:00:53, 11.93s/batch, batch_loss=12, batch_index=383, batch_size=256]

Epoch 1/10:  39%|████▋       | 383/991 [1:32:02<2:00:53, 11.93s/batch, batch_loss=27.7, batch_index=384, batch_size=256]

Epoch 1/10:  39%|████▋       | 384/991 [1:32:02<2:06:19, 12.49s/batch, batch_loss=27.7, batch_index=384, batch_size=256]

Epoch 1/10:  39%|████▋       | 384/991 [1:32:15<2:06:19, 12.49s/batch, batch_loss=8.31, batch_index=385, batch_size=256]

Epoch 1/10:  39%|████▋       | 385/991 [1:32:15<2:07:01, 12.58s/batch, batch_loss=8.31, batch_index=385, batch_size=256]

Epoch 1/10:  39%|████▋       | 385/991 [1:32:28<2:07:01, 12.58s/batch, batch_loss=26.3, batch_index=386, batch_size=256]

Epoch 1/10:  39%|████▋       | 386/991 [1:32:28<2:08:55, 12.79s/batch, batch_loss=26.3, batch_index=386, batch_size=256]

Epoch 1/10:  39%|█████▍        | 386/991 [1:32:42<2:08:55, 12.79s/batch, batch_loss=48, batch_index=387, batch_size=256]

Epoch 1/10:  39%|█████▍        | 387/991 [1:32:42<2:10:06, 12.92s/batch, batch_loss=48, batch_index=387, batch_size=256]

Epoch 1/10:  39%|█████        | 387/991 [1:32:54<2:10:06, 12.92s/batch, batch_loss=817, batch_index=388, batch_size=256]

Epoch 1/10:  39%|█████        | 388/991 [1:32:54<2:08:13, 12.76s/batch, batch_loss=817, batch_index=388, batch_size=256]

Epoch 1/10:  39%|████▋       | 388/991 [1:33:06<2:08:13, 12.76s/batch, batch_loss=17.6, batch_index=389, batch_size=256]

Epoch 1/10:  39%|████▋       | 389/991 [1:33:06<2:04:15, 12.38s/batch, batch_loss=17.6, batch_index=389, batch_size=256]

Epoch 1/10:  39%|█████        | 389/991 [1:33:18<2:04:15, 12.38s/batch, batch_loss=881, batch_index=390, batch_size=256]

Epoch 1/10:  39%|█████        | 390/991 [1:33:18<2:05:26, 12.52s/batch, batch_loss=881, batch_index=390, batch_size=256]

Epoch 1/10:  39%|████▋       | 390/991 [1:33:32<2:05:26, 12.52s/batch, batch_loss=25.8, batch_index=391, batch_size=256]

Epoch 1/10:  39%|████▋       | 391/991 [1:33:32<2:07:02, 12.70s/batch, batch_loss=25.8, batch_index=391, batch_size=256]

Epoch 1/10:  39%|████▋       | 391/991 [1:33:45<2:07:02, 12.70s/batch, batch_loss=25.1, batch_index=392, batch_size=256]

Epoch 1/10:  40%|████▋       | 392/991 [1:33:45<2:09:05, 12.93s/batch, batch_loss=25.1, batch_index=392, batch_size=256]

Epoch 1/10:  40%|████▋       | 392/991 [1:34:01<2:09:05, 12.93s/batch, batch_loss=29.6, batch_index=393, batch_size=256]

Epoch 1/10:  40%|████▊       | 393/991 [1:34:01<2:17:26, 13.79s/batch, batch_loss=29.6, batch_index=393, batch_size=256]

Epoch 1/10:  40%|█████▏       | 393/991 [1:34:14<2:17:26, 13.79s/batch, batch_loss=613, batch_index=394, batch_size=256]

Epoch 1/10:  40%|█████▏       | 394/991 [1:34:14<2:14:21, 13.50s/batch, batch_loss=613, batch_index=394, batch_size=256]

Epoch 1/10:  40%|████▊       | 394/991 [1:34:26<2:14:21, 13.50s/batch, batch_loss=21.3, batch_index=395, batch_size=256]

Epoch 1/10:  40%|████▊       | 395/991 [1:34:26<2:10:28, 13.13s/batch, batch_loss=21.3, batch_index=395, batch_size=256]

Epoch 1/10:  40%|████▊       | 395/991 [1:34:39<2:10:28, 13.13s/batch, batch_loss=13.4, batch_index=396, batch_size=256]

Epoch 1/10:  40%|████▊       | 396/991 [1:34:39<2:09:33, 13.06s/batch, batch_loss=13.4, batch_index=396, batch_size=256]

Epoch 1/10:  40%|█████▌        | 396/991 [1:34:52<2:09:33, 13.06s/batch, batch_loss=19, batch_index=397, batch_size=256]

Epoch 1/10:  40%|█████▌        | 397/991 [1:34:52<2:09:54, 13.12s/batch, batch_loss=19, batch_index=397, batch_size=256]

Epoch 1/10:  40%|████▊       | 397/991 [1:35:05<2:09:54, 13.12s/batch, batch_loss=18.6, batch_index=398, batch_size=256]

Epoch 1/10:  40%|████▊       | 398/991 [1:35:05<2:09:35, 13.11s/batch, batch_loss=18.6, batch_index=398, batch_size=256]

Epoch 1/10:  40%|████▊       | 398/991 [1:35:17<2:09:35, 13.11s/batch, batch_loss=33.2, batch_index=399, batch_size=256]

Epoch 1/10:  40%|████▊       | 399/991 [1:35:17<2:06:03, 12.78s/batch, batch_loss=33.2, batch_index=399, batch_size=256]

Epoch 1/10:  40%|████▊       | 399/991 [1:35:31<2:06:03, 12.78s/batch, batch_loss=13.6, batch_index=400, batch_size=256]

Epoch 1/10:  40%|████▊       | 400/991 [1:35:31<2:07:53, 12.98s/batch, batch_loss=13.6, batch_index=400, batch_size=256]

Epoch 1/10:  40%|████▊       | 400/991 [1:35:43<2:07:53, 12.98s/batch, batch_loss=10.9, batch_index=401, batch_size=256]

Epoch 1/10:  40%|████▊       | 401/991 [1:35:43<2:07:01, 12.92s/batch, batch_loss=10.9, batch_index=401, batch_size=256]

Epoch 1/10:  40%|████▊       | 401/991 [1:35:59<2:07:01, 12.92s/batch, batch_loss=23.6, batch_index=402, batch_size=256]

Epoch 1/10:  41%|████▊       | 402/991 [1:35:59<2:15:11, 13.77s/batch, batch_loss=23.6, batch_index=402, batch_size=256]

Epoch 1/10:  41%|████▊       | 402/991 [1:36:12<2:15:11, 13.77s/batch, batch_loss=18.8, batch_index=403, batch_size=256]

Epoch 1/10:  41%|████▉       | 403/991 [1:36:12<2:10:45, 13.34s/batch, batch_loss=18.8, batch_index=403, batch_size=256]

Epoch 1/10:  41%|████▉       | 403/991 [1:36:23<2:10:45, 13.34s/batch, batch_loss=12.8, batch_index=404, batch_size=256]

Epoch 1/10:  41%|████▉       | 404/991 [1:36:23<2:06:17, 12.91s/batch, batch_loss=12.8, batch_index=404, batch_size=256]

Epoch 1/10:  41%|█████▋        | 404/991 [1:36:37<2:06:17, 12.91s/batch, batch_loss=12, batch_index=405, batch_size=256]

Epoch 1/10:  41%|█████▋        | 405/991 [1:36:37<2:07:15, 13.03s/batch, batch_loss=12, batch_index=405, batch_size=256]

Epoch 1/10:  41%|████▉       | 405/991 [1:36:49<2:07:15, 13.03s/batch, batch_loss=8.81, batch_index=406, batch_size=256]

Epoch 1/10:  41%|████▉       | 406/991 [1:36:49<2:04:34, 12.78s/batch, batch_loss=8.81, batch_index=406, batch_size=256]

Epoch 1/10:  41%|████▉       | 406/991 [1:37:02<2:04:34, 12.78s/batch, batch_loss=24.5, batch_index=407, batch_size=256]

Epoch 1/10:  41%|████▉       | 407/991 [1:37:02<2:05:38, 12.91s/batch, batch_loss=24.5, batch_index=407, batch_size=256]

Epoch 1/10:  41%|████▉       | 407/991 [1:37:15<2:05:38, 12.91s/batch, batch_loss=9.63, batch_index=408, batch_size=256]

Epoch 1/10:  41%|████▉       | 408/991 [1:37:15<2:05:10, 12.88s/batch, batch_loss=9.63, batch_index=408, batch_size=256]

Epoch 1/10:  41%|████▉       | 408/991 [1:37:28<2:05:10, 12.88s/batch, batch_loss=24.1, batch_index=409, batch_size=256]

Epoch 1/10:  41%|████▉       | 409/991 [1:37:28<2:06:18, 13.02s/batch, batch_loss=24.1, batch_index=409, batch_size=256]

Epoch 1/10:  41%|████▉       | 409/991 [1:37:41<2:06:18, 13.02s/batch, batch_loss=36.5, batch_index=410, batch_size=256]

Epoch 1/10:  41%|████▉       | 410/991 [1:37:41<2:05:06, 12.92s/batch, batch_loss=36.5, batch_index=410, batch_size=256]

Epoch 1/10:  41%|████▉       | 410/991 [1:37:53<2:05:06, 12.92s/batch, batch_loss=18.2, batch_index=411, batch_size=256]

Epoch 1/10:  41%|████▉       | 411/991 [1:37:53<2:03:25, 12.77s/batch, batch_loss=18.2, batch_index=411, batch_size=256]

Epoch 1/10:  41%|█████▊        | 411/991 [1:38:10<2:03:25, 12.77s/batch, batch_loss=20, batch_index=412, batch_size=256]

Epoch 1/10:  42%|█████▊        | 412/991 [1:38:10<2:13:28, 13.83s/batch, batch_loss=20, batch_index=412, batch_size=256]

Epoch 1/10:  42%|████▉       | 412/991 [1:38:24<2:13:28, 13.83s/batch, batch_loss=25.9, batch_index=413, batch_size=256]

Epoch 1/10:  42%|█████       | 413/991 [1:38:24<2:14:09, 13.93s/batch, batch_loss=25.9, batch_index=413, batch_size=256]

Epoch 1/10:  42%|█████       | 413/991 [1:38:37<2:14:09, 13.93s/batch, batch_loss=16.9, batch_index=414, batch_size=256]

Epoch 1/10:  42%|█████       | 414/991 [1:38:37<2:12:08, 13.74s/batch, batch_loss=16.9, batch_index=414, batch_size=256]

Epoch 1/10:  42%|█████       | 414/991 [1:38:51<2:12:08, 13.74s/batch, batch_loss=12.5, batch_index=415, batch_size=256]

Epoch 1/10:  42%|█████       | 415/991 [1:38:51<2:11:02, 13.65s/batch, batch_loss=12.5, batch_index=415, batch_size=256]

Epoch 1/10:  42%|█████       | 415/991 [1:39:04<2:11:02, 13.65s/batch, batch_loss=13.4, batch_index=416, batch_size=256]

Epoch 1/10:  42%|█████       | 416/991 [1:39:04<2:08:38, 13.42s/batch, batch_loss=13.4, batch_index=416, batch_size=256]

Epoch 1/10:  42%|█████       | 416/991 [1:39:17<2:08:38, 13.42s/batch, batch_loss=8.66, batch_index=417, batch_size=256]

Epoch 1/10:  42%|█████       | 417/991 [1:39:17<2:09:01, 13.49s/batch, batch_loss=8.66, batch_index=417, batch_size=256]

Epoch 1/10:  42%|█████       | 417/991 [1:39:30<2:09:01, 13.49s/batch, batch_loss=16.2, batch_index=418, batch_size=256]

Epoch 1/10:  42%|█████       | 418/991 [1:39:30<2:06:31, 13.25s/batch, batch_loss=16.2, batch_index=418, batch_size=256]

Epoch 1/10:  42%|████▏     | 418/991 [1:39:43<2:06:31, 13.25s/batch, batch_loss=1.3e+3, batch_index=419, batch_size=256]

Epoch 1/10:  42%|████▏     | 419/991 [1:39:43<2:06:55, 13.31s/batch, batch_loss=1.3e+3, batch_index=419, batch_size=256]

Epoch 1/10:  42%|█████       | 419/991 [1:39:57<2:06:55, 13.31s/batch, batch_loss=23.8, batch_index=420, batch_size=256]

Epoch 1/10:  42%|█████       | 420/991 [1:39:57<2:06:41, 13.31s/batch, batch_loss=23.8, batch_index=420, batch_size=256]

Epoch 1/10:  42%|█████       | 420/991 [1:40:13<2:06:41, 13.31s/batch, batch_loss=14.7, batch_index=421, batch_size=256]

Epoch 1/10:  42%|█████       | 421/991 [1:40:13<2:13:56, 14.10s/batch, batch_loss=14.7, batch_index=421, batch_size=256]

Epoch 1/10:  42%|█████       | 421/991 [1:40:26<2:13:56, 14.10s/batch, batch_loss=9.93, batch_index=422, batch_size=256]

Epoch 1/10:  43%|█████       | 422/991 [1:40:26<2:11:51, 13.90s/batch, batch_loss=9.93, batch_index=422, batch_size=256]

Epoch 1/10:  43%|█████       | 422/991 [1:40:39<2:11:51, 13.90s/batch, batch_loss=11.8, batch_index=423, batch_size=256]

Epoch 1/10:  43%|█████       | 423/991 [1:40:39<2:09:17, 13.66s/batch, batch_loss=11.8, batch_index=423, batch_size=256]

Epoch 1/10:  43%|█████▉        | 423/991 [1:40:52<2:09:17, 13.66s/batch, batch_loss=13, batch_index=424, batch_size=256]

Epoch 1/10:  43%|█████▉        | 424/991 [1:40:52<2:07:26, 13.49s/batch, batch_loss=13, batch_index=424, batch_size=256]

Epoch 1/10:  43%|█████▏      | 424/991 [1:41:05<2:07:26, 13.49s/batch, batch_loss=8.09, batch_index=425, batch_size=256]

Epoch 1/10:  43%|█████▏      | 425/991 [1:41:05<2:06:31, 13.41s/batch, batch_loss=8.09, batch_index=425, batch_size=256]

Epoch 1/10:  43%|█████▏      | 425/991 [1:41:18<2:06:31, 13.41s/batch, batch_loss=2.69, batch_index=426, batch_size=256]

Epoch 1/10:  43%|█████▏      | 426/991 [1:41:18<2:05:22, 13.31s/batch, batch_loss=2.69, batch_index=426, batch_size=256]

Epoch 1/10:  43%|█████▏      | 426/991 [1:41:32<2:05:22, 13.31s/batch, batch_loss=12.7, batch_index=427, batch_size=256]

Epoch 1/10:  43%|█████▏      | 427/991 [1:41:32<2:05:37, 13.36s/batch, batch_loss=12.7, batch_index=427, batch_size=256]

Epoch 1/10:  43%|██████        | 427/991 [1:41:46<2:05:37, 13.36s/batch, batch_loss=30, batch_index=428, batch_size=256]

Epoch 1/10:  43%|██████        | 428/991 [1:41:46<2:06:51, 13.52s/batch, batch_loss=30, batch_index=428, batch_size=256]

Epoch 1/10:  43%|█████▏      | 428/991 [1:42:03<2:06:51, 13.52s/batch, batch_loss=46.2, batch_index=429, batch_size=256]

Epoch 1/10:  43%|█████▏      | 429/991 [1:42:03<2:15:42, 14.49s/batch, batch_loss=46.2, batch_index=429, batch_size=256]

Epoch 1/10:  43%|███▉     | 429/991 [1:42:15<2:15:42, 14.49s/batch, batch_loss=9.29e+3, batch_index=430, batch_size=256]

Epoch 1/10:  43%|███▉     | 430/991 [1:42:15<2:10:13, 13.93s/batch, batch_loss=9.29e+3, batch_index=430, batch_size=256]

Epoch 1/10:  43%|█████▏      | 430/991 [1:42:27<2:10:13, 13.93s/batch, batch_loss=29.5, batch_index=431, batch_size=256]

Epoch 1/10:  43%|█████▏      | 431/991 [1:42:27<2:04:21, 13.32s/batch, batch_loss=29.5, batch_index=431, batch_size=256]

Epoch 1/10:  43%|█████▏      | 431/991 [1:42:39<2:04:21, 13.32s/batch, batch_loss=29.6, batch_index=432, batch_size=256]

Epoch 1/10:  44%|█████▏      | 432/991 [1:42:39<2:01:02, 12.99s/batch, batch_loss=29.6, batch_index=432, batch_size=256]

Epoch 1/10:  44%|█████▏      | 432/991 [1:42:51<2:01:02, 12.99s/batch, batch_loss=15.3, batch_index=433, batch_size=256]

Epoch 1/10:  44%|█████▏      | 433/991 [1:42:51<1:58:17, 12.72s/batch, batch_loss=15.3, batch_index=433, batch_size=256]

Epoch 1/10:  44%|██████        | 433/991 [1:43:04<1:58:17, 12.72s/batch, batch_loss=30, batch_index=434, batch_size=256]

Epoch 1/10:  44%|██████▏       | 434/991 [1:43:04<1:58:14, 12.74s/batch, batch_loss=30, batch_index=434, batch_size=256]

Epoch 1/10:  44%|█████▎      | 434/991 [1:43:17<1:58:14, 12.74s/batch, batch_loss=15.9, batch_index=435, batch_size=256]

Epoch 1/10:  44%|█████▎      | 435/991 [1:43:17<1:58:55, 12.83s/batch, batch_loss=15.9, batch_index=435, batch_size=256]

Epoch 1/10:  44%|█████▎      | 435/991 [1:43:32<1:58:55, 12.83s/batch, batch_loss=20.5, batch_index=436, batch_size=256]

Epoch 1/10:  44%|█████▎      | 436/991 [1:43:32<2:04:35, 13.47s/batch, batch_loss=20.5, batch_index=436, batch_size=256]

Epoch 1/10:  44%|█████▎      | 436/991 [1:43:46<2:04:35, 13.47s/batch, batch_loss=21.9, batch_index=437, batch_size=256]

Epoch 1/10:  44%|█████▎      | 437/991 [1:43:46<2:04:53, 13.53s/batch, batch_loss=21.9, batch_index=437, batch_size=256]

Epoch 1/10:  44%|█████▎      | 437/991 [1:43:59<2:04:53, 13.53s/batch, batch_loss=40.9, batch_index=438, batch_size=256]

Epoch 1/10:  44%|█████▎      | 438/991 [1:43:59<2:03:51, 13.44s/batch, batch_loss=40.9, batch_index=438, batch_size=256]

Epoch 1/10:  44%|█████▎      | 438/991 [1:44:12<2:03:51, 13.44s/batch, batch_loss=21.2, batch_index=439, batch_size=256]

Epoch 1/10:  44%|█████▎      | 439/991 [1:44:12<2:02:38, 13.33s/batch, batch_loss=21.2, batch_index=439, batch_size=256]

Epoch 1/10:  44%|█████▎      | 439/991 [1:44:28<2:02:38, 13.33s/batch, batch_loss=42.6, batch_index=440, batch_size=256]

Epoch 1/10:  44%|█████▎      | 440/991 [1:44:28<2:09:14, 14.07s/batch, batch_loss=42.6, batch_index=440, batch_size=256]

Epoch 1/10:  44%|█████▎      | 440/991 [1:44:42<2:09:14, 14.07s/batch, batch_loss=29.5, batch_index=441, batch_size=256]

Epoch 1/10:  45%|█████▎      | 441/991 [1:44:42<2:07:27, 13.90s/batch, batch_loss=29.5, batch_index=441, batch_size=256]

Epoch 1/10:  45%|█████▎      | 441/991 [1:44:55<2:07:27, 13.90s/batch, batch_loss=21.3, batch_index=442, batch_size=256]

Epoch 1/10:  45%|█████▎      | 442/991 [1:44:55<2:05:37, 13.73s/batch, batch_loss=21.3, batch_index=442, batch_size=256]

Epoch 1/10:  45%|█████▎      | 442/991 [1:45:08<2:05:37, 13.73s/batch, batch_loss=25.8, batch_index=443, batch_size=256]

Epoch 1/10:  45%|█████▎      | 443/991 [1:45:08<2:04:47, 13.66s/batch, batch_loss=25.8, batch_index=443, batch_size=256]

Epoch 1/10:  45%|█████▎      | 443/991 [1:45:22<2:04:47, 13.66s/batch, batch_loss=22.2, batch_index=444, batch_size=256]

Epoch 1/10:  45%|█████▍      | 444/991 [1:45:22<2:04:38, 13.67s/batch, batch_loss=22.2, batch_index=444, batch_size=256]

Epoch 1/10:  45%|█████▍      | 444/991 [1:45:35<2:04:38, 13.67s/batch, batch_loss=26.6, batch_index=445, batch_size=256]

Epoch 1/10:  45%|█████▍      | 445/991 [1:45:35<2:02:24, 13.45s/batch, batch_loss=26.6, batch_index=445, batch_size=256]

Epoch 1/10:  45%|█████▍      | 445/991 [1:45:48<2:02:24, 13.45s/batch, batch_loss=34.6, batch_index=446, batch_size=256]

Epoch 1/10:  45%|█████▍      | 446/991 [1:45:48<2:02:00, 13.43s/batch, batch_loss=34.6, batch_index=446, batch_size=256]

Epoch 1/10:  45%|██████▎       | 446/991 [1:46:01<2:02:00, 13.43s/batch, batch_loss=16, batch_index=447, batch_size=256]

Epoch 1/10:  45%|██████▎       | 447/991 [1:46:01<2:00:09, 13.25s/batch, batch_loss=16, batch_index=447, batch_size=256]

Epoch 1/10:  45%|██████▎       | 447/991 [1:46:14<2:00:09, 13.25s/batch, batch_loss=25, batch_index=448, batch_size=256]

Epoch 1/10:  45%|██████▎       | 448/991 [1:46:14<1:59:08, 13.17s/batch, batch_loss=25, batch_index=448, batch_size=256]

Epoch 1/10:  45%|█████▍      | 448/991 [1:46:27<1:59:08, 13.17s/batch, batch_loss=27.1, batch_index=449, batch_size=256]

Epoch 1/10:  45%|█████▍      | 449/991 [1:46:27<1:58:47, 13.15s/batch, batch_loss=27.1, batch_index=449, batch_size=256]

Epoch 1/10:  45%|█████▍      | 449/991 [1:46:40<1:58:47, 13.15s/batch, batch_loss=35.6, batch_index=450, batch_size=256]

Epoch 1/10:  45%|█████▍      | 450/991 [1:46:40<1:56:06, 12.88s/batch, batch_loss=35.6, batch_index=450, batch_size=256]

Epoch 1/10:  45%|█████▍      | 450/991 [1:46:52<1:56:06, 12.88s/batch, batch_loss=25.1, batch_index=451, batch_size=256]

Epoch 1/10:  46%|█████▍      | 451/991 [1:46:52<1:54:14, 12.69s/batch, batch_loss=25.1, batch_index=451, batch_size=256]

Epoch 1/10:  46%|█████▍      | 451/991 [1:47:04<1:54:14, 12.69s/batch, batch_loss=26.2, batch_index=452, batch_size=256]

Epoch 1/10:  46%|█████▍      | 452/991 [1:47:04<1:52:22, 12.51s/batch, batch_loss=26.2, batch_index=452, batch_size=256]

Epoch 1/10:  46%|█████▍      | 452/991 [1:47:16<1:52:22, 12.51s/batch, batch_loss=32.5, batch_index=453, batch_size=256]

Epoch 1/10:  46%|█████▍      | 453/991 [1:47:16<1:49:59, 12.27s/batch, batch_loss=32.5, batch_index=453, batch_size=256]

Epoch 1/10:  46%|████     | 453/991 [1:47:27<1:49:59, 12.27s/batch, batch_loss=7.24e+3, batch_index=454, batch_size=256]

Epoch 1/10:  46%|████     | 454/991 [1:47:27<1:48:51, 12.16s/batch, batch_loss=7.24e+3, batch_index=454, batch_size=256]

Epoch 1/10:  46%|█████▍      | 454/991 [1:47:40<1:48:51, 12.16s/batch, batch_loss=62.2, batch_index=455, batch_size=256]

Epoch 1/10:  46%|█████▌      | 455/991 [1:47:40<1:48:43, 12.17s/batch, batch_loss=62.2, batch_index=455, batch_size=256]

Epoch 1/10:  46%|█████▌      | 455/991 [1:47:52<1:48:43, 12.17s/batch, batch_loss=37.5, batch_index=456, batch_size=256]

Epoch 1/10:  46%|█████▌      | 456/991 [1:47:52<1:48:42, 12.19s/batch, batch_loss=37.5, batch_index=456, batch_size=256]

Epoch 1/10:  46%|█████▌      | 456/991 [1:48:04<1:48:42, 12.19s/batch, batch_loss=21.3, batch_index=457, batch_size=256]

Epoch 1/10:  46%|█████▌      | 457/991 [1:48:04<1:47:46, 12.11s/batch, batch_loss=21.3, batch_index=457, batch_size=256]

Epoch 1/10:  46%|█████▌      | 457/991 [1:48:16<1:47:46, 12.11s/batch, batch_loss=28.9, batch_index=458, batch_size=256]

Epoch 1/10:  46%|█████▌      | 458/991 [1:48:16<1:46:52, 12.03s/batch, batch_loss=28.9, batch_index=458, batch_size=256]

Epoch 1/10:  46%|██████▍       | 458/991 [1:48:30<1:46:52, 12.03s/batch, batch_loss=47, batch_index=459, batch_size=256]

Epoch 1/10:  46%|██████▍       | 459/991 [1:48:30<1:53:06, 12.76s/batch, batch_loss=47, batch_index=459, batch_size=256]

Epoch 1/10:  46%|█████▌      | 459/991 [1:48:42<1:53:06, 12.76s/batch, batch_loss=45.2, batch_index=460, batch_size=256]

Epoch 1/10:  46%|█████▌      | 460/991 [1:48:42<1:50:06, 12.44s/batch, batch_loss=45.2, batch_index=460, batch_size=256]

Epoch 1/10:  46%|█████▌      | 460/991 [1:48:54<1:50:06, 12.44s/batch, batch_loss=70.7, batch_index=461, batch_size=256]

Epoch 1/10:  47%|█████▌      | 461/991 [1:48:54<1:48:34, 12.29s/batch, batch_loss=70.7, batch_index=461, batch_size=256]

Epoch 1/10:  47%|██████▌       | 461/991 [1:49:06<1:48:34, 12.29s/batch, batch_loss=21, batch_index=462, batch_size=256]

Epoch 1/10:  47%|██████▌       | 462/991 [1:49:06<1:48:00, 12.25s/batch, batch_loss=21, batch_index=462, batch_size=256]

Epoch 1/10:  47%|████▏    | 462/991 [1:49:18<1:48:00, 12.25s/batch, batch_loss=6.21e+4, batch_index=463, batch_size=256]

Epoch 1/10:  47%|████▏    | 463/991 [1:49:18<1:47:21, 12.20s/batch, batch_loss=6.21e+4, batch_index=463, batch_size=256]

Epoch 1/10:  47%|█████▌      | 463/991 [1:49:31<1:47:21, 12.20s/batch, batch_loss=22.3, batch_index=464, batch_size=256]

Epoch 1/10:  47%|█████▌      | 464/991 [1:49:31<1:47:57, 12.29s/batch, batch_loss=22.3, batch_index=464, batch_size=256]

Epoch 1/10:  47%|█████▌      | 464/991 [1:49:43<1:47:57, 12.29s/batch, batch_loss=23.7, batch_index=465, batch_size=256]

Epoch 1/10:  47%|█████▋      | 465/991 [1:49:43<1:47:45, 12.29s/batch, batch_loss=23.7, batch_index=465, batch_size=256]

Epoch 1/10:  47%|█████▋      | 465/991 [1:49:55<1:47:45, 12.29s/batch, batch_loss=18.2, batch_index=466, batch_size=256]

Epoch 1/10:  47%|█████▋      | 466/991 [1:49:55<1:47:13, 12.25s/batch, batch_loss=18.2, batch_index=466, batch_size=256]

Epoch 1/10:  47%|█████▋      | 466/991 [1:50:07<1:47:13, 12.25s/batch, batch_loss=16.1, batch_index=467, batch_size=256]

Epoch 1/10:  47%|█████▋      | 467/991 [1:50:07<1:46:16, 12.17s/batch, batch_loss=16.1, batch_index=467, batch_size=256]

Epoch 1/10:  47%|█████▋      | 467/991 [1:50:20<1:46:16, 12.17s/batch, batch_loss=25.6, batch_index=468, batch_size=256]

Epoch 1/10:  47%|█████▋      | 468/991 [1:50:20<1:47:09, 12.29s/batch, batch_loss=25.6, batch_index=468, batch_size=256]

Epoch 1/10:  47%|█████▋      | 468/991 [1:50:31<1:47:09, 12.29s/batch, batch_loss=32.9, batch_index=469, batch_size=256]

Epoch 1/10:  47%|█████▋      | 469/991 [1:50:31<1:45:59, 12.18s/batch, batch_loss=32.9, batch_index=469, batch_size=256]

Epoch 1/10:  47%|█████▋      | 469/991 [1:50:46<1:45:59, 12.18s/batch, batch_loss=22.6, batch_index=470, batch_size=256]

Epoch 1/10:  47%|█████▋      | 470/991 [1:50:46<1:52:10, 12.92s/batch, batch_loss=22.6, batch_index=470, batch_size=256]

Epoch 1/10:  47%|█████▋      | 470/991 [1:50:59<1:52:10, 12.92s/batch, batch_loss=42.4, batch_index=471, batch_size=256]

Epoch 1/10:  48%|█████▋      | 471/991 [1:50:59<1:51:25, 12.86s/batch, batch_loss=42.4, batch_index=471, batch_size=256]

Epoch 1/10:  48%|█████▋      | 471/991 [1:51:12<1:51:25, 12.86s/batch, batch_loss=40.7, batch_index=472, batch_size=256]

Epoch 1/10:  48%|█████▋      | 472/991 [1:51:12<1:52:41, 13.03s/batch, batch_loss=40.7, batch_index=472, batch_size=256]

Epoch 1/10:  48%|█████▋      | 472/991 [1:51:25<1:52:41, 13.03s/batch, batch_loss=24.7, batch_index=473, batch_size=256]

Epoch 1/10:  48%|█████▋      | 473/991 [1:51:25<1:52:31, 13.03s/batch, batch_loss=24.7, batch_index=473, batch_size=256]

Epoch 1/10:  48%|█████▋      | 473/991 [1:51:38<1:52:31, 13.03s/batch, batch_loss=18.2, batch_index=474, batch_size=256]

Epoch 1/10:  48%|█████▋      | 474/991 [1:51:38<1:51:52, 12.98s/batch, batch_loss=18.2, batch_index=474, batch_size=256]

Epoch 1/10:  48%|████▎    | 474/991 [1:51:52<1:51:52, 12.98s/batch, batch_loss=2.42e+3, batch_index=475, batch_size=256]

Epoch 1/10:  48%|████▎    | 475/991 [1:51:52<1:54:15, 13.29s/batch, batch_loss=2.42e+3, batch_index=475, batch_size=256]

Epoch 1/10:  48%|██████▋       | 475/991 [1:52:05<1:54:15, 13.29s/batch, batch_loss=35, batch_index=476, batch_size=256]

Epoch 1/10:  48%|██████▋       | 476/991 [1:52:05<1:52:49, 13.14s/batch, batch_loss=35, batch_index=476, batch_size=256]

Epoch 1/10:  48%|█████▊      | 476/991 [1:52:18<1:52:49, 13.14s/batch, batch_loss=36.3, batch_index=477, batch_size=256]

Epoch 1/10:  48%|█████▊      | 477/991 [1:52:18<1:51:25, 13.01s/batch, batch_loss=36.3, batch_index=477, batch_size=256]

Epoch 1/10:  48%|█████▊      | 477/991 [1:52:30<1:51:25, 13.01s/batch, batch_loss=22.5, batch_index=478, batch_size=256]

Epoch 1/10:  48%|█████▊      | 478/991 [1:52:30<1:50:50, 12.96s/batch, batch_loss=22.5, batch_index=478, batch_size=256]

Epoch 1/10:  48%|█████▊      | 478/991 [1:52:43<1:50:50, 12.96s/batch, batch_loss=31.1, batch_index=479, batch_size=256]

Epoch 1/10:  48%|█████▊      | 479/991 [1:52:43<1:50:43, 12.98s/batch, batch_loss=31.1, batch_index=479, batch_size=256]

Epoch 1/10:  48%|█████▊      | 479/991 [1:52:57<1:50:43, 12.98s/batch, batch_loss=25.3, batch_index=480, batch_size=256]

Epoch 1/10:  48%|█████▊      | 480/991 [1:52:57<1:52:29, 13.21s/batch, batch_loss=25.3, batch_index=480, batch_size=256]

Epoch 1/10:  48%|█████▊      | 480/991 [1:53:10<1:52:29, 13.21s/batch, batch_loss=33.7, batch_index=481, batch_size=256]

Epoch 1/10:  49%|█████▊      | 481/991 [1:53:10<1:51:45, 13.15s/batch, batch_loss=33.7, batch_index=481, batch_size=256]

Epoch 1/10:  49%|█████▊      | 481/991 [1:53:23<1:51:45, 13.15s/batch, batch_loss=33.3, batch_index=482, batch_size=256]

Epoch 1/10:  49%|█████▊      | 482/991 [1:53:23<1:50:45, 13.06s/batch, batch_loss=33.3, batch_index=482, batch_size=256]

Epoch 1/10:  49%|█████▊      | 482/991 [1:53:36<1:50:45, 13.06s/batch, batch_loss=31.8, batch_index=483, batch_size=256]

Epoch 1/10:  49%|█████▊      | 483/991 [1:53:36<1:50:08, 13.01s/batch, batch_loss=31.8, batch_index=483, batch_size=256]

Epoch 1/10:  49%|█████▊      | 483/991 [1:53:50<1:50:08, 13.01s/batch, batch_loss=31.2, batch_index=484, batch_size=256]

Epoch 1/10:  49%|█████▊      | 484/991 [1:53:50<1:52:20, 13.30s/batch, batch_loss=31.2, batch_index=484, batch_size=256]

Epoch 1/10:  49%|█████▊      | 484/991 [1:54:04<1:52:20, 13.30s/batch, batch_loss=15.9, batch_index=485, batch_size=256]

Epoch 1/10:  49%|█████▊      | 485/991 [1:54:04<1:53:36, 13.47s/batch, batch_loss=15.9, batch_index=485, batch_size=256]

Epoch 1/10:  49%|██████▊       | 485/991 [1:54:17<1:53:36, 13.47s/batch, batch_loss=65, batch_index=486, batch_size=256]

Epoch 1/10:  49%|██████▊       | 486/991 [1:54:17<1:51:54, 13.30s/batch, batch_loss=65, batch_index=486, batch_size=256]

Epoch 1/10:  49%|█████▉      | 486/991 [1:54:30<1:51:54, 13.30s/batch, batch_loss=16.9, batch_index=487, batch_size=256]

Epoch 1/10:  49%|█████▉      | 487/991 [1:54:30<1:50:47, 13.19s/batch, batch_loss=16.9, batch_index=487, batch_size=256]

Epoch 1/10:  49%|█████▉      | 487/991 [1:54:42<1:50:47, 13.19s/batch, batch_loss=11.4, batch_index=488, batch_size=256]

Epoch 1/10:  49%|█████▉      | 488/991 [1:54:42<1:49:17, 13.04s/batch, batch_loss=11.4, batch_index=488, batch_size=256]

Epoch 1/10:  49%|█████▉      | 488/991 [1:54:57<1:49:17, 13.04s/batch, batch_loss=13.5, batch_index=489, batch_size=256]

Epoch 1/10:  49%|█████▉      | 489/991 [1:54:57<1:53:01, 13.51s/batch, batch_loss=13.5, batch_index=489, batch_size=256]

Epoch 1/10:  49%|█████▉      | 489/991 [1:55:10<1:53:01, 13.51s/batch, batch_loss=12.6, batch_index=490, batch_size=256]

Epoch 1/10:  49%|█████▉      | 490/991 [1:55:10<1:52:51, 13.52s/batch, batch_loss=12.6, batch_index=490, batch_size=256]

Epoch 1/10:  49%|█████▉      | 490/991 [1:55:24<1:52:51, 13.52s/batch, batch_loss=26.9, batch_index=491, batch_size=256]

Epoch 1/10:  50%|█████▉      | 491/991 [1:55:24<1:53:21, 13.60s/batch, batch_loss=26.9, batch_index=491, batch_size=256]

Epoch 1/10:  50%|█████▉      | 491/991 [1:55:38<1:53:21, 13.60s/batch, batch_loss=42.2, batch_index=492, batch_size=256]

Epoch 1/10:  50%|█████▉      | 492/991 [1:55:38<1:52:43, 13.55s/batch, batch_loss=42.2, batch_index=492, batch_size=256]

Epoch 1/10:  50%|█████▉      | 492/991 [1:55:50<1:52:43, 13.55s/batch, batch_loss=49.8, batch_index=493, batch_size=256]

Epoch 1/10:  50%|█████▉      | 493/991 [1:55:50<1:50:30, 13.31s/batch, batch_loss=49.8, batch_index=493, batch_size=256]

Epoch 1/10:  50%|█████▉      | 493/991 [1:56:05<1:50:30, 13.31s/batch, batch_loss=15.8, batch_index=494, batch_size=256]

Epoch 1/10:  50%|█████▉      | 494/991 [1:56:05<1:52:05, 13.53s/batch, batch_loss=15.8, batch_index=494, batch_size=256]

Epoch 1/10:  50%|████▍    | 494/991 [1:56:18<1:52:05, 13.53s/batch, batch_loss=8.53e+4, batch_index=495, batch_size=256]

Epoch 1/10:  50%|████▍    | 495/991 [1:56:18<1:52:14, 13.58s/batch, batch_loss=8.53e+4, batch_index=495, batch_size=256]

Epoch 1/10:  50%|█████▉      | 495/991 [1:56:32<1:52:14, 13.58s/batch, batch_loss=17.5, batch_index=496, batch_size=256]

Epoch 1/10:  50%|██████      | 496/991 [1:56:32<1:52:37, 13.65s/batch, batch_loss=17.5, batch_index=496, batch_size=256]

Epoch 1/10:  50%|██████▌      | 496/991 [1:56:45<1:52:37, 13.65s/batch, batch_loss=175, batch_index=497, batch_size=256]

Epoch 1/10:  50%|██████▌      | 497/991 [1:56:45<1:50:29, 13.42s/batch, batch_loss=175, batch_index=497, batch_size=256]

Epoch 1/10:  50%|██████      | 497/991 [1:56:58<1:50:29, 13.42s/batch, batch_loss=17.8, batch_index=498, batch_size=256]

Epoch 1/10:  50%|██████      | 498/991 [1:56:58<1:50:23, 13.44s/batch, batch_loss=17.8, batch_index=498, batch_size=256]

Epoch 1/10:  50%|██████▌      | 498/991 [1:57:12<1:50:23, 13.44s/batch, batch_loss=411, batch_index=499, batch_size=256]

Epoch 1/10:  50%|██████▌      | 499/991 [1:57:12<1:51:25, 13.59s/batch, batch_loss=411, batch_index=499, batch_size=256]

Epoch 1/10:  50%|██████      | 499/991 [1:57:27<1:51:25, 13.59s/batch, batch_loss=18.2, batch_index=500, batch_size=256]

Epoch 1/10:  50%|██████      | 500/991 [1:57:27<1:54:55, 14.04s/batch, batch_loss=18.2, batch_index=500, batch_size=256]

Epoch 1/10:  50%|██████      | 500/991 [1:57:40<1:54:55, 14.04s/batch, batch_loss=9.33, batch_index=501, batch_size=256]

Epoch 1/10:  51%|██████      | 501/991 [1:57:40<1:52:10, 13.73s/batch, batch_loss=9.33, batch_index=501, batch_size=256]

Epoch 1/10:  51%|██████      | 501/991 [1:57:53<1:52:10, 13.73s/batch, batch_loss=9.95, batch_index=502, batch_size=256]

Epoch 1/10:  51%|██████      | 502/991 [1:57:53<1:50:11, 13.52s/batch, batch_loss=9.95, batch_index=502, batch_size=256]

Epoch 1/10:  51%|██████      | 502/991 [1:58:07<1:50:11, 13.52s/batch, batch_loss=26.9, batch_index=503, batch_size=256]

Epoch 1/10:  51%|██████      | 503/991 [1:58:07<1:50:10, 13.55s/batch, batch_loss=26.9, batch_index=503, batch_size=256]

Epoch 1/10:  51%|██████      | 503/991 [1:58:21<1:50:10, 13.55s/batch, batch_loss=13.2, batch_index=504, batch_size=256]

Epoch 1/10:  51%|██████      | 504/991 [1:58:21<1:49:53, 13.54s/batch, batch_loss=13.2, batch_index=504, batch_size=256]

Epoch 1/10:  51%|██████      | 504/991 [1:58:36<1:49:53, 13.54s/batch, batch_loss=11.6, batch_index=505, batch_size=256]

Epoch 1/10:  51%|██████      | 505/991 [1:58:36<1:54:20, 14.12s/batch, batch_loss=11.6, batch_index=505, batch_size=256]

Epoch 1/10:  51%|██████      | 505/991 [1:58:49<1:54:20, 14.12s/batch, batch_loss=16.7, batch_index=506, batch_size=256]

Epoch 1/10:  51%|██████▏     | 506/991 [1:58:49<1:51:30, 13.79s/batch, batch_loss=16.7, batch_index=506, batch_size=256]

Epoch 1/10:  51%|██████▏     | 506/991 [1:59:03<1:51:30, 13.79s/batch, batch_loss=16.8, batch_index=507, batch_size=256]

Epoch 1/10:  51%|██████▏     | 507/991 [1:59:03<1:51:14, 13.79s/batch, batch_loss=16.8, batch_index=507, batch_size=256]

Epoch 1/10:  51%|██████▏     | 507/991 [1:59:16<1:51:14, 13.79s/batch, batch_loss=19.8, batch_index=508, batch_size=256]

Epoch 1/10:  51%|██████▏     | 508/991 [1:59:16<1:48:40, 13.50s/batch, batch_loss=19.8, batch_index=508, batch_size=256]

Epoch 1/10:  51%|███████▏      | 508/991 [1:59:29<1:48:40, 13.50s/batch, batch_loss=20, batch_index=509, batch_size=256]

Epoch 1/10:  51%|███████▏      | 509/991 [1:59:29<1:48:25, 13.50s/batch, batch_loss=20, batch_index=509, batch_size=256]

Epoch 1/10:  51%|██████▏     | 509/991 [1:59:43<1:48:25, 13.50s/batch, batch_loss=18.7, batch_index=510, batch_size=256]

Epoch 1/10:  51%|██████▏     | 510/991 [1:59:43<1:48:13, 13.50s/batch, batch_loss=18.7, batch_index=510, batch_size=256]

Epoch 1/10:  51%|██████▏     | 510/991 [1:59:55<1:48:13, 13.50s/batch, batch_loss=15.4, batch_index=511, batch_size=256]

Epoch 1/10:  52%|██████▏     | 511/991 [1:59:55<1:46:04, 13.26s/batch, batch_loss=15.4, batch_index=511, batch_size=256]

Epoch 1/10:  52%|██████▏     | 511/991 [2:00:08<1:46:04, 13.26s/batch, batch_loss=12.3, batch_index=512, batch_size=256]

Epoch 1/10:  52%|██████▏     | 512/991 [2:00:08<1:43:42, 12.99s/batch, batch_loss=12.3, batch_index=512, batch_size=256]

Epoch 1/10:  52%|██████▏     | 512/991 [2:00:23<1:43:42, 12.99s/batch, batch_loss=10.3, batch_index=513, batch_size=256]

Epoch 1/10:  52%|██████▏     | 513/991 [2:00:23<1:49:38, 13.76s/batch, batch_loss=10.3, batch_index=513, batch_size=256]

Epoch 1/10:  52%|██████▏     | 513/991 [2:00:37<1:49:38, 13.76s/batch, batch_loss=18.9, batch_index=514, batch_size=256]

Epoch 1/10:  52%|██████▏     | 514/991 [2:00:37<1:48:36, 13.66s/batch, batch_loss=18.9, batch_index=514, batch_size=256]

Epoch 1/10:  52%|██████▏     | 514/991 [2:00:50<1:48:36, 13.66s/batch, batch_loss=19.2, batch_index=515, batch_size=256]

Epoch 1/10:  52%|██████▏     | 515/991 [2:00:50<1:46:11, 13.39s/batch, batch_loss=19.2, batch_index=515, batch_size=256]

Epoch 1/10:  52%|██████▏     | 515/991 [2:01:02<1:46:11, 13.39s/batch, batch_loss=22.4, batch_index=516, batch_size=256]

Epoch 1/10:  52%|██████▏     | 516/991 [2:01:02<1:44:34, 13.21s/batch, batch_loss=22.4, batch_index=516, batch_size=256]

Epoch 1/10:  52%|██████▏     | 516/991 [2:01:15<1:44:34, 13.21s/batch, batch_loss=9.88, batch_index=517, batch_size=256]

Epoch 1/10:  52%|██████▎     | 517/991 [2:01:15<1:43:05, 13.05s/batch, batch_loss=9.88, batch_index=517, batch_size=256]

Epoch 1/10:  52%|███████▎      | 517/991 [2:01:29<1:43:05, 13.05s/batch, batch_loss=29, batch_index=518, batch_size=256]

Epoch 1/10:  52%|███████▎      | 518/991 [2:01:29<1:45:11, 13.34s/batch, batch_loss=29, batch_index=518, batch_size=256]

Epoch 1/10:  52%|██████▎     | 518/991 [2:01:43<1:45:11, 13.34s/batch, batch_loss=14.7, batch_index=519, batch_size=256]

Epoch 1/10:  52%|██████▎     | 519/991 [2:01:43<1:46:51, 13.58s/batch, batch_loss=14.7, batch_index=519, batch_size=256]

Epoch 1/10:  52%|██████▎     | 519/991 [2:01:57<1:46:51, 13.58s/batch, batch_loss=16.1, batch_index=520, batch_size=256]

Epoch 1/10:  52%|██████▎     | 520/991 [2:01:57<1:47:38, 13.71s/batch, batch_loss=16.1, batch_index=520, batch_size=256]

Epoch 1/10:  52%|██████▎     | 520/991 [2:02:11<1:47:38, 13.71s/batch, batch_loss=8.34, batch_index=521, batch_size=256]

Epoch 1/10:  53%|██████▎     | 521/991 [2:02:11<1:47:38, 13.74s/batch, batch_loss=8.34, batch_index=521, batch_size=256]

Epoch 1/10:  53%|██████▎     | 521/991 [2:02:24<1:47:38, 13.74s/batch, batch_loss=10.1, batch_index=522, batch_size=256]

Epoch 1/10:  53%|██████▎     | 522/991 [2:02:24<1:44:40, 13.39s/batch, batch_loss=10.1, batch_index=522, batch_size=256]

Epoch 1/10:  53%|██████▎     | 522/991 [2:02:36<1:44:40, 13.39s/batch, batch_loss=3.67, batch_index=523, batch_size=256]

Epoch 1/10:  53%|██████▎     | 523/991 [2:02:36<1:41:18, 12.99s/batch, batch_loss=3.67, batch_index=523, batch_size=256]

Epoch 1/10:  53%|██████▎     | 523/991 [2:02:48<1:41:18, 12.99s/batch, batch_loss=13.9, batch_index=524, batch_size=256]

Epoch 1/10:  53%|██████▎     | 524/991 [2:02:48<1:39:24, 12.77s/batch, batch_loss=13.9, batch_index=524, batch_size=256]

Epoch 1/10:  53%|██████▎     | 524/991 [2:03:01<1:39:24, 12.77s/batch, batch_loss=7.35, batch_index=525, batch_size=256]

Epoch 1/10:  53%|██████▎     | 525/991 [2:03:01<1:39:17, 12.79s/batch, batch_loss=7.35, batch_index=525, batch_size=256]

Epoch 1/10:  53%|██████▉      | 525/991 [2:03:13<1:39:17, 12.79s/batch, batch_loss=8.2, batch_index=526, batch_size=256]

Epoch 1/10:  53%|██████▉      | 526/991 [2:03:13<1:38:29, 12.71s/batch, batch_loss=8.2, batch_index=526, batch_size=256]

Epoch 1/10:  53%|██████▎     | 526/991 [2:03:26<1:38:29, 12.71s/batch, batch_loss=17.6, batch_index=527, batch_size=256]

Epoch 1/10:  53%|██████▍     | 527/991 [2:03:26<1:38:14, 12.70s/batch, batch_loss=17.6, batch_index=527, batch_size=256]

Epoch 1/10:  53%|██████▍     | 527/991 [2:03:38<1:38:14, 12.70s/batch, batch_loss=17.1, batch_index=528, batch_size=256]

Epoch 1/10:  53%|██████▍     | 528/991 [2:03:38<1:36:59, 12.57s/batch, batch_loss=17.1, batch_index=528, batch_size=256]

Epoch 1/10:  53%|██████▍     | 528/991 [2:03:50<1:36:59, 12.57s/batch, batch_loss=9.72, batch_index=529, batch_size=256]

Epoch 1/10:  53%|██████▍     | 529/991 [2:03:50<1:35:48, 12.44s/batch, batch_loss=9.72, batch_index=529, batch_size=256]

Epoch 1/10:  53%|██████▍     | 529/991 [2:04:06<1:35:48, 12.44s/batch, batch_loss=20.3, batch_index=530, batch_size=256]

Epoch 1/10:  53%|██████▍     | 530/991 [2:04:06<1:42:36, 13.35s/batch, batch_loss=20.3, batch_index=530, batch_size=256]

Epoch 1/10:  53%|██████▍     | 530/991 [2:04:18<1:42:36, 13.35s/batch, batch_loss=19.1, batch_index=531, batch_size=256]

Epoch 1/10:  54%|██████▍     | 531/991 [2:04:18<1:38:43, 12.88s/batch, batch_loss=19.1, batch_index=531, batch_size=256]

Epoch 1/10:  54%|███████▌      | 531/991 [2:04:30<1:38:43, 12.88s/batch, batch_loss=16, batch_index=532, batch_size=256]

Epoch 1/10:  54%|███████▌      | 532/991 [2:04:30<1:37:54, 12.80s/batch, batch_loss=16, batch_index=532, batch_size=256]

Epoch 1/10:  54%|██████▍     | 532/991 [2:04:45<1:37:54, 12.80s/batch, batch_loss=16.8, batch_index=533, batch_size=256]

Epoch 1/10:  54%|██████▍     | 533/991 [2:04:45<1:41:18, 13.27s/batch, batch_loss=16.8, batch_index=533, batch_size=256]

Epoch 1/10:  54%|██████▍     | 533/991 [2:04:57<1:41:18, 13.27s/batch, batch_loss=15.8, batch_index=534, batch_size=256]

Epoch 1/10:  54%|██████▍     | 534/991 [2:04:57<1:39:50, 13.11s/batch, batch_loss=15.8, batch_index=534, batch_size=256]

Epoch 1/10:  54%|██████▍     | 534/991 [2:05:10<1:39:50, 13.11s/batch, batch_loss=23.7, batch_index=535, batch_size=256]

Epoch 1/10:  54%|██████▍     | 535/991 [2:05:10<1:39:24, 13.08s/batch, batch_loss=23.7, batch_index=535, batch_size=256]

Epoch 1/10:  54%|███████▌      | 535/991 [2:05:23<1:39:24, 13.08s/batch, batch_loss=23, batch_index=536, batch_size=256]

Epoch 1/10:  54%|███████▌      | 536/991 [2:05:23<1:38:20, 12.97s/batch, batch_loss=23, batch_index=536, batch_size=256]

Epoch 1/10:  54%|██████▍     | 536/991 [2:05:36<1:38:20, 12.97s/batch, batch_loss=11.8, batch_index=537, batch_size=256]

Epoch 1/10:  54%|██████▌     | 537/991 [2:05:36<1:38:03, 12.96s/batch, batch_loss=11.8, batch_index=537, batch_size=256]

Epoch 1/10:  54%|████▉    | 537/991 [2:05:49<1:38:03, 12.96s/batch, batch_loss=1.79e+3, batch_index=538, batch_size=256]

Epoch 1/10:  54%|████▉    | 538/991 [2:05:49<1:37:39, 12.94s/batch, batch_loss=1.79e+3, batch_index=538, batch_size=256]

Epoch 1/10:  54%|██████▌     | 538/991 [2:06:01<1:37:39, 12.94s/batch, batch_loss=54.4, batch_index=539, batch_size=256]

Epoch 1/10:  54%|██████▌     | 539/991 [2:06:01<1:36:49, 12.85s/batch, batch_loss=54.4, batch_index=539, batch_size=256]

Epoch 1/10:  54%|██████▌     | 539/991 [2:06:15<1:36:49, 12.85s/batch, batch_loss=45.8, batch_index=540, batch_size=256]

Epoch 1/10:  54%|██████▌     | 540/991 [2:06:15<1:37:45, 13.01s/batch, batch_loss=45.8, batch_index=540, batch_size=256]

Epoch 1/10:  54%|█████▍    | 540/991 [2:06:28<1:37:45, 13.01s/batch, batch_loss=1.3e+4, batch_index=541, batch_size=256]

Epoch 1/10:  55%|█████▍    | 541/991 [2:06:28<1:36:54, 12.92s/batch, batch_loss=1.3e+4, batch_index=541, batch_size=256]

Epoch 1/10:  55%|████▉    | 541/991 [2:06:41<1:36:54, 12.92s/batch, batch_loss=2.88e+3, batch_index=542, batch_size=256]

Epoch 1/10:  55%|████▉    | 542/991 [2:06:41<1:37:14, 13.00s/batch, batch_loss=2.88e+3, batch_index=542, batch_size=256]

Epoch 1/10:  55%|██████▌     | 542/991 [2:06:54<1:37:14, 13.00s/batch, batch_loss=54.2, batch_index=543, batch_size=256]

Epoch 1/10:  55%|██████▌     | 543/991 [2:06:54<1:38:03, 13.13s/batch, batch_loss=54.2, batch_index=543, batch_size=256]

Epoch 1/10:  55%|██████▌     | 543/991 [2:07:07<1:38:03, 13.13s/batch, batch_loss=29.3, batch_index=544, batch_size=256]

Epoch 1/10:  55%|██████▌     | 544/991 [2:07:07<1:37:39, 13.11s/batch, batch_loss=29.3, batch_index=544, batch_size=256]

Epoch 1/10:  55%|██████▌     | 544/991 [2:07:25<1:37:39, 13.11s/batch, batch_loss=20.2, batch_index=545, batch_size=256]

Epoch 1/10:  55%|██████▌     | 545/991 [2:07:25<1:48:00, 14.53s/batch, batch_loss=20.2, batch_index=545, batch_size=256]

Epoch 1/10:  55%|███████▏     | 545/991 [2:07:40<1:48:00, 14.53s/batch, batch_loss=302, batch_index=546, batch_size=256]

Epoch 1/10:  55%|███████▏     | 546/991 [2:07:40<1:48:07, 14.58s/batch, batch_loss=302, batch_index=546, batch_size=256]

Epoch 1/10:  55%|██████▌     | 546/991 [2:07:52<1:48:07, 14.58s/batch, batch_loss=20.7, batch_index=547, batch_size=256]

Epoch 1/10:  55%|██████▌     | 547/991 [2:07:52<1:43:27, 13.98s/batch, batch_loss=20.7, batch_index=547, batch_size=256]

Epoch 1/10:  55%|██████▌     | 547/991 [2:08:05<1:43:27, 13.98s/batch, batch_loss=37.2, batch_index=548, batch_size=256]

Epoch 1/10:  55%|██████▋     | 548/991 [2:08:05<1:40:12, 13.57s/batch, batch_loss=37.2, batch_index=548, batch_size=256]

Epoch 1/10:  55%|██████▋     | 548/991 [2:08:17<1:40:12, 13.57s/batch, batch_loss=13.2, batch_index=549, batch_size=256]

Epoch 1/10:  55%|██████▋     | 549/991 [2:08:17<1:37:29, 13.24s/batch, batch_loss=13.2, batch_index=549, batch_size=256]

Epoch 1/10:  55%|██████▋     | 549/991 [2:08:33<1:37:29, 13.24s/batch, batch_loss=34.5, batch_index=550, batch_size=256]

Epoch 1/10:  55%|██████▋     | 550/991 [2:08:33<1:41:51, 13.86s/batch, batch_loss=34.5, batch_index=550, batch_size=256]

Epoch 1/10:  55%|██████▋     | 550/991 [2:08:46<1:41:51, 13.86s/batch, batch_loss=20.5, batch_index=551, batch_size=256]

Epoch 1/10:  56%|██████▋     | 551/991 [2:08:46<1:39:43, 13.60s/batch, batch_loss=20.5, batch_index=551, batch_size=256]

Epoch 1/10:  56%|██████▋     | 551/991 [2:08:59<1:39:43, 13.60s/batch, batch_loss=14.5, batch_index=552, batch_size=256]

Epoch 1/10:  56%|██████▋     | 552/991 [2:08:59<1:38:10, 13.42s/batch, batch_loss=14.5, batch_index=552, batch_size=256]

Epoch 1/10:  56%|██████▋     | 552/991 [2:09:11<1:38:10, 13.42s/batch, batch_loss=21.1, batch_index=553, batch_size=256]

Epoch 1/10:  56%|██████▋     | 553/991 [2:09:11<1:35:28, 13.08s/batch, batch_loss=21.1, batch_index=553, batch_size=256]

Epoch 1/10:  56%|█████    | 553/991 [2:09:24<1:35:28, 13.08s/batch, batch_loss=5.72e+3, batch_index=554, batch_size=256]

Epoch 1/10:  56%|█████    | 554/991 [2:09:24<1:36:00, 13.18s/batch, batch_loss=5.72e+3, batch_index=554, batch_size=256]

Epoch 1/10:  56%|█████▌    | 554/991 [2:09:37<1:36:00, 13.18s/batch, batch_loss=2.6e+3, batch_index=555, batch_size=256]

Epoch 1/10:  56%|█████▌    | 555/991 [2:09:37<1:35:22, 13.12s/batch, batch_loss=2.6e+3, batch_index=555, batch_size=256]

Epoch 1/10:  56%|██████▋     | 555/991 [2:09:51<1:35:22, 13.12s/batch, batch_loss=26.4, batch_index=556, batch_size=256]

Epoch 1/10:  56%|██████▋     | 556/991 [2:09:51<1:36:05, 13.25s/batch, batch_loss=26.4, batch_index=556, batch_size=256]

Epoch 1/10:  56%|█████    | 556/991 [2:10:04<1:36:05, 13.25s/batch, batch_loss=1.27e+4, batch_index=557, batch_size=256]

Epoch 1/10:  56%|█████    | 557/991 [2:10:04<1:34:35, 13.08s/batch, batch_loss=1.27e+4, batch_index=557, batch_size=256]

Epoch 1/10:  56%|██████▋     | 557/991 [2:10:18<1:34:35, 13.08s/batch, batch_loss=9.92, batch_index=558, batch_size=256]

Epoch 1/10:  56%|██████▊     | 558/991 [2:10:18<1:36:32, 13.38s/batch, batch_loss=9.92, batch_index=558, batch_size=256]

Epoch 1/10:  56%|███████▉      | 558/991 [2:10:34<1:36:32, 13.38s/batch, batch_loss=19, batch_index=559, batch_size=256]

Epoch 1/10:  56%|███████▉      | 559/991 [2:10:34<1:41:51, 14.15s/batch, batch_loss=19, batch_index=559, batch_size=256]

Epoch 1/10:  56%|██████▊     | 559/991 [2:10:47<1:41:51, 14.15s/batch, batch_loss=8.26, batch_index=560, batch_size=256]

Epoch 1/10:  57%|██████▊     | 560/991 [2:10:47<1:39:50, 13.90s/batch, batch_loss=8.26, batch_index=560, batch_size=256]

Epoch 1/10:  57%|██████▊     | 560/991 [2:11:00<1:39:50, 13.90s/batch, batch_loss=7.63, batch_index=561, batch_size=256]

Epoch 1/10:  57%|██████▊     | 561/991 [2:11:00<1:37:35, 13.62s/batch, batch_loss=7.63, batch_index=561, batch_size=256]

Epoch 1/10:  57%|███████▉      | 561/991 [2:11:13<1:37:35, 13.62s/batch, batch_loss=22, batch_index=562, batch_size=256]

Epoch 1/10:  57%|███████▉      | 562/991 [2:11:13<1:36:43, 13.53s/batch, batch_loss=22, batch_index=562, batch_size=256]

Epoch 1/10:  57%|██████▊     | 562/991 [2:11:27<1:36:43, 13.53s/batch, batch_loss=10.9, batch_index=563, batch_size=256]

Epoch 1/10:  57%|██████▊     | 563/991 [2:11:27<1:36:21, 13.51s/batch, batch_loss=10.9, batch_index=563, batch_size=256]

Epoch 1/10:  57%|██████▊     | 563/991 [2:11:40<1:36:21, 13.51s/batch, batch_loss=12.6, batch_index=564, batch_size=256]

Epoch 1/10:  57%|██████▊     | 564/991 [2:11:40<1:34:39, 13.30s/batch, batch_loss=12.6, batch_index=564, batch_size=256]

Epoch 1/10:  57%|███████▍     | 564/991 [2:11:53<1:34:39, 13.30s/batch, batch_loss=501, batch_index=565, batch_size=256]

Epoch 1/10:  57%|███████▍     | 565/991 [2:11:53<1:34:25, 13.30s/batch, batch_loss=501, batch_index=565, batch_size=256]

Epoch 1/10:  57%|██████▊     | 565/991 [2:12:06<1:34:25, 13.30s/batch, batch_loss=16.8, batch_index=566, batch_size=256]

Epoch 1/10:  57%|██████▊     | 566/991 [2:12:06<1:34:36, 13.36s/batch, batch_loss=16.8, batch_index=566, batch_size=256]

Epoch 1/10:  57%|██████▊     | 566/991 [2:12:20<1:34:36, 13.36s/batch, batch_loss=18.7, batch_index=567, batch_size=256]

Epoch 1/10:  57%|██████▊     | 567/991 [2:12:20<1:34:32, 13.38s/batch, batch_loss=18.7, batch_index=567, batch_size=256]

Epoch 1/10:  57%|███████▍     | 567/991 [2:12:35<1:34:32, 13.38s/batch, batch_loss=313, batch_index=568, batch_size=256]

Epoch 1/10:  57%|███████▍     | 568/991 [2:12:35<1:38:48, 14.02s/batch, batch_loss=313, batch_index=568, batch_size=256]

Epoch 1/10:  57%|██████▉     | 568/991 [2:12:48<1:38:48, 14.02s/batch, batch_loss=54.2, batch_index=569, batch_size=256]

Epoch 1/10:  57%|██████▉     | 569/991 [2:12:48<1:36:20, 13.70s/batch, batch_loss=54.2, batch_index=569, batch_size=256]

Epoch 1/10:  57%|█████▏   | 569/991 [2:13:02<1:36:20, 13.70s/batch, batch_loss=8.47e+3, batch_index=570, batch_size=256]

Epoch 1/10:  58%|█████▏   | 570/991 [2:13:02<1:35:28, 13.61s/batch, batch_loss=8.47e+3, batch_index=570, batch_size=256]

Epoch 1/10:  58%|██████▉     | 570/991 [2:13:14<1:35:28, 13.61s/batch, batch_loss=14.1, batch_index=571, batch_size=256]

Epoch 1/10:  58%|██████▉     | 571/991 [2:13:14<1:33:19, 13.33s/batch, batch_loss=14.1, batch_index=571, batch_size=256]

Epoch 1/10:  58%|████████      | 571/991 [2:13:27<1:33:19, 13.33s/batch, batch_loss=13, batch_index=572, batch_size=256]

Epoch 1/10:  58%|████████      | 572/991 [2:13:27<1:31:44, 13.14s/batch, batch_loss=13, batch_index=572, batch_size=256]

Epoch 1/10:  58%|██████▉     | 572/991 [2:13:40<1:31:44, 13.14s/batch, batch_loss=8.15, batch_index=573, batch_size=256]

Epoch 1/10:  58%|██████▉     | 573/991 [2:13:40<1:32:14, 13.24s/batch, batch_loss=8.15, batch_index=573, batch_size=256]

Epoch 1/10:  58%|██████▉     | 573/991 [2:13:54<1:32:14, 13.24s/batch, batch_loss=12.9, batch_index=574, batch_size=256]

Epoch 1/10:  58%|██████▉     | 574/991 [2:13:54<1:32:01, 13.24s/batch, batch_loss=12.9, batch_index=574, batch_size=256]

Epoch 1/10:  58%|██████▉     | 574/991 [2:14:07<1:32:01, 13.24s/batch, batch_loss=29.9, batch_index=575, batch_size=256]

Epoch 1/10:  58%|██████▉     | 575/991 [2:14:07<1:31:09, 13.15s/batch, batch_loss=29.9, batch_index=575, batch_size=256]

Epoch 1/10:  58%|██████▉     | 575/991 [2:14:20<1:31:09, 13.15s/batch, batch_loss=41.8, batch_index=576, batch_size=256]

Epoch 1/10:  58%|██████▉     | 576/991 [2:14:20<1:30:57, 13.15s/batch, batch_loss=41.8, batch_index=576, batch_size=256]

Epoch 1/10:  58%|██████▉     | 576/991 [2:14:33<1:30:57, 13.15s/batch, batch_loss=11.6, batch_index=577, batch_size=256]

Epoch 1/10:  58%|██████▉     | 577/991 [2:14:33<1:30:59, 13.19s/batch, batch_loss=11.6, batch_index=577, batch_size=256]

Epoch 1/10:  58%|████████▏     | 577/991 [2:14:46<1:30:59, 13.19s/batch, batch_loss=10, batch_index=578, batch_size=256]

Epoch 1/10:  58%|████████▏     | 578/991 [2:14:46<1:31:04, 13.23s/batch, batch_loss=10, batch_index=578, batch_size=256]

Epoch 1/10:  58%|██████▉     | 578/991 [2:14:59<1:31:04, 13.23s/batch, batch_loss=16.3, batch_index=579, batch_size=256]

Epoch 1/10:  58%|███████     | 579/991 [2:14:59<1:29:40, 13.06s/batch, batch_loss=16.3, batch_index=579, batch_size=256]

Epoch 1/10:  58%|███████     | 579/991 [2:15:12<1:29:40, 13.06s/batch, batch_loss=17.8, batch_index=580, batch_size=256]

Epoch 1/10:  59%|███████     | 580/991 [2:15:12<1:29:45, 13.10s/batch, batch_loss=17.8, batch_index=580, batch_size=256]

Epoch 1/10:  59%|███████     | 580/991 [2:15:25<1:29:45, 13.10s/batch, batch_loss=8.92, batch_index=581, batch_size=256]

Epoch 1/10:  59%|███████     | 581/991 [2:15:25<1:29:30, 13.10s/batch, batch_loss=8.92, batch_index=581, batch_size=256]

Epoch 1/10:  59%|███████     | 581/991 [2:15:38<1:29:30, 13.10s/batch, batch_loss=0.65, batch_index=582, batch_size=256]

Epoch 1/10:  59%|███████     | 582/991 [2:15:38<1:29:04, 13.07s/batch, batch_loss=0.65, batch_index=582, batch_size=256]

Epoch 1/10:  59%|█████▎   | 582/991 [2:15:51<1:29:04, 13.07s/batch, batch_loss=6.61e+3, batch_index=583, batch_size=256]

Epoch 1/10:  59%|█████▎   | 583/991 [2:15:51<1:28:49, 13.06s/batch, batch_loss=6.61e+3, batch_index=583, batch_size=256]

Epoch 1/10:  59%|███████     | 583/991 [2:16:05<1:28:49, 13.06s/batch, batch_loss=13.2, batch_index=584, batch_size=256]

Epoch 1/10:  59%|███████     | 584/991 [2:16:05<1:29:20, 13.17s/batch, batch_loss=13.2, batch_index=584, batch_size=256]

Epoch 1/10:  59%|███████     | 584/991 [2:16:19<1:29:20, 13.17s/batch, batch_loss=8.12, batch_index=585, batch_size=256]

Epoch 1/10:  59%|███████     | 585/991 [2:16:19<1:30:15, 13.34s/batch, batch_loss=8.12, batch_index=585, batch_size=256]

Epoch 1/10:  59%|███████     | 585/991 [2:16:32<1:30:15, 13.34s/batch, batch_loss=36.5, batch_index=586, batch_size=256]

Epoch 1/10:  59%|███████     | 586/991 [2:16:32<1:29:57, 13.33s/batch, batch_loss=36.5, batch_index=586, batch_size=256]

Epoch 1/10:  59%|███████     | 586/991 [2:16:47<1:29:57, 13.33s/batch, batch_loss=31.9, batch_index=587, batch_size=256]

Epoch 1/10:  59%|███████     | 587/991 [2:16:47<1:33:43, 13.92s/batch, batch_loss=31.9, batch_index=587, batch_size=256]

Epoch 1/10:  59%|███████     | 587/991 [2:17:00<1:33:43, 13.92s/batch, batch_loss=26.5, batch_index=588, batch_size=256]

Epoch 1/10:  59%|███████     | 588/991 [2:17:00<1:31:26, 13.61s/batch, batch_loss=26.5, batch_index=588, batch_size=256]

Epoch 1/10:  59%|███████     | 588/991 [2:17:13<1:31:26, 13.61s/batch, batch_loss=10.3, batch_index=589, batch_size=256]

Epoch 1/10:  59%|███████▏    | 589/991 [2:17:13<1:29:48, 13.40s/batch, batch_loss=10.3, batch_index=589, batch_size=256]

Epoch 1/10:  59%|███████▏    | 589/991 [2:17:25<1:29:48, 13.40s/batch, batch_loss=19.4, batch_index=590, batch_size=256]

Epoch 1/10:  60%|███████▏    | 590/991 [2:17:25<1:27:12, 13.05s/batch, batch_loss=19.4, batch_index=590, batch_size=256]

Epoch 1/10:  60%|███████▏    | 590/991 [2:17:38<1:27:12, 13.05s/batch, batch_loss=18.3, batch_index=591, batch_size=256]

Epoch 1/10:  60%|███████▏    | 591/991 [2:17:38<1:26:12, 12.93s/batch, batch_loss=18.3, batch_index=591, batch_size=256]

Epoch 1/10:  60%|███████▏    | 591/991 [2:17:50<1:26:12, 12.93s/batch, batch_loss=8.12, batch_index=592, batch_size=256]

Epoch 1/10:  60%|███████▏    | 592/991 [2:17:50<1:25:13, 12.81s/batch, batch_loss=8.12, batch_index=592, batch_size=256]

Epoch 1/10:  60%|████████▎     | 592/991 [2:18:04<1:25:13, 12.81s/batch, batch_loss=12, batch_index=593, batch_size=256]

Epoch 1/10:  60%|████████▍     | 593/991 [2:18:04<1:26:24, 13.03s/batch, batch_loss=12, batch_index=593, batch_size=256]

Epoch 1/10:  60%|███████▏    | 593/991 [2:18:17<1:26:24, 13.03s/batch, batch_loss=13.2, batch_index=594, batch_size=256]

Epoch 1/10:  60%|███████▏    | 594/991 [2:18:17<1:25:54, 12.98s/batch, batch_loss=13.2, batch_index=594, batch_size=256]

Epoch 1/10:  60%|███████▏    | 594/991 [2:18:30<1:25:54, 12.98s/batch, batch_loss=8.25, batch_index=595, batch_size=256]

Epoch 1/10:  60%|███████▏    | 595/991 [2:18:30<1:25:40, 12.98s/batch, batch_loss=8.25, batch_index=595, batch_size=256]

Epoch 1/10:  60%|███████▊     | 595/991 [2:18:43<1:25:40, 12.98s/batch, batch_loss=9.3, batch_index=596, batch_size=256]

Epoch 1/10:  60%|███████▊     | 596/991 [2:18:43<1:26:23, 13.12s/batch, batch_loss=9.3, batch_index=596, batch_size=256]

Epoch 1/10:  60%|███████▏    | 596/991 [2:18:56<1:26:23, 13.12s/batch, batch_loss=34.9, batch_index=597, batch_size=256]

Epoch 1/10:  60%|███████▏    | 597/991 [2:18:56<1:25:29, 13.02s/batch, batch_loss=34.9, batch_index=597, batch_size=256]

Epoch 1/10:  60%|███████▏    | 597/991 [2:19:09<1:25:29, 13.02s/batch, batch_loss=11.7, batch_index=598, batch_size=256]

Epoch 1/10:  60%|███████▏    | 598/991 [2:19:09<1:25:29, 13.05s/batch, batch_loss=11.7, batch_index=598, batch_size=256]

Epoch 1/10:  60%|███████▏    | 598/991 [2:19:23<1:25:29, 13.05s/batch, batch_loss=24.2, batch_index=599, batch_size=256]

Epoch 1/10:  60%|███████▎    | 599/991 [2:19:23<1:26:15, 13.20s/batch, batch_loss=24.2, batch_index=599, batch_size=256]

Epoch 1/10:  60%|███████▎    | 599/991 [2:19:36<1:26:15, 13.20s/batch, batch_loss=18.3, batch_index=600, batch_size=256]

Epoch 1/10:  61%|███████▎    | 600/991 [2:19:36<1:27:02, 13.36s/batch, batch_loss=18.3, batch_index=600, batch_size=256]

Epoch 1/10:  61%|███████▎    | 600/991 [2:19:50<1:27:02, 13.36s/batch, batch_loss=19.9, batch_index=601, batch_size=256]

Epoch 1/10:  61%|███████▎    | 601/991 [2:19:50<1:26:29, 13.31s/batch, batch_loss=19.9, batch_index=601, batch_size=256]

Epoch 1/10:  61%|███████▎    | 601/991 [2:20:06<1:26:29, 13.31s/batch, batch_loss=15.1, batch_index=602, batch_size=256]

Epoch 1/10:  61%|███████▎    | 602/991 [2:20:06<1:31:29, 14.11s/batch, batch_loss=15.1, batch_index=602, batch_size=256]

Epoch 1/10:  61%|███████▎    | 602/991 [2:20:18<1:31:29, 14.11s/batch, batch_loss=11.3, batch_index=603, batch_size=256]

Epoch 1/10:  61%|███████▎    | 603/991 [2:20:18<1:28:47, 13.73s/batch, batch_loss=11.3, batch_index=603, batch_size=256]

Epoch 1/10:  61%|█████▍   | 603/991 [2:20:31<1:28:47, 13.73s/batch, batch_loss=1.02e+4, batch_index=604, batch_size=256]

Epoch 1/10:  61%|█████▍   | 604/991 [2:20:31<1:25:30, 13.26s/batch, batch_loss=1.02e+4, batch_index=604, batch_size=256]

Epoch 1/10:  61%|███████▎    | 604/991 [2:20:44<1:25:30, 13.26s/batch, batch_loss=10.7, batch_index=605, batch_size=256]

Epoch 1/10:  61%|███████▎    | 605/991 [2:20:44<1:25:05, 13.23s/batch, batch_loss=10.7, batch_index=605, batch_size=256]

Epoch 1/10:  61%|████████▌     | 605/991 [2:20:56<1:25:05, 13.23s/batch, batch_loss=11, batch_index=606, batch_size=256]

Epoch 1/10:  61%|████████▌     | 606/991 [2:20:56<1:23:47, 13.06s/batch, batch_loss=11, batch_index=606, batch_size=256]

Epoch 1/10:  61%|███████▎    | 606/991 [2:21:10<1:23:47, 13.06s/batch, batch_loss=9.78, batch_index=607, batch_size=256]

Epoch 1/10:  61%|███████▎    | 607/991 [2:21:10<1:24:13, 13.16s/batch, batch_loss=9.78, batch_index=607, batch_size=256]

Epoch 1/10:  61%|███████▎    | 607/991 [2:21:23<1:24:13, 13.16s/batch, batch_loss=18.9, batch_index=608, batch_size=256]

Epoch 1/10:  61%|███████▎    | 608/991 [2:21:23<1:23:47, 13.13s/batch, batch_loss=18.9, batch_index=608, batch_size=256]

Epoch 1/10:  61%|███████▎    | 608/991 [2:21:37<1:23:47, 13.13s/batch, batch_loss=18.4, batch_index=609, batch_size=256]

Epoch 1/10:  61%|███████▎    | 609/991 [2:21:37<1:25:12, 13.38s/batch, batch_loss=18.4, batch_index=609, batch_size=256]

Epoch 1/10:  61%|███████▎    | 609/991 [2:21:50<1:25:12, 13.38s/batch, batch_loss=24.9, batch_index=610, batch_size=256]

Epoch 1/10:  62%|███████▍    | 610/991 [2:21:50<1:25:14, 13.42s/batch, batch_loss=24.9, batch_index=610, batch_size=256]

Epoch 1/10:  62%|███████▍    | 610/991 [2:22:06<1:25:14, 13.42s/batch, batch_loss=39.3, batch_index=611, batch_size=256]

Epoch 1/10:  62%|███████▍    | 611/991 [2:22:06<1:28:46, 14.02s/batch, batch_loss=39.3, batch_index=611, batch_size=256]

Epoch 1/10:  62%|███████▍    | 611/991 [2:22:19<1:28:46, 14.02s/batch, batch_loss=12.6, batch_index=612, batch_size=256]

Epoch 1/10:  62%|███████▍    | 612/991 [2:22:19<1:27:04, 13.79s/batch, batch_loss=12.6, batch_index=612, batch_size=256]

Epoch 1/10:  62%|████████▋     | 612/991 [2:22:32<1:27:04, 13.79s/batch, batch_loss=15, batch_index=613, batch_size=256]

Epoch 1/10:  62%|████████▋     | 613/991 [2:22:32<1:25:37, 13.59s/batch, batch_loss=15, batch_index=613, batch_size=256]

Epoch 1/10:  62%|█████▌   | 613/991 [2:22:45<1:25:37, 13.59s/batch, batch_loss=1.73e+4, batch_index=614, batch_size=256]

Epoch 1/10:  62%|█████▌   | 614/991 [2:22:45<1:24:01, 13.37s/batch, batch_loss=1.73e+4, batch_index=614, batch_size=256]

Epoch 1/10:  62%|███████▍    | 614/991 [2:22:59<1:24:01, 13.37s/batch, batch_loss=1e+3, batch_index=615, batch_size=256]

Epoch 1/10:  62%|███████▍    | 615/991 [2:22:59<1:24:29, 13.48s/batch, batch_loss=1e+3, batch_index=615, batch_size=256]

Epoch 1/10:  62%|███████▍    | 615/991 [2:23:12<1:24:29, 13.48s/batch, batch_loss=11.4, batch_index=616, batch_size=256]

Epoch 1/10:  62%|███████▍    | 616/991 [2:23:12<1:23:34, 13.37s/batch, batch_loss=11.4, batch_index=616, batch_size=256]

Epoch 1/10:  62%|███████▍    | 616/991 [2:23:25<1:23:34, 13.37s/batch, batch_loss=27.2, batch_index=617, batch_size=256]

Epoch 1/10:  62%|███████▍    | 617/991 [2:23:25<1:22:59, 13.31s/batch, batch_loss=27.2, batch_index=617, batch_size=256]

Epoch 1/10:  62%|███████▍    | 617/991 [2:23:38<1:22:59, 13.31s/batch, batch_loss=14.2, batch_index=618, batch_size=256]

Epoch 1/10:  62%|███████▍    | 618/991 [2:23:38<1:22:14, 13.23s/batch, batch_loss=14.2, batch_index=618, batch_size=256]

Epoch 1/10:  62%|███████▍    | 618/991 [2:23:51<1:22:14, 13.23s/batch, batch_loss=30.9, batch_index=619, batch_size=256]

Epoch 1/10:  62%|███████▍    | 619/991 [2:23:51<1:21:57, 13.22s/batch, batch_loss=30.9, batch_index=619, batch_size=256]

Epoch 1/10:  62%|███████▍    | 619/991 [2:24:05<1:21:57, 13.22s/batch, batch_loss=17.9, batch_index=620, batch_size=256]

Epoch 1/10:  63%|███████▌    | 620/991 [2:24:05<1:23:02, 13.43s/batch, batch_loss=17.9, batch_index=620, batch_size=256]

Epoch 1/10:  63%|███████▌    | 620/991 [2:24:18<1:23:02, 13.43s/batch, batch_loss=8.73, batch_index=621, batch_size=256]

Epoch 1/10:  63%|███████▌    | 621/991 [2:24:18<1:22:29, 13.38s/batch, batch_loss=8.73, batch_index=621, batch_size=256]

Epoch 1/10:  63%|█████▋   | 621/991 [2:24:31<1:22:29, 13.38s/batch, batch_loss=5.49e+3, batch_index=622, batch_size=256]

Epoch 1/10:  63%|█████▋   | 622/991 [2:24:31<1:20:41, 13.12s/batch, batch_loss=5.49e+3, batch_index=622, batch_size=256]

Epoch 1/10:  63%|███████▌    | 622/991 [2:24:44<1:20:41, 13.12s/batch, batch_loss=43.6, batch_index=623, batch_size=256]

Epoch 1/10:  63%|███████▌    | 623/991 [2:24:44<1:20:33, 13.13s/batch, batch_loss=43.6, batch_index=623, batch_size=256]

Epoch 1/10:  63%|██████▎   | 623/991 [2:24:57<1:20:33, 13.13s/batch, batch_loss=1.6e+4, batch_index=624, batch_size=256]

Epoch 1/10:  63%|██████▎   | 624/991 [2:24:57<1:20:46, 13.21s/batch, batch_loss=1.6e+4, batch_index=624, batch_size=256]

Epoch 1/10:  63%|███████▌    | 624/991 [2:25:11<1:20:46, 13.21s/batch, batch_loss=10.5, batch_index=625, batch_size=256]

Epoch 1/10:  63%|███████▌    | 625/991 [2:25:11<1:20:45, 13.24s/batch, batch_loss=10.5, batch_index=625, batch_size=256]

Epoch 1/10:  63%|███████▌    | 625/991 [2:25:25<1:20:45, 13.24s/batch, batch_loss=8.06, batch_index=626, batch_size=256]

Epoch 1/10:  63%|███████▌    | 626/991 [2:25:25<1:22:45, 13.60s/batch, batch_loss=8.06, batch_index=626, batch_size=256]

Epoch 1/10:  63%|█████▋   | 626/991 [2:25:39<1:22:45, 13.60s/batch, batch_loss=4.31e+3, batch_index=627, batch_size=256]

Epoch 1/10:  63%|█████▋   | 627/991 [2:25:39<1:22:08, 13.54s/batch, batch_loss=4.31e+3, batch_index=627, batch_size=256]

Epoch 1/10:  63%|█████▋   | 627/991 [2:25:51<1:22:08, 13.54s/batch, batch_loss=1.06e+3, batch_index=628, batch_size=256]

Epoch 1/10:  63%|█████▋   | 628/991 [2:25:51<1:19:55, 13.21s/batch, batch_loss=1.06e+3, batch_index=628, batch_size=256]

Epoch 1/10:  63%|████████▊     | 628/991 [2:26:04<1:19:55, 13.21s/batch, batch_loss=14, batch_index=629, batch_size=256]

Epoch 1/10:  63%|████████▉     | 629/991 [2:26:04<1:19:03, 13.10s/batch, batch_loss=14, batch_index=629, batch_size=256]

Epoch 1/10:  63%|███████▌    | 629/991 [2:26:16<1:19:03, 13.10s/batch, batch_loss=37.3, batch_index=630, batch_size=256]

Epoch 1/10:  64%|███████▋    | 630/991 [2:26:16<1:17:12, 12.83s/batch, batch_loss=37.3, batch_index=630, batch_size=256]

Epoch 1/10:  64%|███████▋    | 630/991 [2:26:29<1:17:12, 12.83s/batch, batch_loss=29.7, batch_index=631, batch_size=256]

Epoch 1/10:  64%|███████▋    | 631/991 [2:26:29<1:16:49, 12.80s/batch, batch_loss=29.7, batch_index=631, batch_size=256]

Epoch 1/10:  64%|███████▋    | 631/991 [2:26:42<1:16:49, 12.80s/batch, batch_loss=4.17, batch_index=632, batch_size=256]

Epoch 1/10:  64%|███████▋    | 632/991 [2:26:42<1:17:18, 12.92s/batch, batch_loss=4.17, batch_index=632, batch_size=256]

Epoch 1/10:  64%|███████▋    | 632/991 [2:26:56<1:17:18, 12.92s/batch, batch_loss=33.2, batch_index=633, batch_size=256]

Epoch 1/10:  64%|███████▋    | 633/991 [2:26:56<1:18:32, 13.16s/batch, batch_loss=33.2, batch_index=633, batch_size=256]

Epoch 1/10:  64%|███████▋    | 633/991 [2:27:08<1:18:32, 13.16s/batch, batch_loss=27.3, batch_index=634, batch_size=256]

Epoch 1/10:  64%|███████▋    | 634/991 [2:27:08<1:17:21, 13.00s/batch, batch_loss=27.3, batch_index=634, batch_size=256]

Epoch 1/10:  64%|███████▋    | 634/991 [2:27:22<1:17:21, 13.00s/batch, batch_loss=28.7, batch_index=635, batch_size=256]

Epoch 1/10:  64%|███████▋    | 635/991 [2:27:22<1:18:06, 13.16s/batch, batch_loss=28.7, batch_index=635, batch_size=256]

Epoch 1/10:  64%|███████▋    | 635/991 [2:27:36<1:18:06, 13.16s/batch, batch_loss=18.1, batch_index=636, batch_size=256]

Epoch 1/10:  64%|███████▋    | 636/991 [2:27:36<1:19:00, 13.35s/batch, batch_loss=18.1, batch_index=636, batch_size=256]

Epoch 1/10:  64%|███████▋    | 636/991 [2:27:49<1:19:00, 13.35s/batch, batch_loss=28.9, batch_index=637, batch_size=256]

Epoch 1/10:  64%|███████▋    | 637/991 [2:27:49<1:19:22, 13.45s/batch, batch_loss=28.9, batch_index=637, batch_size=256]

Epoch 1/10:  64%|███████▋    | 637/991 [2:28:03<1:19:22, 13.45s/batch, batch_loss=30.6, batch_index=638, batch_size=256]

Epoch 1/10:  64%|███████▋    | 638/991 [2:28:03<1:19:22, 13.49s/batch, batch_loss=30.6, batch_index=638, batch_size=256]

Epoch 1/10:  64%|███████▋    | 638/991 [2:28:17<1:19:22, 13.49s/batch, batch_loss=12.3, batch_index=639, batch_size=256]

Epoch 1/10:  64%|███████▋    | 639/991 [2:28:17<1:19:23, 13.53s/batch, batch_loss=12.3, batch_index=639, batch_size=256]

Epoch 1/10:  64%|████████▍    | 639/991 [2:28:30<1:19:23, 13.53s/batch, batch_loss=693, batch_index=640, batch_size=256]

Epoch 1/10:  65%|████████▍    | 640/991 [2:28:30<1:18:25, 13.41s/batch, batch_loss=693, batch_index=640, batch_size=256]

Epoch 1/10:  65%|███████▋    | 640/991 [2:28:43<1:18:25, 13.41s/batch, batch_loss=21.7, batch_index=641, batch_size=256]

Epoch 1/10:  65%|███████▊    | 641/991 [2:28:43<1:17:16, 13.25s/batch, batch_loss=21.7, batch_index=641, batch_size=256]

Epoch 1/10:  65%|███████▊    | 641/991 [2:28:55<1:17:16, 13.25s/batch, batch_loss=10.7, batch_index=642, batch_size=256]

Epoch 1/10:  65%|███████▊    | 642/991 [2:28:55<1:16:10, 13.09s/batch, batch_loss=10.7, batch_index=642, batch_size=256]

Epoch 1/10:  65%|█████▊   | 642/991 [2:29:08<1:16:10, 13.09s/batch, batch_loss=2.13e+4, batch_index=643, batch_size=256]

Epoch 1/10:  65%|█████▊   | 643/991 [2:29:08<1:15:49, 13.07s/batch, batch_loss=2.13e+4, batch_index=643, batch_size=256]

Epoch 1/10:  65%|█████▊   | 643/991 [2:29:22<1:15:49, 13.07s/batch, batch_loss=1.76e+4, batch_index=644, batch_size=256]

Epoch 1/10:  65%|█████▊   | 644/991 [2:29:22<1:15:47, 13.11s/batch, batch_loss=1.76e+4, batch_index=644, batch_size=256]

Epoch 1/10:  65%|█████▊   | 644/991 [2:29:35<1:15:47, 13.11s/batch, batch_loss=2.21e+3, batch_index=645, batch_size=256]

Epoch 1/10:  65%|█████▊   | 645/991 [2:29:35<1:15:34, 13.11s/batch, batch_loss=2.21e+3, batch_index=645, batch_size=256]

Epoch 1/10:  65%|█████████     | 645/991 [2:29:48<1:15:34, 13.11s/batch, batch_loss=21, batch_index=646, batch_size=256]

Epoch 1/10:  65%|█████████▏    | 646/991 [2:29:48<1:15:00, 13.04s/batch, batch_loss=21, batch_index=646, batch_size=256]

Epoch 1/10:  65%|███████▊    | 646/991 [2:30:01<1:15:00, 13.04s/batch, batch_loss=12.8, batch_index=647, batch_size=256]

Epoch 1/10:  65%|███████▊    | 647/991 [2:30:01<1:16:07, 13.28s/batch, batch_loss=12.8, batch_index=647, batch_size=256]

Epoch 1/10:  65%|█████████▏    | 647/991 [2:30:15<1:16:07, 13.28s/batch, batch_loss=18, batch_index=648, batch_size=256]

Epoch 1/10:  65%|█████████▏    | 648/991 [2:30:15<1:16:45, 13.43s/batch, batch_loss=18, batch_index=648, batch_size=256]

Epoch 1/10:  65%|███████▊    | 648/991 [2:30:30<1:16:45, 13.43s/batch, batch_loss=13.4, batch_index=649, batch_size=256]

Epoch 1/10:  65%|███████▊    | 649/991 [2:30:30<1:18:02, 13.69s/batch, batch_loss=13.4, batch_index=649, batch_size=256]

Epoch 1/10:  65%|█████▉   | 649/991 [2:30:45<1:18:02, 13.69s/batch, batch_loss=1.34e+4, batch_index=650, batch_size=256]

Epoch 1/10:  66%|█████▉   | 650/991 [2:30:45<1:20:03, 14.09s/batch, batch_loss=1.34e+4, batch_index=650, batch_size=256]

Epoch 1/10:  66%|███████▊    | 650/991 [2:30:58<1:20:03, 14.09s/batch, batch_loss=12.1, batch_index=651, batch_size=256]

Epoch 1/10:  66%|███████▉    | 651/991 [2:30:58<1:18:52, 13.92s/batch, batch_loss=12.1, batch_index=651, batch_size=256]

Epoch 1/10:  66%|███████▉    | 651/991 [2:31:12<1:18:52, 13.92s/batch, batch_loss=15.3, batch_index=652, batch_size=256]

Epoch 1/10:  66%|███████▉    | 652/991 [2:31:12<1:19:10, 14.01s/batch, batch_loss=15.3, batch_index=652, batch_size=256]

Epoch 1/10:  66%|███████▉    | 652/991 [2:31:27<1:19:10, 14.01s/batch, batch_loss=21.1, batch_index=653, batch_size=256]

Epoch 1/10:  66%|███████▉    | 653/991 [2:31:27<1:19:22, 14.09s/batch, batch_loss=21.1, batch_index=653, batch_size=256]

Epoch 1/10:  66%|█████████▏    | 653/991 [2:31:41<1:19:22, 14.09s/batch, batch_loss=21, batch_index=654, batch_size=256]

Epoch 1/10:  66%|█████████▏    | 654/991 [2:31:41<1:19:56, 14.23s/batch, batch_loss=21, batch_index=654, batch_size=256]

Epoch 1/10:  66%|█████▉   | 654/991 [2:31:55<1:19:56, 14.23s/batch, batch_loss=3.86e+3, batch_index=655, batch_size=256]

Epoch 1/10:  66%|█████▉   | 655/991 [2:31:55<1:19:39, 14.22s/batch, batch_loss=3.86e+3, batch_index=655, batch_size=256]

Epoch 1/10:  66%|██████▌   | 655/991 [2:32:10<1:19:39, 14.22s/batch, batch_loss=5.2e+3, batch_index=656, batch_size=256]

Epoch 1/10:  66%|██████▌   | 656/991 [2:32:10<1:19:48, 14.29s/batch, batch_loss=5.2e+3, batch_index=656, batch_size=256]

Epoch 1/10:  66%|█████▉   | 656/991 [2:32:24<1:19:48, 14.29s/batch, batch_loss=4.24e+3, batch_index=657, batch_size=256]

Epoch 1/10:  66%|█████▉   | 657/991 [2:32:24<1:19:09, 14.22s/batch, batch_loss=4.24e+3, batch_index=657, batch_size=256]

Epoch 1/10:  66%|██████▋   | 657/991 [2:32:38<1:19:09, 14.22s/batch, batch_loss=2.2e+4, batch_index=658, batch_size=256]

Epoch 1/10:  66%|██████▋   | 658/991 [2:32:38<1:18:58, 14.23s/batch, batch_loss=2.2e+4, batch_index=658, batch_size=256]

Epoch 1/10:  66%|███████▉    | 658/991 [2:32:53<1:18:58, 14.23s/batch, batch_loss=5.75, batch_index=659, batch_size=256]

Epoch 1/10:  66%|███████▉    | 659/991 [2:32:53<1:20:10, 14.49s/batch, batch_loss=5.75, batch_index=659, batch_size=256]

Epoch 1/10:  66%|███████▉    | 659/991 [2:33:08<1:20:10, 14.49s/batch, batch_loss=6.98, batch_index=660, batch_size=256]

Epoch 1/10:  67%|███████▉    | 660/991 [2:33:08<1:20:44, 14.64s/batch, batch_loss=6.98, batch_index=660, batch_size=256]

Epoch 1/10:  67%|███████▉    | 660/991 [2:33:23<1:20:44, 14.64s/batch, batch_loss=16.8, batch_index=661, batch_size=256]

Epoch 1/10:  67%|████████    | 661/991 [2:33:23<1:20:37, 14.66s/batch, batch_loss=16.8, batch_index=661, batch_size=256]

Epoch 1/10:  67%|████████    | 661/991 [2:33:37<1:20:37, 14.66s/batch, batch_loss=18.2, batch_index=662, batch_size=256]

Epoch 1/10:  67%|████████    | 662/991 [2:33:37<1:19:46, 14.55s/batch, batch_loss=18.2, batch_index=662, batch_size=256]

Epoch 1/10:  67%|████████    | 662/991 [2:33:51<1:19:46, 14.55s/batch, batch_loss=29.2, batch_index=663, batch_size=256]

Epoch 1/10:  67%|████████    | 663/991 [2:33:51<1:18:38, 14.39s/batch, batch_loss=29.2, batch_index=663, batch_size=256]

Epoch 1/10:  67%|██████   | 663/991 [2:34:08<1:18:38, 14.39s/batch, batch_loss=3.02e+3, batch_index=664, batch_size=256]

Epoch 1/10:  67%|██████   | 664/991 [2:34:08<1:21:55, 15.03s/batch, batch_loss=3.02e+3, batch_index=664, batch_size=256]

Epoch 1/10:  67%|████████    | 664/991 [2:34:21<1:21:55, 15.03s/batch, batch_loss=23.6, batch_index=665, batch_size=256]

Epoch 1/10:  67%|████████    | 665/991 [2:34:21<1:19:39, 14.66s/batch, batch_loss=23.6, batch_index=665, batch_size=256]

Epoch 1/10:  67%|██████   | 665/991 [2:34:35<1:19:39, 14.66s/batch, batch_loss=3.04e+3, batch_index=666, batch_size=256]

Epoch 1/10:  67%|██████   | 666/991 [2:34:35<1:16:44, 14.17s/batch, batch_loss=3.04e+3, batch_index=666, batch_size=256]

Epoch 1/10:  67%|████████    | 666/991 [2:34:47<1:16:44, 14.17s/batch, batch_loss=21.7, batch_index=667, batch_size=256]

Epoch 1/10:  67%|████████    | 667/991 [2:34:47<1:14:31, 13.80s/batch, batch_loss=21.7, batch_index=667, batch_size=256]

Epoch 1/10:  67%|████████▋    | 667/991 [2:35:03<1:14:31, 13.80s/batch, batch_loss=389, batch_index=668, batch_size=256]

Epoch 1/10:  67%|████████▊    | 668/991 [2:35:03<1:16:44, 14.25s/batch, batch_loss=389, batch_index=668, batch_size=256]

Epoch 1/10:  67%|██████   | 668/991 [2:35:17<1:16:44, 14.25s/batch, batch_loss=2.98e+3, batch_index=669, batch_size=256]

Epoch 1/10:  68%|██████   | 669/991 [2:35:17<1:16:42, 14.29s/batch, batch_loss=2.98e+3, batch_index=669, batch_size=256]

Epoch 1/10:  68%|████████▊    | 669/991 [2:35:31<1:16:42, 14.29s/batch, batch_loss=998, batch_index=670, batch_size=256]

Epoch 1/10:  68%|████████▊    | 670/991 [2:35:31<1:16:11, 14.24s/batch, batch_loss=998, batch_index=670, batch_size=256]

Epoch 1/10:  68%|████████    | 670/991 [2:35:45<1:16:11, 14.24s/batch, batch_loss=11.1, batch_index=671, batch_size=256]

Epoch 1/10:  68%|████████▏   | 671/991 [2:35:45<1:15:50, 14.22s/batch, batch_loss=11.1, batch_index=671, batch_size=256]

Epoch 1/10:  68%|████████▏   | 671/991 [2:36:02<1:15:50, 14.22s/batch, batch_loss=16.7, batch_index=672, batch_size=256]

Epoch 1/10:  68%|████████▏   | 672/991 [2:36:02<1:20:07, 15.07s/batch, batch_loss=16.7, batch_index=672, batch_size=256]

Epoch 1/10:  68%|████████▏   | 672/991 [2:36:17<1:20:07, 15.07s/batch, batch_loss=23.5, batch_index=673, batch_size=256]

Epoch 1/10:  68%|████████▏   | 673/991 [2:36:17<1:18:18, 14.77s/batch, batch_loss=23.5, batch_index=673, batch_size=256]

Epoch 1/10:  68%|████████▏   | 673/991 [2:36:31<1:18:18, 14.77s/batch, batch_loss=21.8, batch_index=674, batch_size=256]

Epoch 1/10:  68%|████████▏   | 674/991 [2:36:31<1:17:11, 14.61s/batch, batch_loss=21.8, batch_index=674, batch_size=256]

Epoch 1/10:  68%|████████▏   | 674/991 [2:36:44<1:17:11, 14.61s/batch, batch_loss=5.07, batch_index=675, batch_size=256]

Epoch 1/10:  68%|████████▏   | 675/991 [2:36:44<1:15:08, 14.27s/batch, batch_loss=5.07, batch_index=675, batch_size=256]

Epoch 1/10:  68%|████████▏   | 675/991 [2:36:59<1:15:08, 14.27s/batch, batch_loss=19.2, batch_index=676, batch_size=256]

Epoch 1/10:  68%|████████▏   | 676/991 [2:36:59<1:15:09, 14.32s/batch, batch_loss=19.2, batch_index=676, batch_size=256]

Epoch 1/10:  68%|████████▏   | 676/991 [2:37:13<1:15:09, 14.32s/batch, batch_loss=39.9, batch_index=677, batch_size=256]

Epoch 1/10:  68%|████████▏   | 677/991 [2:37:13<1:14:44, 14.28s/batch, batch_loss=39.9, batch_index=677, batch_size=256]

Epoch 1/10:  68%|████████▏   | 677/991 [2:37:28<1:14:44, 14.28s/batch, batch_loss=7.79, batch_index=678, batch_size=256]

Epoch 1/10:  68%|████████▏   | 678/991 [2:37:28<1:15:33, 14.48s/batch, batch_loss=7.79, batch_index=678, batch_size=256]

Epoch 1/10:  68%|██████▏  | 678/991 [2:37:42<1:15:33, 14.48s/batch, batch_loss=3.84e+3, batch_index=679, batch_size=256]

Epoch 1/10:  69%|██████▏  | 679/991 [2:37:42<1:15:04, 14.44s/batch, batch_loss=3.84e+3, batch_index=679, batch_size=256]

Epoch 1/10:  69%|██████▏  | 679/991 [2:38:00<1:15:04, 14.44s/batch, batch_loss=6.16e+3, batch_index=680, batch_size=256]

Epoch 1/10:  69%|██████▏  | 680/991 [2:38:00<1:19:40, 15.37s/batch, batch_loss=6.16e+3, batch_index=680, batch_size=256]

Epoch 1/10:  69%|██████▏  | 680/991 [2:38:15<1:19:40, 15.37s/batch, batch_loss=7.23e+4, batch_index=681, batch_size=256]

Epoch 1/10:  69%|██████▏  | 681/991 [2:38:15<1:18:44, 15.24s/batch, batch_loss=7.23e+4, batch_index=681, batch_size=256]

Epoch 1/10:  69%|█████████▌    | 681/991 [2:38:29<1:18:44, 15.24s/batch, batch_loss=17, batch_index=682, batch_size=256]

Epoch 1/10:  69%|█████████▋    | 682/991 [2:38:29<1:17:25, 15.03s/batch, batch_loss=17, batch_index=682, batch_size=256]

Epoch 1/10:  69%|████████▉    | 682/991 [2:38:43<1:17:25, 15.03s/batch, batch_loss=377, batch_index=683, batch_size=256]

Epoch 1/10:  69%|████████▉    | 683/991 [2:38:43<1:15:08, 14.64s/batch, batch_loss=377, batch_index=683, batch_size=256]

Epoch 1/10:  69%|████████▎   | 683/991 [2:38:57<1:15:08, 14.64s/batch, batch_loss=6.08, batch_index=684, batch_size=256]

Epoch 1/10:  69%|████████▎   | 684/991 [2:38:57<1:14:10, 14.50s/batch, batch_loss=6.08, batch_index=684, batch_size=256]

Epoch 1/10:  69%|████████▎   | 684/991 [2:39:11<1:14:10, 14.50s/batch, batch_loss=15.1, batch_index=685, batch_size=256]

Epoch 1/10:  69%|████████▎   | 685/991 [2:39:11<1:13:42, 14.45s/batch, batch_loss=15.1, batch_index=685, batch_size=256]

Epoch 1/10:  69%|████████▎   | 685/991 [2:39:26<1:13:42, 14.45s/batch, batch_loss=12.1, batch_index=686, batch_size=256]

Epoch 1/10:  69%|████████▎   | 686/991 [2:39:26<1:12:57, 14.35s/batch, batch_loss=12.1, batch_index=686, batch_size=256]

Epoch 1/10:  69%|████████▉    | 686/991 [2:39:41<1:12:57, 14.35s/batch, batch_loss=537, batch_index=687, batch_size=256]

Epoch 1/10:  69%|█████████    | 687/991 [2:39:41<1:13:45, 14.56s/batch, batch_loss=537, batch_index=687, batch_size=256]

Epoch 1/10:  69%|████████▎   | 687/991 [2:39:56<1:13:45, 14.56s/batch, batch_loss=6.13, batch_index=688, batch_size=256]

Epoch 1/10:  69%|████████▎   | 688/991 [2:39:56<1:14:48, 14.81s/batch, batch_loss=6.13, batch_index=688, batch_size=256]

Epoch 1/10:  69%|████████▎   | 688/991 [2:40:11<1:14:48, 14.81s/batch, batch_loss=7.66, batch_index=689, batch_size=256]

Epoch 1/10:  70%|████████▎   | 689/991 [2:40:11<1:14:56, 14.89s/batch, batch_loss=7.66, batch_index=689, batch_size=256]

Epoch 1/10:  70%|████████▎   | 689/991 [2:40:25<1:14:56, 14.89s/batch, batch_loss=14.4, batch_index=690, batch_size=256]

Epoch 1/10:  70%|████████▎   | 690/991 [2:40:25<1:13:54, 14.73s/batch, batch_loss=14.4, batch_index=690, batch_size=256]

Epoch 1/10:  70%|████████▎   | 690/991 [2:40:42<1:13:54, 14.73s/batch, batch_loss=21.1, batch_index=691, batch_size=256]

Epoch 1/10:  70%|████████▎   | 691/991 [2:40:42<1:16:21, 15.27s/batch, batch_loss=21.1, batch_index=691, batch_size=256]

Epoch 1/10:  70%|█████████    | 691/991 [2:40:56<1:16:21, 15.27s/batch, batch_loss=5.5, batch_index=692, batch_size=256]

Epoch 1/10:  70%|█████████    | 692/991 [2:40:56<1:14:37, 14.98s/batch, batch_loss=5.5, batch_index=692, batch_size=256]

Epoch 1/10:  70%|██████▎  | 692/991 [2:41:11<1:14:37, 14.98s/batch, batch_loss=4.66e+3, batch_index=693, batch_size=256]

Epoch 1/10:  70%|██████▎  | 693/991 [2:41:11<1:14:19, 14.97s/batch, batch_loss=4.66e+3, batch_index=693, batch_size=256]

Epoch 1/10:  70%|█████████    | 693/991 [2:41:27<1:14:19, 14.97s/batch, batch_loss=462, batch_index=694, batch_size=256]

Epoch 1/10:  70%|█████████    | 694/991 [2:41:27<1:14:40, 15.09s/batch, batch_loss=462, batch_index=694, batch_size=256]

Epoch 1/10:  70%|█████████    | 694/991 [2:41:41<1:14:40, 15.09s/batch, batch_loss=794, batch_index=695, batch_size=256]

Epoch 1/10:  70%|█████████    | 695/991 [2:41:41<1:13:09, 14.83s/batch, batch_loss=794, batch_index=695, batch_size=256]

Epoch 1/10:  70%|████████▍   | 695/991 [2:41:55<1:13:09, 14.83s/batch, batch_loss=12.8, batch_index=696, batch_size=256]

Epoch 1/10:  70%|████████▍   | 696/991 [2:41:55<1:12:14, 14.69s/batch, batch_loss=12.8, batch_index=696, batch_size=256]

Epoch 1/10:  70%|██████▎  | 696/991 [2:42:12<1:12:14, 14.69s/batch, batch_loss=6.79e+3, batch_index=697, batch_size=256]

Epoch 1/10:  70%|██████▎  | 697/991 [2:42:12<1:15:06, 15.33s/batch, batch_loss=6.79e+3, batch_index=697, batch_size=256]

Epoch 1/10:  70%|████████▍   | 697/991 [2:42:27<1:15:06, 15.33s/batch, batch_loss=11.4, batch_index=698, batch_size=256]

Epoch 1/10:  70%|████████▍   | 698/991 [2:42:27<1:14:36, 15.28s/batch, batch_loss=11.4, batch_index=698, batch_size=256]

Epoch 1/10:  70%|████████▍   | 698/991 [2:42:41<1:14:36, 15.28s/batch, batch_loss=8.15, batch_index=699, batch_size=256]

Epoch 1/10:  71%|████████▍   | 699/991 [2:42:41<1:12:42, 14.94s/batch, batch_loss=8.15, batch_index=699, batch_size=256]

Epoch 1/10:  71%|████████▍   | 699/991 [2:42:56<1:12:42, 14.94s/batch, batch_loss=10.2, batch_index=700, batch_size=256]

Epoch 1/10:  71%|████████▍   | 700/991 [2:42:56<1:11:53, 14.82s/batch, batch_loss=10.2, batch_index=700, batch_size=256]

Epoch 1/10:  71%|█████████▏   | 700/991 [2:43:10<1:11:53, 14.82s/batch, batch_loss=212, batch_index=701, batch_size=256]

Epoch 1/10:  71%|█████████▏   | 701/991 [2:43:10<1:10:35, 14.60s/batch, batch_loss=212, batch_index=701, batch_size=256]

Epoch 1/10:  71%|████████▍   | 701/991 [2:43:24<1:10:35, 14.60s/batch, batch_loss=22.4, batch_index=702, batch_size=256]

Epoch 1/10:  71%|████████▌   | 702/991 [2:43:24<1:10:04, 14.55s/batch, batch_loss=22.4, batch_index=702, batch_size=256]

Epoch 1/10:  71%|█████████▏   | 702/991 [2:43:38<1:10:04, 14.55s/batch, batch_loss=277, batch_index=703, batch_size=256]

Epoch 1/10:  71%|█████████▏   | 703/991 [2:43:38<1:09:06, 14.40s/batch, batch_loss=277, batch_index=703, batch_size=256]

Epoch 1/10:  71%|████████▌   | 703/991 [2:43:53<1:09:06, 14.40s/batch, batch_loss=8.96, batch_index=704, batch_size=256]

Epoch 1/10:  71%|████████▌   | 704/991 [2:43:53<1:09:00, 14.43s/batch, batch_loss=8.96, batch_index=704, batch_size=256]

Epoch 1/10:  71%|████████▌   | 704/991 [2:44:06<1:09:00, 14.43s/batch, batch_loss=13.2, batch_index=705, batch_size=256]

Epoch 1/10:  71%|████████▌   | 705/991 [2:44:06<1:07:33, 14.17s/batch, batch_loss=13.2, batch_index=705, batch_size=256]

Epoch 1/10:  71%|████████▌   | 705/991 [2:44:21<1:07:33, 14.17s/batch, batch_loss=21.4, batch_index=706, batch_size=256]

Epoch 1/10:  71%|████████▌   | 706/991 [2:44:21<1:07:39, 14.25s/batch, batch_loss=21.4, batch_index=706, batch_size=256]

Epoch 1/10:  71%|████████▌   | 706/991 [2:44:35<1:07:39, 14.25s/batch, batch_loss=18.5, batch_index=707, batch_size=256]

Epoch 1/10:  71%|████████▌   | 707/991 [2:44:35<1:07:54, 14.35s/batch, batch_loss=18.5, batch_index=707, batch_size=256]

Epoch 1/10:  71%|████████▌   | 707/991 [2:44:50<1:07:54, 14.35s/batch, batch_loss=8.26, batch_index=708, batch_size=256]

Epoch 1/10:  71%|████████▌   | 708/991 [2:44:50<1:07:37, 14.34s/batch, batch_loss=8.26, batch_index=708, batch_size=256]

Epoch 1/10:  71%|████████▌   | 708/991 [2:45:06<1:07:37, 14.34s/batch, batch_loss=8.21, batch_index=709, batch_size=256]

Epoch 1/10:  72%|████████▌   | 709/991 [2:45:06<1:10:41, 15.04s/batch, batch_loss=8.21, batch_index=709, batch_size=256]

Epoch 1/10:  72%|████████▌   | 709/991 [2:45:21<1:10:41, 15.04s/batch, batch_loss=42.7, batch_index=710, batch_size=256]

Epoch 1/10:  72%|████████▌   | 710/991 [2:45:21<1:10:20, 15.02s/batch, batch_loss=42.7, batch_index=710, batch_size=256]

Epoch 1/10:  72%|█████████▎   | 710/991 [2:45:36<1:10:20, 15.02s/batch, batch_loss=104, batch_index=711, batch_size=256]

Epoch 1/10:  72%|█████████▎   | 711/991 [2:45:36<1:09:29, 14.89s/batch, batch_loss=104, batch_index=711, batch_size=256]

Epoch 1/10:  72%|████████▌   | 711/991 [2:45:50<1:09:29, 14.89s/batch, batch_loss=21.1, batch_index=712, batch_size=256]

Epoch 1/10:  72%|████████▌   | 712/991 [2:45:50<1:08:36, 14.76s/batch, batch_loss=21.1, batch_index=712, batch_size=256]

Epoch 1/10:  72%|████████▌   | 712/991 [2:46:05<1:08:36, 14.76s/batch, batch_loss=90.8, batch_index=713, batch_size=256]

Epoch 1/10:  72%|████████▋   | 713/991 [2:46:05<1:07:25, 14.55s/batch, batch_loss=90.8, batch_index=713, batch_size=256]

Epoch 1/10:  72%|████████▋   | 713/991 [2:46:19<1:07:25, 14.55s/batch, batch_loss=39.8, batch_index=714, batch_size=256]

Epoch 1/10:  72%|████████▋   | 714/991 [2:46:19<1:06:34, 14.42s/batch, batch_loss=39.8, batch_index=714, batch_size=256]

Epoch 1/10:  72%|██████████    | 714/991 [2:46:34<1:06:34, 14.42s/batch, batch_loss=26, batch_index=715, batch_size=256]

Epoch 1/10:  72%|██████████    | 715/991 [2:46:34<1:07:20, 14.64s/batch, batch_loss=26, batch_index=715, batch_size=256]

Epoch 1/10:  72%|████████▋   | 715/991 [2:46:48<1:07:20, 14.64s/batch, batch_loss=15.9, batch_index=716, batch_size=256]

Epoch 1/10:  72%|████████▋   | 716/991 [2:46:48<1:06:32, 14.52s/batch, batch_loss=15.9, batch_index=716, batch_size=256]

Epoch 1/10:  72%|████████▋   | 716/991 [2:47:02<1:06:32, 14.52s/batch, batch_loss=43.1, batch_index=717, batch_size=256]

Epoch 1/10:  72%|████████▋   | 717/991 [2:47:02<1:05:49, 14.41s/batch, batch_loss=43.1, batch_index=717, batch_size=256]

Epoch 1/10:  72%|████████▋   | 717/991 [2:47:17<1:05:49, 14.41s/batch, batch_loss=27.3, batch_index=718, batch_size=256]

Epoch 1/10:  72%|████████▋   | 718/991 [2:47:17<1:05:43, 14.45s/batch, batch_loss=27.3, batch_index=718, batch_size=256]

Epoch 1/10:  72%|████████▋   | 718/991 [2:47:31<1:05:43, 14.45s/batch, batch_loss=13.5, batch_index=719, batch_size=256]

Epoch 1/10:  73%|████████▋   | 719/991 [2:47:31<1:05:25, 14.43s/batch, batch_loss=13.5, batch_index=719, batch_size=256]

Epoch 1/10:  73%|████████▋   | 719/991 [2:47:45<1:05:25, 14.43s/batch, batch_loss=17.5, batch_index=720, batch_size=256]

Epoch 1/10:  73%|████████▋   | 720/991 [2:47:45<1:04:46, 14.34s/batch, batch_loss=17.5, batch_index=720, batch_size=256]

Epoch 1/10:  73%|████████▋   | 720/991 [2:47:58<1:04:46, 14.34s/batch, batch_loss=21.1, batch_index=721, batch_size=256]

Epoch 1/10:  73%|████████▋   | 721/991 [2:47:58<1:02:59, 14.00s/batch, batch_loss=21.1, batch_index=721, batch_size=256]

Epoch 1/10:  73%|████████▋   | 721/991 [2:48:11<1:02:59, 14.00s/batch, batch_loss=25.4, batch_index=722, batch_size=256]

Epoch 1/10:  73%|████████▋   | 722/991 [2:48:11<1:01:04, 13.62s/batch, batch_loss=25.4, batch_index=722, batch_size=256]

Epoch 1/10:  73%|██████▌  | 722/991 [2:48:26<1:01:04, 13.62s/batch, batch_loss=7.23e+3, batch_index=723, batch_size=256]

Epoch 1/10:  73%|██████▌  | 723/991 [2:48:26<1:02:52, 14.08s/batch, batch_loss=7.23e+3, batch_index=723, batch_size=256]

Epoch 1/10:  73%|████████▊   | 723/991 [2:48:40<1:02:52, 14.08s/batch, batch_loss=5.39, batch_index=724, batch_size=256]

Epoch 1/10:  73%|████████▊   | 724/991 [2:48:40<1:01:40, 13.86s/batch, batch_loss=5.39, batch_index=724, batch_size=256]

Epoch 1/10:  73%|████████▊   | 724/991 [2:48:53<1:01:40, 13.86s/batch, batch_loss=20.6, batch_index=725, batch_size=256]

Epoch 1/10:  73%|████████▊   | 725/991 [2:48:53<1:00:14, 13.59s/batch, batch_loss=20.6, batch_index=725, batch_size=256]

Epoch 1/10:  73%|████████▊   | 725/991 [2:49:05<1:00:14, 13.59s/batch, batch_loss=14.4, batch_index=726, batch_size=256]

Epoch 1/10:  73%|██████████▎   | 726/991 [2:49:05<58:52, 13.33s/batch, batch_loss=14.4, batch_index=726, batch_size=256]

Epoch 1/10:  73%|████████   | 726/991 [2:49:19<58:52, 13.33s/batch, batch_loss=1.29e+4, batch_index=727, batch_size=256]

Epoch 1/10:  73%|████████   | 727/991 [2:49:19<58:25, 13.28s/batch, batch_loss=1.29e+4, batch_index=727, batch_size=256]

Epoch 1/10:  73%|███████████▋    | 727/991 [2:49:32<58:25, 13.28s/batch, batch_loss=13, batch_index=728, batch_size=256]

Epoch 1/10:  73%|███████████▊    | 728/991 [2:49:32<58:14, 13.29s/batch, batch_loss=13, batch_index=728, batch_size=256]

Epoch 1/10:  73%|███████████    | 728/991 [2:49:45<58:14, 13.29s/batch, batch_loss=136, batch_index=729, batch_size=256]

Epoch 1/10:  74%|███████████    | 729/991 [2:49:45<57:45, 13.23s/batch, batch_loss=136, batch_index=729, batch_size=256]

Epoch 1/10:  74%|██████████▎   | 729/991 [2:49:58<57:45, 13.23s/batch, batch_loss=9.72, batch_index=730, batch_size=256]

Epoch 1/10:  74%|██████████▎   | 730/991 [2:49:58<57:22, 13.19s/batch, batch_loss=9.72, batch_index=730, batch_size=256]

Epoch 1/10:  74%|███████████    | 730/991 [2:50:11<57:22, 13.19s/batch, batch_loss=120, batch_index=731, batch_size=256]

Epoch 1/10:  74%|███████████    | 731/991 [2:50:11<56:40, 13.08s/batch, batch_loss=120, batch_index=731, batch_size=256]

Epoch 1/10:  74%|████████   | 731/991 [2:50:22<56:40, 13.08s/batch, batch_loss=1.39e+4, batch_index=732, batch_size=256]

Epoch 1/10:  74%|████████▏  | 732/991 [2:50:22<54:00, 12.51s/batch, batch_loss=1.39e+4, batch_index=732, batch_size=256]

Epoch 1/10:  74%|██████████▎   | 732/991 [2:50:33<54:00, 12.51s/batch, batch_loss=26.6, batch_index=733, batch_size=256]

Epoch 1/10:  74%|██████████▎   | 733/991 [2:50:33<52:24, 12.19s/batch, batch_loss=26.6, batch_index=733, batch_size=256]

Epoch 1/10:  74%|████████▏  | 733/991 [2:50:48<52:24, 12.19s/batch, batch_loss=6.79e+3, batch_index=734, batch_size=256]

Epoch 1/10:  74%|████████▏  | 734/991 [2:50:48<55:20, 12.92s/batch, batch_loss=6.79e+3, batch_index=734, batch_size=256]

Epoch 1/10:  74%|███████████▊    | 734/991 [2:51:01<55:20, 12.92s/batch, batch_loss=29, batch_index=735, batch_size=256]

Epoch 1/10:  74%|███████████▊    | 735/991 [2:51:01<55:27, 13.00s/batch, batch_loss=29, batch_index=735, batch_size=256]

Epoch 1/10:  74%|██████████▍   | 735/991 [2:51:14<55:27, 13.00s/batch, batch_loss=22.2, batch_index=736, batch_size=256]

Epoch 1/10:  74%|██████████▍   | 736/991 [2:51:14<54:14, 12.76s/batch, batch_loss=22.2, batch_index=736, batch_size=256]

Epoch 1/10:  74%|██████████▍   | 736/991 [2:51:25<54:14, 12.76s/batch, batch_loss=11.4, batch_index=737, batch_size=256]

Epoch 1/10:  74%|██████████▍   | 737/991 [2:51:25<52:41, 12.45s/batch, batch_loss=11.4, batch_index=737, batch_size=256]

Epoch 1/10:  74%|████████▉   | 737/991 [2:51:38<52:41, 12.45s/batch, batch_loss=1.5e+3, batch_index=738, batch_size=256]

Epoch 1/10:  74%|████████▉   | 738/991 [2:51:38<52:51, 12.54s/batch, batch_loss=1.5e+3, batch_index=738, batch_size=256]

Epoch 1/10:  74%|██████████▍   | 738/991 [2:51:51<52:51, 12.54s/batch, batch_loss=47.1, batch_index=739, batch_size=256]

Epoch 1/10:  75%|██████████▍   | 739/991 [2:51:51<53:50, 12.82s/batch, batch_loss=47.1, batch_index=739, batch_size=256]

Epoch 1/10:  75%|██████████▍   | 739/991 [2:52:05<53:50, 12.82s/batch, batch_loss=9.33, batch_index=740, batch_size=256]

Epoch 1/10:  75%|██████████▍   | 740/991 [2:52:05<54:17, 12.98s/batch, batch_loss=9.33, batch_index=740, batch_size=256]

Epoch 1/10:  75%|████████▏  | 740/991 [2:52:19<54:17, 12.98s/batch, batch_loss=1.81e+4, batch_index=741, batch_size=256]

Epoch 1/10:  75%|████████▏  | 741/991 [2:52:19<55:18, 13.27s/batch, batch_loss=1.81e+4, batch_index=741, batch_size=256]

Epoch 1/10:  75%|████████▏  | 741/991 [2:52:33<55:18, 13.27s/batch, batch_loss=2.27e+3, batch_index=742, batch_size=256]

Epoch 1/10:  75%|████████▏  | 742/991 [2:52:33<56:49, 13.69s/batch, batch_loss=2.27e+3, batch_index=742, batch_size=256]

Epoch 1/10:  75%|██████████▍   | 742/991 [2:52:47<56:49, 13.69s/batch, batch_loss=12.4, batch_index=743, batch_size=256]

Epoch 1/10:  75%|██████████▍   | 743/991 [2:52:47<56:53, 13.76s/batch, batch_loss=12.4, batch_index=743, batch_size=256]

Epoch 1/10:  75%|██████████▍   | 743/991 [2:53:01<56:53, 13.76s/batch, batch_loss=15.3, batch_index=744, batch_size=256]

Epoch 1/10:  75%|██████████▌   | 744/991 [2:53:01<56:58, 13.84s/batch, batch_loss=15.3, batch_index=744, batch_size=256]

Epoch 1/10:  75%|██████████▌   | 744/991 [2:53:15<56:58, 13.84s/batch, batch_loss=20.2, batch_index=745, batch_size=256]

Epoch 1/10:  75%|██████████▌   | 745/991 [2:53:15<56:53, 13.87s/batch, batch_loss=20.2, batch_index=745, batch_size=256]

Epoch 1/10:  75%|████████▎  | 745/991 [2:53:29<56:53, 13.87s/batch, batch_loss=1.17e+3, batch_index=746, batch_size=256]

Epoch 1/10:  75%|████████▎  | 746/991 [2:53:29<56:26, 13.82s/batch, batch_loss=1.17e+3, batch_index=746, batch_size=256]

Epoch 1/10:  75%|████████▎  | 746/991 [2:53:43<56:26, 13.82s/batch, batch_loss=3.91e+3, batch_index=747, batch_size=256]

Epoch 1/10:  75%|████████▎  | 747/991 [2:53:43<56:52, 13.98s/batch, batch_loss=3.91e+3, batch_index=747, batch_size=256]

Epoch 1/10:  75%|██████████▌   | 747/991 [2:53:56<56:52, 13.98s/batch, batch_loss=13.5, batch_index=748, batch_size=256]

Epoch 1/10:  75%|██████████▌   | 748/991 [2:53:56<55:28, 13.70s/batch, batch_loss=13.5, batch_index=748, batch_size=256]

Epoch 1/10:  75%|██████████▌   | 748/991 [2:54:10<55:28, 13.70s/batch, batch_loss=15.5, batch_index=749, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 749/991 [2:54:10<55:41, 13.81s/batch, batch_loss=15.5, batch_index=749, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 749/991 [2:54:24<55:41, 13.81s/batch, batch_loss=13.7, batch_index=750, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 750/991 [2:54:24<54:31, 13.57s/batch, batch_loss=13.7, batch_index=750, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 750/991 [2:54:37<54:31, 13.57s/batch, batch_loss=9.83, batch_index=751, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 751/991 [2:54:37<54:11, 13.55s/batch, batch_loss=9.83, batch_index=751, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 751/991 [2:54:51<54:11, 13.55s/batch, batch_loss=8.93, batch_index=752, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 752/991 [2:54:51<54:51, 13.77s/batch, batch_loss=8.93, batch_index=752, batch_size=256]

Epoch 1/10:  76%|██████████▌   | 752/991 [2:55:06<54:51, 13.77s/batch, batch_loss=7.81, batch_index=753, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 753/991 [2:55:06<55:52, 14.09s/batch, batch_loss=7.81, batch_index=753, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 753/991 [2:55:21<55:52, 14.09s/batch, batch_loss=5.09, batch_index=754, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 754/991 [2:55:21<56:18, 14.25s/batch, batch_loss=5.09, batch_index=754, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 754/991 [2:55:34<56:18, 14.25s/batch, batch_loss=26.3, batch_index=755, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 755/991 [2:55:34<55:24, 14.09s/batch, batch_loss=26.3, batch_index=755, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 755/991 [2:55:49<55:24, 14.09s/batch, batch_loss=22.5, batch_index=756, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 756/991 [2:55:49<55:12, 14.10s/batch, batch_loss=22.5, batch_index=756, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 756/991 [2:56:03<55:12, 14.10s/batch, batch_loss=5.37, batch_index=757, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 757/991 [2:56:03<55:49, 14.31s/batch, batch_loss=5.37, batch_index=757, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 757/991 [2:56:19<55:49, 14.31s/batch, batch_loss=15.8, batch_index=758, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 758/991 [2:56:19<56:37, 14.58s/batch, batch_loss=15.8, batch_index=758, batch_size=256]

Epoch 1/10:  76%|██████████▋   | 758/991 [2:56:33<56:37, 14.58s/batch, batch_loss=14.9, batch_index=759, batch_size=256]

Epoch 1/10:  77%|██████████▋   | 759/991 [2:56:33<56:31, 14.62s/batch, batch_loss=14.9, batch_index=759, batch_size=256]

Epoch 1/10:  77%|██████████▋   | 759/991 [2:56:47<56:31, 14.62s/batch, batch_loss=22.3, batch_index=760, batch_size=256]

Epoch 1/10:  77%|██████████▋   | 760/991 [2:56:47<55:29, 14.41s/batch, batch_loss=22.3, batch_index=760, batch_size=256]

Epoch 1/10:  77%|██████████▋   | 760/991 [2:57:01<55:29, 14.41s/batch, batch_loss=35.6, batch_index=761, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 761/991 [2:57:01<54:51, 14.31s/batch, batch_loss=35.6, batch_index=761, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 761/991 [2:57:16<54:51, 14.31s/batch, batch_loss=25.3, batch_index=762, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 762/991 [2:57:16<54:29, 14.28s/batch, batch_loss=25.3, batch_index=762, batch_size=256]

Epoch 1/10:  77%|███████████▌   | 762/991 [2:57:30<54:29, 14.28s/batch, batch_loss=529, batch_index=763, batch_size=256]

Epoch 1/10:  77%|███████████▌   | 763/991 [2:57:30<54:56, 14.46s/batch, batch_loss=529, batch_index=763, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 763/991 [2:57:45<54:56, 14.46s/batch, batch_loss=13.9, batch_index=764, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 764/991 [2:57:45<55:21, 14.63s/batch, batch_loss=13.9, batch_index=764, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 764/991 [2:58:00<55:21, 14.63s/batch, batch_loss=3.51, batch_index=765, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 765/991 [2:58:00<55:06, 14.63s/batch, batch_loss=3.51, batch_index=765, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 765/991 [2:58:14<55:06, 14.63s/batch, batch_loss=13.1, batch_index=766, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 766/991 [2:58:14<53:49, 14.35s/batch, batch_loss=13.1, batch_index=766, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 766/991 [2:58:31<53:49, 14.35s/batch, batch_loss=20.4, batch_index=767, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 767/991 [2:58:31<56:49, 15.22s/batch, batch_loss=20.4, batch_index=767, batch_size=256]

Epoch 1/10:  77%|███████████▌   | 767/991 [2:58:45<56:49, 15.22s/batch, batch_loss=4.3, batch_index=768, batch_size=256]

Epoch 1/10:  77%|███████████▌   | 768/991 [2:58:45<55:33, 14.95s/batch, batch_loss=4.3, batch_index=768, batch_size=256]

Epoch 1/10:  77%|██████████▊   | 768/991 [2:59:00<55:33, 14.95s/batch, batch_loss=3.18, batch_index=769, batch_size=256]

Epoch 1/10:  78%|██████████▊   | 769/991 [2:59:00<55:23, 14.97s/batch, batch_loss=3.18, batch_index=769, batch_size=256]

Epoch 1/10:  78%|██████████▊   | 769/991 [2:59:15<55:23, 14.97s/batch, batch_loss=11.7, batch_index=770, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 770/991 [2:59:15<54:45, 14.87s/batch, batch_loss=11.7, batch_index=770, batch_size=256]

Epoch 1/10:  78%|████████▌  | 770/991 [2:59:30<54:45, 14.87s/batch, batch_loss=2.76e+3, batch_index=771, batch_size=256]

Epoch 1/10:  78%|████████▌  | 771/991 [2:59:30<54:46, 14.94s/batch, batch_loss=2.76e+3, batch_index=771, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 771/991 [2:59:44<54:46, 14.94s/batch, batch_loss=4.44, batch_index=772, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 772/991 [2:59:44<53:41, 14.71s/batch, batch_loss=4.44, batch_index=772, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 772/991 [2:59:57<53:41, 14.71s/batch, batch_loss=1.44, batch_index=773, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 773/991 [2:59:57<51:45, 14.25s/batch, batch_loss=1.44, batch_index=773, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 773/991 [3:00:12<51:45, 14.25s/batch, batch_loss=7.02, batch_index=774, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 774/991 [3:00:12<51:29, 14.24s/batch, batch_loss=7.02, batch_index=774, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 774/991 [3:00:27<51:29, 14.24s/batch, batch_loss=9.36, batch_index=775, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 775/991 [3:00:27<52:02, 14.46s/batch, batch_loss=9.36, batch_index=775, batch_size=256]

Epoch 1/10:  78%|███████████▋   | 775/991 [3:00:41<52:02, 14.46s/batch, batch_loss=263, batch_index=776, batch_size=256]

Epoch 1/10:  78%|███████████▋   | 776/991 [3:00:41<52:08, 14.55s/batch, batch_loss=263, batch_index=776, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 776/991 [3:00:56<52:08, 14.55s/batch, batch_loss=0.66, batch_index=777, batch_size=256]

Epoch 1/10:  78%|██████████▉   | 777/991 [3:00:56<52:29, 14.72s/batch, batch_loss=0.66, batch_index=777, batch_size=256]

Epoch 1/10:  78%|██████████▏  | 777/991 [3:01:14<52:29, 14.72s/batch, batch_loss=0.517, batch_index=778, batch_size=256]

Epoch 1/10:  79%|██████████▏  | 778/991 [3:01:14<54:56, 15.48s/batch, batch_loss=0.517, batch_index=778, batch_size=256]

Epoch 1/10:  79%|██████████▉   | 778/991 [3:01:28<54:56, 15.48s/batch, batch_loss=4.05, batch_index=779, batch_size=256]

Epoch 1/10:  79%|███████████   | 779/991 [3:01:28<53:51, 15.24s/batch, batch_loss=4.05, batch_index=779, batch_size=256]

Epoch 1/10:  79%|███████████   | 779/991 [3:01:42<53:51, 15.24s/batch, batch_loss=2.51, batch_index=780, batch_size=256]

Epoch 1/10:  79%|███████████   | 780/991 [3:01:42<52:06, 14.82s/batch, batch_loss=2.51, batch_index=780, batch_size=256]

Epoch 1/10:  79%|███████████   | 780/991 [3:01:56<52:06, 14.82s/batch, batch_loss=3.01, batch_index=781, batch_size=256]

Epoch 1/10:  79%|███████████   | 781/991 [3:01:56<51:02, 14.58s/batch, batch_loss=3.01, batch_index=781, batch_size=256]

Epoch 1/10:  79%|█████████▍  | 781/991 [3:02:11<51:02, 14.58s/batch, batch_loss=2.5e+4, batch_index=782, batch_size=256]

Epoch 1/10:  79%|█████████▍  | 782/991 [3:02:11<50:32, 14.51s/batch, batch_loss=2.5e+4, batch_index=782, batch_size=256]

Epoch 1/10:  79%|████████████▋   | 782/991 [3:02:25<50:32, 14.51s/batch, batch_loss=25, batch_index=783, batch_size=256]

Epoch 1/10:  79%|████████████▋   | 783/991 [3:02:25<49:57, 14.41s/batch, batch_loss=25, batch_index=783, batch_size=256]

Epoch 1/10:  79%|███████████   | 783/991 [3:02:39<49:57, 14.41s/batch, batch_loss=14.3, batch_index=784, batch_size=256]

Epoch 1/10:  79%|███████████   | 784/991 [3:02:39<49:28, 14.34s/batch, batch_loss=14.3, batch_index=784, batch_size=256]

Epoch 1/10:  79%|███████████   | 784/991 [3:02:52<49:28, 14.34s/batch, batch_loss=14.5, batch_index=785, batch_size=256]

Epoch 1/10:  79%|███████████   | 785/991 [3:02:52<48:18, 14.07s/batch, batch_loss=14.5, batch_index=785, batch_size=256]

Epoch 1/10:  79%|███████████   | 785/991 [3:03:07<48:18, 14.07s/batch, batch_loss=9.58, batch_index=786, batch_size=256]

Epoch 1/10:  79%|███████████   | 786/991 [3:03:07<48:20, 14.15s/batch, batch_loss=9.58, batch_index=786, batch_size=256]

Epoch 1/10:  79%|████████▋  | 786/991 [3:03:21<48:20, 14.15s/batch, batch_loss=2.48e+4, batch_index=787, batch_size=256]

Epoch 1/10:  79%|████████▋  | 787/991 [3:03:21<47:45, 14.04s/batch, batch_loss=2.48e+4, batch_index=787, batch_size=256]

Epoch 1/10:  79%|███████████▉   | 787/991 [3:03:36<47:45, 14.04s/batch, batch_loss=683, batch_index=788, batch_size=256]

Epoch 1/10:  80%|███████████▉   | 788/991 [3:03:36<48:29, 14.33s/batch, batch_loss=683, batch_index=788, batch_size=256]

Epoch 1/10:  80%|████████████▋   | 788/991 [3:03:51<48:29, 14.33s/batch, batch_loss=26, batch_index=789, batch_size=256]

Epoch 1/10:  80%|████████████▋   | 789/991 [3:03:51<49:17, 14.64s/batch, batch_loss=26, batch_index=789, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 789/991 [3:04:08<49:17, 14.64s/batch, batch_loss=19.9, batch_index=790, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 790/991 [3:04:08<51:27, 15.36s/batch, batch_loss=19.9, batch_index=790, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 790/991 [3:04:22<51:27, 15.36s/batch, batch_loss=15.5, batch_index=791, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 791/991 [3:04:22<49:23, 14.82s/batch, batch_loss=15.5, batch_index=791, batch_size=256]

Epoch 1/10:  80%|████████▊  | 791/991 [3:04:35<49:23, 14.82s/batch, batch_loss=1.04e+4, batch_index=792, batch_size=256]

Epoch 1/10:  80%|████████▊  | 792/991 [3:04:35<47:39, 14.37s/batch, batch_loss=1.04e+4, batch_index=792, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 792/991 [3:04:48<47:39, 14.37s/batch, batch_loss=10.1, batch_index=793, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 793/991 [3:04:48<46:16, 14.02s/batch, batch_loss=10.1, batch_index=793, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 793/991 [3:05:03<46:16, 14.02s/batch, batch_loss=2.08, batch_index=794, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 794/991 [3:05:03<47:06, 14.35s/batch, batch_loss=2.08, batch_index=794, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 794/991 [3:05:17<47:06, 14.35s/batch, batch_loss=6.54, batch_index=795, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 795/991 [3:05:17<46:46, 14.32s/batch, batch_loss=6.54, batch_index=795, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 795/991 [3:05:31<46:46, 14.32s/batch, batch_loss=17.2, batch_index=796, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 796/991 [3:05:31<46:15, 14.23s/batch, batch_loss=17.2, batch_index=796, batch_size=256]

Epoch 1/10:  80%|███████████▏  | 796/991 [3:05:46<46:15, 14.23s/batch, batch_loss=32.8, batch_index=797, batch_size=256]

Epoch 1/10:  80%|███████████▎  | 797/991 [3:05:46<46:38, 14.43s/batch, batch_loss=32.8, batch_index=797, batch_size=256]

Epoch 1/10:  80%|████████████   | 797/991 [3:06:00<46:38, 14.43s/batch, batch_loss=345, batch_index=798, batch_size=256]

Epoch 1/10:  81%|████████████   | 798/991 [3:06:00<45:48, 14.24s/batch, batch_loss=345, batch_index=798, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 798/991 [3:06:14<45:48, 14.24s/batch, batch_loss=12.4, batch_index=799, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 799/991 [3:06:14<44:55, 14.04s/batch, batch_loss=12.4, batch_index=799, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 799/991 [3:06:28<44:55, 14.04s/batch, batch_loss=26.7, batch_index=800, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 800/991 [3:06:28<45:04, 14.16s/batch, batch_loss=26.7, batch_index=800, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 800/991 [3:06:45<45:04, 14.16s/batch, batch_loss=12.2, batch_index=801, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 801/991 [3:06:45<47:48, 15.10s/batch, batch_loss=12.2, batch_index=801, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 801/991 [3:07:00<47:48, 15.10s/batch, batch_loss=16.2, batch_index=802, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 802/991 [3:07:00<46:58, 14.91s/batch, batch_loss=16.2, batch_index=802, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 802/991 [3:07:13<46:58, 14.91s/batch, batch_loss=6.99, batch_index=803, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 803/991 [3:07:13<45:19, 14.46s/batch, batch_loss=6.99, batch_index=803, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 803/991 [3:07:27<45:19, 14.46s/batch, batch_loss=15.8, batch_index=804, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 804/991 [3:07:27<44:11, 14.18s/batch, batch_loss=15.8, batch_index=804, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 804/991 [3:07:41<44:11, 14.18s/batch, batch_loss=6.88, batch_index=805, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 805/991 [3:07:41<44:20, 14.30s/batch, batch_loss=6.88, batch_index=805, batch_size=256]

Epoch 1/10:  81%|███████████▎  | 805/991 [3:07:56<44:20, 14.30s/batch, batch_loss=11.3, batch_index=806, batch_size=256]

Epoch 1/10:  81%|███████████▍  | 806/991 [3:07:56<44:15, 14.36s/batch, batch_loss=11.3, batch_index=806, batch_size=256]

Epoch 1/10:  81%|███████████▍  | 806/991 [3:08:09<44:15, 14.36s/batch, batch_loss=9.84, batch_index=807, batch_size=256]

Epoch 1/10:  81%|███████████▍  | 807/991 [3:08:09<43:10, 14.08s/batch, batch_loss=9.84, batch_index=807, batch_size=256]

Epoch 1/10:  81%|███████████▍  | 807/991 [3:08:23<43:10, 14.08s/batch, batch_loss=20.9, batch_index=808, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 808/991 [3:08:23<42:30, 13.94s/batch, batch_loss=20.9, batch_index=808, batch_size=256]

Epoch 1/10:  82%|████████▉  | 808/991 [3:08:39<42:30, 13.94s/batch, batch_loss=1.21e+4, batch_index=809, batch_size=256]

Epoch 1/10:  82%|████████▉  | 809/991 [3:08:39<44:31, 14.68s/batch, batch_loss=1.21e+4, batch_index=809, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 809/991 [3:08:53<44:31, 14.68s/batch, batch_loss=18.6, batch_index=810, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 810/991 [3:08:53<43:38, 14.47s/batch, batch_loss=18.6, batch_index=810, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 810/991 [3:09:08<43:38, 14.47s/batch, batch_loss=10.3, batch_index=811, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 811/991 [3:09:08<43:30, 14.50s/batch, batch_loss=10.3, batch_index=811, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 811/991 [3:09:23<43:30, 14.50s/batch, batch_loss=6.31, batch_index=812, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 812/991 [3:09:23<43:27, 14.57s/batch, batch_loss=6.31, batch_index=812, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 812/991 [3:09:37<43:27, 14.57s/batch, batch_loss=10.1, batch_index=813, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 813/991 [3:09:37<42:42, 14.40s/batch, batch_loss=10.1, batch_index=813, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 813/991 [3:09:51<42:42, 14.40s/batch, batch_loss=14.9, batch_index=814, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 814/991 [3:09:51<42:21, 14.36s/batch, batch_loss=14.9, batch_index=814, batch_size=256]

Epoch 1/10:  82%|███████████▍  | 814/991 [3:10:05<42:21, 14.36s/batch, batch_loss=9.05, batch_index=815, batch_size=256]

Epoch 1/10:  82%|███████████▌  | 815/991 [3:10:05<42:16, 14.41s/batch, batch_loss=9.05, batch_index=815, batch_size=256]

Epoch 1/10:  82%|███████████▌  | 815/991 [3:10:19<42:16, 14.41s/batch, batch_loss=93.2, batch_index=816, batch_size=256]

Epoch 1/10:  82%|███████████▌  | 816/991 [3:10:19<41:38, 14.28s/batch, batch_loss=93.2, batch_index=816, batch_size=256]

Epoch 1/10:  82%|████████████▎  | 816/991 [3:10:37<41:38, 14.28s/batch, batch_loss=366, batch_index=817, batch_size=256]

Epoch 1/10:  82%|████████████▎  | 817/991 [3:10:37<43:55, 15.14s/batch, batch_loss=366, batch_index=817, batch_size=256]

Epoch 1/10:  82%|████████████▎  | 817/991 [3:10:51<43:55, 15.14s/batch, batch_loss=374, batch_index=818, batch_size=256]

Epoch 1/10:  83%|████████████▍  | 818/991 [3:10:51<42:46, 14.84s/batch, batch_loss=374, batch_index=818, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 818/991 [3:11:04<42:46, 14.84s/batch, batch_loss=13.2, batch_index=819, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 819/991 [3:11:04<41:17, 14.40s/batch, batch_loss=13.2, batch_index=819, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 819/991 [3:11:19<41:17, 14.40s/batch, batch_loss=7.43, batch_index=820, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 820/991 [3:11:19<41:04, 14.41s/batch, batch_loss=7.43, batch_index=820, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 820/991 [3:11:33<41:04, 14.41s/batch, batch_loss=7.06, batch_index=821, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 821/991 [3:11:33<40:44, 14.38s/batch, batch_loss=7.06, batch_index=821, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 821/991 [3:11:47<40:44, 14.38s/batch, batch_loss=9.36, batch_index=822, batch_size=256]

Epoch 1/10:  83%|███████████▌  | 822/991 [3:11:47<40:33, 14.40s/batch, batch_loss=9.36, batch_index=822, batch_size=256]

Epoch 1/10:  83%|████████████▍  | 822/991 [3:12:02<40:33, 14.40s/batch, batch_loss=155, batch_index=823, batch_size=256]

Epoch 1/10:  83%|████████████▍  | 823/991 [3:12:02<40:54, 14.61s/batch, batch_loss=155, batch_index=823, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 823/991 [3:12:16<40:54, 14.61s/batch, batch_loss=6.73, batch_index=824, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 824/991 [3:12:16<40:15, 14.46s/batch, batch_loss=6.73, batch_index=824, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 824/991 [3:12:34<40:15, 14.46s/batch, batch_loss=14.3, batch_index=825, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 825/991 [3:12:34<42:21, 15.31s/batch, batch_loss=14.3, batch_index=825, batch_size=256]

Epoch 1/10:  83%|█████████▉  | 825/991 [3:12:48<42:21, 15.31s/batch, batch_loss=2.6e+3, batch_index=826, batch_size=256]

Epoch 1/10:  83%|██████████  | 826/991 [3:12:48<41:03, 14.93s/batch, batch_loss=2.6e+3, batch_index=826, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 826/991 [3:13:02<41:03, 14.93s/batch, batch_loss=24.5, batch_index=827, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 827/991 [3:13:02<40:21, 14.77s/batch, batch_loss=24.5, batch_index=827, batch_size=256]

Epoch 1/10:  83%|███████████▋  | 827/991 [3:13:17<40:21, 14.77s/batch, batch_loss=31.4, batch_index=828, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 828/991 [3:13:17<39:55, 14.70s/batch, batch_loss=31.4, batch_index=828, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 828/991 [3:13:31<39:55, 14.70s/batch, batch_loss=6.21, batch_index=829, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 829/991 [3:13:31<39:37, 14.67s/batch, batch_loss=6.21, batch_index=829, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 829/991 [3:13:46<39:37, 14.67s/batch, batch_loss=16.6, batch_index=830, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 830/991 [3:13:46<39:08, 14.59s/batch, batch_loss=16.6, batch_index=830, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 830/991 [3:14:00<39:08, 14.59s/batch, batch_loss=11.1, batch_index=831, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 831/991 [3:14:00<38:50, 14.57s/batch, batch_loss=11.1, batch_index=831, batch_size=256]

Epoch 1/10:  84%|███████████▋  | 831/991 [3:14:14<38:50, 14.57s/batch, batch_loss=19.1, batch_index=832, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 832/991 [3:14:14<38:00, 14.34s/batch, batch_loss=19.1, batch_index=832, batch_size=256]

Epoch 1/10:  84%|████████████▌  | 832/991 [3:14:29<38:00, 14.34s/batch, batch_loss=222, batch_index=833, batch_size=256]

Epoch 1/10:  84%|████████████▌  | 833/991 [3:14:29<38:21, 14.57s/batch, batch_loss=222, batch_index=833, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 833/991 [3:14:44<38:21, 14.57s/batch, batch_loss=26.3, batch_index=834, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 834/991 [3:14:44<38:16, 14.63s/batch, batch_loss=26.3, batch_index=834, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 834/991 [3:14:59<38:16, 14.63s/batch, batch_loss=15.4, batch_index=835, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 835/991 [3:14:59<38:14, 14.71s/batch, batch_loss=15.4, batch_index=835, batch_size=256]

Epoch 1/10:  84%|█████████▎ | 835/991 [3:15:13<38:14, 14.71s/batch, batch_loss=3.28e+3, batch_index=836, batch_size=256]

Epoch 1/10:  84%|█████████▎ | 836/991 [3:15:13<37:23, 14.47s/batch, batch_loss=3.28e+3, batch_index=836, batch_size=256]

Epoch 1/10:  84%|██████████  | 836/991 [3:15:27<37:23, 14.47s/batch, batch_loss=4.9e+3, batch_index=837, batch_size=256]

Epoch 1/10:  84%|██████████▏ | 837/991 [3:15:27<37:16, 14.52s/batch, batch_loss=4.9e+3, batch_index=837, batch_size=256]

Epoch 1/10:  84%|███████████▊  | 837/991 [3:15:42<37:16, 14.52s/batch, batch_loss=19.4, batch_index=838, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 838/991 [3:15:42<37:20, 14.65s/batch, batch_loss=19.4, batch_index=838, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 838/991 [3:15:57<37:20, 14.65s/batch, batch_loss=4.69, batch_index=839, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 839/991 [3:15:57<36:51, 14.55s/batch, batch_loss=4.69, batch_index=839, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 839/991 [3:16:10<36:51, 14.55s/batch, batch_loss=4.21, batch_index=840, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 840/991 [3:16:10<36:01, 14.31s/batch, batch_loss=4.21, batch_index=840, batch_size=256]

Epoch 1/10:  85%|███████████▊  | 840/991 [3:16:27<36:01, 14.31s/batch, batch_loss=21.3, batch_index=841, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 841/991 [3:16:27<37:37, 15.05s/batch, batch_loss=21.3, batch_index=841, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 841/991 [3:16:41<37:37, 15.05s/batch, batch_loss=19.4, batch_index=842, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 842/991 [3:16:41<36:42, 14.78s/batch, batch_loss=19.4, batch_index=842, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 842/991 [3:16:55<36:42, 14.78s/batch, batch_loss=9.31, batch_index=843, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 843/991 [3:16:55<35:39, 14.46s/batch, batch_loss=9.31, batch_index=843, batch_size=256]

Epoch 1/10:  85%|██████████▏ | 843/991 [3:17:09<35:39, 14.46s/batch, batch_loss=1.7e+3, batch_index=844, batch_size=256]

Epoch 1/10:  85%|██████████▏ | 844/991 [3:17:09<35:05, 14.32s/batch, batch_loss=1.7e+3, batch_index=844, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 844/991 [3:17:24<35:05, 14.32s/batch, batch_loss=21.4, batch_index=845, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 845/991 [3:17:24<34:57, 14.37s/batch, batch_loss=21.4, batch_index=845, batch_size=256]

Epoch 1/10:  85%|█████████▍ | 845/991 [3:17:38<34:57, 14.37s/batch, batch_loss=1.18e+4, batch_index=846, batch_size=256]

Epoch 1/10:  85%|█████████▍ | 846/991 [3:17:38<34:58, 14.47s/batch, batch_loss=1.18e+4, batch_index=846, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 846/991 [3:17:54<34:58, 14.47s/batch, batch_loss=43.5, batch_index=847, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 847/991 [3:17:54<35:24, 14.75s/batch, batch_loss=43.5, batch_index=847, batch_size=256]

Epoch 1/10:  85%|███████████▉  | 847/991 [3:18:08<35:24, 14.75s/batch, batch_loss=49.9, batch_index=848, batch_size=256]

Epoch 1/10:  86%|███████████▉  | 848/991 [3:18:08<34:47, 14.60s/batch, batch_loss=49.9, batch_index=848, batch_size=256]

Epoch 1/10:  86%|█████████▍ | 848/991 [3:18:22<34:47, 14.60s/batch, batch_loss=1.03e+3, batch_index=849, batch_size=256]

Epoch 1/10:  86%|█████████▍ | 849/991 [3:18:22<34:18, 14.49s/batch, batch_loss=1.03e+3, batch_index=849, batch_size=256]

Epoch 1/10:  86%|███████████▉  | 849/991 [3:18:36<34:18, 14.49s/batch, batch_loss=9.22, batch_index=850, batch_size=256]

Epoch 1/10:  86%|████████████  | 850/991 [3:18:36<33:45, 14.36s/batch, batch_loss=9.22, batch_index=850, batch_size=256]

Epoch 1/10:  86%|████████████  | 850/991 [3:18:52<33:45, 14.36s/batch, batch_loss=24.2, batch_index=851, batch_size=256]

Epoch 1/10:  86%|████████████  | 851/991 [3:18:52<34:11, 14.65s/batch, batch_loss=24.2, batch_index=851, batch_size=256]

Epoch 1/10:  86%|████████████  | 851/991 [3:19:05<34:11, 14.65s/batch, batch_loss=14.6, batch_index=852, batch_size=256]

Epoch 1/10:  86%|████████████  | 852/991 [3:19:05<33:20, 14.39s/batch, batch_loss=14.6, batch_index=852, batch_size=256]

Epoch 1/10:  86%|█████████▍ | 852/991 [3:19:20<33:20, 14.39s/batch, batch_loss=7.63e+3, batch_index=853, batch_size=256]

Epoch 1/10:  86%|█████████▍ | 853/991 [3:19:20<33:00, 14.35s/batch, batch_loss=7.63e+3, batch_index=853, batch_size=256]

Epoch 1/10:  86%|████████████  | 853/991 [3:19:34<33:00, 14.35s/batch, batch_loss=21.6, batch_index=854, batch_size=256]

Epoch 1/10:  86%|████████████  | 854/991 [3:19:34<33:09, 14.52s/batch, batch_loss=21.6, batch_index=854, batch_size=256]

Epoch 1/10:  86%|████████████  | 854/991 [3:19:50<33:09, 14.52s/batch, batch_loss=7.61, batch_index=855, batch_size=256]

Epoch 1/10:  86%|████████████  | 855/991 [3:19:50<33:22, 14.72s/batch, batch_loss=7.61, batch_index=855, batch_size=256]

Epoch 1/10:  86%|████████████  | 855/991 [3:20:03<33:22, 14.72s/batch, batch_loss=9.16, batch_index=856, batch_size=256]

Epoch 1/10:  86%|████████████  | 856/991 [3:20:03<32:25, 14.41s/batch, batch_loss=9.16, batch_index=856, batch_size=256]

Epoch 1/10:  86%|████████████  | 856/991 [3:20:18<32:25, 14.41s/batch, batch_loss=8.86, batch_index=857, batch_size=256]

Epoch 1/10:  86%|████████████  | 857/991 [3:20:18<32:12, 14.42s/batch, batch_loss=8.86, batch_index=857, batch_size=256]

Epoch 1/10:  86%|████████████  | 857/991 [3:20:35<32:12, 14.42s/batch, batch_loss=35.1, batch_index=858, batch_size=256]

Epoch 1/10:  87%|████████████  | 858/991 [3:20:35<34:03, 15.37s/batch, batch_loss=35.1, batch_index=858, batch_size=256]

Epoch 1/10:  87%|█████████████▊  | 858/991 [3:20:49<34:03, 15.37s/batch, batch_loss=19, batch_index=859, batch_size=256]

Epoch 1/10:  87%|█████████████▊  | 859/991 [3:20:49<32:51, 14.94s/batch, batch_loss=19, batch_index=859, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 859/991 [3:21:05<32:51, 14.94s/batch, batch_loss=29.8, batch_index=860, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 860/991 [3:21:05<33:02, 15.13s/batch, batch_loss=29.8, batch_index=860, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 860/991 [3:21:20<33:02, 15.13s/batch, batch_loss=9.39, batch_index=861, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 861/991 [3:21:20<32:50, 15.16s/batch, batch_loss=9.39, batch_index=861, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 861/991 [3:21:35<32:50, 15.16s/batch, batch_loss=22.4, batch_index=862, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 862/991 [3:21:35<32:22, 15.06s/batch, batch_loss=22.4, batch_index=862, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 862/991 [3:21:50<32:22, 15.06s/batch, batch_loss=35.5, batch_index=863, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 863/991 [3:21:50<32:24, 15.19s/batch, batch_loss=35.5, batch_index=863, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 863/991 [3:22:08<32:24, 15.19s/batch, batch_loss=11.3, batch_index=864, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 864/991 [3:22:08<33:51, 16.00s/batch, batch_loss=11.3, batch_index=864, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 864/991 [3:22:23<33:51, 16.00s/batch, batch_loss=20.7, batch_index=865, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 865/991 [3:22:23<32:35, 15.52s/batch, batch_loss=20.7, batch_index=865, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 865/991 [3:22:36<32:35, 15.52s/batch, batch_loss=30.2, batch_index=866, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 866/991 [3:22:36<31:11, 14.97s/batch, batch_loss=30.2, batch_index=866, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 866/991 [3:22:51<31:11, 14.97s/batch, batch_loss=31.1, batch_index=867, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 867/991 [3:22:51<30:30, 14.76s/batch, batch_loss=31.1, batch_index=867, batch_size=256]

Epoch 1/10:  87%|████████████▏ | 867/991 [3:23:05<30:30, 14.76s/batch, batch_loss=25.7, batch_index=868, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 868/991 [3:23:05<30:14, 14.75s/batch, batch_loss=25.7, batch_index=868, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 868/991 [3:23:20<30:14, 14.75s/batch, batch_loss=13.4, batch_index=869, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 869/991 [3:23:20<29:55, 14.71s/batch, batch_loss=13.4, batch_index=869, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 869/991 [3:23:34<29:55, 14.71s/batch, batch_loss=16.3, batch_index=870, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 870/991 [3:23:34<29:06, 14.43s/batch, batch_loss=16.3, batch_index=870, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 870/991 [3:23:48<29:06, 14.43s/batch, batch_loss=9.24, batch_index=871, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 871/991 [3:23:48<29:00, 14.50s/batch, batch_loss=9.24, batch_index=871, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 871/991 [3:24:03<29:00, 14.50s/batch, batch_loss=25.5, batch_index=872, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 872/991 [3:24:03<28:38, 14.44s/batch, batch_loss=25.5, batch_index=872, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 872/991 [3:24:18<28:38, 14.44s/batch, batch_loss=17.6, batch_index=873, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 873/991 [3:24:18<28:35, 14.54s/batch, batch_loss=17.6, batch_index=873, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 873/991 [3:24:32<28:35, 14.54s/batch, batch_loss=8.35, batch_index=874, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 874/991 [3:24:32<28:06, 14.41s/batch, batch_loss=8.35, batch_index=874, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 874/991 [3:24:46<28:06, 14.41s/batch, batch_loss=16.2, batch_index=875, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 875/991 [3:24:46<27:56, 14.45s/batch, batch_loss=16.2, batch_index=875, batch_size=256]

Epoch 1/10:  88%|████████████▎ | 875/991 [3:25:00<27:56, 14.45s/batch, batch_loss=34.4, batch_index=876, batch_size=256]

Epoch 1/10:  88%|████████████▍ | 876/991 [3:25:00<27:35, 14.39s/batch, batch_loss=34.4, batch_index=876, batch_size=256]

Epoch 1/10:  88%|████████████▍ | 876/991 [3:25:15<27:35, 14.39s/batch, batch_loss=22.7, batch_index=877, batch_size=256]

Epoch 1/10:  88%|████████████▍ | 877/991 [3:25:15<27:34, 14.51s/batch, batch_loss=22.7, batch_index=877, batch_size=256]

Epoch 1/10:  88%|████████████▍ | 877/991 [3:25:30<27:34, 14.51s/batch, batch_loss=37.1, batch_index=878, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 878/991 [3:25:30<27:23, 14.55s/batch, batch_loss=37.1, batch_index=878, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 878/991 [3:25:44<27:23, 14.55s/batch, batch_loss=19.8, batch_index=879, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 879/991 [3:25:44<27:01, 14.48s/batch, batch_loss=19.8, batch_index=879, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 879/991 [3:25:58<27:01, 14.48s/batch, batch_loss=15.9, batch_index=880, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 880/991 [3:25:58<26:27, 14.30s/batch, batch_loss=15.9, batch_index=880, batch_size=256]

Epoch 1/10:  89%|█████████▊ | 880/991 [3:26:13<26:27, 14.30s/batch, batch_loss=5.13e+3, batch_index=881, batch_size=256]

Epoch 1/10:  89%|█████████▊ | 881/991 [3:26:13<26:20, 14.37s/batch, batch_loss=5.13e+3, batch_index=881, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 881/991 [3:26:27<26:20, 14.37s/batch, batch_loss=16.7, batch_index=882, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 882/991 [3:26:27<26:04, 14.35s/batch, batch_loss=16.7, batch_index=882, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 882/991 [3:26:42<26:04, 14.35s/batch, batch_loss=21.6, batch_index=883, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 883/991 [3:26:42<26:17, 14.60s/batch, batch_loss=21.6, batch_index=883, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 883/991 [3:26:56<26:17, 14.60s/batch, batch_loss=11.8, batch_index=884, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 884/991 [3:26:56<25:40, 14.39s/batch, batch_loss=11.8, batch_index=884, batch_size=256]

Epoch 1/10:  89%|████████████▍ | 884/991 [3:27:10<25:40, 14.39s/batch, batch_loss=17.4, batch_index=885, batch_size=256]

Epoch 1/10:  89%|████████████▌ | 885/991 [3:27:10<25:12, 14.27s/batch, batch_loss=17.4, batch_index=885, batch_size=256]

Epoch 1/10:  89%|████████████▌ | 885/991 [3:27:25<25:12, 14.27s/batch, batch_loss=18.4, batch_index=886, batch_size=256]

Epoch 1/10:  89%|████████████▌ | 886/991 [3:27:25<25:19, 14.47s/batch, batch_loss=18.4, batch_index=886, batch_size=256]

Epoch 1/10:  89%|█████████▊ | 886/991 [3:27:40<25:19, 14.47s/batch, batch_loss=1.93e+4, batch_index=887, batch_size=256]

Epoch 1/10:  90%|█████████▊ | 887/991 [3:27:40<25:12, 14.55s/batch, batch_loss=1.93e+4, batch_index=887, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 887/991 [3:27:55<25:12, 14.55s/batch, batch_loss=20.4, batch_index=888, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 888/991 [3:27:55<25:08, 14.64s/batch, batch_loss=20.4, batch_index=888, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 888/991 [3:28:09<25:08, 14.64s/batch, batch_loss=22.9, batch_index=889, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 889/991 [3:28:09<24:49, 14.60s/batch, batch_loss=22.9, batch_index=889, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 889/991 [3:28:24<24:49, 14.60s/batch, batch_loss=11.9, batch_index=890, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 890/991 [3:28:24<24:36, 14.62s/batch, batch_loss=11.9, batch_index=890, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 890/991 [3:28:38<24:36, 14.62s/batch, batch_loss=12.9, batch_index=891, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 891/991 [3:28:38<23:59, 14.39s/batch, batch_loss=12.9, batch_index=891, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 891/991 [3:28:52<23:59, 14.39s/batch, batch_loss=26.7, batch_index=892, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 892/991 [3:28:52<23:48, 14.43s/batch, batch_loss=26.7, batch_index=892, batch_size=256]

Epoch 1/10:  90%|█████████▉ | 892/991 [3:29:06<23:48, 14.43s/batch, batch_loss=3.69e+3, batch_index=893, batch_size=256]

Epoch 1/10:  90%|█████████▉ | 893/991 [3:29:06<23:12, 14.21s/batch, batch_loss=3.69e+3, batch_index=893, batch_size=256]

Epoch 1/10:  90%|████████████▌ | 893/991 [3:29:20<23:12, 14.21s/batch, batch_loss=14.1, batch_index=894, batch_size=256]

Epoch 1/10:  90%|████████████▋ | 894/991 [3:29:20<22:44, 14.07s/batch, batch_loss=14.1, batch_index=894, batch_size=256]

Epoch 1/10:  90%|██████████████▍ | 894/991 [3:29:34<22:44, 14.07s/batch, batch_loss=18, batch_index=895, batch_size=256]

Epoch 1/10:  90%|██████████████▍ | 895/991 [3:29:34<22:34, 14.11s/batch, batch_loss=18, batch_index=895, batch_size=256]

Epoch 1/10:  90%|████████████▋ | 895/991 [3:29:48<22:34, 14.11s/batch, batch_loss=14.5, batch_index=896, batch_size=256]

Epoch 1/10:  90%|████████████▋ | 896/991 [3:29:48<22:19, 14.10s/batch, batch_loss=14.5, batch_index=896, batch_size=256]

Epoch 1/10:  90%|████████████▋ | 896/991 [3:30:02<22:19, 14.10s/batch, batch_loss=27.5, batch_index=897, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 897/991 [3:30:02<22:09, 14.15s/batch, batch_loss=27.5, batch_index=897, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 897/991 [3:30:17<22:09, 14.15s/batch, batch_loss=25.7, batch_index=898, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 898/991 [3:30:17<22:07, 14.27s/batch, batch_loss=25.7, batch_index=898, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 898/991 [3:30:31<22:07, 14.27s/batch, batch_loss=27.4, batch_index=899, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 899/991 [3:30:31<21:49, 14.23s/batch, batch_loss=27.4, batch_index=899, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 899/991 [3:30:46<21:49, 14.23s/batch, batch_loss=25.4, batch_index=900, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 900/991 [3:30:46<21:55, 14.46s/batch, batch_loss=25.4, batch_index=900, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 900/991 [3:30:59<21:55, 14.46s/batch, batch_loss=31.2, batch_index=901, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 901/991 [3:30:59<21:20, 14.23s/batch, batch_loss=31.2, batch_index=901, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 901/991 [3:31:15<21:20, 14.23s/batch, batch_loss=14.9, batch_index=902, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 902/991 [3:31:15<21:37, 14.58s/batch, batch_loss=14.9, batch_index=902, batch_size=256]

Epoch 1/10:  91%|████████████▋ | 902/991 [3:31:29<21:37, 14.58s/batch, batch_loss=8.11, batch_index=903, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 903/991 [3:31:29<21:18, 14.53s/batch, batch_loss=8.11, batch_index=903, batch_size=256]

Epoch 1/10:  91%|██████████████▌ | 903/991 [3:31:44<21:18, 14.53s/batch, batch_loss=11, batch_index=904, batch_size=256]

Epoch 1/10:  91%|██████████████▌ | 904/991 [3:31:44<21:09, 14.59s/batch, batch_loss=11, batch_index=904, batch_size=256]

Epoch 1/10:  91%|██████████████▌ | 904/991 [3:32:01<21:09, 14.59s/batch, batch_loss=45, batch_index=905, batch_size=256]

Epoch 1/10:  91%|██████████████▌ | 905/991 [3:32:01<22:02, 15.37s/batch, batch_loss=45, batch_index=905, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 905/991 [3:32:16<22:02, 15.37s/batch, batch_loss=34.2, batch_index=906, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 906/991 [3:32:16<21:28, 15.15s/batch, batch_loss=34.2, batch_index=906, batch_size=256]

Epoch 1/10:  91%|████████████▊ | 906/991 [3:32:31<21:28, 15.15s/batch, batch_loss=28.1, batch_index=907, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 907/991 [3:32:31<21:04, 15.05s/batch, batch_loss=28.1, batch_index=907, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 907/991 [3:32:44<21:04, 15.05s/batch, batch_loss=11.1, batch_index=908, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 908/991 [3:32:44<20:17, 14.67s/batch, batch_loss=11.1, batch_index=908, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 908/991 [3:32:59<20:17, 14.67s/batch, batch_loss=6.28, batch_index=909, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 909/991 [3:32:59<20:08, 14.74s/batch, batch_loss=6.28, batch_index=909, batch_size=256]

Epoch 1/10:  92%|█████████████▊ | 909/991 [3:33:13<20:08, 14.74s/batch, batch_loss=688, batch_index=910, batch_size=256]

Epoch 1/10:  92%|█████████████▊ | 910/991 [3:33:13<19:30, 14.44s/batch, batch_loss=688, batch_index=910, batch_size=256]

Epoch 1/10:  92%|██████████ | 910/991 [3:33:28<19:30, 14.44s/batch, batch_loss=1.03e+3, batch_index=911, batch_size=256]

Epoch 1/10:  92%|██████████ | 911/991 [3:33:28<19:18, 14.49s/batch, batch_loss=1.03e+3, batch_index=911, batch_size=256]

Epoch 1/10:  92%|████████████▊ | 911/991 [3:33:42<19:18, 14.49s/batch, batch_loss=29.9, batch_index=912, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 912/991 [3:33:42<19:12, 14.59s/batch, batch_loss=29.9, batch_index=912, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 912/991 [3:34:00<19:12, 14.59s/batch, batch_loss=34.6, batch_index=913, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 913/991 [3:34:00<20:01, 15.40s/batch, batch_loss=34.6, batch_index=913, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 913/991 [3:34:13<20:01, 15.40s/batch, batch_loss=27.1, batch_index=914, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 914/991 [3:34:13<18:58, 14.78s/batch, batch_loss=27.1, batch_index=914, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 914/991 [3:34:26<18:58, 14.78s/batch, batch_loss=34.5, batch_index=915, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 915/991 [3:34:26<18:11, 14.36s/batch, batch_loss=34.5, batch_index=915, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 915/991 [3:34:39<18:11, 14.36s/batch, batch_loss=17.4, batch_index=916, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 916/991 [3:34:39<17:20, 13.88s/batch, batch_loss=17.4, batch_index=916, batch_size=256]

Epoch 1/10:  92%|████████████▉ | 916/991 [3:34:53<17:20, 13.88s/batch, batch_loss=9.81, batch_index=917, batch_size=256]

Epoch 1/10:  93%|████████████▉ | 917/991 [3:34:53<17:14, 13.98s/batch, batch_loss=9.81, batch_index=917, batch_size=256]

Epoch 1/10:  93%|████████████▉ | 917/991 [3:35:09<17:14, 13.98s/batch, batch_loss=19.1, batch_index=918, batch_size=256]

Epoch 1/10:  93%|████████████▉ | 918/991 [3:35:09<17:42, 14.55s/batch, batch_loss=19.1, batch_index=918, batch_size=256]

Epoch 1/10:  93%|██████████████▊ | 918/991 [3:35:24<17:42, 14.55s/batch, batch_loss=15, batch_index=919, batch_size=256]

Epoch 1/10:  93%|██████████████▊ | 919/991 [3:35:24<17:22, 14.49s/batch, batch_loss=15, batch_index=919, batch_size=256]

Epoch 1/10:  93%|██████████████▊ | 919/991 [3:35:38<17:22, 14.49s/batch, batch_loss=18, batch_index=920, batch_size=256]

Epoch 1/10:  93%|██████████████▊ | 920/991 [3:35:38<17:08, 14.48s/batch, batch_loss=18, batch_index=920, batch_size=256]

Epoch 1/10:  93%|████████████▉ | 920/991 [3:35:53<17:08, 14.48s/batch, batch_loss=21.6, batch_index=921, batch_size=256]

Epoch 1/10:  93%|█████████████ | 921/991 [3:35:53<16:51, 14.44s/batch, batch_loss=21.6, batch_index=921, batch_size=256]

Epoch 1/10:  93%|█████████████ | 921/991 [3:36:10<16:51, 14.44s/batch, batch_loss=34.2, batch_index=922, batch_size=256]

Epoch 1/10:  93%|█████████████ | 922/991 [3:36:10<17:49, 15.50s/batch, batch_loss=34.2, batch_index=922, batch_size=256]

Epoch 1/10:  93%|█████████████ | 922/991 [3:36:26<17:49, 15.50s/batch, batch_loss=10.1, batch_index=923, batch_size=256]

Epoch 1/10:  93%|█████████████ | 923/991 [3:36:26<17:30, 15.44s/batch, batch_loss=10.1, batch_index=923, batch_size=256]

Epoch 1/10:  93%|█████████████ | 923/991 [3:36:41<17:30, 15.44s/batch, batch_loss=12.2, batch_index=924, batch_size=256]

Epoch 1/10:  93%|█████████████ | 924/991 [3:36:41<17:07, 15.33s/batch, batch_loss=12.2, batch_index=924, batch_size=256]

Epoch 1/10:  93%|█████████████ | 924/991 [3:36:56<17:07, 15.33s/batch, batch_loss=15.6, batch_index=925, batch_size=256]

Epoch 1/10:  93%|█████████████ | 925/991 [3:36:56<16:43, 15.20s/batch, batch_loss=15.6, batch_index=925, batch_size=256]

Epoch 1/10:  93%|█████████████ | 925/991 [3:37:10<16:43, 15.20s/batch, batch_loss=3e+4, batch_index=926, batch_size=256]

Epoch 1/10:  93%|█████████████ | 926/991 [3:37:10<16:04, 14.83s/batch, batch_loss=3e+4, batch_index=926, batch_size=256]

Epoch 1/10:  93%|█████████████ | 926/991 [3:37:24<16:04, 14.83s/batch, batch_loss=6.45, batch_index=927, batch_size=256]

Epoch 1/10:  94%|█████████████ | 927/991 [3:37:24<15:44, 14.76s/batch, batch_loss=6.45, batch_index=927, batch_size=256]

Epoch 1/10:  94%|██████████████ | 927/991 [3:37:39<15:44, 14.76s/batch, batch_loss=857, batch_index=928, batch_size=256]

Epoch 1/10:  94%|██████████████ | 928/991 [3:37:39<15:23, 14.66s/batch, batch_loss=857, batch_index=928, batch_size=256]

Epoch 1/10:  94%|█████████████ | 928/991 [3:37:52<15:23, 14.66s/batch, batch_loss=10.8, batch_index=929, batch_size=256]

Epoch 1/10:  94%|█████████████ | 929/991 [3:37:52<14:50, 14.36s/batch, batch_loss=10.8, batch_index=929, batch_size=256]

Epoch 1/10:  94%|█████████████ | 929/991 [3:38:10<14:50, 14.36s/batch, batch_loss=10.8, batch_index=930, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 930/991 [3:38:10<15:26, 15.19s/batch, batch_loss=10.8, batch_index=930, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 930/991 [3:38:24<15:26, 15.19s/batch, batch_loss=16.6, batch_index=931, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 931/991 [3:38:24<15:01, 15.03s/batch, batch_loss=16.6, batch_index=931, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 931/991 [3:38:40<15:01, 15.03s/batch, batch_loss=12.8, batch_index=932, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 932/991 [3:38:40<14:55, 15.18s/batch, batch_loss=12.8, batch_index=932, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 932/991 [3:38:55<14:55, 15.18s/batch, batch_loss=12.3, batch_index=933, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 933/991 [3:38:55<14:45, 15.26s/batch, batch_loss=12.3, batch_index=933, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 933/991 [3:39:09<14:45, 15.26s/batch, batch_loss=1.49, batch_index=934, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 934/991 [3:39:09<14:11, 14.94s/batch, batch_loss=1.49, batch_index=934, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 934/991 [3:39:24<14:11, 14.94s/batch, batch_loss=1.64, batch_index=935, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 935/991 [3:39:24<13:44, 14.72s/batch, batch_loss=1.64, batch_index=935, batch_size=256]

Epoch 1/10:  94%|██████████████▏| 935/991 [3:39:37<13:44, 14.72s/batch, batch_loss=165, batch_index=936, batch_size=256]

Epoch 1/10:  94%|██████████████▏| 936/991 [3:39:37<13:09, 14.36s/batch, batch_loss=165, batch_index=936, batch_size=256]

Epoch 1/10:  94%|█████████████▏| 936/991 [3:39:51<13:09, 14.36s/batch, batch_loss=37.7, batch_index=937, batch_size=256]

Epoch 1/10:  95%|█████████████▏| 937/991 [3:39:51<12:48, 14.23s/batch, batch_loss=37.7, batch_index=937, batch_size=256]

Epoch 1/10:  95%|█████████████▏| 937/991 [3:40:06<12:48, 14.23s/batch, batch_loss=11.3, batch_index=938, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 938/991 [3:40:06<12:41, 14.37s/batch, batch_loss=11.3, batch_index=938, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 938/991 [3:40:21<12:41, 14.37s/batch, batch_loss=9.74, batch_index=939, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 939/991 [3:40:21<12:37, 14.56s/batch, batch_loss=9.74, batch_index=939, batch_size=256]

Epoch 1/10:  95%|██████████████▏| 939/991 [3:40:36<12:37, 14.56s/batch, batch_loss=428, batch_index=940, batch_size=256]

Epoch 1/10:  95%|██████████████▏| 940/991 [3:40:36<12:27, 14.65s/batch, batch_loss=428, batch_index=940, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 940/991 [3:40:49<12:27, 14.65s/batch, batch_loss=25.2, batch_index=941, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 941/991 [3:40:49<11:50, 14.21s/batch, batch_loss=25.2, batch_index=941, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 941/991 [3:41:05<11:50, 14.21s/batch, batch_loss=19.6, batch_index=942, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 942/991 [3:41:05<11:59, 14.69s/batch, batch_loss=19.6, batch_index=942, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 942/991 [3:41:17<11:59, 14.69s/batch, batch_loss=19.7, batch_index=943, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 943/991 [3:41:17<11:17, 14.11s/batch, batch_loss=19.7, batch_index=943, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 943/991 [3:41:41<11:17, 14.11s/batch, batch_loss=15.9, batch_index=944, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 944/991 [3:41:41<13:21, 17.05s/batch, batch_loss=15.9, batch_index=944, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 944/991 [3:42:00<13:21, 17.05s/batch, batch_loss=2.35, batch_index=945, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 945/991 [3:42:00<13:26, 17.54s/batch, batch_loss=2.35, batch_index=945, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 945/991 [3:42:27<13:26, 17.54s/batch, batch_loss=20.4, batch_index=946, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 946/991 [3:42:27<15:15, 20.35s/batch, batch_loss=20.4, batch_index=946, batch_size=256]

Epoch 1/10:  95%|█████████████▎| 946/991 [3:42:41<15:15, 20.35s/batch, batch_loss=17.9, batch_index=947, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 947/991 [3:42:41<13:36, 18.55s/batch, batch_loss=17.9, batch_index=947, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 947/991 [3:42:56<13:36, 18.55s/batch, batch_loss=14.8, batch_index=948, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 948/991 [3:42:56<12:33, 17.52s/batch, batch_loss=14.8, batch_index=948, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 948/991 [3:43:10<12:33, 17.52s/batch, batch_loss=5.98, batch_index=949, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 949/991 [3:43:10<11:30, 16.44s/batch, batch_loss=5.98, batch_index=949, batch_size=256]

Epoch 1/10:  96%|██████████████▎| 949/991 [3:43:24<11:30, 16.44s/batch, batch_loss=9.4, batch_index=950, batch_size=256]

Epoch 1/10:  96%|██████████████▍| 950/991 [3:43:24<10:44, 15.72s/batch, batch_loss=9.4, batch_index=950, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 950/991 [3:43:38<10:44, 15.72s/batch, batch_loss=21.4, batch_index=951, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 951/991 [3:43:38<10:09, 15.23s/batch, batch_loss=21.4, batch_index=951, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 951/991 [3:43:52<10:09, 15.23s/batch, batch_loss=28.9, batch_index=952, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 952/991 [3:43:52<09:38, 14.83s/batch, batch_loss=28.9, batch_index=952, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 952/991 [3:44:10<09:38, 14.83s/batch, batch_loss=6.99, batch_index=953, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 953/991 [3:44:10<09:54, 15.63s/batch, batch_loss=6.99, batch_index=953, batch_size=256]

Epoch 1/10:  96%|██████████████▍| 953/991 [3:44:25<09:54, 15.63s/batch, batch_loss=336, batch_index=954, batch_size=256]

Epoch 1/10:  96%|██████████████▍| 954/991 [3:44:25<09:31, 15.44s/batch, batch_loss=336, batch_index=954, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 954/991 [3:44:40<09:31, 15.44s/batch, batch_loss=15.9, batch_index=955, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 955/991 [3:44:40<09:10, 15.28s/batch, batch_loss=15.9, batch_index=955, batch_size=256]

Epoch 1/10:  96%|█████████████▍| 955/991 [3:44:54<09:10, 15.28s/batch, batch_loss=21.9, batch_index=956, batch_size=256]

Epoch 1/10:  96%|█████████████▌| 956/991 [3:44:54<08:43, 14.95s/batch, batch_loss=21.9, batch_index=956, batch_size=256]

Epoch 1/10:  96%|█████████████▌| 956/991 [3:45:08<08:43, 14.95s/batch, batch_loss=21.3, batch_index=957, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 957/991 [3:45:08<08:17, 14.64s/batch, batch_loss=21.3, batch_index=957, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 957/991 [3:45:25<08:17, 14.64s/batch, batch_loss=21.5, batch_index=958, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 958/991 [3:45:25<08:28, 15.40s/batch, batch_loss=21.5, batch_index=958, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 958/991 [3:45:39<08:28, 15.40s/batch, batch_loss=10.3, batch_index=959, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 959/991 [3:45:39<07:59, 14.99s/batch, batch_loss=10.3, batch_index=959, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 959/991 [3:45:52<07:59, 14.99s/batch, batch_loss=16.8, batch_index=960, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 960/991 [3:45:52<07:31, 14.56s/batch, batch_loss=16.8, batch_index=960, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 960/991 [3:46:07<07:31, 14.56s/batch, batch_loss=23.8, batch_index=961, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 961/991 [3:46:07<07:15, 14.50s/batch, batch_loss=23.8, batch_index=961, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 961/991 [3:46:21<07:15, 14.50s/batch, batch_loss=7.09, batch_index=962, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 962/991 [3:46:21<06:57, 14.40s/batch, batch_loss=7.09, batch_index=962, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 962/991 [3:46:35<06:57, 14.40s/batch, batch_loss=8.56, batch_index=963, batch_size=256]

Epoch 1/10:  97%|█████████████▌| 963/991 [3:46:35<06:39, 14.25s/batch, batch_loss=8.56, batch_index=963, batch_size=256]

Epoch 1/10:  97%|██████████▋| 963/991 [3:46:48<06:39, 14.25s/batch, batch_loss=9.42e+3, batch_index=964, batch_size=256]

Epoch 1/10:  97%|██████████▋| 964/991 [3:46:48<06:11, 13.77s/batch, batch_loss=9.42e+3, batch_index=964, batch_size=256]

Epoch 1/10:  97%|███████████████▌| 964/991 [3:47:01<06:11, 13.77s/batch, batch_loss=24, batch_index=965, batch_size=256]

Epoch 1/10:  97%|███████████████▌| 965/991 [3:47:01<05:53, 13.60s/batch, batch_loss=24, batch_index=965, batch_size=256]

Epoch 1/10:  97%|█████████████▋| 965/991 [3:47:15<05:53, 13.60s/batch, batch_loss=17.9, batch_index=966, batch_size=256]

Epoch 1/10:  97%|█████████████▋| 966/991 [3:47:15<05:47, 13.92s/batch, batch_loss=17.9, batch_index=966, batch_size=256]

Epoch 1/10:  97%|███████████▋| 966/991 [3:47:29<05:47, 13.92s/batch, batch_loss=2.4e+4, batch_index=967, batch_size=256]

Epoch 1/10:  98%|███████████▋| 967/991 [3:47:29<05:31, 13.82s/batch, batch_loss=2.4e+4, batch_index=967, batch_size=256]

Epoch 1/10:  98%|██████████████▋| 967/991 [3:47:43<05:31, 13.82s/batch, batch_loss=429, batch_index=968, batch_size=256]

Epoch 1/10:  98%|██████████████▋| 968/991 [3:47:43<05:21, 13.98s/batch, batch_loss=429, batch_index=968, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 968/991 [3:47:58<05:21, 13.98s/batch, batch_loss=27.1, batch_index=969, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 969/991 [3:47:58<05:11, 14.15s/batch, batch_loss=27.1, batch_index=969, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 969/991 [3:48:13<05:11, 14.15s/batch, batch_loss=0.98, batch_index=970, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 970/991 [3:48:13<05:05, 14.53s/batch, batch_loss=0.98, batch_index=970, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 970/991 [3:48:27<05:05, 14.53s/batch, batch_loss=8.59, batch_index=971, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 971/991 [3:48:27<04:47, 14.38s/batch, batch_loss=8.59, batch_index=971, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 971/991 [3:48:43<04:47, 14.38s/batch, batch_loss=29.7, batch_index=972, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 972/991 [3:48:43<04:37, 14.63s/batch, batch_loss=29.7, batch_index=972, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 972/991 [3:48:57<04:37, 14.63s/batch, batch_loss=25.2, batch_index=973, batch_size=256]

Epoch 1/10:  98%|█████████████▋| 973/991 [3:48:57<04:24, 14.67s/batch, batch_loss=25.2, batch_index=973, batch_size=256]

Epoch 1/10:  98%|███████████████▋| 973/991 [3:49:12<04:24, 14.67s/batch, batch_loss=16, batch_index=974, batch_size=256]

Epoch 1/10:  98%|███████████████▋| 974/991 [3:49:12<04:07, 14.55s/batch, batch_loss=16, batch_index=974, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 974/991 [3:49:26<04:07, 14.55s/batch, batch_loss=9.97, batch_index=975, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 975/991 [3:49:26<03:51, 14.46s/batch, batch_loss=9.97, batch_index=975, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 975/991 [3:49:40<03:51, 14.46s/batch, batch_loss=25.1, batch_index=976, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 976/991 [3:49:40<03:33, 14.24s/batch, batch_loss=25.1, batch_index=976, batch_size=256]

Epoch 1/10:  98%|█████████████▊| 976/991 [3:49:53<03:33, 14.24s/batch, batch_loss=11.3, batch_index=977, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 977/991 [3:49:53<03:13, 13.85s/batch, batch_loss=11.3, batch_index=977, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 977/991 [3:50:06<03:13, 13.85s/batch, batch_loss=9.81, batch_index=978, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 978/991 [3:50:06<02:57, 13.67s/batch, batch_loss=9.81, batch_index=978, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 978/991 [3:50:18<02:57, 13.67s/batch, batch_loss=8.34, batch_index=979, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 979/991 [3:50:18<02:40, 13.35s/batch, batch_loss=8.34, batch_index=979, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 979/991 [3:50:31<02:40, 13.35s/batch, batch_loss=6.83, batch_index=980, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 980/991 [3:50:31<02:25, 13.25s/batch, batch_loss=6.83, batch_index=980, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 980/991 [3:50:45<02:25, 13.25s/batch, batch_loss=5.41, batch_index=981, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 981/991 [3:50:45<02:12, 13.23s/batch, batch_loss=5.41, batch_index=981, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 981/991 [3:50:58<02:12, 13.23s/batch, batch_loss=4.22, batch_index=982, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 982/991 [3:50:58<01:59, 13.28s/batch, batch_loss=4.22, batch_index=982, batch_size=256]

Epoch 1/10:  99%|█████████████▊| 982/991 [3:51:13<01:59, 13.28s/batch, batch_loss=3.28, batch_index=983, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 983/991 [3:51:13<01:49, 13.71s/batch, batch_loss=3.28, batch_index=983, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 983/991 [3:51:25<01:49, 13.71s/batch, batch_loss=2.47, batch_index=984, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 984/991 [3:51:25<01:33, 13.29s/batch, batch_loss=2.47, batch_index=984, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 984/991 [3:51:38<01:33, 13.29s/batch, batch_loss=1.86, batch_index=985, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 985/991 [3:51:38<01:19, 13.30s/batch, batch_loss=1.86, batch_index=985, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 985/991 [3:51:51<01:19, 13.30s/batch, batch_loss=1.39, batch_index=986, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 986/991 [3:51:51<01:06, 13.24s/batch, batch_loss=1.39, batch_index=986, batch_size=256]

Epoch 1/10:  99%|█████████████▉| 986/991 [3:52:04<01:06, 13.24s/batch, batch_loss=1.01, batch_index=987, batch_size=256]

Epoch 1/10: 100%|█████████████▉| 987/991 [3:52:04<00:52, 13.05s/batch, batch_loss=1.01, batch_index=987, batch_size=256]

Epoch 1/10: 100%|████████████▉| 987/991 [3:52:17<00:52, 13.05s/batch, batch_loss=0.717, batch_index=988, batch_size=256]

Epoch 1/10: 100%|████████████▉| 988/991 [3:52:17<00:39, 13.01s/batch, batch_loss=0.717, batch_index=988, batch_size=256]

Epoch 1/10: 100%|████████████▉| 988/991 [3:52:30<00:39, 13.01s/batch, batch_loss=0.493, batch_index=989, batch_size=256]

Epoch 1/10: 100%|████████████▉| 989/991 [3:52:30<00:26, 13.07s/batch, batch_loss=0.493, batch_index=989, batch_size=256]

Epoch 1/10: 100%|████████████▉| 989/991 [3:52:43<00:26, 13.07s/batch, batch_loss=0.333, batch_index=990, batch_size=256]

Epoch 1/10: 100%|████████████▉| 990/991 [3:52:43<00:12, 12.97s/batch, batch_loss=0.333, batch_index=990, batch_size=256]

Epoch 1/10: 100%|████████████▉| 990/991 [3:52:54<00:14, 14.12s/batch, batch_loss=0.333, batch_index=990, batch_size=256]




RuntimeError: The expanded size of the tensor (256) must match the existing size (220) at non-singleton dimension 0.  Target sizes: [256, 4].  Tensor sizes: [220, 4]

In [13]:
device = (torch.device("cuda" if torch.cuda.is_available() else "cpu"))
criterion = torch.nn.MSELoss()
model = NeuralNetwork(batch_size, input_window, prediction_window, device=device).to(device)
model.load_state_dict(torch.load("multimodal_seq2seq.pth", weights_only=True))

test_dataset = DoomMotionDataset(coco_test, TEST_RUN, input_window, prediction_window)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

model.eval()  # Set the model to evaluation mode
running_loss = 0.0


progress_bar = tqdm(test_loader, desc="Testing", unit="batch")

with torch.no_grad():  # Disable gradient calculations for evaluation
    for batch_idx, batch in enumerate(progress_bar):
        prev_motion, next_motion, previous_seg, previous_dep = batch["prev_motion"], batch["next_motion"], batch["previous_seg"], batch["previous_dep"]
        prev_motion, next_motion, previous_seg, previous_dep = prev_motion.to(device), next_motion.to(device), previous_seg.to(device), previous_dep.to(device)

        if prev_motion.size(0) != next_motion.size(0) != previous_seg.size(0) != previous_dep.size(0):
                continue
            
        outputs = model(previous_seg, previous_dep, prev_motion)
        outputs = outputs.permute(1, 0, 2)

        if outputs.size(0) != next_motion.size(0):
            continue
        
        loss = criterion(outputs, next_motion)
        
        running_loss += loss.item()
        
        progress_bar.set_postfix({
            "batch_loss": loss.item(),
            "batch_index": batch_idx + 1,
            "batch_size": prev_motion.size(0)
        })

# Average loss over all batches
test_loss = running_loss / len(test_loader)
print(f"Test Loss: {test_loss:.4f}")

FileNotFoundError: [Errno 2] No such file or directory: 'multimodal_seq2seq.pth'